Merge default into bug22926bug22926

author: David Wragg <david@rabbitmq.com> 2010-09-17 17:22:18 +0100
committer: David Wragg <david@rabbitmq.com> 2010-09-17 17:22:18 +0100
commit: 9fd840798eb72a96fbaf726fb9e1f22d9283f088 (patch)
tree: 84d7e9c458a6625efcd0bc9aeae4032a3c76b3bc
parent: ec0b517396c195534b0190f10ef0387c50279df1 (diff)
parent: 582ef11941c9ebcb27fb1e1b4f38e30a372e2118 (diff)
download: rabbitmq-server-bug22926.tar.gz
100 files changed, 10867 insertions, 3497 deletions
diff --git a/.hgignore b/.hgignore
index 7b796b66..03b60914 100644
--- a/.hgignore
+++ b/.hgignore
@@ -11,7 +11,7 @@ syntax: regexp
 ^dist/
 ^include/rabbit_framing\.hrl$
 ^include/rabbit_framing_spec\.hrl$
-^src/rabbit_framing\.erl$
+^src/rabbit_framing_amqp.*\.erl$
 ^src/.*\_usage.erl$
 ^rabbit\.plt$
 ^basic.plt$
diff --git a/LICENSE b/LICENSE
index d7042b92..89640485 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,5 +1,5 @@
 This package, the RabbitMQ server is licensed under the MPL. For the
 MPL, please see LICENSE-MPL-RabbitMQ.
 
-If you have any questions regarding licensing, please contact us at 
+If you have any questions regarding licensing, please contact us at
 info@rabbitmq.com.
diff --git a/Makefile b/Makefile
index f9f7f9e2..35797fa1 100644
--- a/Makefile
+++ b/Makefile
@@ -11,11 +11,11 @@ SOURCE_DIR=src
 EBIN_DIR=ebin
 INCLUDE_DIR=include
 DOCS_DIR=docs
-INCLUDES=$(wildcard $(INCLUDE_DIR)/*.hrl) $(INCLUDE_DIR)/rabbit_framing.hrl $(INCLUDE_DIR)/rabbit_framing_spec.hrl
-SOURCES=$(wildcard $(SOURCE_DIR)/*.erl) $(SOURCE_DIR)/rabbit_framing.erl $(USAGES_ERL)
+INCLUDES=$(wildcard $(INCLUDE_DIR)/*.hrl) $(INCLUDE_DIR)/rabbit_framing.hrl
+SOURCES=$(wildcard $(SOURCE_DIR)/*.erl) $(SOURCE_DIR)/rabbit_framing_amqp_0_9_1.erl $(SOURCE_DIR)/rabbit_framing_amqp_0_8.erl $(USAGES_ERL)
 BEAM_TARGETS=$(patsubst $(SOURCE_DIR)/%.erl, $(EBIN_DIR)/%.beam, $(SOURCES))
-TARGETS=$(EBIN_DIR)/rabbit.app $(INCLUDE_DIR)/rabbit_framing.hrl $(INCLUDE_DIR)/rabbit_framing_spec.hrl $(BEAM_TARGETS)
-WEB_URL=http://stage.rabbitmq.com/
+TARGETS=$(EBIN_DIR)/rabbit.app $(INCLUDE_DIR)/rabbit_framing.hrl $(BEAM_TARGETS)
+WEB_URL=http://www.rabbitmq.com/
 MANPAGES=$(patsubst %.xml, %.gz, $(wildcard $(DOCS_DIR)/*.[0-9].xml))
 WEB_MANPAGES=$(patsubst %.xml, %.man.xml, $(wildcard $(DOCS_DIR)/*.[0-9].xml) $(DOCS_DIR)/rabbitmq-service.xml)
 USAGES_XML=$(DOCS_DIR)/rabbitmqctl.1.xml $(DOCS_DIR)/rabbitmq-multi.1.xml
@@ -41,10 +41,10 @@ RABBIT_PLT=rabbit.plt
 
 ifndef USE_SPECS
 # our type specs rely on features and bug fixes in dialyzer that are
-# only available in R13B01 upwards (R13B01 is eshell 5.7.2)
+# only available in R14A upwards (R13B04 is erts 5.7.5)
 #
 # NB: the test assumes that version number will only contain single digits
-USE_SPECS=$(shell if [ $$(erl -noshell -eval 'io:format(erlang:system_info(version)), halt().') \> "5.7.1" ]; then echo "true"; else echo "false"; fi)
+USE_SPECS=$(shell if [ $$(erl -noshell -eval 'io:format(erlang:system_info(version)), halt().') \> "5.7.5" ]; then echo "true"; else echo "false"; fi)
 endif
 
 #other args: +native +"{hipe,[o3,verbose]}" -Ddebug=true +debug_info +no_strict_record_tests
@@ -56,7 +56,8 @@ TARGET_SRC_DIR=dist/$(TARBALL_NAME)
 
 SIBLING_CODEGEN_DIR=../rabbitmq-codegen/
 AMQP_CODEGEN_DIR=$(shell [ -d $(SIBLING_CODEGEN_DIR) ] && echo $(SIBLING_CODEGEN_DIR) || echo codegen)
-AMQP_SPEC_JSON_FILES=$(AMQP_CODEGEN_DIR)/amqp-0.8.json $(AMQP_CODEGEN_DIR)/rabbitmq-0.8-extensions.json
+AMQP_SPEC_JSON_FILES_0_9_1=$(AMQP_CODEGEN_DIR)/amqp-rabbitmq-0.9.1.json
+AMQP_SPEC_JSON_FILES_0_8=$(AMQP_CODEGEN_DIR)/amqp-rabbitmq-0.8.json
 
 ERL_CALL=erl_call -sname $(RABBITMQ_NODENAME) -e
 
@@ -99,14 +100,14 @@ $(EBIN_DIR)/rabbit.app: $(EBIN_DIR)/rabbit_app.in $(BEAM_TARGETS) generate_app
 $(EBIN_DIR)/%.beam: $(SOURCE_DIR)/%.erl $(DEPS_FILE)
 	erlc $(ERLC_OPTS) -pa $(EBIN_DIR) $<
 
-$(INCLUDE_DIR)/rabbit_framing.hrl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_FILES)
-	$(PYTHON) codegen.py header $(AMQP_SPEC_JSON_FILES) $@
+$(INCLUDE_DIR)/rabbit_framing.hrl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_FILES_0_9_1) $(AMQP_SPEC_JSON_FILES_0_8)
+	$(PYTHON) codegen.py --ignore-conflicts header $(AMQP_SPEC_JSON_FILES_0_9_1) $(AMQP_SPEC_JSON_FILES_0_8) $@
 
-$(INCLUDE_DIR)/rabbit_framing_spec.hrl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_FILES)
-	$(PYTHON) codegen.py spec $(AMQP_SPEC_JSON_FILES) $@
+$(SOURCE_DIR)/rabbit_framing_amqp_0_9_1.erl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_FILES_0_9_1)
+	$(PYTHON) codegen.py body $(AMQP_SPEC_JSON_FILES_0_9_1) $@
 
-$(SOURCE_DIR)/rabbit_framing.erl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_FILES)
-	$(PYTHON) codegen.py body   $(AMQP_SPEC_JSON_FILES) $@
+$(SOURCE_DIR)/rabbit_framing_amqp_0_8.erl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_FILES_0_8)
+	$(PYTHON) codegen.py body $(AMQP_SPEC_JSON_FILES_0_8) $@
 
 dialyze: $(BEAM_TARGETS) $(BASIC_PLT)
 	$(ERL_EBIN) -eval \
@@ -131,7 +132,7 @@ $(BASIC_PLT): $(BEAM_TARGETS)
 clean:
 	rm -f $(EBIN_DIR)/*.beam
 	rm -f $(EBIN_DIR)/rabbit.app $(EBIN_DIR)/rabbit.boot $(EBIN_DIR)/rabbit.script $(EBIN_DIR)/rabbit.rel
-	rm -f $(INCLUDE_DIR)/rabbit_framing.hrl $(INCLUDE_DIR)/rabbit_framing_spec.hrl $(SOURCE_DIR)/rabbit_framing.erl codegen.pyc
+	rm -f $(INCLUDE_DIR)/rabbit_framing.hrl $(SOURCE_DIR)/rabbit_framing_amqp_*.erl codegen.pyc
 	rm -f $(DOCS_DIR)/*.[0-9].gz $(DOCS_DIR)/*.man.xml $(DOCS_DIR)/*.erl $(USAGES_ERL)
 	rm -f $(RABBIT_PLT)
 	rm -f $(DEPS_FILE)
@@ -178,6 +179,14 @@ stop-rabbit-on-node: all
 force-snapshot: all
 	echo "rabbit_persister:force_snapshot()." | $(ERL_CALL)
 
+set-memory-alarm: all
+	echo "alarm_handler:set_alarm({vm_memory_high_watermark, []})." | \
+	$(ERL_CALL)
+
+clear-memory-alarm: all
+	echo "alarm_handler:clear_alarm(vm_memory_high_watermark)." | \
+	$(ERL_CALL)
+
 stop-node:
 	-$(ERL_CALL) -q
 
@@ -205,7 +214,7 @@ srcdist: distclean
 		>> $(TARGET_SRC_DIR)/INSTALL
 	cp README.in $(TARGET_SRC_DIR)/README
 	elinks -dump -no-references -no-numbering $(WEB_URL)build-server.html \
-		>> $(TARGET_SRC_DIR)/BUILD
+		>> $(TARGET_SRC_DIR)/README
 	sed -i.save 's/%%VSN%%/$(VERSION)/' $(TARGET_SRC_DIR)/ebin/rabbit_app.in && rm -f $(TARGET_SRC_DIR)/ebin/rabbit_app.in.save
 
 	cp -r $(AMQP_CODEGEN_DIR)/* $(TARGET_SRC_DIR)/codegen/
@@ -226,9 +235,10 @@ distclean: clean
 
 # xmlto can not read from standard input, so we mess with a tmp file.
 %.gz: %.xml $(DOCS_DIR)/examples-to-end.xsl
-	xsltproc $(DOCS_DIR)/examples-to-end.xsl $< > $<.tmp && \
-	xmlto man -o $(DOCS_DIR) --stringparam man.indent.verbatims=0 $<.tmp && \
-	gzip -f $(DOCS_DIR)/`basename $< .xml`
+	xmlto --version | grep -E '^xmlto version 0\.0\.([0-9]|1[1-8])$$' >/dev/null || opt='--stringparam man.indent.verbatims=0' ; \
+	    xsltproc $(DOCS_DIR)/examples-to-end.xsl $< > $<.tmp && \
+	    xmlto -o $(DOCS_DIR) $$opt man $<.tmp && \
+	    gzip -f $(DOCS_DIR)/`basename $< .xml`
 	rm -f $<.tmp
 
 # Use tmp files rather than a pipeline so that we get meaningful errors
@@ -259,7 +269,7 @@ install: all docs_all install_dirs
 	cp -r ebin include LICENSE LICENSE-MPL-RabbitMQ INSTALL $(TARGET_DIR)
 
 	chmod 0755 scripts/*
-	for script in rabbitmq-env rabbitmq-server rabbitmqctl rabbitmq-multi rabbitmq-activate-plugins rabbitmq-deactivate-plugins; do \
+	for script in rabbitmq-env rabbitmq-server rabbitmqctl rabbitmq-multi; do \
 		cp scripts/$$script $(TARGET_DIR)/sbin; \
 		[ -e $(SBIN_DIR)/$$script ] || ln -s $(SCRIPTS_REL_PATH)/$$script $(SBIN_DIR)/$$script; \
 	done
@@ -269,6 +279,8 @@ install: all docs_all install_dirs
 			cp $$manpage $(MAN_DIR)/man$$section; \
 		done; \
 	done
+	mkdir -p $(TARGET_DIR)/plugins
+	echo Put your .ez plugin files in this directory. > $(TARGET_DIR)/plugins/README
 
 install_dirs:
 	@ OK=true && \
diff --git a/codegen.py b/codegen.py
index 0d6d9d56..14229753 100644
--- a/codegen.py
+++ b/codegen.py
@@ -315,11 +315,16 @@ def genErl(spec):
     methods = spec.allMethods()
 
     printFileHeader()
-    print """-module(rabbit_framing).
--include("rabbit_framing.hrl").
-
+    module = "rabbit_framing_amqp_%d_%d" % (spec.major, spec.minor)
+    if spec.revision != 0:
+        module = "%s_%d" % (module, spec.revision)
+    if module == "rabbit_framing_amqp_8_0":
+        module = "rabbit_framing_amqp_0_8"
+    print "-module(%s)." % module
+    print """-include("rabbit_framing.hrl").
+
+-export([version/0]).
 -export([lookup_method_name/1]).
-
 -export([method_id/1]).
 -export([method_has_content/1]).
 -export([is_method_synchronous/1]).
@@ -332,26 +337,93 @@ def genErl(spec):
 -export([lookup_amqp_exception/1]).
 -export([amqp_exception/1]).
 
-bitvalue(true) -> 1;
-bitvalue(false) -> 0;
-bitvalue(undefined) -> 0.
+"""
+    print "%% Various types"
+    print "-ifdef(use_specs)."
+
+    print """-export_type([amqp_table/0, amqp_property_type/0, amqp_method_record/0,
+              amqp_method_name/0, amqp_method/0, amqp_class_id/0,
+              amqp_value/0, amqp_array/0, amqp_exception/0, amqp_property_record/0]).
+
+-type(amqp_field_type() ::
+      'longstr' | 'signedint' | 'decimal' | 'timestamp' |
+      'table' | 'byte' | 'double' | 'float' | 'long' |
+      'short' | 'bool' | 'binary' | 'void').
+-type(amqp_property_type() ::
+      'shortstr' | 'longstr' | 'octet' | 'shortint' | 'longint' |
+      'longlongint' | 'timestamp' | 'bit' | 'table').
+
+-type(amqp_table() :: [{binary(), amqp_field_type(), amqp_value()}]).
+-type(amqp_array() :: [{amqp_field_type(), amqp_value()}]).
+-type(amqp_value() :: binary() |    % longstr
+                      integer() |   % signedint
+                      {non_neg_integer(), non_neg_integer()} | % decimal
+                      amqp_table() |
+                      amqp_array() |
+                      byte() |      % byte
+                      float() |     % double
+                      integer() |   % long
+                      integer() |   % short
+                      boolean() |   % bool
+                      binary() |    % binary
+                      'undefined' | % void
+                      non_neg_integer() % timestamp
+     ).
+"""
+
+    print prettyType("amqp_method_name()",
+                     [m.erlangName() for m in methods])
+    print prettyType("amqp_method()",
+                     ["{%s, %s}" % (m.klass.index, m.index) for m in methods],
+                     6)
+    print prettyType("amqp_method_record()",
+                     ["#%s{}" % (m.erlangName()) for m in methods])
+    fieldNames = set()
+    for m in methods:
+        fieldNames.update(m.arguments)
+    fieldNames = [erlangize(f.name) for f in fieldNames]
+    print prettyType("amqp_method_field_name()",
+                     fieldNames)
+    print prettyType("amqp_property_record()",
+                     ["#'P_%s'{}" % erlangize(c.name) for c in spec.allClasses()])
+    print prettyType("amqp_exception()",
+                     ["'%s'" % erlangConstantName(c).lower() for (c, v, cls) in spec.constants])
+    print prettyType("amqp_exception_code()",
+                     ["%i" % v for (c, v, cls) in spec.constants])
+    classIds = set()
+    for m in spec.allMethods():
+        classIds.add(m.klass.index)
+    print prettyType("amqp_class_id()",
+                     ["%i" % ci for ci in classIds])
+    print "-endif. % use_specs"
 
+    print """
 %% Method signatures
 -ifdef(use_specs).
+-spec(version/0 :: () -> {non_neg_integer(), non_neg_integer(), non_neg_integer()}).
 -spec(lookup_method_name/1 :: (amqp_method()) -> amqp_method_name()).
 -spec(method_id/1 :: (amqp_method_name()) -> amqp_method()).
 -spec(method_has_content/1 :: (amqp_method_name()) -> boolean()).
 -spec(is_method_synchronous/1 :: (amqp_method_record()) -> boolean()).
 -spec(method_record/1 :: (amqp_method_name()) -> amqp_method_record()).
 -spec(method_fieldnames/1 :: (amqp_method_name()) -> [amqp_method_field_name()]).
--spec(decode_method_fields/2 :: (amqp_method_name(), binary()) -> amqp_method_record()).
+-spec(decode_method_fields/2 ::
+        (amqp_method_name(), binary()) -> amqp_method_record() | rabbit_types:connection_exit()).
 -spec(decode_properties/2 :: (non_neg_integer(), binary()) -> amqp_property_record()).
 -spec(encode_method_fields/1 :: (amqp_method_record()) -> binary()).
 -spec(encode_properties/1 :: (amqp_method_record()) -> binary()).
 -spec(lookup_amqp_exception/1 :: (amqp_exception()) -> {boolean(), amqp_exception_code(), binary()}).
 -spec(amqp_exception/1 :: (amqp_exception_code()) -> amqp_exception()).
 -endif. % use_specs
+
+bitvalue(true) -> 1;
+bitvalue(false) -> 0;
+bitvalue(undefined) -> 0.
 """
+    version = "{%d, %d, %d}" % (spec.major, spec.minor, spec.revision)
+    if version == '{8, 0, 0}': version = '{0, 8, 0}'
+    print "version() -> %s." % (version)
+
     for m in methods: genLookupMethodName(m)
     print "lookup_method_name({_ClassId, _MethodId} = Id) -> exit({unknown_method_id, Id})."
 
@@ -410,8 +482,6 @@ def genHrl(spec):
     methods = spec.allMethods()
 
     printFileHeader()
-    print "-define(PROTOCOL_VERSION_MAJOR, %d)." % (spec.major)
-    print "-define(PROTOCOL_VERSION_MINOR, %d)." % (spec.minor)
     print "-define(PROTOCOL_PORT, %d)." % (spec.port)
 
     for (c,v,cls) in spec.constants:
@@ -425,63 +495,6 @@ def genHrl(spec):
     for c in spec.allClasses():
         print "-record('P_%s', {%s})." % (erlangize(c.name), fieldNameList(c.fields))
 
-    print "-ifdef(use_specs)."
-    print "%% Various types"
-    print prettyType("amqp_method_name()",
-                     [m.erlangName() for m in methods])
-    print prettyType("amqp_method()",
-                     ["{%s, %s}" % (m.klass.index, m.index) for m in methods],
-                     6)
-    print prettyType("amqp_method_record()",
-                     ["#%s{}" % (m.erlangName()) for m in methods])
-    fieldNames = set()
-    for m in methods:
-        fieldNames.update(m.arguments)
-    fieldNames = [erlangize(f.name) for f in fieldNames]
-    print prettyType("amqp_method_field_name()",
-                     fieldNames)
-    print prettyType("amqp_property_record()",
-                     ["#'P_%s'{}" % erlangize(c.name) for c in spec.allClasses()])
-    print prettyType("amqp_exception()",
-                     ["'%s'" % erlangConstantName(c).lower() for (c, v, cls) in spec.constants])
-    print prettyType("amqp_exception_code()",
-                     ["%i" % v for (c, v, cls) in spec.constants])
-    print "-endif. % use_specs"
-
-def genSpec(spec):
-    methods = spec.allMethods()
-
-    printFileHeader()
-    print """% Hard-coded types
--type(amqp_field_type() ::
-      'longstr' | 'signedint' | 'decimal' | 'timestamp' |
-      'table' | 'byte' | 'double' | 'float' | 'long' |
-      'short' | 'bool' | 'binary' | 'void').
--type(amqp_property_type() ::
-      'shortstr' | 'longstr' | 'octet' | 'shortint' | 'longint' |
-      'longlongint' | 'timestamp' | 'bit' | 'table').
-%% we could make this more precise but ultimately are limited by
-%% dialyzer's lack of support for recursive types
--type(amqp_table() :: [{binary(), amqp_field_type(), any()}]).
-%% TODO: make this more precise
--type(amqp_properties() :: tuple()).
-
--type(channel_number() :: non_neg_integer()).
--type(resource_name() :: binary()).
--type(routing_key() :: binary()).
--type(username() :: binary()).
--type(password() :: binary()).
--type(vhost() :: binary()).
--type(ctag() :: binary()).
--type(exchange_type() :: atom()).
--type(binding_key() :: binary()).
-"""
-    print "% Auto-generated types"
-    classIds = set()
-    for m in spec.allMethods():
-        classIds.add(m.klass.index)
-    print prettyType("amqp_class_id()",
-                     ["%i" % ci for ci in classIds])
 
 def generateErl(specPath):
     genErl(AmqpSpec(specPath))
@@ -489,11 +502,7 @@ def generateErl(specPath):
 def generateHrl(specPath):
     genHrl(AmqpSpec(specPath))
 
-def generateSpec(specPath):
-    genSpec(AmqpSpec(specPath))
-
 if __name__ == "__main__":
     do_main_dict({"header": generateHrl,
-                  "spec": generateSpec,
                   "body": generateErl})
 
diff --git a/docs/html-to-website-xml.xsl b/docs/html-to-website-xml.xsl
index 662dbea0..c325bb5a 100644
--- a/docs/html-to-website-xml.xsl
+++ b/docs/html-to-website-xml.xsl
@@ -30,7 +30,7 @@
         <code><xsl:value-of select="document($original)/refentry/refnamediv/refname"/>(<xsl:value-of select="document($original)/refentry/refmeta/manvolnum"/>)</code>.
       </p>
       <p>
-        <a href="manpages.html">See a list of all manual pages</a>.
+        <a href="../manpages.html">See a list of all manual pages</a>.
       </p>
       </xsl:when>
       <xsl:otherwise>
diff --git a/docs/rabbitmq-activate-plugins.1.xml b/docs/rabbitmq-activate-plugins.1.xml
deleted file mode 100644
index 5f831634..00000000
--- a/docs/rabbitmq-activate-plugins.1.xml
+++ /dev/null
@@ -1,60 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.docbook.org/xml/4.5/docbookx.dtd">
-<refentry lang="en">
-    <refentryinfo>
-        <productname>RabbitMQ Server</productname>
-        <authorgroup>
-            <corpauthor>The RabbitMQ Team &lt;<ulink url="mailto:info@rabbitmq.com"><email>info@rabbitmq.com</email></ulink>&gt;</corpauthor>
-        </authorgroup>
-    </refentryinfo>
-
-    <refmeta>
-        <refentrytitle>rabbitmq-activate-plugins</refentrytitle>
-        <manvolnum>1</manvolnum>
-        <refmiscinfo class="manual">RabbitMQ Server</refmiscinfo>
-    </refmeta>
-
-    <refnamediv>
-        <refname>rabbitmq-activate-plugins</refname>
-        <refpurpose>command line tool for activating plugins in a RabbitMQ broker</refpurpose>
-    </refnamediv>
-
-    <refsynopsisdiv>
-        <cmdsynopsis>
-            <command>rabbitmq-activate-plugins</command>
-        </cmdsynopsis>
-    </refsynopsisdiv>
-
-    <refsect1>
-        <title>Description</title>
-        <para>
-           RabbitMQ is an implementation of AMQP, the emerging standard for high
-performance enterprise messaging. The RabbitMQ server is a robust and
-scalable implementation of an AMQP broker.
-        </para>
-        <para>
-          rabbitmq-activate-plugins is a command line tool for activating
-plugins installed into the broker's plugins directory.
-        </para>
-        <para role="example-prefix">
-          For example:
-        </para>
-        <screen role="example">
-          rabbitmq-activate-plugins
-        </screen>
-        <para role="example">
-          This command activates all of the installed plugins in the current RabbitMQ install.
-        </para>
-    </refsect1>
-
-    <refsect1>
-        <title>See also</title>
-        <para>
-            <citerefentry><refentrytitle>rabbitmq.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>
-            <citerefentry><refentrytitle>rabbitmq-multi</refentrytitle><manvolnum>1</manvolnum></citerefentry>
-            <citerefentry><refentrytitle>rabbitmq-server</refentrytitle><manvolnum>1</manvolnum></citerefentry>
-            <citerefentry><refentrytitle>rabbitmqctl</refentrytitle><manvolnum>1</manvolnum></citerefentry>
-            <citerefentry><refentrytitle>rabbitmq-deactivate-plugins</refentrytitle><manvolnum>1</manvolnum></citerefentry>
-        </para>
-    </refsect1>
-</refentry>
diff --git a/docs/rabbitmq-deactivate-plugins.1.xml b/docs/rabbitmq-deactivate-plugins.1.xml
deleted file mode 100644
index bbf1207e..00000000
--- a/docs/rabbitmq-deactivate-plugins.1.xml
+++ /dev/null
@@ -1,60 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.docbook.org/xml/4.5/docbookx.dtd">
-<refentry lang="en">
-    <refentryinfo>
-        <productname>RabbitMQ Server</productname>
-        <authorgroup>
-            <corpauthor>The RabbitMQ Team &lt;<ulink url="mailto:info@rabbitmq.com"><email>info@rabbitmq.com</email></ulink>&gt;</corpauthor>
-        </authorgroup>
-    </refentryinfo>
-
-    <refmeta>
-        <refentrytitle>rabbitmq-deactivate-plugins</refentrytitle>
-        <manvolnum>1</manvolnum>
-        <refmiscinfo class="manual">RabbitMQ Server</refmiscinfo>
-    </refmeta>
-
-    <refnamediv>
-        <refname>rabbitmq-deactivate-plugins</refname>
-        <refpurpose>command line tool for deactivating plugins in a RabbitMQ broker</refpurpose>
-    </refnamediv>
-
-    <refsynopsisdiv>
-        <cmdsynopsis>
-            <command>rabbitmq-deactivate-plugins</command>
-        </cmdsynopsis>
-    </refsynopsisdiv>
-
-    <refsect1>
-        <title>Description</title>
-        <para>
-           RabbitMQ is an implementation of AMQP, the emerging standard for high
-performance enterprise messaging. The RabbitMQ server is a robust and
-scalable implementation of an AMQP broker.
-        </para>
-        <para>
-rabbitmq-deactivate-plugins is a command line tool for deactivating
-plugins installed into the broker.
-        </para>
-        <para role="example-prefix">
-          For example:
-        </para>
-        <screen role="example">
-          rabbitmq-deactivate-plugins
-        </screen>
-        <para role="example">
-          This command deactivates all of the installed plugins in the current RabbitMQ install.
-        </para>
-    </refsect1>
-
-    <refsect1>
-        <title>See also</title>
-        <para>
-            <citerefentry><refentrytitle>rabbitmq.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>
-            <citerefentry><refentrytitle>rabbitmq-multi</refentrytitle><manvolnum>1</manvolnum></citerefentry>
-            <citerefentry><refentrytitle>rabbitmq-server</refentrytitle><manvolnum>1</manvolnum></citerefentry>
-            <citerefentry><refentrytitle>rabbitmqctl</refentrytitle><manvolnum>1</manvolnum></citerefentry>
-            <citerefentry><refentrytitle>rabbitmq-activate-plugins</refentrytitle><manvolnum>1</manvolnum></citerefentry>
-        </para>
-    </refsect1>
-</refentry>
diff --git a/docs/rabbitmqctl.1.xml b/docs/rabbitmqctl.1.xml
index a2038cf0..5179eb25 100644
--- a/docs/rabbitmqctl.1.xml
+++ b/docs/rabbitmqctl.1.xml
@@ -88,9 +88,6 @@
         </listitem>
       </varlistentry>
     </variablelist>
-    <para>
-      Flags must precede all other parameters to <command>rabbitmqctl</command>.
-    </para>
   </refsect1>
 
   <refsect1>
@@ -270,7 +267,7 @@
       <title>Cluster management</title>
 
       <variablelist>
-        <varlistentry>
+        <varlistentry id="cluster">
           <term><cmdsynopsis><command>cluster</command> <arg choice="req" role="usage-option-list"><replaceable>clusternode</replaceable> ...</arg></cmdsynopsis></term>
           <listitem>
             <variablelist>
@@ -281,7 +278,8 @@
             </variablelist>
             <para>
               Instruct the node to become member of a cluster with the
-              specified nodes.
+              specified nodes.  To cluster with currently offline nodes,
+              use <link linkend="force_cluster"><command>force_cluster</command></link>.
             </para>
             <para>
               Cluster nodes can be of two types: disk or ram. Disk nodes
@@ -334,6 +332,29 @@
             </para>
           </listitem>
         </varlistentry>
+        <varlistentry id="force_cluster">
+          <term><cmdsynopsis><command>force_cluster</command> <arg choice="req" role="usage-option-list"><replaceable>clusternode</replaceable> ...</arg></cmdsynopsis></term>
+          <listitem>
+            <variablelist>
+              <varlistentry>
+                <term>clusternode</term>
+                <listitem><para>Subset of the nodes of the cluster to which this node should be connected.</para></listitem>
+              </varlistentry>
+            </variablelist>
+            <para>
+              Instruct the node to become member of a cluster with the
+              specified nodes.  This will succeed even if the specified nodes
+              are offline.  For a more detailed description, see
+              <link linkend="cluster"><command>cluster</command>.</link>
+            </para>
+            <para>
+              Note that this variant of the cluster command just
+              ignores the current status of the specified nodes.
+              Clustering may still fail for a variety of other
+              reasons.
+            </para>
+          </listitem>
+        </varlistentry>
       </variablelist>
     </refsect2>
 
@@ -396,7 +417,8 @@
             <screen role="example">rabbitmqctl add_user tonyg changeit</screen>
             <para role="example">
               This command instructs the RabbitMQ broker to create a
-              user named <command>tonyg</command> with (initial) password
+              (non-administrative) user named <command>tonyg</command> with
+              (initial) password
               <command>changeit</command>.
             </para>
           </listitem>
@@ -444,13 +466,57 @@
         </varlistentry>
 
         <varlistentry>
+          <term><cmdsynopsis><command>set_admin</command> <arg choice="req"><replaceable>username</replaceable></arg></cmdsynopsis></term>
+          <listitem>
+            <variablelist>
+              <varlistentry>
+                <term>username</term>
+                <listitem><para>The name of the user whose administrative
+                status is to be set.</para></listitem>
+              </varlistentry>
+            </variablelist>
+            <para role="example-prefix">For example:</para>
+            <screen role="example">rabbitmqctl set_admin tonyg</screen>
+            <para role="example">
+              This command instructs the RabbitMQ broker to ensure the user
+               named <command>tonyg</command> is an administrator. This has no
+               effect when the user logs in via AMQP, but can be used to permit
+               the user to manage users, virtual hosts and permissions when the
+               user logs in via some other means (for example with the
+               management plugin).
+            </para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><cmdsynopsis><command>clear_admin</command> <arg choice="req"><replaceable>username</replaceable></arg></cmdsynopsis></term>
+          <listitem>
+            <variablelist>
+              <varlistentry>
+                <term>username</term>
+                <listitem><para>The name of the user whose administrative
+                status is to be cleared.</para></listitem>
+              </varlistentry>
+            </variablelist>
+            <para role="example-prefix">For example:</para>
+            <screen role="example">rabbitmqctl clear_admin tonyg</screen>
+            <para role="example">
+              This command instructs the RabbitMQ broker to ensure the user
+               named <command>tonyg</command> is not an administrator.
+            </para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
           <term><cmdsynopsis><command>list_users</command></cmdsynopsis></term>
           <listitem>
             <para>Lists users</para>
             <para role="example-prefix">For example:</para>
             <screen role="example">rabbitmqctl list_users</screen>
             <para role="example">
-              This command instructs the RabbitMQ broker to list all users.
+              This command instructs the RabbitMQ broker to list all
+              users. Each result row will contain the user name and
+              the administrator status of the user, in that order.
             </para>
           </listitem>
         </varlistentry>
@@ -523,7 +589,7 @@
         </varlistentry>
 
         <varlistentry>
-          <term><cmdsynopsis><command>set_permissions</command> <arg choice="opt">-p <replaceable>vhostpath</replaceable></arg> <arg choice="req"><replaceable>username</replaceable></arg> <arg choice="req"><replaceable>configure</replaceable></arg> <arg choice="req"><replaceable>write</replaceable></arg> <arg choice="req"><replaceable>read</replaceable></arg></cmdsynopsis></term>
+          <term><cmdsynopsis><command>set_permissions</command> <arg choice="opt">-p <replaceable>vhostpath</replaceable></arg> <arg choice="opt">-s <replaceable>scope</replaceable></arg> <arg choice="req"><replaceable>user</replaceable></arg> <arg choice="req"><replaceable>conf</replaceable></arg> <arg choice="req"><replaceable>write</replaceable></arg> <arg choice="req"><replaceable>read</replaceable></arg></cmdsynopsis></term>
           <listitem>
             <variablelist>
               <varlistentry>
@@ -531,11 +597,21 @@
                 <listitem><para>The name of the virtual host to which to grant the user access, defaulting to <command>/</command>.</para></listitem>
               </varlistentry>
               <varlistentry>
-                <term>username</term>
+                <term>scope</term>
+                <listitem><para>Scope of the permissions: either
+                <command>client</command> (the default) or
+                <command>all</command>.  This determines whether
+                permissions are checked for server-generated resource
+                names (<command>all</command>) or only for
+                client-specified resource names
+                (<command>client</command>).</para></listitem>
+              </varlistentry>
+              <varlistentry>
+                <term>user</term>
                 <listitem><para>The name of the user to grant access to the specified virtual host.</para></listitem>
               </varlistentry>
               <varlistentry>
-                <term>configure</term>
+                <term>conf</term>
                 <listitem><para>A regular expression matching resource names for which the user is granted configure permissions.</para></listitem>
               </varlistentry>
               <varlistentry>
@@ -603,10 +679,12 @@
             <para role="example-prefix">For example:</para>
             <screen role="example">rabbitmqctl list_permissions -p /myvhost</screen>
             <para role="example">
-              This command instructs the RabbitMQ broker to list all the
-              users which have been granted access to the virtual host
-              called <command>/myvhost</command>, and the permissions they
-              have for operations on resources in that virtual host.
+              This command instructs the RabbitMQ broker to list all
+              the users which have been granted access to the virtual
+              host called <command>/myvhost</command>, and the
+              permissions they have for operations on resources in
+              that virtual host.  Note that an empty string means no
+              permissions granted.
             </para>
           </listitem>
         </varlistentry>
@@ -671,7 +749,7 @@
             <variablelist>
               <varlistentry>
                 <term>name</term>
-                <listitem><para>The name of the queue with non-ASCII characters URL-escaped.</para></listitem>
+                <listitem><para>The name of the queue with non-ASCII characters escaped as in C.</para></listitem>
               </varlistentry>
               <varlistentry>
                 <term>durable</term>
@@ -762,7 +840,7 @@
             <variablelist>
               <varlistentry>
                 <term>name</term>
-                <listitem><para>The name of the exchange with non-ASCII characters URL-escaped.</para></listitem>
+                <listitem><para>The name of the exchange with non-ASCII characters escaped as in C.</para></listitem>
               </varlistentry>
               <varlistentry>
                 <term>type</term>
@@ -797,22 +875,58 @@
             </para>
           </listitem>
         </varlistentry>
-      </variablelist>
 
-      <variablelist>
-        <varlistentry>
-          <term><cmdsynopsis><command>list_bindings</command> <arg choice="opt">-p <replaceable>vhostpath</replaceable></arg></cmdsynopsis></term>
+        <varlistentry role="usage-has-option-list">
+          <term><cmdsynopsis><command>list_bindings</command> <arg choice="opt">-p <replaceable>vhostpath</replaceable></arg> <arg choice="opt" role="usage-option-list"><replaceable>bindinginfoitem</replaceable> ...</arg></cmdsynopsis></term>
           <listitem>
             <para>
-              By default the bindings for the <command>/</command> virtual
-              host are returned. The "-p" flag can be used to override
-              this default.  Each result row will contain an exchange
-              name, queue name, routing key and binding arguments, in
-              that order. Non-ASCII characters will be URL-encoded.
+              Returns binding details. By default the bindings for
+              the <command>/</command> virtual host are returned. The
+              "-p" flag can be used to override this default.
             </para>
-            <para role="usage">
-              The output format for "list_bindings" is a list of rows containing
-              exchange name, queue name, routing key and arguments, in that order.
+            <para>
+              The <command>bindinginfoitem</command> parameter is used
+              to indicate which binding information items to include
+              in the results. The column order in the results will
+              match the order of the parameters.
+              <command>bindinginfoitem</command> can take any value
+              from the list that follows:
+            </para>
+            <variablelist>
+              <varlistentry>
+                <term>exchange_name</term>
+                <listitem><para>The name of the exchange to which the
+                binding is attached. with non-ASCII characters
+                escaped as in C.</para></listitem>
+              </varlistentry>
+              <varlistentry>
+                <term>queue_name</term>
+                <listitem><para>The name of the queue to which the
+                binding is attached. with non-ASCII characters
+                escaped as in C.</para></listitem>
+              </varlistentry>
+              <varlistentry>
+                <term>routing_key</term>
+                <listitem><para>The binding's routing key, with
+                non-ASCII characters escaped as in C.</para></listitem>
+              </varlistentry>
+              <varlistentry>
+                <term>arguments</term>
+                <listitem><para>The binding's arguments.</para></listitem>
+              </varlistentry>
+            </variablelist>
+            <para>
+              If no <command>bindinginfoitem</command>s are specified then
+              all above items are displayed.
+            </para>
+            <para role="example-prefix">
+              For example:
+            </para>
+            <screen role="example">rabbitmqctl list_bindings -p /myvhost exchange_name queue_name</screen>
+            <para role="example">
+              This command displays the exchange name and queue name
+              of the bindings in the virtual host
+              named <command>/myvhost</command>.
             </para>
           </listitem>
         </varlistentry>
@@ -862,12 +976,16 @@
                 <listitem><para>Number of channels using the connection.</para></listitem>
               </varlistentry>
               <varlistentry>
+                <term>protocol</term>
+                <listitem><para>Version of the AMQP protocol in use (currently one of <command>{0,9,1}</command> or <command>{0,8,0}</command>). Note that if a client requests an AMQP 0-9 connection, we treat it as AMQP 0-9-1.</para></listitem>
+              </varlistentry>
+              <varlistentry>
                 <term>user</term>
                 <listitem><para>Username associated with the connection.</para></listitem>
               </varlistentry>
               <varlistentry>
                 <term>vhost</term>
-                <listitem><para>Virtual host name with non-ASCII characters URL-escaped.</para></listitem>
+                <listitem><para>Virtual host name with non-ASCII characters escaped as in C.</para></listitem>
               </varlistentry>
               <varlistentry>
                 <term>timeout</term>
diff --git a/ebin/rabbit_app.in b/ebin/rabbit_app.in
index ce94cafe..4be09c5a 100644
--- a/ebin/rabbit_app.in
+++ b/ebin/rabbit_app.in
@@ -18,10 +18,15 @@
          {ssl_listeners, []},
          {ssl_options, []},
          {vm_memory_high_watermark, 0.4},
-         {backing_queue_module, rabbit_invariable_queue},
+         {msg_store_index_module, rabbit_msg_store_ets_index},
+         {backing_queue_module, rabbit_variable_queue},
          {persister_max_wrap_entries, 500},
          {persister_hibernate_after, 10000},
+         {msg_store_file_size_limit, 16777216},
+         {queue_index_max_journal_entries, 262144},
          {default_user, <<"guest">>},
          {default_pass, <<"guest">>},
+         {default_user_is_admin, true},
          {default_vhost, <<"/">>},
-         {default_permissions, [<<".*">>, <<".*">>, <<".*">>]}]}]}.
+         {default_permissions, [<<".*">>, <<".*">>, <<".*">>]},
+         {collect_statistics, none}]}]}.
diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index 06297c69..24aa8d98 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -29,14 +29,15 @@
 %%   Contributor(s): ______________________________________.
 %%
 
--record(user, {username, password}).
--record(permission, {configure, write, read}).
+-record(user, {username, password, is_admin}).
+-record(permission, {scope, configure, write, read}).
 -record(user_vhost, {username, virtual_host}).
 -record(user_permission, {user_vhost, permission}).
 
 -record(vhost, {virtual_host, dummy}).
 
--record(connection, {user, timeout_sec, frame_max, vhost, client_properties}).
+-record(connection, {protocol, user, timeout_sec, frame_max, vhost,
+                     client_properties}).
 
 -record(content,
         {class_id,
@@ -44,6 +45,7 @@
          properties_bin, %% either 'none', or an encoded properties binary
          %% Note: at most one of properties and properties_bin can be
          %% 'none' at once.
+         protocol, %% The protocol under which properties_bin was encoded
          payload_fragments_rev %% list of binaries, in reverse order (!)
          }).
 
@@ -68,123 +70,22 @@
 
 -record(ssl_socket, {tcp, ssl}).
 -record(delivery, {mandatory, immediate, txn, sender, message}).
-
 -record(amqp_error, {name, explanation, method = none}).
 
-%%----------------------------------------------------------------------------
-
--ifdef(use_specs).
-
--include("rabbit_framing_spec.hrl").
-
--type(maybe(T) :: T | 'none').
--type(erlang_node() :: atom()).
--type(node_type() :: disc_only | disc | ram | unknown).
--type(ssl_socket() :: #ssl_socket{}).
--type(socket() :: port() | ssl_socket()).
--type(thunk(T) :: fun(() -> T)).
--type(info_key() :: atom()).
--type(info() :: {info_key(), any()}).
--type(regexp() :: binary()).
--type(file_path() :: string()).
-
-%% this is really an abstract type, but dialyzer does not support them
--type(guid() :: binary()).
--type(txn() :: guid()).
--type(pkey() :: guid()).
--type(r(Kind) ::
-      #resource{virtual_host :: vhost(),
-                kind         :: Kind,
-                name         :: resource_name()}).
--type(queue_name() :: r('queue')).
--type(exchange_name() :: r('exchange')).
--type(user() ::
-      #user{username :: username(),
-            password :: password()}).
--type(permission() ::
-      #permission{configure :: regexp(),
-                  write     :: regexp(),
-                  read      :: regexp()}).
--type(amqqueue() ::
-      #amqqueue{name            :: queue_name(),
-                durable         :: boolean(),
-                auto_delete     :: boolean(),
-                exclusive_owner :: maybe(pid()),
-                arguments       :: amqp_table(),
-                pid             :: maybe(pid())}).
--type(exchange() ::
-      #exchange{name        :: exchange_name(),
-                type        :: exchange_type(),
-                durable     :: boolean(),
-                auto_delete :: boolean(),
-                arguments   :: amqp_table()}).
--type(binding() ::
-      #binding{exchange_name    :: exchange_name(),
-               queue_name       :: queue_name(),
-               key              :: binding_key()}).
-%% TODO: make this more precise by tying specific class_ids to
-%% specific properties
--type(undecoded_content() ::
-      #content{class_id              :: amqp_class_id(),
-               properties            :: 'none',
-               properties_bin        :: binary(),
-               payload_fragments_rev :: [binary()]} |
-      #content{class_id              :: amqp_class_id(),
-               properties            :: amqp_properties(),
-               properties_bin        :: 'none',
-               payload_fragments_rev :: [binary()]}).
--type(unencoded_content() :: undecoded_content()).
--type(decoded_content() ::
-      #content{class_id              :: amqp_class_id(),
-               properties            :: amqp_properties(),
-               properties_bin        :: maybe(binary()),
-               payload_fragments_rev :: [binary()]}).
--type(encoded_content() ::
-      #content{class_id              :: amqp_class_id(),
-               properties            :: maybe(amqp_properties()),
-               properties_bin        :: binary(),
-               payload_fragments_rev :: [binary()]}).
--type(content() :: undecoded_content() | decoded_content()).
--type(basic_message() ::
-      #basic_message{exchange_name  :: exchange_name(),
-                     routing_key    :: routing_key(),
-                     content        :: content(),
-                     guid           :: guid(),
-                     is_persistent  :: boolean()}).
--type(message() :: basic_message()).
--type(delivery() ::
-      #delivery{mandatory :: boolean(),
-                immediate :: boolean(),
-                txn       :: maybe(txn()),
-                sender    :: pid(),
-                message   :: message()}).
-%% this really should be an abstract type
--type(msg_id() :: non_neg_integer()).
--type(qmsg() :: {queue_name(), pid(), msg_id(), boolean(), message()}).
--type(listener() ::
-      #listener{node     :: erlang_node(),
-                protocol :: atom(),
-                host     :: string() | atom(),
-                port     :: non_neg_integer()}).
--type(not_found() :: {'error', 'not_found'}).
--type(routing_result() :: 'routed' | 'unroutable' | 'not_delivered').
--type(amqp_error() ::
-      #amqp_error{name        :: atom(),
-                  explanation :: string(),
-                  method      :: atom()}).
-
--endif.
+-record(event, {type, props, timestamp}).
 
 %%----------------------------------------------------------------------------
 
 -define(COPYRIGHT_MESSAGE, "Copyright (C) 2007-2010 LShift Ltd., Cohesive Financial Technologies LLC., and Rabbit Technologies Ltd.").
 -define(INFORMATION_MESSAGE, "Licensed under the MPL.  See http://www.rabbitmq.com/").
+-define(PROTOCOL_VERSION, "AMQP 0-9-1 / 0-9 / 0-8").
 -define(ERTS_MINIMUM, "5.6.3").
 
 -define(MAX_WAIT, 16#ffffffff).
 
 -define(HIBERNATE_AFTER_MIN,        1000).
 -define(DESIRED_HIBERNATE,         10000).
+-define(STATS_INTERVAL,             5000).
 
 -ifdef(debug).
 -define(LOGDEBUG0(F), rabbit_log:debug(F)).
diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl
index 55cd126e..005994f0 100644
--- a/include/rabbit_backing_queue_spec.hrl
+++ b/include/rabbit_backing_queue_spec.hrl
@@ -31,33 +31,34 @@
 
 -type(fetch_result() ::
                  %% Message,  IsDelivered,  AckTag,  Remaining_Len
-        ('empty'|{basic_message(), boolean(), ack(), non_neg_integer()})).
+        ('empty'|{rabbit_types:basic_message(), boolean(), ack(), non_neg_integer()})).
 -type(is_durable() :: boolean()).
 -type(attempt_recovery() :: boolean()).
 -type(purged_msg_count() :: non_neg_integer()).
 -type(ack_required() :: boolean()).
 
--spec(start/1 :: ([queue_name()]) -> 'ok').
--spec(init/3 :: (queue_name(), is_durable(), attempt_recovery()) -> state()).
+-spec(start/1 :: ([rabbit_amqqueue:name()]) -> 'ok').
+-spec(stop/0 :: () -> 'ok').
+-spec(init/3 :: (rabbit_amqqueue:name(), is_durable(), attempt_recovery()) -> state()).
 -spec(terminate/1 :: (state()) -> state()).
 -spec(delete_and_terminate/1 :: (state()) -> state()).
 -spec(purge/1 :: (state()) -> {purged_msg_count(), state()}).
--spec(publish/2 :: (basic_message(), state()) -> state()).
+-spec(publish/2 :: (rabbit_types:basic_message(), state()) -> state()).
 -spec(publish_delivered/3 ::
-        (ack_required(), basic_message(), state()) -> {ack(), state()}).
+        (ack_required(), rabbit_types:basic_message(), state()) -> {ack(), state()}).
 -spec(fetch/2 :: (ack_required(), state()) -> {fetch_result(), state()}).
 -spec(ack/2 :: ([ack()], state()) -> state()).
--spec(tx_publish/3 :: (txn(), basic_message(), state()) -> state()).
--spec(tx_ack/3 :: (txn(), [ack()], state()) -> state()).
--spec(tx_rollback/2 :: (txn(), state()) -> {[ack()], state()}).
--spec(tx_commit/3 :: (txn(), fun (() -> any()), state()) -> {[ack()], state()}).
+-spec(tx_publish/3 :: (rabbit_types:txn(), rabbit_types:basic_message(), state()) -> state()).
+-spec(tx_ack/3 :: (rabbit_types:txn(), [ack()], state()) -> state()).
+-spec(tx_rollback/2 :: (rabbit_types:txn(), state()) -> {[ack()], state()}).
+-spec(tx_commit/3 :: (rabbit_types:txn(), fun (() -> any()), state()) -> {[ack()], state()}).
 -spec(requeue/2 :: ([ack()], state()) -> state()).
 -spec(len/1 :: (state()) -> non_neg_integer()).
 -spec(is_empty/1 :: (state()) -> boolean()).
 -spec(set_ram_duration_target/2 ::
       (('undefined' | 'infinity' | number()), state()) -> state()).
 -spec(ram_duration/1 :: (state()) -> {number(), state()}).
--spec(needs_sync/1 :: (state()) -> boolean()).
--spec(sync/1 :: (state()) -> state()).
+-spec(needs_idle_timeout/1 :: (state()) -> boolean()).
+-spec(idle_timeout/1 :: (state()) -> state()).
 -spec(handle_pre_hibernate/1 :: (state()) -> state()).
 -spec(status/1 :: (state()) -> [{atom(), any()}]).
diff --git a/include/rabbit_exchange_type_spec.hrl b/include/rabbit_exchange_type_spec.hrl
index cb564365..cecd666b 100644
--- a/include/rabbit_exchange_type_spec.hrl
+++ b/include/rabbit_exchange_type_spec.hrl
@@ -31,13 +31,20 @@
 -ifdef(use_specs).
 
 -spec(description/0 :: () -> [{atom(), any()}]).
--spec(publish/2 :: (exchange(), delivery()) -> {routing_result(), [pid()]}).
--spec(validate/1 :: (exchange()) -> 'ok').
--spec(create/1 :: (exchange()) -> 'ok').
--spec(recover/2 :: (exchange(), list(binding())) -> 'ok').
--spec(delete/2 :: (exchange(), list(binding())) -> 'ok').
--spec(add_binding/2 :: (exchange(), binding()) -> 'ok').
--spec(remove_bindings/2 :: (exchange(), list(binding())) -> 'ok').
--spec(assert_args_equivalence/2 :: (exchange(), amqp_table()) -> 'ok').
+-spec(publish/2 :: (rabbit_types:exchange(), rabbit_types:delivery())
+                   -> {rabbit_router:routing_result(), [pid()]}).
+-spec(validate/1 :: (rabbit_types:exchange()) -> 'ok').
+-spec(create/1 :: (rabbit_types:exchange()) -> 'ok').
+-spec(recover/2 :: (rabbit_types:exchange(),
+                    [rabbit_types:binding()]) -> 'ok').
+-spec(delete/2 :: (rabbit_types:exchange(),
+                   [rabbit_types:binding()]) -> 'ok').
+-spec(add_binding/2 :: (rabbit_types:exchange(),
+                        rabbit_types:binding()) -> 'ok').
+-spec(remove_bindings/2 :: (rabbit_types:exchange(),
+                            [rabbit_types:binding()]) -> 'ok').
+-spec(assert_args_equivalence/2 ::
+        (rabbit_types:exchange(), rabbit_framing:amqp_table())
+        -> 'ok' | rabbit_types:connection_exit()).
 
 -endif.
diff --git a/src/rabbit_tracer.erl b/include/rabbit_msg_store.hrl
index 484249b1..d96fa758 100644
--- a/src/rabbit_tracer.erl
+++ b/include/rabbit_msg_store.hrl
@@ -29,22 +29,13 @@
 %%   Contributor(s): ______________________________________.
 %%
 
--module(rabbit_tracer).
--export([start/0]).
+-include("rabbit.hrl").
 
--import(erlang).
+-ifdef(use_specs).
 
-start() ->
-    spawn(fun mainloop/0),
-    ok.
+-type(msg() :: any()).
 
-mainloop() ->
-    erlang:trace(new, true, [all]),
-    mainloop1().
+-endif.
 
-mainloop1() ->
-    receive
-        Msg ->
-            rabbit_log:info("TRACE: ~p~n", [Msg])
-    end,
-    mainloop1().
+-record(msg_location,
+        {guid, ref_count, file, offset, total_size}).
diff --git a/src/rabbit_load.erl b/include/rabbit_msg_store_index.hrl
index 4f467162..fba0b7cd 100644
--- a/src/rabbit_load.erl
+++ b/include/rabbit_msg_store_index.hrl
@@ -29,51 +29,31 @@
 %%   Contributor(s): ______________________________________.
 %%
 
--module(rabbit_load).
-
--export([local_load/0, remote_loads/0, pick/0]).
-
--define(FUDGE_FACTOR, 0.98).
--define(TIMEOUT, 100).
+-include("rabbit_msg_store.hrl").
 
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--type(erlang_node() :: atom()).
--type(load() :: {{non_neg_integer(), integer() | 'unknown'}, erlang_node()}).
--spec(local_load/0 :: () -> load()).
--spec(remote_loads/0 :: () -> [load()]).
--spec(pick/0 :: () -> erlang_node()).
+-type(dir() :: any()).
+-type(index_state() :: any()).
+-type(keyvalue() :: any()).
+-type(fieldpos() :: non_neg_integer()).
+-type(fieldvalue() :: any()).
+
+-spec(new/1 :: (dir()) -> index_state()).
+-spec(recover/1 :: (dir()) -> rabbit_types:ok_or_error2(index_state(), any())).
+-spec(lookup/2 ::
+        (rabbit_guid:guid(), index_state()) -> ('not_found' | keyvalue())).
+-spec(insert/2 :: (keyvalue(), index_state()) -> 'ok').
+-spec(update/2 :: (keyvalue(), index_state()) -> 'ok').
+-spec(update_fields/3 :: (rabbit_guid:guid(), ({fieldpos(), fieldvalue()} |
+                                               [{fieldpos(), fieldvalue()}]),
+                          index_state()) -> 'ok').
+-spec(delete/2 :: (rabbit_guid:guid(), index_state()) -> 'ok').
+-spec(delete_by_file/2 :: (fieldvalue(), index_state()) -> 'ok').
+-spec(terminate/1 :: (index_state()) -> any()).
 
 -endif.
 
 %%----------------------------------------------------------------------------
-
-local_load() ->
-    LoadAvg = case whereis(cpu_sup) of
-                  undefined -> unknown;
-                  _         -> case cpu_sup:avg1() of
-                                   L when is_integer(L) -> L;
-                                   {error, timeout}     -> unknown
-                               end
-              end,
-    {{statistics(run_queue), LoadAvg}, node()}.
-
-remote_loads() ->
-    {ResL, _BadNodes} =
-        rpc:multicall(nodes(), ?MODULE, local_load, [], ?TIMEOUT),
-    ResL.
-
-pick() ->
-    RemoteLoads = remote_loads(),
-    {{RunQ, LoadAvg}, Node} = local_load(),
-    %% add bias towards current node; we rely on Erlang's term order
-    %% of SomeFloat < local_unknown < unknown.
-    AdjustedLoadAvg = case LoadAvg of
-                          unknown -> local_unknown;
-                          _       -> LoadAvg * ?FUDGE_FACTOR
-                      end,
-    Loads = [{{RunQ, AdjustedLoadAvg}, Node} | RemoteLoads],
-    {_, SelectedNode} = lists:min(Loads),
-    SelectedNode.
diff --git a/packaging/RPMS/Fedora/rabbitmq-server.spec b/packaging/RPMS/Fedora/rabbitmq-server.spec
index c5950be4..eb0a2a51 100644
--- a/packaging/RPMS/Fedora/rabbitmq-server.spec
+++ b/packaging/RPMS/Fedora/rabbitmq-server.spec
@@ -31,6 +31,7 @@ scalable implementation of an AMQP broker.
 %define _rabbit_wrapper %{_builddir}/`basename %{S:2}`
 %define _rabbit_asroot_wrapper %{_builddir}/`basename %{S:4}`
 %define _rabbit_server_ocf %{_builddir}/`basename %{S:5}`
+%define _plugins_state_dir %{_localstatedir}/lib/rabbitmq/plugins
 
 %define _maindir %{buildroot}%{_rabbit_erllibdir}
 
@@ -58,8 +59,6 @@ install -p -D -m 0755 %{S:1} %{buildroot}%{_initrddir}/rabbitmq-server
 install -p -D -m 0755 %{_rabbit_wrapper} %{buildroot}%{_sbindir}/rabbitmqctl
 install -p -D -m 0755 %{_rabbit_wrapper} %{buildroot}%{_sbindir}/rabbitmq-server
 install -p -D -m 0755 %{_rabbit_wrapper} %{buildroot}%{_sbindir}/rabbitmq-multi
-install -p -D -m 0755 %{_rabbit_asroot_wrapper} %{buildroot}%{_sbindir}/rabbitmq-activate-plugins
-install -p -D -m 0755 %{_rabbit_asroot_wrapper} %{buildroot}%{_sbindir}/rabbitmq-deactivate-plugins
 install -p -D -m 0755 %{_rabbit_server_ocf} %{buildroot}%{_exec_prefix}/lib/ocf/resource.d/rabbitmq/rabbitmq-server
 
 install -p -D -m 0644 %{S:3} %{buildroot}%{_sysconfdir}/logrotate.d/rabbitmq-server
@@ -108,7 +107,7 @@ if [ $1 = 0 ]; then
 fi
 
 # Clean out plugin activation state, both on uninstall and upgrade
-rm -rf %{_rabbit_erllibdir}/priv
+rm -rf %{_plugins_state_dir}
 for ext in rel script boot ; do
     rm -f %{_rabbit_erllibdir}/ebin/rabbit.$ext
 done
@@ -128,6 +127,15 @@ done
 rm -rf %{buildroot}
 
 %changelog
+* Tue Sep 14 2010 marek@rabbitmq.com 2.1.0-1
+- New Upstream Release
+
+* Mon Aug 23 2010 mikeb@rabbitmq.com 2.0.0-1
+- New Upstream Release
+
+* Wed Jul 14 2010 Emile Joubert <emile@rabbitmq.com> 1.8.1-1
+- New Upstream Release
+
 * Tue Jun 15 2010 Matthew Sackman <matthew@rabbitmq.com> 1.8.0-1
 - New Upstream Release
 
diff --git a/packaging/common/rabbitmq-server.ocf b/packaging/common/rabbitmq-server.ocf
index db0ed70b..b969535a 100755
--- a/packaging/common/rabbitmq-server.ocf
+++ b/packaging/common/rabbitmq-server.ocf
@@ -40,7 +40,6 @@
 ##   OCF_RESKEY_nodename
 ##   OCF_RESKEY_ip
 ##   OCF_RESKEY_port
-##   OCF_RESKEY_cluster_config_file
 ##   OCF_RESKEY_config_file
 ##   OCF_RESKEY_log_base
 ##   OCF_RESKEY_mnesia_base
@@ -117,14 +116,6 @@ The IP Port for rabbitmq-server to listen on
 <content type="integer" default="" />
 </parameter>
 
-<parameter name="cluster_config_file" unique="0" required="0">
-<longdesc lang="en">
-Location of the cluster config file
-</longdesc>
-<shortdesc lang="en">Cluster config file path</shortdesc>
-<content type="string" default="" />
-</parameter>
-
 <parameter name="config_file" unique="0" required="0">
 <longdesc lang="en">
 Location of the config file
@@ -184,7 +175,6 @@ RABBITMQ_CTL=$OCF_RESKEY_ctl
 RABBITMQ_NODENAME=$OCF_RESKEY_nodename
 RABBITMQ_NODE_IP_ADDRESS=$OCF_RESKEY_ip
 RABBITMQ_NODE_PORT=$OCF_RESKEY_port
-RABBITMQ_CLUSTER_CONFIG_FILE=$OCF_RESKEY_cluster_config_file
 RABBITMQ_CONFIG_FILE=$OCF_RESKEY_config_file
 RABBITMQ_LOG_BASE=$OCF_RESKEY_log_base
 RABBITMQ_MNESIA_BASE=$OCF_RESKEY_mnesia_base
@@ -195,7 +185,6 @@ RABBITMQ_SERVER_START_ARGS=$OCF_RESKEY_server_start_args
 export_vars() {
     [ ! -z $RABBITMQ_NODE_IP_ADDRESS ]     && export RABBITMQ_NODE_IP_ADDRESS
     [ ! -z $RABBITMQ_NODE_PORT ]           && export RABBITMQ_NODE_PORT
-    [ ! -z $RABBITMQ_CLUSTER_CONFIG_FILE ] && export RABBITMQ_CLUSTER_CONFIG_FILE
     [ ! -z $RABBITMQ_CONFIG_FILE ]         && export RABBITMQ_CONFIG_FILE
     [ ! -z $RABBITMQ_LOG_BASE ]            && export RABBITMQ_LOG_BASE
     [ ! -z $RABBITMQ_MNESIA_BASE ]         && export RABBITMQ_MNESIA_BASE
@@ -215,11 +204,6 @@ rabbit_validate_partial() {
 }
 
 rabbit_validate_full() {
-    if [ ! -z $RABBITMQ_CLUSTER_CONFIG_FILE ] && [ ! -e $RABBITMQ_CLUSTER_CONFIG_FILE ]; then
-        ocf_log err "rabbitmq-server cluster_config_file $RABBITMQ_CLUSTER_CONFIG_FILE does not exist or is not a file";
-        exit $OCF_ERR_INSTALLED;
-    fi
-
     if [ ! -z $RABBITMQ_CONFIG_FILE ] && [ ! -e $RABBITMQ_CONFIG_FILE ]; then
         ocf_log err "rabbitmq-server config_file $RABBITMQ_CONFIG_FILE does not exist or is not a file";
         exit $OCF_ERR_INSTALLED;
diff --git a/packaging/debs/Debian/debian/changelog b/packaging/debs/Debian/debian/changelog
index 3c0d6937..9927cfbc 100644
--- a/packaging/debs/Debian/debian/changelog
+++ b/packaging/debs/Debian/debian/changelog
@@ -1,3 +1,21 @@
+rabbitmq-server (2.1.0-1) lucid; urgency=low
+
+  * New Upstream Release
+
+ -- Marek Majkowski <marek@rabbitmq.com>  Tue, 14 Sep 2010 14:20:17 +0100
+
+rabbitmq-server (2.0.0-1) karmic; urgency=low
+
+  * New Upstream Release
+
+ -- Michael Bridgen <mikeb@rabbitmq.com>  Mon, 23 Aug 2010 14:55:39 +0100
+
+rabbitmq-server (1.8.1-1) lucid; urgency=low
+
+  * New Upstream Release
+
+ -- Emile Joubert <emile@rabbitmq.com>  Wed, 14 Jul 2010 15:05:24 +0100
+
 rabbitmq-server (1.8.0-1) intrepid; urgency=low
 
   * New Upstream Release
diff --git a/packaging/debs/Debian/debian/control b/packaging/debs/Debian/debian/control
index a44f49a0..4afc66ac 100644
--- a/packaging/debs/Debian/debian/control
+++ b/packaging/debs/Debian/debian/control
@@ -1,7 +1,7 @@
 Source: rabbitmq-server
 Section: net
 Priority: extra
-Maintainer: Tony Garnock-Jones <tonyg@rabbitmq.com>
+Maintainer: RabbitMQ Team <packaging@rabbitmq.com>
 Build-Depends: cdbs, debhelper (>= 5), erlang-dev, python-simplejson, xmlto, xsltproc
 Standards-Version: 3.8.0
 
diff --git a/packaging/debs/Debian/debian/postrm.in b/packaging/debs/Debian/debian/postrm.in
index 5290de9b..c4aeeebe 100644
--- a/packaging/debs/Debian/debian/postrm.in
+++ b/packaging/debs/Debian/debian/postrm.in
@@ -20,10 +20,7 @@ set -e
 
 remove_plugin_traces() {
     # Remove traces of plugins
-    rm -rf @RABBIT_LIB@/priv @RABBIT_LIB@/plugins
-    for ext in rel script boot ; do
-        rm -f @RABBIT_LIB@/ebin/rabbit.$ext
-    done
+    rm -rf /var/lib/rabbitmq/plugins-scratch
 }
 
 case "$1" in
diff --git a/packaging/debs/Debian/debian/rules b/packaging/debs/Debian/debian/rules
index 19166514..6b6df33b 100644
--- a/packaging/debs/Debian/debian/rules
+++ b/packaging/debs/Debian/debian/rules
@@ -17,8 +17,5 @@ install/rabbitmq-server::
 	for script in rabbitmqctl rabbitmq-server rabbitmq-multi; do \
 		install -p -D -m 0755 debian/rabbitmq-script-wrapper $(DEB_DESTDIR)usr/sbin/$$script; \
 	done
-	for script in rabbitmq-activate-plugins rabbitmq-deactivate-plugins; do \
-		install -p -D -m 0755 debian/rabbitmq-asroot-script-wrapper $(DEB_DESTDIR)usr/sbin/$$script; \
-	done
 	sed -e 's|@RABBIT_LIB@|/usr/lib/rabbitmq/lib/rabbitmq_server-$(DEB_UPSTREAM_VERSION)|g' <debian/postrm.in >debian/postrm
 	install -p -D -m 0755 debian/rabbitmq-server.ocf $(DEB_DESTDIR)usr/lib/ocf/resource.d/rabbitmq/rabbitmq-server
diff --git a/packaging/generic-unix/Makefile b/packaging/generic-unix/Makefile
index 4eade6c7..c4e01f4a 100644
--- a/packaging/generic-unix/Makefile
+++ b/packaging/generic-unix/Makefile
@@ -4,7 +4,6 @@ TARGET_DIR=rabbitmq_server-$(VERSION)
 TARGET_TARBALL=rabbitmq-server-generic-unix-$(VERSION)
 
 dist:
-	$(MAKE) -C ../.. VERSION=$(VERSION) srcdist
 	tar -zxvf ../../dist/$(SOURCE_DIR).tar.gz
 
 	$(MAKE) -C $(SOURCE_DIR) \
diff --git a/packaging/macports/Makefile b/packaging/macports/Makefile
index 4ad4c30b..3a22eef0 100644
--- a/packaging/macports/Makefile
+++ b/packaging/macports/Makefile
@@ -31,11 +31,18 @@ $(DEST)/Portfile: Portfile.in
 	    -f checksums.sed <$^ >$@
 	rm checksums.sed
 
+# The purpose of the intricate substitution below is to set up similar
+# environment vars to the ones that su will on Linux.  On OS X, we
+# have to use the -m option to su in order to be able to set the shell
+# (which for the rabbitmq user would otherwise be /dev/null).  But the
+# -m option means that *all* environment vars get preserved.  Erlang
+# needs vars such as HOME to be set.  So we have to set them
+# explicitly.
 macports: dirs $(DEST)/Portfile
 	for f in rabbitmq-asroot-script-wrapper rabbitmq-script-wrapper ; do \
 	  cp $(COMMON_DIR)/$$f $(DEST)/files ; \
 	done
-	sed -i -e 's|@SU_RABBITMQ_SH_C@|SHELL=/bin/sh su -m rabbitmq -c|' \
+	sed -i -e 's|@SU_RABBITMQ_SH_C@|SHELL=/bin/sh HOME=/var/lib/rabbitmq USER=rabbitmq LOGNAME=rabbitmq PATH="$$(eval `PATH=MACPORTS_PREFIX/bin /usr/libexec/path_helper -s`; echo $$PATH)" su -m rabbitmq -c|' \
 	    $(DEST)/files/rabbitmq-script-wrapper
 	cp patch-org.macports.rabbitmq-server.plist.diff $(DEST)/files
 	if [ -n "$(MACPORTS_USERHOST)" ] ; then \
@@ -52,4 +59,4 @@ macports: dirs $(DEST)/Portfile
 	fi
 
 clean:
-	rm -rf $(DEST) checksums.sed
+	rm -rf $(MACPORTS_DIR) checksums.sed
diff --git a/packaging/macports/Portfile.in b/packaging/macports/Portfile.in
index 188a81c0..f30460d3 100644
--- a/packaging/macports/Portfile.in
+++ b/packaging/macports/Portfile.in
@@ -75,39 +75,24 @@ post-destroot {
 
     reinplace -E "s:(/etc/rabbitmq/rabbitmq.conf):${prefix}\\1:g" \
         ${realsbin}/rabbitmq-env
-    reinplace -E "s:(CLUSTER_CONFIG_FILE)=/:\\1=${prefix}/:" \
-        ${realsbin}/rabbitmq-multi \
-        ${realsbin}/rabbitmq-server \
-        ${realsbin}/rabbitmqctl
-    reinplace -E "s:(LOG_BASE)=/:\\1=${prefix}/:" \
-        ${realsbin}/rabbitmq-multi \
-        ${realsbin}/rabbitmq-server \
-        ${realsbin}/rabbitmqctl
-    reinplace -E "s:(MNESIA_BASE)=/:\\1=${prefix}/:" \
-        ${realsbin}/rabbitmq-multi \
-        ${realsbin}/rabbitmq-server \
-        ${realsbin}/rabbitmqctl
-    reinplace -E "s:(PIDS_FILE)=/:\\1=${prefix}/:" \
-        ${realsbin}/rabbitmq-multi \
-        ${realsbin}/rabbitmq-server \
-        ${realsbin}/rabbitmqctl
+    foreach var {CONFIG_FILE CLUSTER_CONFIG_FILE LOG_BASE MNESIA_BASE PIDS_FILE} {
+        reinplace -E "s:^($var)=/:\\1=${prefix}/:" \
+            ${realsbin}/rabbitmq-multi \
+            ${realsbin}/rabbitmq-server \
+            ${realsbin}/rabbitmqctl
+    }
 
     xinstall -m 555 ${filespath}/rabbitmq-script-wrapper \
                 ${wrappersbin}/rabbitmq-multi
-    xinstall -m 555 ${filespath}/rabbitmq-asroot-script-wrapper \
-                ${wrappersbin}/rabbitmq-activate-plugins
 
+    reinplace -E "s:MACPORTS_PREFIX/bin:${prefix}/bin:" \
+                ${wrappersbin}/rabbitmq-multi
     reinplace -E "s:/usr/lib/rabbitmq/bin/:${prefix}/lib/rabbitmq/bin/:" \
                 ${wrappersbin}/rabbitmq-multi
     reinplace -E "s:/var/lib/rabbitmq:${prefix}/var/lib/rabbitmq:" \
                 ${wrappersbin}/rabbitmq-multi
-    reinplace -E "s:/usr/lib/rabbitmq/bin/:${prefix}/lib/rabbitmq/bin/:" \
-                ${wrappersbin}/rabbitmq-activate-plugins
-    reinplace -E "s:/var/lib/rabbitmq:${prefix}/var/lib/rabbitmq:" \
-                ${wrappersbin}/rabbitmq-activate-plugins
     file copy ${wrappersbin}/rabbitmq-multi ${wrappersbin}/rabbitmq-server
     file copy ${wrappersbin}/rabbitmq-multi ${wrappersbin}/rabbitmqctl
-    file copy ${wrappersbin}/rabbitmq-activate-plugins ${wrappersbin}/rabbitmq-deactivate-plugins
 }
 
 pre-install {
diff --git a/packaging/windows/Makefile b/packaging/windows/Makefile
index 50ce1637..abe174e0 100644
--- a/packaging/windows/Makefile
+++ b/packaging/windows/Makefile
@@ -4,7 +4,6 @@ TARGET_DIR=rabbitmq_server-$(VERSION)
 TARGET_ZIP=rabbitmq-server-windows-$(VERSION)
 
 dist:
-	$(MAKE) -C ../.. VERSION=$(VERSION) srcdist
 	tar -zxvf ../../dist/$(SOURCE_DIR).tar.gz
 	$(MAKE) -C $(SOURCE_DIR)
 
@@ -13,8 +12,6 @@ dist:
 	mv $(SOURCE_DIR)/scripts/rabbitmq-service.bat $(SOURCE_DIR)/sbin
 	mv $(SOURCE_DIR)/scripts/rabbitmqctl.bat $(SOURCE_DIR)/sbin
 	mv $(SOURCE_DIR)/scripts/rabbitmq-multi.bat $(SOURCE_DIR)/sbin
-	mv $(SOURCE_DIR)/scripts/rabbitmq-activate-plugins.bat $(SOURCE_DIR)/sbin
-	mv $(SOURCE_DIR)/scripts/rabbitmq-deactivate-plugins.bat $(SOURCE_DIR)/sbin
 	rm -rf $(SOURCE_DIR)/scripts
 	rm -rf $(SOURCE_DIR)/codegen* $(SOURCE_DIR)/Makefile
 	rm -f $(SOURCE_DIR)/README
@@ -22,6 +19,8 @@ dist:
 
 	mv $(SOURCE_DIR) $(TARGET_DIR)
 	mkdir -p $(TARGET_DIR)
+	mkdir -p $(TARGET_DIR)/plugins
+	echo Put your .ez plugin files in this directory > $(TARGET_DIR)/plugins/README
 	xmlto -o . xhtml-nochunks ../../docs/rabbitmq-service.xml
 	elinks -dump -no-references -no-numbering rabbitmq-service.html \
 		> $(TARGET_DIR)/readme-service.txt
diff --git a/scripts/rabbitmq-activate-plugins b/scripts/rabbitmq-activate-plugins
deleted file mode 100755
index 00ee6c61..00000000
--- a/scripts/rabbitmq-activate-plugins
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/sh
-##   The contents of this file are subject to the Mozilla Public License
-##   Version 1.1 (the "License"); you may not use this file except in
-##   compliance with the License. You may obtain a copy of the License at
-##   http://www.mozilla.org/MPL/
-##
-##   Software distributed under the License is distributed on an "AS IS"
-##   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-##   License for the specific language governing rights and limitations
-##   under the License.
-##
-##   The Original Code is RabbitMQ.
-##
-##   The Initial Developers of the Original Code are LShift Ltd,
-##   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-##
-##   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-##   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-##   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-##   Technologies LLC, and Rabbit Technologies Ltd.
-##
-##   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
-##   Ltd. Portions created by Cohesive Financial Technologies LLC are
-##   Copyright (C) 2007-2010 Cohesive Financial Technologies
-##   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-##   (C) 2007-2010 Rabbit Technologies Ltd.
-##
-##   All Rights Reserved.
-##
-##   Contributor(s): ______________________________________.
-##
-
-. `dirname $0`/rabbitmq-env
-
-RABBITMQ_EBIN=${RABBITMQ_HOME}/ebin
-[ "x" = "x$RABBITMQ_PLUGINS_DIR" ] && RABBITMQ_PLUGINS_DIR="${RABBITMQ_HOME}/plugins"
-[ "x" = "x$RABBITMQ_PLUGINS_EXPAND_DIR" ] && RABBITMQ_PLUGINS_EXPAND_DIR="${RABBITMQ_HOME}/priv/plugins"
-
-exec erl \
-    -pa "$RABBITMQ_EBIN" \
-    -rabbit plugins_dir "\"$RABBITMQ_PLUGINS_DIR\"" \
-    -rabbit plugins_expand_dir "\"$RABBITMQ_PLUGINS_EXPAND_DIR\"" \
-    -rabbit rabbit_ebin "\"$RABBITMQ_EBIN\"" \
-    -noinput \
-    -hidden \
-    -s rabbit_plugin_activator \
-    -extra "$@"
diff --git a/scripts/rabbitmq-activate-plugins.bat b/scripts/rabbitmq-activate-plugins.bat
deleted file mode 100644
index 3c9a057c..00000000
--- a/scripts/rabbitmq-activate-plugins.bat
+++ /dev/null
@@ -1,67 +0,0 @@
-@echo off
-REM   The contents of this file are subject to the Mozilla Public License
-REM   Version 1.1 (the "License"); you may not use this file except in
-REM   compliance with the License. You may obtain a copy of the License at
-REM   http://www.mozilla.org/MPL/
-REM
-REM   Software distributed under the License is distributed on an "AS IS"
-REM   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-REM   License for the specific language governing rights and limitations
-REM   under the License.
-REM
-REM   The Original Code is RabbitMQ.
-REM
-REM   The Initial Developers of the Original Code are LShift Ltd,
-REM   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-REM
-REM   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-REM   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-REM   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-REM   Technologies LLC, and Rabbit Technologies Ltd.
-REM
-REM   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
-REM   Ltd. Portions created by Cohesive Financial Technologies LLC are
-REM   Copyright (C) 2007-2010 Cohesive Financial Technologies
-REM   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-REM   (C) 2007-2010 Rabbit Technologies Ltd.
-REM
-REM   All Rights Reserved.
-REM
-REM   Contributor(s): ______________________________________.
-REM
-
-setlocal
-
-rem Preserve values that might contain exclamation marks before
-rem enabling delayed expansion
-set TDP0=%~dp0
-set STAR=%*
-setlocal enabledelayedexpansion
-
-if not exist "!ERLANG_HOME!\bin\erl.exe" (
-    echo.
-    echo ******************************
-    echo ERLANG_HOME not set correctly. 
-    echo ******************************
-    echo.
-    echo Please either set ERLANG_HOME to point to your Erlang installation or place the
-    echo RabbitMQ server distribution in the Erlang lib folder.
-    echo.
-    exit /B
-)
-
-set RABBITMQ_PLUGINS_DIR=!TDP0!..\plugins
-set RABBITMQ_PLUGINS_EXPAND_DIR=!TDP0!..\priv\plugins
-set RABBITMQ_EBIN_DIR=!TDP0!..\ebin
-
-"!ERLANG_HOME!\bin\erl.exe" ^
--pa "!RABBITMQ_EBIN_DIR!" ^
--noinput -hidden ^
--s rabbit_plugin_activator ^
--rabbit plugins_dir \""!RABBITMQ_PLUGINS_DIR:\=/!"\" ^
--rabbit plugins_expand_dir \""!RABBITMQ_PLUGINS_EXPAND_DIR:\=/!"\" ^
--rabbit rabbit_ebin  \""!RABBITMQ_EBIN_DIR:\=/!"\" ^
--extra !STAR!
-
-endlocal
-endlocal
diff --git a/scripts/rabbitmq-deactivate-plugins b/scripts/rabbitmq-deactivate-plugins
deleted file mode 100755
index 3fd71bfa..00000000
--- a/scripts/rabbitmq-deactivate-plugins
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/sh
-##   The contents of this file are subject to the Mozilla Public License
-##   Version 1.1 (the "License"); you may not use this file except in
-##   compliance with the License. You may obtain a copy of the License at
-##   http://www.mozilla.org/MPL/
-##
-##   Software distributed under the License is distributed on an "AS IS"
-##   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-##   License for the specific language governing rights and limitations
-##   under the License.
-##
-##   The Original Code is RabbitMQ.
-##
-##   The Initial Developers of the Original Code are LShift Ltd,
-##   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-##
-##   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-##   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-##   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-##   Technologies LLC, and Rabbit Technologies Ltd.
-##
-##   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
-##   Ltd. Portions created by Cohesive Financial Technologies LLC are
-##   Copyright (C) 2007-2010 Cohesive Financial Technologies
-##   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-##   (C) 2007-2010 Rabbit Technologies Ltd.
-##
-##   All Rights Reserved.
-##
-##   Contributor(s): ______________________________________.
-##
-
-. `dirname $0`/rabbitmq-env
-
-RABBITMQ_EBIN=${RABBITMQ_HOME}/ebin
-
-rm -f ${RABBITMQ_EBIN}/rabbit.rel ${RABBITMQ_EBIN}/rabbit.script ${RABBITMQ_EBIN}/rabbit.boot
diff --git a/scripts/rabbitmq-deactivate-plugins.bat b/scripts/rabbitmq-deactivate-plugins.bat
deleted file mode 100644
index 1bc3f88e..00000000
--- a/scripts/rabbitmq-deactivate-plugins.bat
+++ /dev/null
@@ -1,45 +0,0 @@
-@echo off
-REM   The contents of this file are subject to the Mozilla Public License
-REM   Version 1.1 (the "License"); you may not use this file except in
-REM   compliance with the License. You may obtain a copy of the License at
-REM   http://www.mozilla.org/MPL/
-REM
-REM   Software distributed under the License is distributed on an "AS IS"
-REM   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-REM   License for the specific language governing rights and limitations
-REM   under the License.
-REM
-REM   The Original Code is RabbitMQ.
-REM
-REM   The Initial Developers of the Original Code are LShift Ltd,
-REM   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-REM
-REM   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-REM   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-REM   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-REM   Technologies LLC, and Rabbit Technologies Ltd.
-REM
-REM   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
-REM   Ltd. Portions created by Cohesive Financial Technologies LLC are
-REM   Copyright (C) 2007-2010 Cohesive Financial Technologies
-REM   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-REM   (C) 2007-2010 Rabbit Technologies Ltd.
-REM
-REM   All Rights Reserved.
-REM
-REM   Contributor(s): ______________________________________.
-REM
-
-setlocal
-
-rem Preserve values that might contain exclamation marks before
-rem enabling delayed expansion
-set TDP0=%~dp0
-setlocal enabledelayedexpansion
-
-set RABBITMQ_EBIN_DIR=!TDP0!..\ebin
-
-del /f "!RABBITMQ_EBIN_DIR!"\rabbit.rel "!RABBITMQ_EBIN_DIR!"\rabbit.script "!RABBITMQ_EBIN_DIR!"\rabbit.boot
-
-endlocal
-endlocal
diff --git a/scripts/rabbitmq-server b/scripts/rabbitmq-server
index 2261b56e..8e26663a 100755
--- a/scripts/rabbitmq-server
+++ b/scripts/rabbitmq-server
@@ -68,6 +68,8 @@ fi
 [ "x" = "x$RABBITMQ_MNESIA_DIR" ] && RABBITMQ_MNESIA_DIR=${MNESIA_DIR}
 [ "x" = "x$RABBITMQ_MNESIA_DIR" ] && RABBITMQ_MNESIA_DIR=${RABBITMQ_MNESIA_BASE}/${RABBITMQ_NODENAME}
 
+[ "x" = "x$RABBITMQ_PLUGINS_DIR" ] && RABBITMQ_PLUGINS_DIR="${RABBITMQ_HOME}/plugins"
+
 ## Log rotation
 [ "x" = "x$RABBITMQ_LOGS" ] && RABBITMQ_LOGS=${LOGS}
 [ "x" = "x$RABBITMQ_LOGS" ] && RABBITMQ_LOGS="${RABBITMQ_LOG_BASE}/${RABBITMQ_NODENAME}.log"
@@ -79,23 +81,29 @@ fi
 [ -f  "${RABBITMQ_LOGS}" ] && cat "${RABBITMQ_LOGS}" >> "${RABBITMQ_LOGS}${RABBITMQ_BACKUP_EXTENSION}"
 [ -f  "${RABBITMQ_SASL_LOGS}" ] && cat "${RABBITMQ_SASL_LOGS}" >> "${RABBITMQ_SASL_LOGS}${RABBITMQ_BACKUP_EXTENSION}"
 
-if [ -f "$RABBITMQ_CLUSTER_CONFIG_FILE" ]; then
-    RABBITMQ_CLUSTER_CONFIG_OPTION="-rabbit cluster_config \"$RABBITMQ_CLUSTER_CONFIG_FILE\""
-else
-    RABBITMQ_CLUSTER_CONFIG_OPTION=""
-fi
-
 RABBITMQ_START_RABBIT=
-[ "x" = "x$RABBITMQ_ALLOW_INPUT" ] && RABBITMQ_START_RABBIT='-noinput' 
+[ "x" = "x$RABBITMQ_ALLOW_INPUT" ] && RABBITMQ_START_RABBIT='-noinput'
 
 RABBITMQ_EBIN_ROOT="${RABBITMQ_HOME}/ebin"
-if [ -f "${RABBITMQ_EBIN_ROOT}/rabbit.boot" ] && [ "x" = "x$RABBITMQ_NODE_ONLY" ]; then
-    RABBITMQ_BOOT_FILE="${RABBITMQ_EBIN_ROOT}/rabbit"
-    RABBITMQ_EBIN_PATH=""
+if [ "x" = "x$RABBITMQ_NODE_ONLY" ]; then
+    if erl \
+	-pa "$RABBITMQ_EBIN_ROOT" \
+	-rabbit plugins_dir "\"$RABBITMQ_PLUGINS_DIR\"" \
+	-rabbit plugins_expand_dir "\"${RABBITMQ_MNESIA_DIR}/plugins-scratch\"" \
+	-rabbit rabbit_ebin "\"$RABBITMQ_EBIN_ROOT\"" \
+	-noinput \
+	-hidden \
+	-s rabbit_plugin_activator \
+	-extra "$@"
+    then
+	RABBITMQ_BOOT_FILE="${RABBITMQ_MNESIA_DIR}/plugins-scratch/rabbit"
+	RABBITMQ_EBIN_PATH=""
+    else
+	exit 1
+    fi
 else
     RABBITMQ_BOOT_FILE=start_sasl
     RABBITMQ_EBIN_PATH="-pa ${RABBITMQ_EBIN_ROOT}"
-    [ "x" = "x$RABBITMQ_NODE_ONLY" ] && RABBITMQ_START_RABBIT="${RABBITMQ_START_RABBIT} -s rabbit"
 fi
 RABBITMQ_CONFIG_ARG=
 [ -f "${RABBITMQ_CONFIG_FILE}.config" ] && RABBITMQ_CONFIG_ARG="-config ${RABBITMQ_CONFIG_FILE}"
@@ -124,6 +132,5 @@ exec erl \
     -os_mon start_disksup false \
     -os_mon start_memsup false \
     -mnesia dir "\"${RABBITMQ_MNESIA_DIR}\"" \
-    ${RABBITMQ_CLUSTER_CONFIG_OPTION} \
     ${RABBITMQ_SERVER_START_ARGS} \
     "$@"
diff --git a/scripts/rabbitmq-server.bat b/scripts/rabbitmq-server.bat
index a290f935..5bcbc6ba 100644
--- a/scripts/rabbitmq-server.bat
+++ b/scripts/rabbitmq-server.bat
@@ -103,26 +103,30 @@ if exist "!SASL_LOGS!" (
 rem End of log management
 
 
-if "!RABBITMQ_CLUSTER_CONFIG_FILE!"=="" (
-    set RABBITMQ_CLUSTER_CONFIG_FILE=!RABBITMQ_BASE!\rabbitmq_cluster.config
-)
-set CLUSTER_CONFIG=
-if not exist "!RABBITMQ_CLUSTER_CONFIG_FILE!" GOTO L1
-set CLUSTER_CONFIG=-rabbit cluster_config \""!RABBITMQ_CLUSTER_CONFIG_FILE:\=/!"\"
-:L1
-
 if "!RABBITMQ_MNESIA_DIR!"=="" (
     set RABBITMQ_MNESIA_DIR=!RABBITMQ_MNESIA_BASE!/!RABBITMQ_NODENAME!-mnesia
 )
+
+set RABBITMQ_PLUGINS_DIR=!TDP0!..\plugins
 set RABBITMQ_EBIN_ROOT=!TDP0!..\ebin
-if exist "!RABBITMQ_EBIN_ROOT!\rabbit.boot" (
-    echo Using Custom Boot File "!RABBITMQ_EBIN_ROOT!\rabbit.boot"
-    set RABBITMQ_BOOT_FILE=!RABBITMQ_EBIN_ROOT!\rabbit
-    set RABBITMQ_EBIN_PATH=
-) else (
-    set RABBITMQ_BOOT_FILE=start_sasl
-    set RABBITMQ_EBIN_PATH=-pa "!RABBITMQ_EBIN_ROOT!"
+
+"!ERLANG_HOME!\bin\erl.exe" ^
+-pa "!RABBITMQ_EBIN_ROOT!" ^
+-noinput -hidden ^
+-s rabbit_plugin_activator ^
+-rabbit plugins_dir \""!RABBITMQ_PLUGINS_DIR:\=/!"\" ^
+-rabbit plugins_expand_dir \""!RABBITMQ_MNESIA_DIR:\=/!/plugins-scratch"\" ^
+-rabbit rabbit_ebin  \""!RABBITMQ_EBIN_ROOT:\=/!"\" ^
+-extra !STAR!
+
+set RABBITMQ_BOOT_FILE=!RABBITMQ_MNESIA_DIR!\plugins-scratch\rabbit
+if not exist "!RABBITMQ_BOOT_FILE!.boot" (
+    echo Custom Boot File "!RABBITMQ_BOOT_FILE!.boot" is missing.
+    exit /B 1
 )
+
+set RABBITMQ_EBIN_PATH=
+
 if "!RABBITMQ_CONFIG_FILE!"=="" (
     set RABBITMQ_CONFIG_FILE=!RABBITMQ_BASE!\rabbitmq
 )
diff --git a/scripts/rabbitmq-service.bat b/scripts/rabbitmq-service.bat
index bd117b83..4b3961d4 100644
--- a/scripts/rabbitmq-service.bat
+++ b/scripts/rabbitmq-service.bat
@@ -136,14 +136,6 @@ if exist "!SASL_LOGS!" (
 rem End of log management
 
 
-if "!RABBITMQ_CLUSTER_CONFIG_FILE!"=="" (
-    set RABBITMQ_CLUSTER_CONFIG_FILE=!RABBITMQ_BASE!\rabbitmq_cluster.config
-)
-set CLUSTER_CONFIG=
-if not exist "!RABBITMQ_CLUSTER_CONFIG_FILE!" GOTO L1
-set CLUSTER_CONFIG=-rabbit cluster_config \""!RABBITMQ_CLUSTER_CONFIG_FILE:\=/!"\"
-:L1
-
 if "!RABBITMQ_MNESIA_DIR!"=="" (
     set RABBITMQ_MNESIA_DIR=!RABBITMQ_MNESIA_BASE!/!RABBITMQ_NODENAME!-mnesia
 )
@@ -185,15 +177,26 @@ if errorlevel 1 (
     echo !RABBITMQ_SERVICENAME! service is already present - only updating service parameters
 )
 
+set RABBITMQ_PLUGINS_DIR=!TDP0!..\plugins
 set RABBITMQ_EBIN_ROOT=!TDP0!..\ebin
-if exist "!RABBITMQ_EBIN_ROOT!\rabbit.boot" (
-    echo Using Custom Boot File "!RABBITMQ_EBIN_ROOT!\rabbit.boot"
-    set RABBITMQ_BOOT_FILE=!RABBITMQ_EBIN_ROOT!\rabbit
-    set RABBITMQ_EBIN_PATH=
-) else (
-    set RABBITMQ_BOOT_FILE=start_sasl
-    set RABBITMQ_EBIN_PATH=-pa "!RABBITMQ_EBIN_ROOT!"
+
+"!ERLANG_HOME!\bin\erl.exe" ^
+-pa "!RABBITMQ_EBIN_ROOT!" ^
+-noinput -hidden ^
+-s rabbit_plugin_activator ^
+-rabbit plugins_dir \""!RABBITMQ_PLUGINS_DIR:\=/!"\" ^
+-rabbit plugins_expand_dir \""!RABBITMQ_MNESIA_DIR:\=/!/plugins-scratch"\" ^
+-rabbit rabbit_ebin  \""!RABBITMQ_EBIN_ROOT:\=/!"\" ^
+-extra !STAR!
+
+set RABBITMQ_BOOT_FILE=!RABBITMQ_MNESIA_DIR!\plugins-scratch\rabbit
+if not exist "!RABBITMQ_BOOT_FILE!.boot" (
+    echo Custom Boot File "!RABBITMQ_BOOT_FILE!.boot" is missing.
+    exit /B 1
 )
+
+set RABBITMQ_EBIN_PATH=
+
 if "!RABBITMQ_CONFIG_FILE!"=="" (
     set RABBITMQ_CONFIG_FILE=!RABBITMQ_BASE!\rabbitmq
 )
@@ -244,6 +247,7 @@ set ERLANG_SERVICE_ARGUMENTS=!ERLANG_SERVICE_ARGUMENTS:"=\"!
 -sname !RABBITMQ_NODENAME! ^
 !CONSOLE_FLAG! ^
 -args "!ERLANG_SERVICE_ARGUMENTS!" > NUL
+
 goto END
 
 
diff --git a/scripts/rabbitmqctl b/scripts/rabbitmqctl
index 92e5312b..76ce25fd 100755
--- a/scripts/rabbitmqctl
+++ b/scripts/rabbitmqctl
@@ -47,4 +47,3 @@ exec erl \
     -s rabbit_control \
     -nodename $RABBITMQ_NODENAME \
     -extra "$@"
-	
diff --git a/src/bpqueue.erl b/src/bpqueue.erl
new file mode 100644
index 00000000..49874aa6
--- /dev/null
+++ b/src/bpqueue.erl
@@ -0,0 +1,286 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(bpqueue).
+
+%% Block-prefixed queue. From the perspective of the queue interface
+%% the datastructure acts like a regular queue where each value is
+%% paired with the prefix.
+%%
+%% This is implemented as a queue of queues, which is more space and
+%% time efficient, whilst supporting the normal queue interface. Each
+%% inner queue has a prefix, which does not need to be unique, and it
+%% is guaranteed that no two consecutive blocks have the same
+%% prefix. len/1 returns the flattened length of the queue and is
+%% O(1).
+
+-export([new/0, is_empty/1, len/1, in/3, in_r/3, out/1, out_r/1, join/2,
+         foldl/3, foldr/3, from_list/1, to_list/1, map_fold_filter_l/4,
+         map_fold_filter_r/4]).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-export_type([bpqueue/0]).
+
+-type(bpqueue() :: {non_neg_integer(), queue()}).
+-type(prefix() :: any()).
+-type(value() :: any()).
+-type(result() :: ({'empty', bpqueue()} |
+                   {{'value', prefix(), value()}, bpqueue()})).
+
+-spec(new/0 :: () -> bpqueue()).
+-spec(is_empty/1 :: (bpqueue()) -> boolean()).
+-spec(len/1 :: (bpqueue()) -> non_neg_integer()).
+-spec(in/3 :: (prefix(), value(), bpqueue()) -> bpqueue()).
+-spec(in_r/3 :: (prefix(), value(), bpqueue()) -> bpqueue()).
+-spec(out/1 :: (bpqueue()) -> result()).
+-spec(out_r/1 :: (bpqueue()) -> result()).
+-spec(join/2 :: (bpqueue(), bpqueue()) -> bpqueue()).
+-spec(foldl/3 :: (fun ((prefix(), value(), B) -> B), B, bpqueue()) -> B).
+-spec(foldr/3 :: (fun ((prefix(), value(), B) -> B), B, bpqueue()) -> B).
+-spec(from_list/1 :: ([{prefix(), [value()]}]) -> bpqueue()).
+-spec(to_list/1 :: (bpqueue()) -> [{prefix(), [value()]}]).
+-spec(map_fold_filter_l/4 :: ((fun ((prefix()) -> boolean())),
+                              (fun ((value(), B) ->
+                                           ({prefix(), value(), B} | 'stop'))),
+                              B,
+                              bpqueue()) ->
+             {bpqueue(), B}).
+-spec(map_fold_filter_r/4 :: ((fun ((prefix()) -> boolean())),
+                              (fun ((value(), B) ->
+                                           ({prefix(), value(), B} | 'stop'))),
+                              B,
+                              bpqueue()) ->
+             {bpqueue(), B}).
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+new() -> {0, queue:new()}.
+
+is_empty({0, _Q}) -> true;
+is_empty(_BPQ)    -> false.
+
+len({N, _Q}) -> N.
+
+in(Prefix, Value, {0, Q}) ->
+    {1, queue:in({Prefix, queue:from_list([Value])}, Q)};
+in(Prefix, Value, BPQ) ->
+    in1({fun queue:in/2, fun queue:out_r/1}, Prefix, Value, BPQ).
+
+in_r(Prefix, Value, BPQ = {0, _Q}) ->
+    in(Prefix, Value, BPQ);
+in_r(Prefix, Value, BPQ) ->
+    in1({fun queue:in_r/2, fun queue:out/1}, Prefix, Value, BPQ).
+
+in1({In, Out}, Prefix, Value, {N, Q}) ->
+    {N+1, case Out(Q) of
+              {{value, {Prefix, InnerQ}}, Q1} ->
+                  In({Prefix, In(Value, InnerQ)}, Q1);
+              {{value, {_Prefix, _InnerQ}}, _Q1} ->
+                  In({Prefix, queue:in(Value, queue:new())}, Q)
+          end}.
+
+in_q(Prefix, Queue, BPQ = {0, Q}) ->
+    case queue:len(Queue) of
+        0 -> BPQ;
+        N -> {N, queue:in({Prefix, Queue}, Q)}
+    end;
+in_q(Prefix, Queue, BPQ) ->
+    in_q1({fun queue:in/2, fun queue:out_r/1,
+           fun queue:join/2},
+          Prefix, Queue, BPQ).
+
+in_q_r(Prefix, Queue, BPQ = {0, _Q}) ->
+    in_q(Prefix, Queue, BPQ);
+in_q_r(Prefix, Queue, BPQ) ->
+    in_q1({fun queue:in_r/2, fun queue:out/1,
+           fun (T, H) -> queue:join(H, T) end},
+          Prefix, Queue, BPQ).
+
+in_q1({In, Out, Join}, Prefix, Queue, BPQ = {N, Q}) ->
+    case queue:len(Queue) of
+        0 -> BPQ;
+        M -> {N + M, case Out(Q) of
+                         {{value, {Prefix, InnerQ}}, Q1} ->
+                             In({Prefix, Join(InnerQ, Queue)}, Q1);
+                         {{value, {_Prefix, _InnerQ}}, _Q1} ->
+                             In({Prefix, Queue}, Q)
+                     end}
+    end.
+
+out({0, _Q} = BPQ) -> {empty, BPQ};
+out(BPQ)           -> out1({fun queue:in_r/2, fun queue:out/1}, BPQ).
+
+out_r({0, _Q} = BPQ) -> {empty, BPQ};
+out_r(BPQ)           -> out1({fun queue:in/2, fun queue:out_r/1}, BPQ).
+
+out1({In, Out}, {N, Q}) ->
+    {{value, {Prefix, InnerQ}}, Q1} = Out(Q),
+    {{value, Value}, InnerQ1} = Out(InnerQ),
+    Q2 = case queue:is_empty(InnerQ1) of
+             true  -> Q1;
+             false -> In({Prefix, InnerQ1}, Q1)
+         end,
+    {{value, Prefix, Value}, {N-1, Q2}}.
+
+join({0, _Q}, BPQ) ->
+    BPQ;
+join(BPQ, {0, _Q}) ->
+    BPQ;
+join({NHead, QHead}, {NTail, QTail}) ->
+    {{value, {Prefix, InnerQHead}}, QHead1} = queue:out_r(QHead),
+    {NHead + NTail,
+     case queue:out(QTail) of
+         {{value, {Prefix, InnerQTail}}, QTail1} ->
+             queue:join(
+               queue:in({Prefix, queue:join(InnerQHead, InnerQTail)}, QHead1),
+               QTail1);
+         {{value, {_Prefix, _InnerQTail}}, _QTail1} ->
+             queue:join(QHead, QTail)
+     end}.
+
+foldl(_Fun, Init, {0, _Q}) -> Init;
+foldl( Fun, Init, {_N, Q}) -> fold1(fun queue:out/1, Fun, Init, Q).
+
+foldr(_Fun, Init, {0, _Q}) -> Init;
+foldr( Fun, Init, {_N, Q}) -> fold1(fun queue:out_r/1, Fun, Init, Q).
+
+fold1(Out, Fun, Init, Q) ->
+    case Out(Q) of
+        {empty, _Q} ->
+            Init;
+        {{value, {Prefix, InnerQ}}, Q1} ->
+            fold1(Out, Fun, fold1(Out, Fun, Prefix, Init, InnerQ), Q1)
+    end.
+
+fold1(Out, Fun, Prefix, Init, InnerQ) ->
+    case Out(InnerQ) of
+        {empty, _Q} ->
+            Init;
+        {{value, Value}, InnerQ1} ->
+            fold1(Out, Fun, Prefix, Fun(Prefix, Value, Init), InnerQ1)
+    end.
+
+from_list(List) ->
+    {FinalPrefix, FinalInnerQ, ListOfPQs1, Len} =
+        lists:foldl(
+          fun ({_Prefix, []}, Acc) ->
+                  Acc;
+              ({Prefix, InnerList}, {Prefix, InnerQ, ListOfPQs, LenAcc}) ->
+                  {Prefix, queue:join(InnerQ, queue:from_list(InnerList)),
+                   ListOfPQs, LenAcc + length(InnerList)};
+              ({Prefix1, InnerList}, {Prefix, InnerQ, ListOfPQs, LenAcc}) ->
+                  {Prefix1, queue:from_list(InnerList),
+                   [{Prefix, InnerQ} | ListOfPQs], LenAcc + length(InnerList)}
+          end, {undefined, queue:new(), [], 0}, List),
+    ListOfPQs2 = [{FinalPrefix, FinalInnerQ} | ListOfPQs1],
+    [{undefined, InnerQ1} | Rest] = All = lists:reverse(ListOfPQs2),
+    {Len, queue:from_list(case queue:is_empty(InnerQ1) of
+                              true  -> Rest;
+                              false -> All
+                          end)}.
+
+to_list({0, _Q}) -> [];
+to_list({_N, Q}) -> [{Prefix, queue:to_list(InnerQ)} ||
+                        {Prefix, InnerQ} <- queue:to_list(Q)].
+
+%% map_fold_filter_[lr](FilterFun, Fun, Init, BPQ) -> {BPQ, Init}
+%% where FilterFun(Prefix) -> boolean()
+%%       Fun(Value, Init) -> {Prefix, Value, Init} | stop
+%%
+%% The filter fun allows you to skip very quickly over blocks that
+%% you're not interested in. Such blocks appear in the resulting bpq
+%% without modification. The Fun is then used both to map the value,
+%% which also allows you to change the prefix (and thus block) of the
+%% value, and also to modify the Init/Acc (just like a fold).  If the
+%% Fun returns 'stop' then it is not applied to any further items.
+map_fold_filter_l(_PFilter, _Fun, Init, BPQ = {0, _Q}) ->
+    {BPQ, Init};
+map_fold_filter_l(PFilter, Fun, Init, {N, Q}) ->
+    map_fold_filter1({fun queue:out/1, fun queue:in/2,
+                      fun in_q/3, fun join/2},
+                     N, PFilter, Fun, Init, Q, new()).
+
+map_fold_filter_r(_PFilter, _Fun, Init, BPQ = {0, _Q}) ->
+    {BPQ, Init};
+map_fold_filter_r(PFilter, Fun, Init, {N, Q}) ->
+    map_fold_filter1({fun queue:out_r/1, fun queue:in_r/2,
+                      fun in_q_r/3, fun (T, H) -> join(H, T) end},
+                     N, PFilter, Fun, Init, Q, new()).
+
+map_fold_filter1(Funs = {Out, _In, InQ, Join}, Len, PFilter, Fun,
+                 Init, Q, QNew) ->
+    case Out(Q) of
+        {empty, _Q} ->
+            {QNew, Init};
+        {{value, {Prefix, InnerQ}}, Q1} ->
+            case PFilter(Prefix) of
+                true ->
+                    {Init1, QNew1, Cont} =
+                        map_fold_filter2(Funs, Fun, Prefix, Prefix,
+                                         Init, InnerQ, QNew, queue:new()),
+                    case Cont of
+                        false -> {Join(QNew1, {Len - len(QNew1), Q1}), Init1};
+                        true  -> map_fold_filter1(Funs, Len, PFilter, Fun,
+                                                  Init1, Q1, QNew1)
+                    end;
+                false ->
+                    map_fold_filter1(Funs, Len, PFilter, Fun,
+                                     Init, Q1, InQ(Prefix, InnerQ, QNew))
+            end
+    end.
+
+map_fold_filter2(Funs = {Out, In, InQ, _Join}, Fun, OrigPrefix, Prefix,
+                 Init, InnerQ, QNew, InnerQNew) ->
+    case Out(InnerQ) of
+        {empty, _Q} ->
+            {Init, InQ(OrigPrefix, InnerQ,
+                       InQ(Prefix, InnerQNew, QNew)), true};
+        {{value, Value}, InnerQ1} ->
+            case Fun(Value, Init) of
+                stop ->
+                    {Init, InQ(OrigPrefix, InnerQ,
+                               InQ(Prefix, InnerQNew, QNew)), false};
+                {Prefix1, Value1, Init1} ->
+                    {Prefix2, QNew1, InnerQNew1} =
+                        case Prefix1 =:= Prefix of
+                            true  -> {Prefix, QNew, In(Value1, InnerQNew)};
+                            false -> {Prefix1, InQ(Prefix, InnerQNew, QNew),
+                                      In(Value1, queue:new())}
+                        end,
+                    map_fold_filter2(Funs, Fun, OrigPrefix, Prefix2,
+                                     Init1, InnerQ1, QNew1, InnerQNew1)
+            end
+    end.
diff --git a/src/delegate.erl b/src/delegate.erl
index 8af28127..c8aa3092 100644
--- a/src/delegate.erl
+++ b/src/delegate.erl
@@ -44,8 +44,9 @@
 
 -ifdef(use_specs).
 
--spec(start_link/1 :: (non_neg_integer()) -> {'ok', pid()}).
--spec(invoke_no_result/2 :: (pid() | [pid()], fun ((pid()) -> any())) -> 'ok').
+-spec(start_link/1 :: (non_neg_integer()) -> {'ok', pid()} | {'error', any()}).
+-spec(invoke_no_result/2 ::
+        (pid() | [pid()], fun ((pid()) -> any())) -> 'ok').
 -spec(invoke/2 :: (pid() | [pid()], fun ((pid()) -> A)) -> A).
 
 -spec(process_count/0 :: () -> non_neg_integer()).
diff --git a/src/delegate_sup.erl b/src/delegate_sup.erl
index 1c1d62a9..ff303ee2 100644
--- a/src/delegate_sup.erl
+++ b/src/delegate_sup.erl
@@ -43,7 +43,7 @@
 
 -ifdef(use_specs).
 
--spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(start_link/0 :: () -> {'ok', pid()} | {'error', any()}).
 
 -endif.
 
diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 0f648dcd..d2830a25 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -34,13 +34,15 @@
 %% A File Handle Cache
 %%
 %% This extends a subset of the functionality of the Erlang file
-%% module.
+%% module. In the below, we use "file handle" to specifically refer to
+%% file handles, and "file descriptor" to refer to descriptors which
+%% are not file handles, e.g. sockets.
 %%
 %% Some constraints
 %% 1) This supports one writer, multiple readers per file. Nothing
 %% else.
 %% 2) Do not open the same file from different processes. Bad things
-%% may happen.
+%% may happen, especially for writes.
 %% 3) Writes are all appends. You cannot write to the middle of a
 %% file, although you can truncate and then append if you want.
 %% 4) Although there is a write buffer, there is no read buffer. Feel
@@ -49,10 +51,10 @@
 %%
 %% Some benefits
 %% 1) You do not have to remember to call sync before close
-%% 2) Buffering is much more flexible than with plain file module, and
-%% you can control when the buffer gets flushed out. This means that
-%% you can rely on reads-after-writes working, without having to call
-%% the expensive sync.
+%% 2) Buffering is much more flexible than with the plain file module,
+%% and you can control when the buffer gets flushed out. This means
+%% that you can rely on reads-after-writes working, without having to
+%% call the expensive sync.
 %% 3) Unnecessary calls to position and sync get optimised out.
 %% 4) You can find out what your 'real' offset is, and what your
 %% 'virtual' offset is (i.e. where the hdl really is, and where it
@@ -60,14 +62,19 @@
 %% 5) You can find out what the offset was when you last sync'd.
 %%
 %% There is also a server component which serves to limit the number
-%% of open file handles in a "soft" way - the server will never
-%% prevent a client from opening a handle, but may immediately tell it
-%% to close the handle. Thus you can set the limit to zero and it will
-%% still all work correctly, it is just that effectively no caching
-%% will take place. The operation of limiting is as follows:
+%% of open file descriptors. This is a hard limit: the server
+%% component will ensure that clients do not have more file
+%% descriptors open than it's configured to allow.
 %%
-%% On open and close, the client sends messages to the server
-%% informing it of opens and closes. This allows the server to keep
+%% On open, the client requests permission from the server to open the
+%% required number of file handles. The server may ask the client to
+%% close other file handles that it has open, or it may queue the
+%% request and ask other clients to close file handles they have open
+%% in order to satisfy the request. Requests are always satisfied in
+%% the order they arrive, even if a latter request (for a small number
+%% of file handles) can be satisfied before an earlier request (for a
+%% larger number of file handles). On close, the client sends a
+%% message to the server. These messages allow the server to keep
 %% track of the number of open handles. The client also keeps a
 %% gb_tree which is updated on every use of a file handle, mapping the
 %% time at which the file handle was last used (timestamp) to the
@@ -81,21 +88,38 @@
 %% Note that this data can go very out of date, by the client using
 %% the least recently used handle.
 %%
-%% When the limit is reached, the server calculates the average age of
-%% the last reported least recently used file handle of all the
-%% clients. It then tells all the clients to close any handles not
-%% used for longer than this average, by invoking the callback the
-%% client registered. The client should receive this message and pass
-%% it into set_maximum_since_use/1. However, it is highly possible
-%% this age will be greater than the ages of all the handles the
-%% client knows of because the client has used its file handles in the
-%% mean time. Thus at this point the client reports to the server the
+%% When the limit is exceeded (i.e. the number of open file handles is
+%% at the limit and there are pending 'open' requests), the server
+%% calculates the average age of the last reported least recently used
+%% file handle of all the clients. It then tells all the clients to
+%% close any handles not used for longer than this average, by
+%% invoking the callback the client registered. The client should
+%% receive this message and pass it into
+%% set_maximum_since_use/1. However, it is highly possible this age
+%% will be greater than the ages of all the handles the client knows
+%% of because the client has used its file handles in the mean
+%% time. Thus at this point the client reports to the server the
 %% current timestamp at which its least recently used file handle was
 %% last used. The server will check two seconds later that either it
 %% is back under the limit, in which case all is well again, or if
 %% not, it will calculate a new average age. Its data will be much
 %% more recent now, and so it is very likely that when this is
 %% communicated to the clients, the clients will close file handles.
+%% (In extreme cases, where it's very likely that all clients have
+%% used their open handles since they last sent in an update, which
+%% would mean that the average will never cause any file handles to
+%% be closed, the server can send out an average age of 0, resulting
+%% in all available clients closing all their file handles.)
+%%
+%% Care is taken to ensure that (a) processes which are blocked
+%% waiting for file descriptors to become available are not sent
+%% requests to close file handles; and (b) given it is known how many
+%% file handles a process has open, when the average age is forced to
+%% 0, close messages are only sent to enough processes to release the
+%% correct number of file handles and the list of processes is
+%% randomly shuffled. This ensures we don't cause processes to
+%% needlessly close file handles, and ensures that we don't always
+%% make such requests of the same processes.
 %%
 %% The advantage of this scheme is that there is only communication
 %% from the client to the server on open, close, and when in the
@@ -103,11 +127,7 @@
 %% communication from the client to the server on normal file handle
 %% operations. This scheme forms a feed-back loop - the server does
 %% not care which file handles are closed, just that some are, and it
-%% checks this repeatedly when over the limit. Given the guarantees of
-%% now(), even if there is just one file handle open, a limit of 1,
-%% and one client, it is certain that when the client calculates the
-%% age of the handle, it will be greater than when the server
-%% calculated it, hence it should be closed.
+%% checks this repeatedly when over the limit.
 %%
 %% Handles which are closed as a result of the server are put into a
 %% "soft-closed" state in which the handle is closed (data flushed out
@@ -116,13 +136,24 @@
 %% do not need to worry about their handles being closed by the server
 %% - reopening them when necessary is handled transparently.
 %%
-%% The server also supports obtain and release_on_death. obtain/0
-%% blocks until a file descriptor is available. release_on_death/1
-%% takes a pid and monitors the pid, reducing the count by 1 when the
-%% pid dies. Thus the assumption is that obtain/0 is called first, and
-%% when that returns, release_on_death/1 is called with the pid who
-%% "owns" the file descriptor. This is, for example, used to track the
-%% use of file descriptors through network sockets.
+%% The server also supports obtain and transfer. obtain/0 blocks until
+%% a file descriptor is available, at which point the requesting
+%% process is considered to 'own' one more descriptor. transfer/1
+%% transfers ownership of a file descriptor between processes. It is
+%% non-blocking. Obtain is used to obtain permission to accept file
+%% descriptors. Obtain has a lower limit, set by the ?OBTAIN_LIMIT/1
+%% macro. File handles can use the entire limit, but will be evicted
+%% by obtain calls up to the point at which no more obtain calls can
+%% be satisfied by the obtains limit. Thus there will always be some
+%% capacity available for file handles. Processes that use obtain are
+%% never asked to return them, and they are not managed in any way by
+%% the server. It is simply a mechanism to ensure that processes that
+%% need file descriptors such as sockets can do so in such a way that
+%% the overall number of open file descriptors is managed.
+%%
+%% The callers of register_callback/3, obtain/0, and the argument of
+%% transfer/1 are monitored, reducing the count of handles in use
+%% appropriately when the processes terminate.
 
 -behaviour(gen_server).
 
@@ -130,17 +161,28 @@
 -export([open/3, close/1, read/2, append/2, sync/1, position/2, truncate/1,
          last_sync_offset/1, current_virtual_offset/1, current_raw_offset/1,
          flush/1, copy/3, set_maximum_since_use/1, delete/1, clear/1]).
--export([release_on_death/1, obtain/0]).
+-export([obtain/0, transfer/1, set_limit/1, get_limit/0]).
+-export([ulimit/0]).
 
 -export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
 -define(SERVER, ?MODULE).
 -define(RESERVED_FOR_OTHERS, 100).
--define(FILE_HANDLES_LIMIT_WINDOWS, 10000000).
+
+%% Googling around suggests that Windows has a limit somewhere around
+%% 16M, eg
+%% http://blogs.technet.com/markrussinovich/archive/2009/09/29/3283844.aspx
+%% however, it turns out that's only available through the win32
+%% API. Via the C Runtime, we have just 512:
+%% http://msdn.microsoft.com/en-us/library/6e3b887c%28VS.80%29.aspx
+-define(FILE_HANDLES_LIMIT_WINDOWS, 512).
 -define(FILE_HANDLES_LIMIT_OTHER, 1024).
 -define(FILE_HANDLES_CHECK_INTERVAL, 2000).
 
+-define(OBTAIN_LIMIT(LIMIT), trunc((LIMIT * 0.9) - 2)).
+-define(CLIENT_ETS_TABLE, ?MODULE).
+
 %%----------------------------------------------------------------------------
 
 -record(file,
@@ -168,13 +210,31 @@
 -record(fhc_state,
         { elders,
           limit,
-          count,
-          obtains,
-          callbacks,
-          client_mrefs,
+          open_count,
+          open_pending,
+          obtain_limit,
+          obtain_count,
+          obtain_pending,
+          clients,
           timer_ref
         }).
 
+-record(cstate,
+        { pid,
+          callback,
+          opened,
+          obtained,
+          blocked,
+          pending_closes
+        }).
+
+-record(pending,
+        { kind,
+          pid,
+          requested,
+          from
+        }).
+
 %%----------------------------------------------------------------------------
 %% Specs
 %%----------------------------------------------------------------------------
@@ -182,18 +242,18 @@
 -ifdef(use_specs).
 
 -type(ref() :: any()).
--type(error() :: {'error', any()}).
--type(ok_or_error() :: ('ok' | error())).
--type(val_or_error(T) :: ({'ok', T} | error())).
+-type(ok_or_error() :: 'ok' | {'error', any()}).
+-type(val_or_error(T) :: {'ok', T} | {'error', any()}).
 -type(position() :: ('bof' | 'eof' | non_neg_integer() |
-                     {('bof' |'eof'), non_neg_integer()} | {'cur', integer()})).
+                     {('bof' |'eof'), non_neg_integer()} |
+                     {'cur', integer()})).
 -type(offset() :: non_neg_integer()).
 
 -spec(register_callback/3 :: (atom(), atom(), [any()]) -> 'ok').
 -spec(open/3 ::
-      (string(), [any()],
-       [{'write_buffer', (non_neg_integer() | 'infinity' | 'unbuffered')}]) ->
-             val_or_error(ref())).
+        (string(), [any()],
+         [{'write_buffer', (non_neg_integer() | 'infinity' | 'unbuffered')}])
+        -> val_or_error(ref())).
 -spec(close/1 :: (ref()) -> ok_or_error()).
 -spec(read/2 :: (ref(), non_neg_integer()) ->
              val_or_error([char()] | binary()) | 'eof').
@@ -210,8 +270,11 @@
 -spec(set_maximum_since_use/1 :: (non_neg_integer()) -> 'ok').
 -spec(delete/1 :: (ref()) -> ok_or_error()).
 -spec(clear/1 :: (ref()) -> ok_or_error()).
--spec(release_on_death/1 :: (pid()) -> 'ok').
 -spec(obtain/0 :: () -> 'ok').
+-spec(transfer/1 :: (pid()) -> 'ok').
+-spec(set_limit/1 :: (non_neg_integer()) -> 'ok').
+-spec(get_limit/0 :: () -> non_neg_integer()).
+-spec(ulimit/0 :: () -> 'infinity' | 'unknown' | non_neg_integer()).
 
 -endif.
 
@@ -238,9 +301,9 @@ open(Path, Mode, Options) ->
     IsWriter = is_writer(Mode1),
     case IsWriter andalso HasWriter of
         true  -> {error, writer_exists};
-        false -> Ref = make_ref(),
-                 case open1(Path1, Mode1, Options, Ref, bof, new) of
-                     {ok, _Handle} ->
+        false -> {ok, Ref} = new_closed_handle(Path1, Mode1, Options),
+                 case get_or_reopen([{Ref, new}]) of
+                     {ok, [_Handle1]} ->
                          RCount1 = case is_reader(Mode1) of
                                        true  -> RCount + 1;
                                        false -> RCount
@@ -251,6 +314,7 @@ open(Path, Mode, Options) ->
                                            has_writer = HasWriter1 }),
                          {ok, Ref};
                      Error ->
+                         erase({Ref, fhc_handle}),
                          Error
                  end
     end.
@@ -301,7 +365,7 @@ append(Ref, Data) ->
                       Size1 = Size + iolist_size(Data),
                       Handle2 = Handle1 #handle { write_buffer = WriteBuffer1,
                                                   write_buffer_size = Size1 },
-                      case Limit /= infinity andalso Size1 > Limit of
+                      case Limit =/= infinity andalso Size1 > Limit of
                           true  -> {Result, Handle3} = write_buffer(Handle2),
                                    {Result, [Handle3]};
                           false -> {ok, [Handle2]}
@@ -375,7 +439,8 @@ copy(Src, Dest, Count) ->
                   {ok, Count1} = Result1 ->
                       {Result1,
                        [SHandle #handle { offset = SOffset + Count1 },
-                        DHandle #handle { offset = DOffset + Count1 }]};
+                        DHandle #handle { offset = DOffset + Count1,
+                                          is_dirty = true }]};
                   Error ->
                       {Error, [SHandle, DHandle]}
               end;
@@ -420,29 +485,29 @@ set_maximum_since_use(MaximumAge) ->
     case lists:foldl(
            fun ({{Ref, fhc_handle},
                  Handle = #handle { hdl = Hdl, last_used_at = Then }}, Rep) ->
-                   Age = timer:now_diff(Now, Then),
-                   case Hdl /= closed andalso Age >= MaximumAge of
-                       true  -> {Res, Handle1} = soft_close(Handle),
-                                case Res of
-                                    ok -> put({Ref, fhc_handle}, Handle1),
-                                          false;
-                                    _  -> put_handle(Ref, Handle1),
-                                          Rep
-                                end;
+                   case Hdl =/= closed andalso
+                       timer:now_diff(Now, Then) >= MaximumAge of
+                       true  -> soft_close(Ref, Handle) orelse Rep;
                        false -> Rep
                    end;
                (_KeyValuePair, Rep) ->
                    Rep
-           end, true, get()) of
-        true  -> age_tree_change(), ok;
-        false -> ok
+           end, false, get()) of
+        false -> age_tree_change(), ok;
+        true  -> ok
     end.
 
-release_on_death(Pid) when is_pid(Pid) ->
-    gen_server:cast(?SERVER, {release_on_death, Pid}).
-
 obtain() ->
-    gen_server:call(?SERVER, obtain, infinity).
+    gen_server:call(?SERVER, {obtain, self()}, infinity).
+
+transfer(Pid) ->
+    gen_server:cast(?SERVER, {transfer, self(), Pid}).
+
+set_limit(Limit) ->
+    gen_server:call(?SERVER, {set_limit, Limit}, infinity).
+
+get_limit() ->
+    gen_server:call(?SERVER, get_limit, infinity).
 
 %%----------------------------------------------------------------------------
 %% Internal functions
@@ -459,18 +524,9 @@ append_to_write(Mode) ->
     end.
 
 with_handles(Refs, Fun) ->
-    ResHandles = lists:foldl(
-                   fun (Ref, {ok, HandlesAcc}) ->
-                           case get_or_reopen(Ref) of
-                               {ok, Handle} -> {ok, [Handle | HandlesAcc]};
-                               Error        -> Error
-                           end;
-                       (_Ref, Error) ->
-                           Error
-                   end, {ok, []}, Refs),
-    case ResHandles of
+    case get_or_reopen([{Ref, reopen} || Ref <- Refs]) of
         {ok, Handles} ->
-            case Fun(lists:reverse(Handles)) of
+            case Fun(Handles) of
                 {Result, Handles1} when is_list(Handles1) ->
                     lists:zipwith(fun put_handle/2, Refs, Handles1),
                     Result;
@@ -499,36 +555,94 @@ with_flushed_handles(Refs, Fun) ->
               end
       end).
 
-get_or_reopen(Ref) ->
-    case get({Ref, fhc_handle}) of
-        undefined ->
-            {error, not_open, Ref};
-        #handle { hdl = closed, offset = Offset,
-                  path = Path, mode = Mode, options = Options } ->
-            open1(Path, Mode, Options, Ref, Offset, reopen);
-        Handle ->
-            {ok, Handle}
+get_or_reopen(RefNewOrReopens) ->
+    case partition_handles(RefNewOrReopens) of
+        {OpenHdls, []} ->
+            {ok, [Handle || {_Ref, Handle} <- OpenHdls]};
+        {OpenHdls, ClosedHdls} ->
+            Oldest = oldest(get_age_tree(), fun () -> now() end),
+            case gen_server:call(?SERVER, {open, self(), length(ClosedHdls),
+                                           Oldest}, infinity) of
+                ok ->
+                    case reopen(ClosedHdls) of
+                        {ok, RefHdls}  -> sort_handles(RefNewOrReopens,
+                                                       OpenHdls, RefHdls, []);
+                        Error          -> Error
+                    end;
+                close ->
+                    [soft_close(Ref, Handle) ||
+                        {{Ref, fhc_handle}, Handle = #handle { hdl = Hdl }} <-
+                            get(),
+                        Hdl =/= closed],
+                    get_or_reopen(RefNewOrReopens)
+            end
+    end.
+
+reopen(ClosedHdls) -> reopen(ClosedHdls, get_age_tree(), []).
+
+reopen([], Tree, RefHdls) ->
+    put_age_tree(Tree),
+    {ok, lists:reverse(RefHdls)};
+reopen([{Ref, NewOrReopen, Handle = #handle { hdl          = closed,
+                                              path         = Path,
+                                              mode         = Mode,
+                                              offset       = Offset,
+                                              last_used_at = undefined }} |
+        RefNewOrReopenHdls] = ToOpen, Tree, RefHdls) ->
+    case file:open(Path, case NewOrReopen of
+                             new    -> Mode;
+                             reopen -> [read | Mode]
+                         end) of
+        {ok, Hdl} ->
+            Now = now(),
+            {{ok, Offset1}, Handle1} =
+                maybe_seek(Offset, Handle #handle { hdl          = Hdl,
+                                                    offset       = 0,
+                                                    last_used_at = Now }),
+            Handle2 = Handle1 #handle { trusted_offset = Offset1 },
+            put({Ref, fhc_handle}, Handle2),
+            reopen(RefNewOrReopenHdls, gb_trees:insert(Now, Ref, Tree),
+                   [{Ref, Handle2} | RefHdls]);
+        Error ->
+            %% NB: none of the handles in ToOpen are in the age tree
+            Oldest = oldest(Tree, fun () -> undefined end),
+            [gen_server:cast(?SERVER, {close, self(), Oldest}) || _ <- ToOpen],
+            put_age_tree(Tree),
+            Error
     end.
 
+partition_handles(RefNewOrReopens) ->
+    lists:foldr(
+      fun ({Ref, NewOrReopen}, {Open, Closed}) ->
+              case get({Ref, fhc_handle}) of
+                  #handle { hdl = closed } = Handle ->
+                      {Open, [{Ref, NewOrReopen, Handle} | Closed]};
+                  #handle {} = Handle ->
+                      {[{Ref, Handle} | Open], Closed}
+              end
+      end, {[], []}, RefNewOrReopens).
+
+sort_handles([], [], [], Acc) ->
+    {ok, lists:reverse(Acc)};
+sort_handles([{Ref, _} | RefHdls], [{Ref, Handle} | RefHdlsA], RefHdlsB, Acc) ->
+    sort_handles(RefHdls, RefHdlsA, RefHdlsB, [Handle | Acc]);
+sort_handles([{Ref, _} | RefHdls], RefHdlsA, [{Ref, Handle} | RefHdlsB], Acc) ->
+    sort_handles(RefHdls, RefHdlsA, RefHdlsB, [Handle | Acc]).
+
 put_handle(Ref, Handle = #handle { last_used_at = Then }) ->
     Now = now(),
     age_tree_update(Then, Now, Ref),
     put({Ref, fhc_handle}, Handle #handle { last_used_at = Now }).
 
-with_age_tree(Fun) ->
-    put(fhc_age_tree, Fun(case get(fhc_age_tree) of
-                              undefined -> gb_trees:empty();
-                              AgeTree   -> AgeTree
-                          end)).
+with_age_tree(Fun) -> put_age_tree(Fun(get_age_tree())).
 
-age_tree_insert(Now, Ref) ->
-    with_age_tree(
-      fun (Tree) ->
-              Tree1 = gb_trees:insert(Now, Ref, Tree),
-              {Oldest, _Ref} = gb_trees:smallest(Tree1),
-              gen_server:cast(?SERVER, {open, self(), Oldest}),
-              Tree1
-      end).
+get_age_tree() ->
+    case get(fhc_age_tree) of
+        undefined -> gb_trees:empty();
+        AgeTree   -> AgeTree
+    end.
+
+put_age_tree(Tree) -> put(fhc_age_tree, Tree).
 
 age_tree_update(Then, Now, Ref) ->
     with_age_tree(
@@ -540,13 +654,7 @@ age_tree_delete(Then) ->
     with_age_tree(
       fun (Tree) ->
               Tree1 = gb_trees:delete_any(Then, Tree),
-              Oldest = case gb_trees:is_empty(Tree1) of
-                           true ->
-                               undefined;
-                           false ->
-                               {Oldest1, _Ref} = gb_trees:smallest(Tree1),
-                               Oldest1
-                       end,
+              Oldest = oldest(Tree1, fun () -> undefined end),
               gen_server:cast(?SERVER, {close, self(), Oldest}),
               Tree1
       end).
@@ -562,48 +670,53 @@ age_tree_change() ->
               Tree
       end).
 
-open1(Path, Mode, Options, Ref, Offset, NewOrReopen) ->
-    Mode1 = case NewOrReopen of
-                new    -> Mode;
-                reopen -> [read | Mode]
-            end,
-    case file:open(Path, Mode1) of
-        {ok, Hdl} ->
-            WriteBufferSize =
-                case proplists:get_value(write_buffer, Options, unbuffered) of
-                    unbuffered           -> 0;
-                    infinity             -> infinity;
-                    N when is_integer(N) -> N
-                end,
-            Now = now(),
-            Handle = #handle { hdl                     = Hdl,
-                               offset                  = 0,
-                               trusted_offset          = 0,
-                               is_dirty                = false,
-                               write_buffer_size       = 0,
-                               write_buffer_size_limit = WriteBufferSize,
-                               write_buffer            = [],
-                               at_eof                  = false,
-                               path                    = Path,
-                               mode                    = Mode,
-                               options                 = Options,
-                               is_write                = is_writer(Mode),
-                               is_read                 = is_reader(Mode),
-                               last_used_at            = Now },
-            {{ok, Offset1}, Handle1} = maybe_seek(Offset, Handle),
-            Handle2 = Handle1 #handle { trusted_offset = Offset1 },
-            put({Ref, fhc_handle}, Handle2),
-            age_tree_insert(Now, Ref),
-            {ok, Handle2};
-        {error, Reason} ->
-            {error, Reason}
+oldest(Tree, DefaultFun) ->
+    case gb_trees:is_empty(Tree) of
+        true  -> DefaultFun();
+        false -> {Oldest, _Ref} = gb_trees:smallest(Tree),
+                 Oldest
+    end.
+
+new_closed_handle(Path, Mode, Options) ->
+    WriteBufferSize =
+        case proplists:get_value(write_buffer, Options, unbuffered) of
+            unbuffered           -> 0;
+            infinity             -> infinity;
+            N when is_integer(N) -> N
+        end,
+    Ref = make_ref(),
+    put({Ref, fhc_handle}, #handle { hdl                     = closed,
+                                     offset                  = 0,
+                                     trusted_offset          = 0,
+                                     is_dirty                = false,
+                                     write_buffer_size       = 0,
+                                     write_buffer_size_limit = WriteBufferSize,
+                                     write_buffer            = [],
+                                     at_eof                  = false,
+                                     path                    = Path,
+                                     mode                    = Mode,
+                                     options                 = Options,
+                                     is_write                = is_writer(Mode),
+                                     is_read                 = is_reader(Mode),
+                                     last_used_at            = undefined }),
+    {ok, Ref}.
+
+soft_close(Ref, Handle) ->
+    {Res, Handle1} = soft_close(Handle),
+    case Res of
+        ok -> put({Ref, fhc_handle}, Handle1),
+              true;
+        _  -> put_handle(Ref, Handle1),
+              false
     end.
 
 soft_close(Handle = #handle { hdl = closed }) ->
     {ok, Handle};
 soft_close(Handle) ->
     case write_buffer(Handle) of
-        {ok, #handle { hdl = Hdl, offset = Offset, is_dirty = IsDirty,
+        {ok, #handle { hdl         = Hdl,
+                       offset      = Offset,
+                       is_dirty    = IsDirty,
                        last_used_at = Then } = Handle1 } ->
             ok = case IsDirty of
                      true  -> file:sync(Hdl);
@@ -611,8 +724,10 @@ soft_close(Handle) ->
                  end,
             ok = file:close(Hdl),
             age_tree_delete(Then),
-            {ok, Handle1 #handle { hdl = closed, trusted_offset = Offset,
-                                   is_dirty = false }};
+            {ok, Handle1 #handle { hdl            = closed,
+                                   trusted_offset = Offset,
+                                   is_dirty       = false,
+                                   last_used_at   = undefined }};
         {_Error, _Handle} = Result ->
             Result
     end.
@@ -699,116 +814,309 @@ init([]) ->
                                       Watermark > 0) ->
                     Watermark;
                 _ ->
-                    ulimit()
+                    case ulimit() of
+                        infinity -> infinity;
+                        unknown  -> ?FILE_HANDLES_LIMIT_OTHER;
+                        Lim      -> lists:max([2, Lim - ?RESERVED_FOR_OTHERS])
+                    end
             end,
-    error_logger:info_msg("Limiting to approx ~p file handles~n", [Limit]),
-    {ok, #fhc_state { elders = dict:new(), limit = Limit, count = 0,
-                      obtains = [], callbacks = dict:new(),
-                      client_mrefs = dict:new(), timer_ref = undefined }}.
-
-handle_call(obtain, From, State = #fhc_state { count = Count }) ->
-    State1 = #fhc_state { count = Count1, limit = Limit, obtains = Obtains } =
-        maybe_reduce(State #fhc_state { count = Count + 1 }),
-    case Limit /= infinity andalso Count1 >= Limit of
-        true  -> {noreply, State1 #fhc_state { obtains = [From | Obtains],
-                                               count = Count1 - 1 }};
-        false -> {reply, ok, State1}
-    end.
-
-handle_cast({register_callback, Pid, MFA},
-            State = #fhc_state { callbacks = Callbacks }) ->
-    {noreply, ensure_mref(
-                Pid, State #fhc_state {
-                       callbacks = dict:store(Pid, MFA, Callbacks) })};
-
-handle_cast({open, Pid, EldestUnusedSince}, State =
-            #fhc_state { elders = Elders, count = Count }) ->
+    ObtainLimit = obtain_limit(Limit),
+    error_logger:info_msg("Limiting to approx ~p file handles (~p sockets)~n",
+                          [Limit, ObtainLimit]),
+    Clients = ets:new(?CLIENT_ETS_TABLE, [set, private, {keypos, #cstate.pid}]),
+    {ok, #fhc_state { elders         = dict:new(),
+                      limit          = Limit,
+                      open_count     = 0,
+                      open_pending   = pending_new(),
+                      obtain_limit   = ObtainLimit,
+                      obtain_count   = 0,
+                      obtain_pending = pending_new(),
+                      clients        = Clients,
+                      timer_ref      = undefined }}.
+
+handle_call({open, Pid, Requested, EldestUnusedSince}, From,
+            State = #fhc_state { open_count   = Count,
+                                 open_pending = Pending,
+                                 elders       = Elders,
+                                 clients      = Clients })
+  when EldestUnusedSince =/= undefined ->
     Elders1 = dict:store(Pid, EldestUnusedSince, Elders),
-    {noreply, maybe_reduce(
-                ensure_mref(Pid, State #fhc_state { elders = Elders1,
-                                                    count = Count + 1 }))};
+    Item = #pending { kind      = open,
+                      pid       = Pid,
+                      requested = Requested,
+                      from      = From },
+    ok = track_client(Pid, Clients),
+    State1 = State #fhc_state { elders = Elders1 },
+    case needs_reduce(State1 #fhc_state { open_count = Count + Requested }) of
+        true  -> case ets:lookup(Clients, Pid) of
+                     [#cstate { opened = 0 }] ->
+                         true = ets:update_element(
+                                  Clients, Pid, {#cstate.blocked, true}),
+                         {noreply,
+                          reduce(State1 #fhc_state {
+                                   open_pending = pending_in(Item, Pending) })};
+                     [#cstate { opened = Opened }] ->
+                         true = ets:update_element(
+                                  Clients, Pid,
+                                  {#cstate.pending_closes, Opened}),
+                         {reply, close, State1}
+                 end;
+        false -> {noreply, run_pending_item(Item, State1)}
+    end;
+
+handle_call({obtain, Pid}, From, State = #fhc_state { obtain_limit   = Limit,
+                                                      obtain_count   = Count,
+                                                      obtain_pending = Pending,
+                                                      clients = Clients })
+  when Limit =/= infinity andalso Count >= Limit ->
+    ok = track_client(Pid, Clients),
+    true = ets:update_element(Clients, Pid, {#cstate.blocked, true}),
+    Item = #pending { kind = obtain, pid = Pid, requested = 1, from = From },
+    {noreply, State #fhc_state { obtain_pending = pending_in(Item, Pending) }};
+handle_call({obtain, Pid}, From, State = #fhc_state { obtain_count   = Count,
+                                                      obtain_pending = Pending,
+                                                      clients = Clients }) ->
+    Item = #pending { kind = obtain, pid = Pid, requested = 1, from = From },
+    ok = track_client(Pid, Clients),
+    case needs_reduce(State #fhc_state { obtain_count = Count + 1 }) of
+        true ->
+            true = ets:update_element(Clients, Pid, {#cstate.blocked, true}),
+            {noreply, reduce(State #fhc_state {
+                               obtain_pending = pending_in(Item, Pending) })};
+        false ->
+            {noreply, run_pending_item(Item, State)}
+    end;
+handle_call({set_limit, Limit}, _From, State) ->
+    {reply, ok, maybe_reduce(
+                  process_pending(State #fhc_state {
+                                    limit        = Limit,
+                                    obtain_limit = obtain_limit(Limit) }))};
+handle_call(get_limit, _From, State = #fhc_state { limit = Limit }) ->
+    {reply, Limit, State}.
 
-handle_cast({update, Pid, EldestUnusedSince}, State =
-            #fhc_state { elders = Elders }) ->
+handle_cast({register_callback, Pid, MFA},
+            State = #fhc_state { clients = Clients }) ->
+    ok = track_client(Pid, Clients),
+    true = ets:update_element(Clients, Pid, {#cstate.callback, MFA}),
+    {noreply, State};
+
+handle_cast({update, Pid, EldestUnusedSince},
+            State = #fhc_state { elders = Elders })
+  when EldestUnusedSince =/= undefined ->
     Elders1 = dict:store(Pid, EldestUnusedSince, Elders),
     %% don't call maybe_reduce from here otherwise we can create a
     %% storm of messages
-    {noreply, ensure_mref(Pid, State #fhc_state { elders = Elders1 })};
+    {noreply, State #fhc_state { elders = Elders1 }};
 
-handle_cast({close, Pid, EldestUnusedSince}, State =
-            #fhc_state { elders = Elders, count = Count }) ->
+handle_cast({close, Pid, EldestUnusedSince},
+            State = #fhc_state { elders = Elders, clients = Clients }) ->
     Elders1 = case EldestUnusedSince of
                   undefined -> dict:erase(Pid, Elders);
                   _         -> dict:store(Pid, EldestUnusedSince, Elders)
               end,
-    {noreply, process_obtains(
-                ensure_mref(Pid, State #fhc_state { elders = Elders1,
-                                                    count = Count - 1 }))};
+    ets:update_counter(Clients, Pid, {#cstate.pending_closes, -1, 0, 0}),
+    {noreply, process_pending(
+                update_counts(open, Pid, -1,
+                              State #fhc_state { elders = Elders1 }))};
+
+handle_cast({transfer, FromPid, ToPid}, State) ->
+    ok = track_client(ToPid, State#fhc_state.clients),
+    {noreply, process_pending(
+                update_counts(obtain, ToPid, +1,
+                              update_counts(obtain, FromPid, -1, State)))};
 
 handle_cast(check_counts, State) ->
-    {noreply, maybe_reduce(State #fhc_state { timer_ref = undefined })};
-
-handle_cast({release_on_death, Pid}, State) ->
-    _MRef = erlang:monitor(process, Pid),
-    {noreply, State}.
-
-handle_info({'DOWN', MRef, process, Pid, _Reason}, State =
-                #fhc_state { count = Count, callbacks = Callbacks,
-                             client_mrefs = ClientMRefs, elders = Elders }) ->
-    {noreply, process_obtains(
-                case dict:find(Pid, ClientMRefs) of
-                    {ok, MRef} -> State #fhc_state {
-                                    elders       = dict:erase(Pid, Elders),
-                                    client_mrefs = dict:erase(Pid, ClientMRefs),
-                                    callbacks    = dict:erase(Pid, Callbacks) };
-                    _          -> State #fhc_state { count = Count - 1 }
-                end)}.
-
-terminate(_Reason, State) ->
+    {noreply, maybe_reduce(State #fhc_state { timer_ref = undefined })}.
+
+handle_info({'DOWN', _MRef, process, Pid, _Reason},
+            State = #fhc_state { elders         = Elders,
+                                 open_count     = OpenCount,
+                                 open_pending   = OpenPending,
+                                 obtain_count   = ObtainCount,
+                                 obtain_pending = ObtainPending,
+                                 clients        = Clients }) ->
+    [#cstate { opened = Opened, obtained = Obtained }] =
+        ets:lookup(Clients, Pid),
+    true = ets:delete(Clients, Pid),
+    FilterFun = fun (#pending { pid = Pid1 }) -> Pid1 =/= Pid end,
+    {noreply, process_pending(
+                State #fhc_state {
+                  open_count     = OpenCount - Opened,
+                  open_pending   = filter_pending(FilterFun, OpenPending),
+                  obtain_count   = ObtainCount - Obtained,
+                  obtain_pending = filter_pending(FilterFun, ObtainPending),
+                  elders         = dict:erase(Pid, Elders) })}.
+
+terminate(_Reason, State = #fhc_state { clients = Clients }) ->
+    ets:delete(Clients),
     State.
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
 %%----------------------------------------------------------------------------
+%% pending queue abstraction helpers
+%%----------------------------------------------------------------------------
+
+queue_fold(Fun, Init, Q) ->
+    case queue:out(Q) of
+        {empty, _Q}      -> Init;
+        {{value, V}, Q1} -> queue_fold(Fun, Fun(V, Init), Q1)
+    end.
+
+filter_pending(Fun, {Count, Queue}) ->
+    {Delta, Queue1} =
+        queue_fold(fun (Item, {DeltaN, QueueN}) ->
+                           case Fun(Item) of
+                               true  -> {DeltaN, queue:in(Item, QueueN)};
+                               false -> {DeltaN - requested(Item), QueueN}
+                           end
+                   end, {0, queue:new()}, Queue),
+    {Count + Delta, Queue1}.
+
+pending_new() ->
+    {0, queue:new()}.
+
+pending_in(Item = #pending { requested = Requested }, {Count, Queue}) ->
+    {Count + Requested, queue:in(Item, Queue)}.
+
+pending_out({0, _Queue} = Pending) ->
+    {empty, Pending};
+pending_out({N, Queue}) ->
+    {{value, #pending { requested = Requested }} = Result, Queue1} =
+        queue:out(Queue),
+    {Result, {N - Requested, Queue1}}.
+
+pending_count({Count, _Queue}) ->
+    Count.
+
+pending_is_empty({0, _Queue}) ->
+    true;
+pending_is_empty({_N, _Queue}) ->
+    false.
+
+%%----------------------------------------------------------------------------
 %% server helpers
 %%----------------------------------------------------------------------------
 
-process_obtains(State = #fhc_state { obtains = [] }) ->
-    State;
-process_obtains(State = #fhc_state { limit = Limit, count = Count })
-  when Limit /= infinity andalso Count >= Limit ->
+obtain_limit(infinity) -> infinity;
+obtain_limit(Limit)    -> case ?OBTAIN_LIMIT(Limit) of
+                              OLimit when OLimit < 0 -> 0;
+                              OLimit                 -> OLimit
+                          end.
+
+requested({_Kind, _Pid, Requested, _From}) ->
+    Requested.
+
+process_pending(State = #fhc_state { limit = infinity }) ->
     State;
-process_obtains(State = #fhc_state { limit = Limit, count = Count,
-                                     obtains = Obtains }) ->
-    ObtainsLen = length(Obtains),
-    ObtainableLen = lists:min([ObtainsLen, Limit - Count]),
-    Take = ObtainsLen - ObtainableLen,
-    {ObtainsNew, ObtainableRev} = lists:split(Take, Obtains),
-    [gen_server:reply(From, ok) || From <- ObtainableRev],
-    State #fhc_state { count = Count + ObtainableLen, obtains = ObtainsNew }.
-
-maybe_reduce(State = #fhc_state { limit = Limit, count = Count, elders = Elders,
-                                  callbacks = Callbacks, timer_ref = TRef })
-  when Limit /= infinity andalso Count >= Limit ->
+process_pending(State) ->
+    process_open(process_obtain(State)).
+
+process_open(State = #fhc_state { limit        = Limit,
+                                  open_pending = Pending,
+                                  open_count   = OpenCount,
+                                  obtain_count = ObtainCount }) ->
+    {Pending1, State1} =
+        process_pending(Pending, Limit - (ObtainCount + OpenCount), State),
+    State1 #fhc_state { open_pending = Pending1 }.
+
+process_obtain(State = #fhc_state { limit          = Limit,
+                                    obtain_pending = Pending,
+                                    obtain_limit   = ObtainLimit,
+                                    obtain_count   = ObtainCount,
+                                    open_count     = OpenCount }) ->
+    Quota = lists:min([ObtainLimit - ObtainCount,
+                       Limit - (ObtainCount + OpenCount)]),
+    {Pending1, State1} = process_pending(Pending, Quota, State),
+    State1 #fhc_state { obtain_pending = Pending1 }.
+
+process_pending(Pending, Quota, State) when Quota =< 0 ->
+    {Pending, State};
+process_pending(Pending, Quota, State) ->
+    case pending_out(Pending) of
+        {empty, _Pending} ->
+            {Pending, State};
+        {{value, #pending { requested = Requested }}, _Pending1}
+          when Requested > Quota ->
+            {Pending, State};
+        {{value, #pending { requested = Requested } = Item}, Pending1} ->
+            process_pending(Pending1, Quota - Requested,
+                            run_pending_item(Item, State))
+    end.
+
+run_pending_item(#pending { kind      = Kind,
+                            pid       = Pid,
+                            requested = Requested,
+                            from      = From },
+                 State = #fhc_state { clients = Clients }) ->
+    gen_server:reply(From, ok),
+    true = ets:update_element(Clients, Pid, {#cstate.blocked, false}),
+    update_counts(Kind, Pid, Requested, State).
+
+update_counts(Kind, Pid, Delta,
+              State = #fhc_state { open_count   = OpenCount,
+                                   obtain_count = ObtainCount,
+                                   clients      = Clients }) ->
+    {OpenDelta, ObtainDelta} = update_counts1(Kind, Pid, Delta, Clients),
+    State #fhc_state { open_count   = OpenCount   + OpenDelta,
+                       obtain_count = ObtainCount + ObtainDelta }.
+
+update_counts1(open, Pid, Delta, Clients) ->
+    ets:update_counter(Clients, Pid, {#cstate.opened, Delta}),
+    {Delta, 0};
+update_counts1(obtain, Pid, Delta, Clients) ->
+    ets:update_counter(Clients, Pid, {#cstate.obtained, Delta}),
+    {0, Delta}.
+
+maybe_reduce(State) ->
+    case needs_reduce(State) of
+        true  -> reduce(State);
+        false -> State
+    end.
+
+needs_reduce(#fhc_state { limit          = Limit,
+                          open_count     = OpenCount,
+                          open_pending   = OpenPending,
+                          obtain_count   = ObtainCount,
+                          obtain_limit   = ObtainLimit,
+                          obtain_pending = ObtainPending }) ->
+    Limit =/= infinity
+        andalso ((OpenCount + ObtainCount > Limit)
+                 orelse (not pending_is_empty(OpenPending))
+                 orelse (ObtainCount < ObtainLimit
+                         andalso not pending_is_empty(ObtainPending))).
+
+reduce(State = #fhc_state { open_pending   = OpenPending,
+                            obtain_pending = ObtainPending,
+                            elders         = Elders,
+                            clients        = Clients,
+                            timer_ref      = TRef }) ->
     Now = now(),
-    {Pids, Sum, ClientCount} =
-        dict:fold(fun (_Pid, undefined, Accs) ->
-                          Accs;
-                      (Pid, Eldest, {PidsAcc, SumAcc, CountAcc}) ->
-                          {[Pid|PidsAcc], SumAcc + timer:now_diff(Now, Eldest),
-                           CountAcc + 1}
+    {CStates, Sum, ClientCount} =
+        dict:fold(fun (Pid, Eldest, {CStatesAcc, SumAcc, CountAcc} = Accs) ->
+                          [#cstate { pending_closes = PendingCloses,
+                                     opened         = Opened,
+                                     blocked        = Blocked } = CState] =
+                              ets:lookup(Clients, Pid),
+                          case Blocked orelse PendingCloses =:= Opened of
+                              true  -> Accs;
+                              false -> {[CState | CStatesAcc],
+                                        SumAcc + timer:now_diff(Now, Eldest),
+                                        CountAcc + 1}
+                          end
                   end, {[], 0, 0}, Elders),
-    case Pids of
+    case CStates of
         [] -> ok;
-        _  -> AverageAge = Sum / ClientCount,
-              lists:foreach(
-                fun (Pid) ->
-                        case dict:find(Pid, Callbacks) of
-                            error           -> ok;
-                            {ok, {M, F, A}} -> apply(M, F, A ++ [AverageAge])
-                        end
-                end, Pids)
+        _  -> case (Sum / ClientCount) -
+                       (1000 * ?FILE_HANDLES_CHECK_INTERVAL) of
+                  AverageAge when AverageAge > 0 ->
+                      notify_age(CStates, AverageAge);
+                  _ ->
+                      notify_age0(Clients, CStates,
+                                  pending_count(OpenPending) +
+                                      pending_count(ObtainPending))
+              end
     end,
     case TRef of
         undefined -> {ok, TRef1} = timer:apply_after(
@@ -816,16 +1124,47 @@ maybe_reduce(State = #fhc_state { limit = Limit, count = Count, elders = Elders,
                                      gen_server, cast, [?SERVER, check_counts]),
                      State #fhc_state { timer_ref = TRef1 };
         _         -> State
-    end;
-maybe_reduce(State) ->
-    State.
+    end.
 
-%% Googling around suggests that Windows has a limit somewhere around
-%% 16M, eg
-%% http://blogs.technet.com/markrussinovich/archive/2009/09/29/3283844.aspx
-%% For everything else, assume ulimit exists. Further googling
-%% suggests that BSDs (incl OS X), solaris and linux all agree that
-%% ulimit -n is file handles
+notify_age(CStates, AverageAge) ->
+    lists:foreach(
+      fun (#cstate { callback = undefined }) -> ok;
+          (#cstate { callback = {M, F, A} }) -> apply(M, F, A ++ [AverageAge])
+      end, CStates).
+
+notify_age0(Clients, CStates, Required) ->
+    Notifications =
+        [CState || CState <- CStates, CState#cstate.callback =/= undefined],
+    {L1, L2} = lists:split(random:uniform(length(Notifications)),
+                           Notifications),
+    notify(Clients, Required, L2 ++ L1).
+
+notify(_Clients, _Required, []) ->
+    ok;
+notify(_Clients, Required, _Notifications) when Required =< 0 ->
+    ok;
+notify(Clients, Required, [#cstate{ pid      = Pid,
+                                    callback = {M, F, A},
+                                    opened   = Opened } | Notifications]) ->
+    apply(M, F, A ++ [0]),
+    ets:update_element(Clients, Pid, {#cstate.pending_closes, Opened}),
+    notify(Clients, Required - Opened, Notifications).
+
+track_client(Pid, Clients) ->
+    case ets:insert_new(Clients, #cstate { pid            = Pid,
+                                           callback       = undefined,
+                                           opened         = 0,
+                                           obtained       = 0,
+                                           blocked        = false,
+                                           pending_closes = 0 }) of
+        true  -> _MRef = erlang:monitor(process, Pid),
+                 ok;
+        false -> ok
+    end.
+
+%% For all unices, assume ulimit exists. Further googling suggests
+%% that BSDs (incl OS X), solaris and linux all agree that ulimit -n
+%% is file handles
 ulimit() ->
     case os:type() of
         {win32, _OsName} ->
@@ -839,24 +1178,14 @@ ulimit() ->
                 "unlimited" ->
                     infinity;
                 String = [C|_] when $0 =< C andalso C =< $9 ->
-                    Num = list_to_integer(
-                            lists:takewhile(
-                              fun (D) -> $0 =< D andalso D =< $9 end, String)) -
-                        ?RESERVED_FOR_OTHERS,
-                    lists:max([1, Num]);
+                    list_to_integer(
+                      lists:takewhile(
+                        fun (D) -> $0 =< D andalso D =< $9 end, String));
                 _ ->
                     %% probably a variant of
                     %% "/bin/sh: line 1: ulimit: command not found\n"
-                    ?FILE_HANDLES_LIMIT_OTHER - ?RESERVED_FOR_OTHERS
+                    unknown
             end;
         _ ->
-            ?FILE_HANDLES_LIMIT_OTHER - ?RESERVED_FOR_OTHERS
-    end.
-
-ensure_mref(Pid, State = #fhc_state { client_mrefs = ClientMRefs }) ->
-    case dict:find(Pid, ClientMRefs) of
-        {ok, _MRef} -> State;
-        error       -> MRef = erlang:monitor(process, Pid),
-                       State #fhc_state {
-                         client_mrefs = dict:store(Pid, MRef, ClientMRefs) }
+            unknown
     end.
diff --git a/src/gatherer.erl b/src/gatherer.erl
new file mode 100644
index 00000000..1e03d6c4
--- /dev/null
+++ b/src/gatherer.erl
@@ -0,0 +1,145 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(gatherer).
+
+-behaviour(gen_server2).
+
+-export([start_link/0, stop/1, fork/1, finish/1, in/2, out/1]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-spec(start_link/0 :: () -> {'ok', pid()} | {'error', any()}).
+-spec(stop/1 :: (pid()) -> 'ok').
+-spec(fork/1 :: (pid()) -> 'ok').
+-spec(finish/1 :: (pid()) -> 'ok').
+-spec(in/2 :: (pid(), any()) -> 'ok').
+-spec(out/1 :: (pid()) -> {'value', any()} | 'empty').
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+-define(HIBERNATE_AFTER_MIN, 1000).
+-define(DESIRED_HIBERNATE, 10000).
+
+%%----------------------------------------------------------------------------
+
+-record(gstate, { forks, values, blocked }).
+
+%%----------------------------------------------------------------------------
+
+start_link() ->
+    gen_server2:start_link(?MODULE, [], [{timeout, infinity}]).
+
+stop(Pid) ->
+    gen_server2:call(Pid, stop, infinity).
+
+fork(Pid) ->
+    gen_server2:call(Pid, fork, infinity).
+
+finish(Pid) ->
+    gen_server2:cast(Pid, finish).
+
+in(Pid, Value) ->
+    gen_server2:cast(Pid, {in, Value}).
+
+out(Pid) ->
+    gen_server2:call(Pid, out, infinity).
+
+%%----------------------------------------------------------------------------
+
+init([]) ->
+    {ok, #gstate { forks = 0, values = queue:new(), blocked = queue:new() },
+     hibernate,
+     {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+handle_call(stop, _From, State) ->
+    {stop, normal, ok, State};
+
+handle_call(fork, _From, State = #gstate { forks = Forks }) ->
+    {reply, ok, State #gstate { forks = Forks + 1 }, hibernate};
+
+handle_call(out, From, State = #gstate { forks   = Forks,
+                                         values  = Values,
+                                         blocked = Blocked }) ->
+    case queue:out(Values) of
+        {empty, _} ->
+            case Forks of
+                0 -> {reply, empty, State, hibernate};
+                _ -> {noreply,
+                      State #gstate { blocked = queue:in(From, Blocked) },
+                      hibernate}
+            end;
+        {{value, _Value} = V, NewValues} ->
+            {reply, V, State #gstate { values = NewValues }, hibernate}
+    end;
+
+handle_call(Msg, _From, State) ->
+    {stop, {unexpected_call, Msg}, State}.
+
+handle_cast(finish, State = #gstate { forks = Forks, blocked = Blocked }) ->
+    NewForks = Forks - 1,
+    NewBlocked = case NewForks of
+                     0 -> [gen_server2:reply(From, empty) ||
+                              From <- queue:to_list(Blocked)],
+                          queue:new();
+                     _ -> Blocked
+                 end,
+    {noreply, State #gstate { forks = NewForks, blocked = NewBlocked },
+     hibernate};
+
+handle_cast({in, Value}, State = #gstate { values  = Values,
+                                           blocked = Blocked }) ->
+    {noreply, case queue:out(Blocked) of
+                  {empty, _} ->
+                      State #gstate { values = queue:in(Value, Values) };
+                  {{value, From}, NewBlocked} ->
+                      gen_server2:reply(From, {value, Value}),
+                      State #gstate { blocked = NewBlocked }
+              end, hibernate};
+
+handle_cast(Msg, State) ->
+    {stop, {unexpected_cast, Msg}, State}.
+
+handle_info(Msg, State) ->
+    {stop, {unexpected_info, Msg}, State}.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
+
+terminate(_Reason, State) ->
+    State.
diff --git a/src/gen_server2.erl b/src/gen_server2.erl
index 547f0a42..b0379b95 100644
--- a/src/gen_server2.erl
+++ b/src/gen_server2.erl
@@ -16,10 +16,12 @@
 %% The original code could reorder messages when communicating with a
 %% process on a remote node that was not currently connected.
 %%
-%% 4) The new functions gen_server2:pcall/3, pcall/4, and pcast/3
-%% allow callers to attach priorities to requests. Requests with
-%% higher priorities are processed before requests with lower
-%% priorities. The default priority is 0.
+%% 4) The callback module can optionally implement prioritise_call/3,
+%% prioritise_cast/2 and prioritise_info/2.  These functions take
+%% Message, From and State or just Message and State and return a
+%% single integer representing the priority attached to the message.
+%% Messages with higher priorities are processed before requests with
+%% lower priorities. The default priority is 0.
 %%
 %% 5) The callback module can optionally implement
 %% handle_pre_hibernate/1 and handle_post_hibernate/1. These will be
@@ -64,16 +66,16 @@
 %% compliance with the License. You should have received a copy of the
 %% Erlang Public License along with this software. If not, it can be
 %% retrieved via the world wide web at http://www.erlang.org/.
-%% 
+%%
 %% Software distributed under the License is distributed on an "AS IS"
 %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 %% the License for the specific language governing rights and limitations
 %% under the License.
-%% 
+%%
 %% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
 %% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
 %% AB. All Rights Reserved.''
-%% 
+%%
 %%     $Id$
 %%
 -module(gen_server2).
@@ -82,13 +84,13 @@
 %%%
 %%% The idea behind THIS server is that the user module
 %%% provides (different) functions to handle different
-%%% kind of inputs. 
+%%% kind of inputs.
 %%% If the Parent process terminates the Module:terminate/2
 %%% function is called.
 %%%
 %%% The user module should export:
 %%%
-%%%   init(Args)  
+%%%   init(Args)
 %%%     ==> {ok, State}
 %%%         {ok, State, Timeout}
 %%%         {ok, State, Timeout, Backoff}
@@ -101,21 +103,21 @@
 %%%        {reply, Reply, State, Timeout}
 %%%        {noreply, State}
 %%%        {noreply, State, Timeout}
-%%%        {stop, Reason, Reply, State}  
+%%%        {stop, Reason, Reply, State}
 %%%              Reason = normal | shutdown | Term terminate(State) is called
 %%%
 %%%   handle_cast(Msg, State)
 %%%
 %%%    ==> {noreply, State}
 %%%        {noreply, State, Timeout}
-%%%        {stop, Reason, State} 
+%%%        {stop, Reason, State}
 %%%              Reason = normal | shutdown | Term terminate(State) is called
 %%%
 %%%   handle_info(Info, State) Info is e.g. {'EXIT', P, R}, {nodedown, N}, ...
 %%%
 %%%    ==> {noreply, State}
 %%%        {noreply, State, Timeout}
-%%%        {stop, Reason, State} 
+%%%        {stop, Reason, State}
 %%%              Reason = normal | shutdown | Term, terminate(State) is called
 %%%
 %%%   terminate(Reason, State) Let the user module clean up
@@ -159,37 +161,41 @@
 
 %% API
 -export([start/3, start/4,
-	 start_link/3, start_link/4,
-	 call/2, call/3, pcall/3, pcall/4,
-	 cast/2, pcast/3, reply/2,
-	 abcast/2, abcast/3,
-	 multi_call/2, multi_call/3, multi_call/4,
-	 enter_loop/3, enter_loop/4, enter_loop/5, wake_hib/7]).
+         start_link/3, start_link/4,
+         call/2, call/3,
+         cast/2, reply/2,
+         abcast/2, abcast/3,
+         multi_call/2, multi_call/3, multi_call/4,
+         enter_loop/3, enter_loop/4, enter_loop/5, enter_loop/6, wake_hib/1]).
 
 -export([behaviour_info/1]).
 
 %% System exports
 -export([system_continue/3,
-	 system_terminate/4,
-	 system_code_change/4,
-	 format_status/2]).
+         system_terminate/4,
+         system_code_change/4,
+         format_status/2]).
 
 %% Internal exports
 -export([init_it/6, print_event/3]).
 
 -import(error_logger, [format/2]).
 
+%% State record
+-record(gs2_state, {parent, name, state, mod, time,
+                    timeout_state, queue, debug, prioritise_call,
+                    prioritise_cast, prioritise_info}).
+
 %%%=========================================================================
 %%%  Specs. These exist only to shut up dialyzer's warnings
 %%%=========================================================================
 
 -ifdef(use_specs).
 
--spec(handle_common_termination/6 ::
-      (any(), any(), any(), atom(), any(), any()) -> no_return()). 
+-spec(handle_common_termination/3 ::
+      (any(), atom(), #gs2_state{}) -> no_return()).
 
--spec(hibernate/7 ::
-      (pid(), any(), any(), atom(), any(), queue(), any()) -> no_return()).
+-spec(hibernate/1 :: (#gs2_state{}) -> no_return()).
 
 -endif.
 
@@ -238,37 +244,21 @@ start_link(Name, Mod, Args, Options) ->
 %% be monitored.
 %% If the client is trapping exits and is linked server termination
 %% is handled here (? Shall we do that here (or rely on timeouts) ?).
-%% ----------------------------------------------------------------- 
+%% -----------------------------------------------------------------
 call(Name, Request) ->
     case catch gen:call(Name, '$gen_call', Request) of
-	{ok,Res} ->
-	    Res;
-	{'EXIT',Reason} ->
-	    exit({Reason, {?MODULE, call, [Name, Request]}})
+        {ok,Res} ->
+            Res;
+        {'EXIT',Reason} ->
+            exit({Reason, {?MODULE, call, [Name, Request]}})
     end.
 
 call(Name, Request, Timeout) ->
     case catch gen:call(Name, '$gen_call', Request, Timeout) of
-	{ok,Res} ->
-	    Res;
-	{'EXIT',Reason} ->
-	    exit({Reason, {?MODULE, call, [Name, Request, Timeout]}})
-    end.
-
-pcall(Name, Priority, Request) ->
-    case catch gen:call(Name, '$gen_pcall', {Priority, Request}) of
-	{ok,Res} ->
-	    Res;
-	{'EXIT',Reason} ->
-	    exit({Reason, {?MODULE, pcall, [Name, Priority, Request]}})
-    end.
-
-pcall(Name, Priority, Request, Timeout) ->
-    case catch gen:call(Name, '$gen_pcall', {Priority, Request}, Timeout) of
-	{ok,Res} ->
-	    Res;
-	{'EXIT',Reason} ->
-	    exit({Reason, {?MODULE, pcall, [Name, Priority, Request, Timeout]}})
+        {ok,Res} ->
+            Res;
+        {'EXIT',Reason} ->
+            exit({Reason, {?MODULE, call, [Name, Request, Timeout]}})
     end.
 
 %% -----------------------------------------------------------------
@@ -277,34 +267,18 @@ pcall(Name, Priority, Request, Timeout) ->
 cast({global,Name}, Request) ->
     catch global:send(Name, cast_msg(Request)),
     ok;
-cast({Name,Node}=Dest, Request) when is_atom(Name), is_atom(Node) -> 
+cast({Name,Node}=Dest, Request) when is_atom(Name), is_atom(Node) ->
     do_cast(Dest, Request);
 cast(Dest, Request) when is_atom(Dest) ->
     do_cast(Dest, Request);
 cast(Dest, Request) when is_pid(Dest) ->
     do_cast(Dest, Request).
 
-do_cast(Dest, Request) -> 
+do_cast(Dest, Request) ->
     do_send(Dest, cast_msg(Request)),
     ok.
-    
-cast_msg(Request) -> {'$gen_cast',Request}.
 
-pcast({global,Name}, Priority, Request) ->
-    catch global:send(Name, cast_msg(Priority, Request)),
-    ok;
-pcast({Name,Node}=Dest, Priority, Request) when is_atom(Name), is_atom(Node) -> 
-    do_cast(Dest, Priority, Request);
-pcast(Dest, Priority, Request) when is_atom(Dest) ->
-    do_cast(Dest, Priority, Request);
-pcast(Dest, Priority, Request) when is_pid(Dest) ->
-    do_cast(Dest, Priority, Request).
-
-do_cast(Dest, Priority, Request) -> 
-    do_send(Dest, cast_msg(Priority, Request)),
-    ok.
-    
-cast_msg(Priority, Request) -> {'$gen_pcast', {Priority, Request}}.
+cast_msg(Request) -> {'$gen_cast',Request}.
 
 %% -----------------------------------------------------------------
 %% Send a reply to the client.
@@ -312,9 +286,9 @@ cast_msg(Priority, Request) -> {'$gen_pcast', {Priority, Request}}.
 reply({To, Tag}, Reply) ->
     catch To ! {Tag, Reply}.
 
-%% ----------------------------------------------------------------- 
-%% Asyncronous broadcast, returns nothing, it's just send'n prey
-%%-----------------------------------------------------------------  
+%% -----------------------------------------------------------------
+%% Asyncronous broadcast, returns nothing, it's just send'n pray
+%% -----------------------------------------------------------------
 abcast(Name, Request) when is_atom(Name) ->
     do_abcast([node() | nodes()], Name, cast_msg(Request)).
 
@@ -330,36 +304,36 @@ do_abcast([], _,_) -> abcast.
 %%% Make a call to servers at several nodes.
 %%% Returns: {[Replies],[BadNodes]}
 %%% A Timeout can be given
-%%% 
+%%%
 %%% A middleman process is used in case late answers arrives after
 %%% the timeout. If they would be allowed to glog the callers message
-%%% queue, it would probably become confused. Late answers will 
+%%% queue, it would probably become confused. Late answers will
 %%% now arrive to the terminated middleman and so be discarded.
 %%% -----------------------------------------------------------------
 multi_call(Name, Req)
   when is_atom(Name) ->
     do_multi_call([node() | nodes()], Name, Req, infinity).
 
-multi_call(Nodes, Name, Req) 
+multi_call(Nodes, Name, Req)
   when is_list(Nodes), is_atom(Name) ->
     do_multi_call(Nodes, Name, Req, infinity).
 
 multi_call(Nodes, Name, Req, infinity) ->
     do_multi_call(Nodes, Name, Req, infinity);
-multi_call(Nodes, Name, Req, Timeout) 
+multi_call(Nodes, Name, Req, Timeout)
   when is_list(Nodes), is_atom(Name), is_integer(Timeout), Timeout >= 0 ->
     do_multi_call(Nodes, Name, Req, Timeout).
 
 
 %%-----------------------------------------------------------------
-%% enter_loop(Mod, Options, State, <ServerName>, <TimeOut>, <Backoff>) ->_ 
-%%   
-%% Description: Makes an existing process into a gen_server. 
-%%              The calling process will enter the gen_server receive 
+%% enter_loop(Mod, Options, State, <ServerName>, <TimeOut>, <Backoff>) ->_
+%%
+%% Description: Makes an existing process into a gen_server.
+%%              The calling process will enter the gen_server receive
 %%              loop and become a gen_server process.
-%%              The process *must* have been started using one of the 
-%%              start functions in proc_lib, see proc_lib(3). 
-%%              The user is responsible for any initialization of the 
+%%              The process *must* have been started using one of the
+%%              start functions in proc_lib, see proc_lib(3).
+%%              The user is responsible for any initialization of the
 %%              process, including registering a name for it.
 %%-----------------------------------------------------------------
 enter_loop(Mod, Options, State) ->
@@ -386,7 +360,10 @@ enter_loop(Mod, Options, State, ServerName, Timeout, Backoff) ->
     Debug = debug_options(Name, Options),
     Queue = priority_queue:new(),
     Backoff1 = extend_backoff(Backoff),
-    loop(Parent, Name, State, Mod, Timeout, Backoff1, Queue, Debug).
+    loop(find_prioritisers(
+           #gs2_state { parent = Parent, name = Name, state = State,
+                        mod = Mod, time = Timeout, timeout_state = Backoff1,
+                        queue = Queue, debug = Debug })).
 
 %%%========================================================================
 %%% Gen-callback functions
@@ -405,39 +382,51 @@ init_it(Starter, Parent, Name0, Mod, Args, Options) ->
     Name = name(Name0),
     Debug = debug_options(Name, Options),
     Queue = priority_queue:new(),
+    GS2State = find_prioritisers(
+                 #gs2_state { parent  = Parent,
+                              name    = Name,
+                              mod     = Mod,
+                              queue   = Queue,
+                              debug   = Debug }),
     case catch Mod:init(Args) of
-	{ok, State} ->
-	    proc_lib:init_ack(Starter, {ok, self()}), 	    
-	    loop(Parent, Name, State, Mod, infinity, undefined, Queue, Debug);
-	{ok, State, Timeout} ->
-	    proc_lib:init_ack(Starter, {ok, self()}),
-	    loop(Parent, Name, State, Mod, Timeout, undefined, Queue, Debug);
-	{ok, State, Timeout, Backoff = {backoff, _, _, _}} ->
+        {ok, State} ->
+            proc_lib:init_ack(Starter, {ok, self()}),
+            loop(GS2State #gs2_state { state         = State,
+                                       time          = infinity,
+                                       timeout_state = undefined });
+        {ok, State, Timeout} ->
+            proc_lib:init_ack(Starter, {ok, self()}),
+            loop(GS2State #gs2_state { state         = State,
+                                       time          = Timeout,
+                                       timeout_state = undefined });
+        {ok, State, Timeout, Backoff = {backoff, _, _, _}} ->
             Backoff1 = extend_backoff(Backoff),
-	    proc_lib:init_ack(Starter, {ok, self()}),
-	    loop(Parent, Name, State, Mod, Timeout, Backoff1, Queue, Debug);
-	{stop, Reason} ->
-	    %% For consistency, we must make sure that the
-	    %% registered name (if any) is unregistered before
-	    %% the parent process is notified about the failure.
-	    %% (Otherwise, the parent process could get
-	    %% an 'already_started' error if it immediately
-	    %% tried starting the process again.)
-	    unregister_name(Name0),
-	    proc_lib:init_ack(Starter, {error, Reason}),
-	    exit(Reason);
-	ignore ->
-	    unregister_name(Name0),
-	    proc_lib:init_ack(Starter, ignore),
-	    exit(normal);
-	{'EXIT', Reason} ->
-	    unregister_name(Name0),
-	    proc_lib:init_ack(Starter, {error, Reason}),
-	    exit(Reason);
-	Else ->
-	    Error = {bad_return_value, Else},
-	    proc_lib:init_ack(Starter, {error, Error}),
-	    exit(Error)
+            proc_lib:init_ack(Starter, {ok, self()}),
+            loop(GS2State #gs2_state { state         = State,
+                                       time          = Timeout,
+                                       timeout_state = Backoff1 });
+        {stop, Reason} ->
+            %% For consistency, we must make sure that the
+            %% registered name (if any) is unregistered before
+            %% the parent process is notified about the failure.
+            %% (Otherwise, the parent process could get
+            %% an 'already_started' error if it immediately
+            %% tried starting the process again.)
+            unregister_name(Name0),
+            proc_lib:init_ack(Starter, {error, Reason}),
+            exit(Reason);
+        ignore ->
+            unregister_name(Name0),
+            proc_lib:init_ack(Starter, ignore),
+            exit(normal);
+        {'EXIT', Reason} ->
+            unregister_name(Name0),
+            proc_lib:init_ack(Starter, {error, Reason}),
+            exit(Reason);
+        Else ->
+            Error = {bad_return_value, Else},
+            proc_lib:init_ack(Starter, {error, Error}),
+            exit(Error)
     end.
 
 name({local,Name}) -> Name;
@@ -467,23 +456,24 @@ extend_backoff({backoff, InitialTimeout, MinimumTimeout, DesiredHibPeriod}) ->
 %%% ---------------------------------------------------
 %%% The MAIN loop.
 %%% ---------------------------------------------------
-loop(Parent, Name, State, Mod, hibernate, undefined, Queue, Debug) ->
-    pre_hibernate(Parent, Name, State, Mod, undefined, Queue, Debug);
-loop(Parent, Name, State, Mod, Time, TimeoutState, Queue, Debug) ->
-    process_next_msg(Parent, Name, State, Mod, Time, TimeoutState,
-                     drain(Queue), Debug).
+loop(GS2State = #gs2_state { time          = hibernate,
+                             timeout_state = undefined }) ->
+    pre_hibernate(GS2State);
+loop(GS2State) ->
+    process_next_msg(drain(GS2State)).
 
-drain(Queue) ->
+drain(GS2State) ->
     receive
-        Input -> drain(in(Input, Queue))
-    after 0 -> Queue
+        Input -> drain(in(Input, GS2State))
+    after 0 -> GS2State
     end.
 
-process_next_msg(Parent, Name, State, Mod, Time, TimeoutState, Queue, Debug) ->
+process_next_msg(GS2State = #gs2_state { time          = Time,
+                                         timeout_state = TimeoutState,
+                                         queue         = Queue }) ->
     case priority_queue:out(Queue) of
         {{value, Msg}, Queue1} ->
-            process_msg(Parent, Name, State, Mod,
-                        Time, TimeoutState, Queue1, Debug, Msg);
+            process_msg(Msg, GS2State #gs2_state { queue = Queue1 });
         {empty, Queue1} ->
             {Time1, HibOnTimeout}
                 = case {Time, TimeoutState} of
@@ -504,68 +494,64 @@ process_next_msg(Parent, Name, State, Mod, Time, TimeoutState, Queue, Debug) ->
                 Input ->
                     %% Time could be 'hibernate' here, so *don't* call loop
                     process_next_msg(
-                      Parent, Name, State, Mod, Time, TimeoutState,
-                      drain(in(Input, Queue1)), Debug)
+                      drain(in(Input, GS2State #gs2_state { queue = Queue1 })))
             after Time1 ->
                     case HibOnTimeout of
                         true ->
                             pre_hibernate(
-                              Parent, Name, State, Mod, TimeoutState, Queue1,
-                              Debug);
+                              GS2State #gs2_state { queue = Queue1 });
                         false ->
-                            process_msg(
-                              Parent, Name, State, Mod, Time, TimeoutState,
-                              Queue1, Debug, timeout)
+                            process_msg(timeout,
+                                        GS2State #gs2_state { queue = Queue1 })
                     end
             end
     end.
 
-wake_hib(Parent, Name, State, Mod, TS, Queue, Debug) ->
+wake_hib(GS2State = #gs2_state { timeout_state = TS }) ->
     TimeoutState1 = case TS of
                         undefined ->
                             undefined;
                         {SleptAt, TimeoutState} ->
                             adjust_timeout_state(SleptAt, now(), TimeoutState)
                     end,
-    post_hibernate(Parent, Name, State, Mod, TimeoutState1,
-                   drain(Queue), Debug).
+    post_hibernate(
+      drain(GS2State #gs2_state { timeout_state = TimeoutState1 })).
 
-hibernate(Parent, Name, State, Mod, TimeoutState, Queue, Debug) ->
+hibernate(GS2State = #gs2_state { timeout_state = TimeoutState }) ->
     TS = case TimeoutState of
              undefined             -> undefined;
              {backoff, _, _, _, _} -> {now(), TimeoutState}
          end,
-    proc_lib:hibernate(?MODULE, wake_hib, [Parent, Name, State, Mod,
-                                           TS, Queue, Debug]).
+    proc_lib:hibernate(?MODULE, wake_hib,
+                       [GS2State #gs2_state { timeout_state = TS }]).
 
-pre_hibernate(Parent, Name, State, Mod, TimeoutState, Queue, Debug) ->
+pre_hibernate(GS2State = #gs2_state { state   = State,
+                                      mod     = Mod }) ->
     case erlang:function_exported(Mod, handle_pre_hibernate, 1) of
         true ->
             case catch Mod:handle_pre_hibernate(State) of
                 {hibernate, NState} ->
-                    hibernate(Parent, Name, NState, Mod, TimeoutState, Queue,
-                              Debug);
+                    hibernate(GS2State #gs2_state { state = NState } );
                 Reply ->
-                    handle_common_termination(Reply, Name, pre_hibernate,
-                                              Mod, State, Debug)
+                    handle_common_termination(Reply, pre_hibernate, GS2State)
             end;
         false ->
-            hibernate(Parent, Name, State, Mod, TimeoutState, Queue, Debug)
+            hibernate(GS2State)
     end.
 
-post_hibernate(Parent, Name, State, Mod, TimeoutState, Queue, Debug) ->
+post_hibernate(GS2State = #gs2_state { state = State,
+                                       mod   = Mod }) ->
     case erlang:function_exported(Mod, handle_post_hibernate, 1) of
         true ->
             case catch Mod:handle_post_hibernate(State) of
                 {noreply, NState} ->
-                    process_next_msg(Parent, Name, NState, Mod, infinity,
-                                     TimeoutState, Queue, Debug);
+                    process_next_msg(GS2State #gs2_state { state = NState,
+                                                           time  = infinity });
                 {noreply, NState, Time} ->
-                    process_next_msg(Parent, Name, NState, Mod, Time,
-                                     TimeoutState, Queue, Debug);
+                    process_next_msg(GS2State #gs2_state { state = NState,
+                                                           time  = Time });
                 Reply ->
-                    handle_common_termination(Reply, Name, post_hibernate,
-                                              Mod, State, Debug)
+                    handle_common_termination(Reply, post_hibernate, GS2State)
             end;
         false ->
             %% use hibernate here, not infinity. This matches
@@ -574,8 +560,7 @@ post_hibernate(Parent, Name, State, Mod, TimeoutState, Queue, Debug) ->
             %% still set to hibernate, iff that msg is the very msg
             %% that woke us up (or the first msg we receive after
             %% waking up).
-            process_next_msg(Parent, Name, State, Mod, hibernate,
-                             TimeoutState, Queue, Debug)
+            process_next_msg(GS2State #gs2_state { time = hibernate })
     end.
 
 adjust_timeout_state(SleptAt, AwokeAt, {backoff, CurrentTO, MinimumTO,
@@ -596,32 +581,40 @@ adjust_timeout_state(SleptAt, AwokeAt, {backoff, CurrentTO, MinimumTO,
     CurrentTO1 = Base + Extra,
     {backoff, CurrentTO1, MinimumTO, DesiredHibPeriod, RandomState1}.
 
-in({'$gen_pcast', {Priority, Msg}}, Queue) ->
-    priority_queue:in({'$gen_cast', Msg}, Priority, Queue);
-in({'$gen_pcall', From, {Priority, Msg}}, Queue) ->
-    priority_queue:in({'$gen_call', From, Msg}, Priority, Queue);
-in(Input, Queue) ->
-    priority_queue:in(Input, Queue).
-
-process_msg(Parent, Name, State, Mod, Time, TimeoutState, Queue,
-            Debug, Msg) ->
+in({'$gen_cast', Msg}, GS2State = #gs2_state { prioritise_cast = PC,
+                                               queue           = Queue }) ->
+    GS2State #gs2_state { queue = priority_queue:in(
+                                    {'$gen_cast', Msg},
+                                    PC(Msg, GS2State), Queue) };
+in({'$gen_call', From, Msg}, GS2State = #gs2_state { prioritise_call = PC,
+                                                     queue           = Queue }) ->
+    GS2State #gs2_state { queue = priority_queue:in(
+                                    {'$gen_call', From, Msg},
+                                    PC(Msg, From, GS2State), Queue) };
+in(Input, GS2State = #gs2_state { prioritise_info = PI, queue = Queue }) ->
+    GS2State #gs2_state { queue = priority_queue:in(
+                                    Input, PI(Input, GS2State), Queue) }.
+
+process_msg(Msg,
+            GS2State = #gs2_state { parent = Parent,
+                                    name   = Name,
+                                    debug  = Debug }) ->
     case Msg of
-	{system, From, Req} ->
-	    sys:handle_system_msg(
+        {system, From, Req} ->
+            sys:handle_system_msg(
               Req, From, Parent, ?MODULE, Debug,
-              [Name, State, Mod, Time, TimeoutState, Queue]);
+              GS2State);
         %% gen_server puts Hib on the end as the 7th arg, but that
         %% version of the function seems not to be documented so
         %% leaving out for now.
-	{'EXIT', Parent, Reason} ->
-	    terminate(Reason, Name, Msg, Mod, State, Debug);
-	_Msg when Debug =:= [] ->
-	    handle_msg(Msg, Parent, Name, State, Mod, TimeoutState, Queue);
-	_Msg ->
-	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, 
-				      Name, {in, Msg}),
-	    handle_msg(Msg, Parent, Name, State, Mod, TimeoutState, Queue,
-                       Debug1)
+        {'EXIT', Parent, Reason} ->
+            terminate(Reason, Msg, GS2State);
+        _Msg when Debug =:= [] ->
+            handle_msg(Msg, GS2State);
+        _Msg ->
+            Debug1 = sys:handle_debug(Debug, {?MODULE, print_event},
+                                      Name, {in, Msg}),
+            handle_msg(Msg, GS2State #gs2_state { debug = Debug1 })
     end.
 
 %%% ---------------------------------------------------
@@ -638,35 +631,35 @@ do_multi_call(Nodes, Name, Req, Timeout) ->
     Tag = make_ref(),
     Caller = self(),
     Receiver =
-	spawn(
-	  fun () ->
-		  %% Middleman process. Should be unsensitive to regular
-		  %% exit signals. The sychronization is needed in case
-		  %% the receiver would exit before the caller started
-		  %% the monitor.
-		  process_flag(trap_exit, true),
-		  Mref = erlang:monitor(process, Caller),
-		  receive
-		      {Caller,Tag} ->
-			  Monitors = send_nodes(Nodes, Name, Tag, Req),
-			  TimerId = erlang:start_timer(Timeout, self(), ok),
-			  Result = rec_nodes(Tag, Monitors, Name, TimerId),
-			  exit({self(),Tag,Result});
-		      {'DOWN',Mref,_,_,_} ->
-			  %% Caller died before sending us the go-ahead.
-			  %% Give up silently.
-			  exit(normal)
-		  end
-	  end),
+        spawn(
+          fun () ->
+                  %% Middleman process. Should be unsensitive to regular
+                  %% exit signals. The sychronization is needed in case
+                  %% the receiver would exit before the caller started
+                  %% the monitor.
+                  process_flag(trap_exit, true),
+                  Mref = erlang:monitor(process, Caller),
+                  receive
+                      {Caller,Tag} ->
+                          Monitors = send_nodes(Nodes, Name, Tag, Req),
+                          TimerId = erlang:start_timer(Timeout, self(), ok),
+                          Result = rec_nodes(Tag, Monitors, Name, TimerId),
+                          exit({self(),Tag,Result});
+                      {'DOWN',Mref,_,_,_} ->
+                          %% Caller died before sending us the go-ahead.
+                          %% Give up silently.
+                          exit(normal)
+                  end
+          end),
     Mref = erlang:monitor(process, Receiver),
     Receiver ! {self(),Tag},
     receive
-	{'DOWN',Mref,_,_,{Receiver,Tag,Result}} ->
-	    Result;
-	{'DOWN',Mref,_,_,Reason} ->
-	    %% The middleman code failed. Or someone did 
-	    %% exit(_, kill) on the middleman process => Reason==killed
-	    exit(Reason)
+        {'DOWN',Mref,_,_,{Receiver,Tag,Result}} ->
+            Result;
+        {'DOWN',Mref,_,_,Reason} ->
+            %% The middleman code failed. Or someone did
+            %% exit(_, kill) on the middleman process => Reason==killed
+            exit(Reason)
     end.
 
 send_nodes(Nodes, Name, Tag, Req) ->
@@ -681,7 +674,7 @@ send_nodes([Node|Tail], Name, Tag, Req, Monitors)
 send_nodes([_Node|Tail], Name, Tag, Req, Monitors) ->
     %% Skip non-atom Node
     send_nodes(Tail, Name, Tag, Req, Monitors);
-send_nodes([], _Name, _Tag, _Req, Monitors) -> 
+send_nodes([], _Name, _Tag, _Req, Monitors) ->
     Monitors.
 
 %% Against old nodes:
@@ -691,89 +684,89 @@ send_nodes([], _Name, _Tag, _Req, Monitors) ->
 %% Against contemporary nodes:
 %% Wait for reply, server 'DOWN', or timeout from TimerId.
 
-rec_nodes(Tag, Nodes, Name, TimerId) -> 
+rec_nodes(Tag, Nodes, Name, TimerId) ->
     rec_nodes(Tag, Nodes, Name, [], [], 2000, TimerId).
 
 rec_nodes(Tag, [{N,R}|Tail], Name, Badnodes, Replies, Time, TimerId ) ->
     receive
-	{'DOWN', R, _, _, _} ->
-	    rec_nodes(Tag, Tail, Name, [N|Badnodes], Replies, Time, TimerId);
-	{{Tag, N}, Reply} ->  %% Tag is bound !!!
-	    unmonitor(R), 
-	    rec_nodes(Tag, Tail, Name, Badnodes, 
-		      [{N,Reply}|Replies], Time, TimerId);
-	{timeout, TimerId, _} ->	
-	    unmonitor(R),
-	    %% Collect all replies that already have arrived
-	    rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies)
+        {'DOWN', R, _, _, _} ->
+            rec_nodes(Tag, Tail, Name, [N|Badnodes], Replies, Time, TimerId);
+        {{Tag, N}, Reply} ->  %% Tag is bound !!!
+            unmonitor(R),
+            rec_nodes(Tag, Tail, Name, Badnodes,
+                      [{N,Reply}|Replies], Time, TimerId);
+        {timeout, TimerId, _} ->
+            unmonitor(R),
+            %% Collect all replies that already have arrived
+            rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies)
     end;
 rec_nodes(Tag, [N|Tail], Name, Badnodes, Replies, Time, TimerId) ->
     %% R6 node
     receive
-	{nodedown, N} ->
-	    monitor_node(N, false),
-	    rec_nodes(Tag, Tail, Name, [N|Badnodes], Replies, 2000, TimerId);
-	{{Tag, N}, Reply} ->  %% Tag is bound !!!
-	    receive {nodedown, N} -> ok after 0 -> ok end,
-	    monitor_node(N, false),
-	    rec_nodes(Tag, Tail, Name, Badnodes,
-		      [{N,Reply}|Replies], 2000, TimerId);
-	{timeout, TimerId, _} ->	
-	    receive {nodedown, N} -> ok after 0 -> ok end,
-	    monitor_node(N, false),
-	    %% Collect all replies that already have arrived
-	    rec_nodes_rest(Tag, Tail, Name, [N | Badnodes], Replies)
+        {nodedown, N} ->
+            monitor_node(N, false),
+            rec_nodes(Tag, Tail, Name, [N|Badnodes], Replies, 2000, TimerId);
+        {{Tag, N}, Reply} ->  %% Tag is bound !!!
+            receive {nodedown, N} -> ok after 0 -> ok end,
+            monitor_node(N, false),
+            rec_nodes(Tag, Tail, Name, Badnodes,
+                      [{N,Reply}|Replies], 2000, TimerId);
+        {timeout, TimerId, _} ->
+            receive {nodedown, N} -> ok after 0 -> ok end,
+            monitor_node(N, false),
+            %% Collect all replies that already have arrived
+            rec_nodes_rest(Tag, Tail, Name, [N | Badnodes], Replies)
     after Time ->
-	    case rpc:call(N, erlang, whereis, [Name]) of
-		Pid when is_pid(Pid) -> % It exists try again.
-		    rec_nodes(Tag, [N|Tail], Name, Badnodes,
-			      Replies, infinity, TimerId);
-		_ -> % badnode
-		    receive {nodedown, N} -> ok after 0 -> ok end,
-		    monitor_node(N, false),
-		    rec_nodes(Tag, Tail, Name, [N|Badnodes],
-			      Replies, 2000, TimerId)
-	    end
+            case rpc:call(N, erlang, whereis, [Name]) of
+                Pid when is_pid(Pid) -> % It exists try again.
+                    rec_nodes(Tag, [N|Tail], Name, Badnodes,
+                              Replies, infinity, TimerId);
+                _ -> % badnode
+                    receive {nodedown, N} -> ok after 0 -> ok end,
+                    monitor_node(N, false),
+                    rec_nodes(Tag, Tail, Name, [N|Badnodes],
+                              Replies, 2000, TimerId)
+            end
     end;
 rec_nodes(_, [], _, Badnodes, Replies, _, TimerId) ->
     case catch erlang:cancel_timer(TimerId) of
-	false ->  % It has already sent it's message
-	    receive
-		{timeout, TimerId, _} -> ok
-	    after 0 ->
-		    ok
-	    end;
-	_ -> % Timer was cancelled, or TimerId was 'undefined'
-	    ok
+        false ->  % It has already sent it's message
+            receive
+                {timeout, TimerId, _} -> ok
+            after 0 ->
+                    ok
+            end;
+        _ -> % Timer was cancelled, or TimerId was 'undefined'
+            ok
     end,
     {Replies, Badnodes}.
 
 %% Collect all replies that already have arrived
 rec_nodes_rest(Tag, [{N,R}|Tail], Name, Badnodes, Replies) ->
     receive
-	{'DOWN', R, _, _, _} ->
-	    rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies);
-	{{Tag, N}, Reply} -> %% Tag is bound !!!
-	    unmonitor(R),
-	    rec_nodes_rest(Tag, Tail, Name, Badnodes, [{N,Reply}|Replies])
+        {'DOWN', R, _, _, _} ->
+            rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies);
+        {{Tag, N}, Reply} -> %% Tag is bound !!!
+            unmonitor(R),
+            rec_nodes_rest(Tag, Tail, Name, Badnodes, [{N,Reply}|Replies])
     after 0 ->
-	    unmonitor(R),
-	    rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies)
+            unmonitor(R),
+            rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies)
     end;
 rec_nodes_rest(Tag, [N|Tail], Name, Badnodes, Replies) ->
     %% R6 node
     receive
-	{nodedown, N} ->
-	    monitor_node(N, false),
-	    rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies);
-	{{Tag, N}, Reply} ->  %% Tag is bound !!!
-	    receive {nodedown, N} -> ok after 0 -> ok end,
-	    monitor_node(N, false),
-	    rec_nodes_rest(Tag, Tail, Name, Badnodes, [{N,Reply}|Replies])
+        {nodedown, N} ->
+            monitor_node(N, false),
+            rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies);
+        {{Tag, N}, Reply} ->  %% Tag is bound !!!
+            receive {nodedown, N} -> ok after 0 -> ok end,
+            monitor_node(N, false),
+            rec_nodes_rest(Tag, Tail, Name, Badnodes, [{N,Reply}|Replies])
     after 0 ->
-	    receive {nodedown, N} -> ok after 0 -> ok end,
-	    monitor_node(N, false),
-	    rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies)
+            receive {nodedown, N} -> ok after 0 -> ok end,
+            monitor_node(N, false),
+            rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies)
     end;
 rec_nodes_rest(_Tag, [], _Name, Badnodes, Replies) ->
     {Replies, Badnodes}.
@@ -785,28 +778,28 @@ rec_nodes_rest(_Tag, [], _Name, Badnodes, Replies) ->
 
 start_monitor(Node, Name) when is_atom(Node), is_atom(Name) ->
     if node() =:= nonode@nohost, Node =/= nonode@nohost ->
-	    Ref = make_ref(),
-	    self() ! {'DOWN', Ref, process, {Name, Node}, noconnection},
-	    {Node, Ref};
+            Ref = make_ref(),
+            self() ! {'DOWN', Ref, process, {Name, Node}, noconnection},
+            {Node, Ref};
        true ->
-	    case catch erlang:monitor(process, {Name, Node}) of
-		{'EXIT', _} ->
-		    %% Remote node is R6
-		    monitor_node(Node, true),
-		    Node;
-		Ref when is_reference(Ref) ->
-		    {Node, Ref}
-	    end
+            case catch erlang:monitor(process, {Name, Node}) of
+                {'EXIT', _} ->
+                    %% Remote node is R6
+                    monitor_node(Node, true),
+                    Node;
+                Ref when is_reference(Ref) ->
+                    {Node, Ref}
+            end
     end.
 
 %% Cancels a monitor started with Ref=erlang:monitor(_, _).
 unmonitor(Ref) when is_reference(Ref) ->
     erlang:demonitor(Ref),
     receive
-	{'DOWN', Ref, _, _, _} ->
-	    true
+        {'DOWN', Ref, _, _, _} ->
+            true
     after 0 ->
-	    true
+            true
     end.
 
 %%% ---------------------------------------------------
@@ -818,130 +811,114 @@ dispatch({'$gen_cast', Msg}, Mod, State) ->
 dispatch(Info, Mod, State) ->
     Mod:handle_info(Info, State).
 
-handle_msg({'$gen_call', From, Msg},
-           Parent, Name, State, Mod, TimeoutState, Queue) ->
-    case catch Mod:handle_call(Msg, From, State) of
-	{reply, Reply, NState} ->
-	    reply(From, Reply),
-	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, Queue, []);
-	{reply, Reply, NState, Time1} ->
-	    reply(From, Reply),
-	    loop(Parent, Name, NState, Mod, Time1, TimeoutState, Queue, []);
-	{noreply, NState} ->
-	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, Queue, []);
-	{noreply, NState, Time1} ->
-	    loop(Parent, Name, NState, Mod, Time1, TimeoutState, Queue, []);
-	{stop, Reason, Reply, NState} ->
-	    {'EXIT', R} = 
-		(catch terminate(Reason, Name, Msg, Mod, NState, [])),
-	    reply(From, Reply),
-	    exit(R);
-	Other -> handle_common_reply(Other, Parent, Name, Msg, Mod, State,
-                                     TimeoutState, Queue)
-    end;
-handle_msg(Msg,
-           Parent, Name, State, Mod, TimeoutState, Queue) ->
-    Reply = (catch dispatch(Msg, Mod, State)),
-    handle_common_reply(Reply, Parent, Name, Msg, Mod, State,
-                        TimeoutState, Queue).
-
-handle_msg({'$gen_call', From, Msg},
-           Parent, Name, State, Mod, TimeoutState, Queue, Debug) ->
+common_reply(_Name, From, Reply, _NState, [] = _Debug) ->
+    reply(From, Reply),
+    [];
+common_reply(Name, From, Reply, NState, Debug) ->
+    reply(Name, From, Reply, NState, Debug).
+
+common_debug([] = _Debug, _Func, _Info, _Event) ->
+    [];
+common_debug(Debug, Func, Info, Event) ->
+    sys:handle_debug(Debug, Func, Info, Event).
+
+handle_msg({'$gen_call', From, Msg}, GS2State = #gs2_state { mod = Mod,
+                                                             state = State,
+                                                             name = Name,
+                                                             debug = Debug }) ->
     case catch Mod:handle_call(Msg, From, State) of
-	{reply, Reply, NState} ->
-	    Debug1 = reply(Name, From, Reply, NState, Debug),
-	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, Queue,
-                 Debug1);
-	{reply, Reply, NState, Time1} ->
-	    Debug1 = reply(Name, From, Reply, NState, Debug),
-	    loop(Parent, Name, NState, Mod, Time1, TimeoutState, Queue, Debug1);
-	{noreply, NState} ->
-	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
-				      {noreply, NState}),
-	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, Queue,
-                 Debug1);
-	{noreply, NState, Time1} ->
-	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
-				      {noreply, NState}),
-	    loop(Parent, Name, NState, Mod, Time1, TimeoutState, Queue, Debug1);
-	{stop, Reason, Reply, NState} ->
-	    {'EXIT', R} = 
-		(catch terminate(Reason, Name, Msg, Mod, NState, Debug)),
-	    reply(Name, From, Reply, NState, Debug),
-	    exit(R);
-	Other ->
-	    handle_common_reply(Other, Parent, Name, Msg, Mod, State,
-                                TimeoutState, Queue, Debug)
+        {reply, Reply, NState} ->
+            Debug1 = common_reply(Name, From, Reply, NState, Debug),
+            loop(GS2State #gs2_state { state = NState,
+                                       time  = infinity,
+                                       debug = Debug1 });
+        {reply, Reply, NState, Time1} ->
+            Debug1 = common_reply(Name, From, Reply, NState, Debug),
+            loop(GS2State #gs2_state { state = NState,
+                                       time  = Time1,
+                                       debug = Debug1});
+        {noreply, NState} ->
+            Debug1 = common_debug(Debug, {?MODULE, print_event}, Name,
+                                  {noreply, NState}),
+            loop(GS2State #gs2_state {state = NState,
+                                      time  = infinity,
+                                      debug = Debug1});
+        {noreply, NState, Time1} ->
+            Debug1 = common_debug(Debug, {?MODULE, print_event}, Name,
+                                  {noreply, NState}),
+            loop(GS2State #gs2_state {state = NState,
+                                      time  = Time1,
+                                      debug = Debug1});
+        {stop, Reason, Reply, NState} ->
+            {'EXIT', R} =
+                (catch terminate(Reason, Msg,
+                                 GS2State #gs2_state { state = NState })),
+            reply(Name, From, Reply, NState, Debug),
+            exit(R);
+        Other ->
+            handle_common_reply(Other, Msg, GS2State)
     end;
-handle_msg(Msg,
-           Parent, Name, State, Mod, TimeoutState, Queue, Debug) ->
+handle_msg(Msg, GS2State = #gs2_state { mod = Mod, state = State }) ->
     Reply = (catch dispatch(Msg, Mod, State)),
-    handle_common_reply(Reply, Parent, Name, Msg, Mod, State,
-                        TimeoutState, Queue, Debug).
+    handle_common_reply(Reply, Msg, GS2State).
 
-handle_common_reply(Reply, Parent, Name, Msg, Mod, State,
-                    TimeoutState, Queue) ->
+handle_common_reply(Reply, Msg, GS2State = #gs2_state { name  = Name,
+                                                        debug = Debug}) ->
     case Reply of
-	{noreply, NState} ->
-	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, Queue, []);
-	{noreply, NState, Time1} ->
-	    loop(Parent, Name, NState, Mod, Time1, TimeoutState, Queue, []);
+        {noreply, NState} ->
+            Debug1 = common_debug(Debug, {?MODULE, print_event}, Name,
+                                  {noreply, NState}),
+            loop(GS2State #gs2_state { state = NState,
+                                       time  = infinity,
+                                       debug = Debug1 });
+        {noreply, NState, Time1} ->
+            Debug1 = common_debug(Debug, {?MODULE, print_event}, Name,
+                                  {noreply, NState}),
+            loop(GS2State #gs2_state { state = NState,
+                                       time  = Time1,
+                                       debug = Debug1 });
         _ ->
-            handle_common_termination(Reply, Name, Msg, Mod, State, [])
+            handle_common_termination(Reply, Msg, GS2State)
     end.
 
-handle_common_reply(Reply, Parent, Name, Msg, Mod, State, TimeoutState, Queue,
-                    Debug) ->
+handle_common_termination(Reply, Msg, GS2State) ->
     case Reply of
-	{noreply, NState} ->
-	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
-				      {noreply, NState}),
-	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, Queue,
-                 Debug1);
-	{noreply, NState, Time1} ->
-	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
-				      {noreply, NState}),
-	    loop(Parent, Name, NState, Mod, Time1, TimeoutState, Queue, Debug1);
+        {stop, Reason, NState} ->
+            terminate(Reason, Msg, GS2State #gs2_state { state = NState });
+        {'EXIT', What} ->
+            terminate(What, Msg, GS2State);
         _ ->
-            handle_common_termination(Reply, Name, Msg, Mod, State, Debug)
-    end.
-
-handle_common_termination(Reply, Name, Msg, Mod, State, Debug) ->
-    case Reply of
-	{stop, Reason, NState} ->
-	    terminate(Reason, Name, Msg, Mod, NState, Debug);
-	{'EXIT', What} ->
-	    terminate(What, Name, Msg, Mod, State, Debug);
-	_ ->
-	    terminate({bad_return_value, Reply}, Name, Msg, Mod, State, Debug)
+            terminate({bad_return_value, Reply}, Msg, GS2State)
     end.
 
 reply(Name, {To, Tag}, Reply, State, Debug) ->
     reply({To, Tag}, Reply),
-    sys:handle_debug(Debug, {?MODULE, print_event}, Name, 
-		     {out, Reply, To, State} ).
+    sys:handle_debug(
+      Debug, {?MODULE, print_event}, Name, {out, Reply, To, State}).
 
 
 %%-----------------------------------------------------------------
 %% Callback functions for system messages handling.
 %%-----------------------------------------------------------------
-system_continue(Parent, Debug, [Name, State, Mod, Time, TimeoutState, Queue]) ->
-    loop(Parent, Name, State, Mod, Time, TimeoutState, Queue, Debug).
+system_continue(Parent, Debug, GS2State) ->
+    loop(GS2State #gs2_state { parent = Parent, debug = Debug }).
 
 -ifdef(use_specs).
 -spec system_terminate(_, _, _, [_]) -> no_return().
 -endif.
 
-system_terminate(Reason, _Parent, Debug, [Name, State, Mod, _Time,
-                                          _TimeoutState, _Queue]) ->
-    terminate(Reason, Name, [], Mod, State, Debug).
+system_terminate(Reason, _Parent, Debug, GS2State) ->
+    terminate(Reason, [], GS2State #gs2_state { debug = Debug }).
 
-system_code_change([Name, State, Mod, Time, TimeoutState, Queue], _Module,
-                   OldVsn, Extra) ->
+system_code_change(GS2State = #gs2_state { mod   = Mod,
+                                           state = State },
+                   _Module, OldVsn, Extra) ->
     case catch Mod:code_change(OldVsn, State, Extra) of
-	{ok, NewState} ->
-            {ok, [Name, NewState, Mod, Time, TimeoutState, Queue]};
-	Else ->
+        {ok, NewState} ->
+            NewGS2State = find_prioritisers(
+                            GS2State #gs2_state { state = NewState }),
+            {ok, [NewGS2State]};
+        Else ->
             Else
     end.
 
@@ -951,18 +928,18 @@ system_code_change([Name, State, Mod, Time, TimeoutState, Queue], _Module,
 %%-----------------------------------------------------------------
 print_event(Dev, {in, Msg}, Name) ->
     case Msg of
-	{'$gen_call', {From, _Tag}, Call} ->
-	    io:format(Dev, "*DBG* ~p got call ~p from ~w~n",
-		      [Name, Call, From]);
-	{'$gen_cast', Cast} ->
-	    io:format(Dev, "*DBG* ~p got cast ~p~n",
-		      [Name, Cast]);
-	_ ->
-	    io:format(Dev, "*DBG* ~p got ~p~n", [Name, Msg])
+        {'$gen_call', {From, _Tag}, Call} ->
+            io:format(Dev, "*DBG* ~p got call ~p from ~w~n",
+                      [Name, Call, From]);
+        {'$gen_cast', Cast} ->
+            io:format(Dev, "*DBG* ~p got cast ~p~n",
+                      [Name, Cast]);
+        _ ->
+            io:format(Dev, "*DBG* ~p got ~p~n", [Name, Msg])
     end;
 print_event(Dev, {out, Msg, To, State}, Name) ->
-    io:format(Dev, "*DBG* ~p sent ~p to ~w, new state ~w~n", 
-	      [Name, Msg, To, State]);
+    io:format(Dev, "*DBG* ~p sent ~p to ~w, new state ~w~n",
+              [Name, Msg, To, State]);
 print_event(Dev, {noreply, State}, Name) ->
     io:format(Dev, "*DBG* ~p new state ~w~n", [Name, State]);
 print_event(Dev, Event, Name) ->
@@ -973,56 +950,61 @@ print_event(Dev, Event, Name) ->
 %%% Terminate the server.
 %%% ---------------------------------------------------
 
-terminate(Reason, Name, Msg, Mod, State, Debug) ->
+terminate(Reason, Msg, #gs2_state { name  = Name,
+                                    mod   = Mod,
+                                    state = State,
+                                    debug = Debug }) ->
     case catch Mod:terminate(Reason, State) of
-	{'EXIT', R} ->
-	    error_info(R, Name, Msg, State, Debug),
-	    exit(R);
-	_ ->
-	    case Reason of
-		normal ->
-		    exit(normal);
-		shutdown ->
-		    exit(shutdown);
-		{shutdown,_}=Shutdown ->
-		    exit(Shutdown);
-		_ ->
-		    error_info(Reason, Name, Msg, State, Debug),
-		    exit(Reason)
-	    end
+        {'EXIT', R} ->
+            error_info(R, Reason, Name, Msg, State, Debug),
+            exit(R);
+        _ ->
+            case Reason of
+                normal ->
+                    exit(normal);
+                shutdown ->
+                    exit(shutdown);
+                {shutdown,_}=Shutdown ->
+                    exit(Shutdown);
+                _ ->
+                    error_info(Reason, undefined, Name, Msg, State, Debug),
+                    exit(Reason)
+            end
     end.
 
-error_info(_Reason, application_controller, _Msg, _State, _Debug) ->
+error_info(_Reason, _RootCause, application_controller, _Msg, _State, _Debug) ->
     %% OTP-5811 Don't send an error report if it's the system process
     %% application_controller which is terminating - let init take care
     %% of it instead
     ok;
-error_info(Reason, Name, Msg, State, Debug) ->
-    Reason1 = 
-	case Reason of
-	    {undef,[{M,F,A}|MFAs]} ->
-		case code:is_loaded(M) of
-		    false ->
-			{'module could not be loaded',[{M,F,A}|MFAs]};
-		    _ ->
-			case erlang:function_exported(M, F, length(A)) of
-			    true ->
-				Reason;
-			    false ->
-				{'function not exported',[{M,F,A}|MFAs]}
-			end
-		end;
-	    _ ->
-		Reason
-	end,    
-    format("** Generic server ~p terminating \n"
-           "** Last message in was ~p~n"
-           "** When Server state == ~p~n"
-           "** Reason for termination == ~n** ~p~n",
-	   [Name, Msg, State, Reason1]),
+error_info(Reason, RootCause, Name, Msg, State, Debug) ->
+    Reason1 = error_reason(Reason),
+    Fmt =
+        "** Generic server ~p terminating~n"
+        "** Last message in was ~p~n"
+        "** When Server state == ~p~n"
+        "** Reason for termination == ~n** ~p~n",
+    case RootCause of
+        undefined -> format(Fmt, [Name, Msg, State, Reason1]);
+        _         -> format(Fmt ++ "** In 'terminate' callback "
+                            "with reason ==~n** ~p~n",
+                            [Name, Msg, State, Reason1,
+                             error_reason(RootCause)])
+    end,
     sys:print_log(Debug),
     ok.
 
+error_reason({undef,[{M,F,A}|MFAs]} = Reason) ->
+    case code:is_loaded(M) of
+        false -> {'module could not be loaded',[{M,F,A}|MFAs]};
+        _     -> case erlang:function_exported(M, F, length(A)) of
+                     true  -> Reason;
+                     false -> {'function not exported',[{M,F,A}|MFAs]}
+                 end
+    end;
+error_reason(Reason) ->
+    Reason.
+
 %%% ---------------------------------------------------
 %%% Misc. functions.
 %%% ---------------------------------------------------
@@ -1036,74 +1018,109 @@ opt(_, []) ->
 
 debug_options(Name, Opts) ->
     case opt(debug, Opts) of
-	{ok, Options} -> dbg_options(Name, Options);
-	_ -> dbg_options(Name, [])
+        {ok, Options} -> dbg_options(Name, Options);
+        _ -> dbg_options(Name, [])
     end.
 
 dbg_options(Name, []) ->
-    Opts = 
-	case init:get_argument(generic_debug) of
-	    error ->
-		[];
-	    _ ->
-		[log, statistics]
-	end,
+    Opts =
+        case init:get_argument(generic_debug) of
+            error ->
+                [];
+            _ ->
+                [log, statistics]
+        end,
     dbg_opts(Name, Opts);
 dbg_options(Name, Opts) ->
     dbg_opts(Name, Opts).
 
 dbg_opts(Name, Opts) ->
     case catch sys:debug_options(Opts) of
-	{'EXIT',_} ->
-	    format("~p: ignoring erroneous debug options - ~p~n",
-		   [Name, Opts]),
-	    [];
-	Dbg ->
-	    Dbg
+        {'EXIT',_} ->
+            format("~p: ignoring erroneous debug options - ~p~n",
+                   [Name, Opts]),
+            [];
+        Dbg ->
+            Dbg
     end.
 
 get_proc_name(Pid) when is_pid(Pid) ->
     Pid;
 get_proc_name({local, Name}) ->
     case process_info(self(), registered_name) of
-	{registered_name, Name} ->
-	    Name;
-	{registered_name, _Name} ->
-	    exit(process_not_registered);
-	[] ->
-	    exit(process_not_registered)
-    end;    
+        {registered_name, Name} ->
+            Name;
+        {registered_name, _Name} ->
+            exit(process_not_registered);
+        [] ->
+            exit(process_not_registered)
+    end;
 get_proc_name({global, Name}) ->
     case global:safe_whereis_name(Name) of
-	undefined ->
-	    exit(process_not_registered_globally);
-	Pid when Pid =:= self() ->
-	    Name;
-	_Pid ->
-	    exit(process_not_registered_globally)
+        undefined ->
+            exit(process_not_registered_globally);
+        Pid when Pid =:= self() ->
+            Name;
+        _Pid ->
+            exit(process_not_registered_globally)
     end.
 
 get_parent() ->
     case get('$ancestors') of
-	[Parent | _] when is_pid(Parent)->
+        [Parent | _] when is_pid(Parent)->
             Parent;
         [Parent | _] when is_atom(Parent)->
             name_to_pid(Parent);
-	_ ->
-	    exit(process_was_not_started_by_proc_lib)
+        _ ->
+            exit(process_was_not_started_by_proc_lib)
     end.
 
 name_to_pid(Name) ->
     case whereis(Name) of
-	undefined ->
-	    case global:safe_whereis_name(Name) of
-		undefined ->
-		    exit(could_not_find_registerd_name);
-		Pid ->
-		    Pid
-	    end;
-	Pid ->
-	    Pid
+        undefined ->
+            case global:safe_whereis_name(Name) of
+                undefined ->
+                    exit(could_not_find_registerd_name);
+                Pid ->
+                    Pid
+            end;
+        Pid ->
+            Pid
+    end.
+
+find_prioritisers(GS2State = #gs2_state { mod = Mod }) ->
+    PrioriCall = function_exported_or_default(
+                   Mod, 'prioritise_call', 3,
+                   fun (_Msg, _From, _State) -> 0 end),
+    PrioriCast = function_exported_or_default(Mod, 'prioritise_cast', 2,
+                                              fun (_Msg, _State) -> 0 end),
+    PrioriInfo = function_exported_or_default(Mod, 'prioritise_info', 2,
+                                              fun (_Msg, _State) -> 0 end),
+    GS2State #gs2_state { prioritise_call = PrioriCall,
+                          prioritise_cast = PrioriCast,
+                          prioritise_info = PrioriInfo }.
+
+function_exported_or_default(Mod, Fun, Arity, Default) ->
+    case erlang:function_exported(Mod, Fun, Arity) of
+        true -> case Arity of
+                    2 -> fun (Msg, GS2State = #gs2_state { state = State }) ->
+                                 case catch Mod:Fun(Msg, State) of
+                                     Res when is_integer(Res) ->
+                                         Res;
+                                     Err ->
+                                         handle_common_termination(Err, Msg, GS2State)
+                                 end
+                         end;
+                    3 -> fun (Msg, From, GS2State = #gs2_state { state = State }) ->
+                                 case catch Mod:Fun(Msg, From, State) of
+                                     Res when is_integer(Res) ->
+                                         Res;
+                                     Err ->
+                                         handle_common_termination(Err, Msg, GS2State)
+                                 end
+                         end
+                end;
+        false -> Default
     end.
 
 %%-----------------------------------------------------------------
@@ -1113,25 +1130,23 @@ format_status(Opt, StatusData) ->
     [PDict, SysState, Parent, Debug,
      [Name, State, Mod, _Time, _TimeoutState, Queue]] = StatusData,
     NameTag = if is_pid(Name) ->
-		      pid_to_list(Name);
-		 is_atom(Name) ->
-		      Name
-	      end,
+                      pid_to_list(Name);
+                 is_atom(Name) ->
+                      Name
+              end,
     Header = lists:concat(["Status for generic server ", NameTag]),
     Log = sys:get_debug(log, Debug, []),
-    Specfic = 
-	case erlang:function_exported(Mod, format_status, 2) of
-	    true ->
-		case catch Mod:format_status(Opt, [PDict, State]) of
-		    {'EXIT', _} -> [{data, [{"State", State}]}];
-		    Else -> Else
-		end;
-	    _ ->
-		[{data, [{"State", State}]}]
-	end,
+    Specfic =
+        case erlang:function_exported(Mod, format_status, 2) of
+            true -> case catch Mod:format_status(Opt, [PDict, State]) of
+                        {'EXIT', _} -> [{data, [{"State", State}]}];
+                        Else        -> Else
+                    end;
+            _    -> [{data, [{"State", State}]}]
+        end,
     [{header, Header},
      {data, [{"Status", SysState},
-	     {"Parent", Parent},
-	     {"Logged events", Log},
+             {"Parent", Parent},
+             {"Logged events", Log},
              {"Queued messages", priority_queue:to_list(Queue)}]} |
      Specfic].
diff --git a/src/pg_local.erl b/src/pg_local.erl
index 1501331d..49fa873a 100644
--- a/src/pg_local.erl
+++ b/src/pg_local.erl
@@ -36,8 +36,8 @@
 
 -export([join/2, leave/2, get_members/1]).
 -export([sync/0]). %% intended for testing only; not part of official API
--export([start/0,start_link/0,init/1,handle_call/3,handle_cast/2,handle_info/2,
-         terminate/2]).
+-export([start/0, start_link/0, init/1, handle_call/3, handle_cast/2,
+         handle_info/2, terminate/2]).
 
 %%----------------------------------------------------------------------------
 
@@ -45,8 +45,8 @@
 
 -type(name() :: term()).
 
--spec(start_link/0 :: () -> {'ok', pid()} | {'error', term()}).
--spec(start/0 :: () -> {'ok', pid()} | {'error', term()}).
+-spec(start_link/0 :: () -> {'ok', pid()} | {'error', any()}).
+-spec(start/0 :: () -> {'ok', pid()} | {'error', any()}).
 -spec(join/2 :: (name(), pid()) -> 'ok').
 -spec(leave/2 :: (name(), pid()) -> 'ok').
 -spec(get_members/1 :: (name()) -> [pid()]).
diff --git a/src/rabbit.erl b/src/rabbit.erl
index 6cf6d7d5..8c36a9f0 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -33,7 +33,8 @@
 
 -behaviour(application).
 
--export([prepare/0, start/0, stop/0, stop_and_halt/0, status/0, rotate_logs/1]).
+-export([prepare/0, start/0, stop/0, stop_and_halt/0, status/0,
+         rotate_logs/1]).
 
 -export([start/2, stop/1]).
 
@@ -82,9 +83,10 @@
                     {requires,    external_infrastructure},
                     {enables,     kernel_ready}]}).
 
--rabbit_boot_step({rabbit_hooks,
-                   [{description, "internal event notification system"},
-                    {mfa,         {rabbit_hooks, start, []}},
+-rabbit_boot_step({rabbit_event,
+                   [{description, "statistics event manager"},
+                    {mfa,         {rabbit_sup, start_restartable_child,
+                                   [rabbit_event]}},
                     {requires,    external_infrastructure},
                     {enables,     kernel_ready}]}).
 
@@ -183,18 +185,19 @@
 
 -ifdef(use_specs).
 
--type(log_location() :: 'tty' | 'undefined' | string()).
 -type(file_suffix() :: binary()).
+%% this really should be an abstract type
+-type(log_location() :: 'tty' | 'undefined' | file:filename()).
 
 -spec(prepare/0 :: () -> 'ok').
 -spec(start/0 :: () -> 'ok').
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_halt/0 :: () -> 'ok').
--spec(rotate_logs/1 :: (file_suffix()) -> 'ok' | {'error', any()}).
--spec(status/0 :: () ->
-             [{running_applications, [{atom(), string(), string()}]} |
-              {nodes, [{node_type(), [erlang_node()]}]} |
-              {running_nodes, [erlang_node()]}]).
+-spec(rotate_logs/1 :: (file_suffix()) -> rabbit_types:ok_or_error(any())).
+-spec(status/0 ::
+        () -> [{running_applications, [{atom(), string(), string()}]} |
+               {nodes, [{rabbit_mnesia:node_type(), [node()]}]} |
+               {running_nodes, [node()]}]).
 -spec(log_location/1 :: ('sasl' | 'kernel') -> log_location()).
 
 -endif.
@@ -202,8 +205,7 @@
 %%----------------------------------------------------------------------------
 
 prepare() ->
-    ok = ensure_working_log_handlers(),
-    ok = rabbit_mnesia:ensure_mnesia_dir().
+    ok = ensure_working_log_handlers().
 
 start() ->
     try
@@ -424,9 +426,9 @@ print_banner() ->
               "| ~s  +---+   |~n"
               "|                   |~n"
               "+-------------------+~n"
-              "AMQP ~p-~p~n~s~n~s~n~n",
+              "~s~n~s~n~s~n~n",
               [Product, string:right([$v|Version], ProductLen),
-               ?PROTOCOL_VERSION_MAJOR, ?PROTOCOL_VERSION_MINOR,
+               ?PROTOCOL_VERSION,
                ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE]),
     Settings = [{"node",           node()},
                 {"app descriptor", app_location()},
@@ -487,11 +489,16 @@ maybe_insert_default_data() ->
 insert_default_data() ->
     {ok, DefaultUser} = application:get_env(default_user),
     {ok, DefaultPass} = application:get_env(default_pass),
+    {ok, DefaultAdmin} = application:get_env(default_user_is_admin),
     {ok, DefaultVHost} = application:get_env(default_vhost),
     {ok, [DefaultConfigurePerm, DefaultWritePerm, DefaultReadPerm]} =
         application:get_env(default_permissions),
     ok = rabbit_access_control:add_vhost(DefaultVHost),
     ok = rabbit_access_control:add_user(DefaultUser, DefaultPass),
+    case DefaultAdmin of
+        true -> rabbit_access_control:set_admin(DefaultUser);
+        _    -> ok
+    end,
     ok = rabbit_access_control:set_permissions(DefaultUser, DefaultVHost,
                                                DefaultConfigurePerm,
                                                DefaultWritePerm,
diff --git a/src/rabbit_access_control.erl b/src/rabbit_access_control.erl
index a445f441..73fd6f0e 100644
--- a/src/rabbit_access_control.erl
+++ b/src/rabbit_access_control.erl
@@ -35,38 +35,68 @@
 
 -export([check_login/2, user_pass_login/2,
          check_vhost_access/2, check_resource_access/3]).
--export([add_user/2, delete_user/1, change_password/2, list_users/0,
-         lookup_user/1]).
--export([add_vhost/1, delete_vhost/1, list_vhosts/0]).
--export([set_permissions/5, clear_permissions/2,
-         list_vhost_permissions/1, list_user_permissions/1]).
+-export([add_user/2, delete_user/1, change_password/2, set_admin/1,
+         clear_admin/1, list_users/0, lookup_user/1]).
+-export([add_vhost/1, delete_vhost/1, vhost_exists/1, list_vhosts/0]).
+-export([set_permissions/5, set_permissions/6, clear_permissions/2,
+         list_permissions/0, list_vhost_permissions/1, list_user_permissions/1,
+         list_user_vhost_permissions/2]).
 
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
+-export_type([username/0, password/0]).
+
 -type(permission_atom() :: 'configure' | 'read' | 'write').
+-type(username() :: binary()).
+-type(password() :: binary()).
+-type(regexp() :: binary()).
+-type(scope() :: binary()).
+-type(scope_atom() :: 'client' | 'all').
 
--spec(check_login/2 :: (binary(), binary()) -> user()).
--spec(user_pass_login/2 :: (username(), password()) -> user()).
--spec(check_vhost_access/2 :: (user(), vhost()) -> 'ok').
+-spec(check_login/2 ::
+        (binary(), binary()) -> rabbit_types:user() |
+                                rabbit_types:channel_exit()).
+-spec(user_pass_login/2 ::
+        (username(), password())
+        -> rabbit_types:user() | rabbit_types:channel_exit()).
+-spec(check_vhost_access/2 ::
+        (rabbit_types:user(), rabbit_types:vhost())
+        -> 'ok' | rabbit_types:channel_exit()).
 -spec(check_resource_access/3 ::
-      (username(), r(atom()), permission_atom()) -> 'ok').
+        (username(), rabbit_types:r(atom()), permission_atom())
+        -> 'ok' | rabbit_types:channel_exit()).
 -spec(add_user/2 :: (username(), password()) -> 'ok').
 -spec(delete_user/1 :: (username()) -> 'ok').
 -spec(change_password/2 :: (username(), password()) -> 'ok').
+-spec(set_admin/1 :: (username()) -> 'ok').
+-spec(clear_admin/1 :: (username()) -> 'ok').
 -spec(list_users/0 :: () -> [username()]).
--spec(lookup_user/1 :: (username()) -> {'ok', user()} | not_found()).
--spec(add_vhost/1 :: (vhost()) -> 'ok').
--spec(delete_vhost/1 :: (vhost()) -> 'ok').
--spec(list_vhosts/0 :: () -> [vhost()]).
--spec(set_permissions/5 ::
-      (username(), vhost(), regexp(), regexp(), regexp()) -> 'ok').
--spec(clear_permissions/2 :: (username(), vhost()) -> 'ok').
+-spec(lookup_user/1 ::
+        (username()) -> rabbit_types:ok(rabbit_types:user())
+                            | rabbit_types:error('not_found')).
+-spec(add_vhost/1 :: (rabbit_types:vhost()) -> 'ok').
+-spec(delete_vhost/1 :: (rabbit_types:vhost()) -> 'ok').
+-spec(vhost_exists/1 :: (rabbit_types:vhost()) -> boolean()).
+-spec(list_vhosts/0 :: () -> [rabbit_types:vhost()]).
+-spec(set_permissions/5 ::(username(), rabbit_types:vhost(), regexp(),
+                           regexp(), regexp()) -> 'ok').
+-spec(set_permissions/6 ::(scope(), username(), rabbit_types:vhost(),
+                           regexp(), regexp(), regexp()) -> 'ok').
+-spec(clear_permissions/2 :: (username(), rabbit_types:vhost()) -> 'ok').
+-spec(list_permissions/0 ::
+        () -> [{username(), rabbit_types:vhost(), regexp(), regexp(), regexp(),
+                scope_atom()}]).
 -spec(list_vhost_permissions/1 ::
-      (vhost()) -> [{username(), regexp(), regexp(), regexp()}]).
+        (rabbit_types:vhost()) -> [{username(), regexp(), regexp(), regexp(),
+                                    scope_atom()}]).
 -spec(list_user_permissions/1 ::
-      (username()) -> [{vhost(), regexp(), regexp(), regexp()}]).
+        (username()) -> [{rabbit_types:vhost(), regexp(), regexp(), regexp(),
+                          scope_atom()}]).
+-spec(list_user_vhost_permissions/2 ::
+        (username(), rabbit_types:vhost()) -> [{regexp(), regexp(), regexp(),
+                                                scope_atom()}]).
 
 -endif.
 
@@ -121,7 +151,7 @@ internal_lookup_vhost_access(Username, VHostPath) ->
     rabbit_misc:execute_mnesia_transaction(
       fun () ->
               case mnesia:read({rabbit_user_permission,
-                                #user_vhost{username = Username,
+                                #user_vhost{username     = Username,
                                             virtual_host = VHostPath}}) of
                   [] -> not_found;
                   [R] -> {ok, R}
@@ -149,24 +179,29 @@ check_resource_access(Username,
     check_resource_access(Username,
                           R#resource{name = <<"amq.default">>},
                           Permission);
-check_resource_access(_Username,
-                      #resource{name = <<"amq.gen",_/binary>>},
-                      _Permission) ->
-    ok;
 check_resource_access(Username,
                       R = #resource{virtual_host = VHostPath, name = Name},
                       Permission) ->
     Res = case mnesia:dirty_read({rabbit_user_permission,
-                                  #user_vhost{username = Username,
+                                  #user_vhost{username     = Username,
                                               virtual_host = VHostPath}}) of
               [] ->
                   false;
               [#user_permission{permission = P}] ->
-                  case regexp:match(
-                         binary_to_list(Name),
-                         binary_to_list(element(permission_index(Permission), P))) of
-                      {match, _, _} -> true;
-                      nomatch       -> false
+                  case {Name, P} of
+                      {<<"amq.gen",_/binary>>, #permission{scope = client}} ->
+                          true;
+                      _ ->
+                          PermRegexp =
+                              case element(permission_index(Permission), P) of
+                                  %% <<"^$">> breaks Emacs' erlang mode
+                                  <<"">> -> <<$^, $$>>;
+                                           RE     -> RE
+                              end,
+                          case re:run(Name, PermRegexp, [{capture, none}]) of
+                              match    -> true;
+                              nomatch  -> false
+                          end
                   end
           end,
     if Res  -> ok;
@@ -182,7 +217,8 @@ add_user(Username, Password) ->
                       [] ->
                           ok = mnesia:write(rabbit_user,
                                             #user{username = Username,
-                                                  password = Password},
+                                                  password = Password,
+                                                  is_admin = false},
                                             write);
                       _ ->
                           mnesia:abort({user_already_exists, Username})
@@ -212,20 +248,39 @@ delete_user(Username) ->
     R.
 
 change_password(Username, Password) ->
-    R = rabbit_misc:execute_mnesia_transaction(
-          rabbit_misc:with_user(
-            Username,
-            fun () ->
-                    ok = mnesia:write(rabbit_user,
-                                      #user{username = Username,
-                                            password = Password},
-                                      write)
-            end)),
+    R = update_user(Username, fun(User) ->
+                                      User#user{password = Password}
+                              end),
     rabbit_log:info("Changed password for user ~p~n", [Username]),
     R.
 
+set_admin(Username) ->
+    set_admin(Username, true).
+
+clear_admin(Username) ->
+    set_admin(Username, false).
+
+set_admin(Username, IsAdmin) ->
+    R = update_user(Username, fun(User) ->
+                                      User#user{is_admin = IsAdmin}
+                              end),
+    rabbit_log:info("Set user admin flag for user ~p to ~p~n",
+                    [Username, IsAdmin]),
+    R.
+
+update_user(Username, Fun) ->
+    rabbit_misc:execute_mnesia_transaction(
+      rabbit_misc:with_user(
+        Username,
+        fun () ->
+                {ok, User} = lookup_user(Username),
+                ok = mnesia:write(rabbit_user, Fun(User), write)
+        end)).
+
 list_users() ->
-    mnesia:dirty_all_keys(rabbit_user).
+    [{Username, IsAdmin} ||
+        #user{username = Username, is_admin = IsAdmin} <-
+            mnesia:dirty_match_object(rabbit_user, #user{_ = '_'})].
 
 lookup_user(Username) ->
     rabbit_misc:dirty_read({rabbit_user, Username}).
@@ -275,74 +330,100 @@ delete_vhost(VHostPath) ->
     R.
 
 internal_delete_vhost(VHostPath) ->
-    lists:foreach(fun (#exchange{name=Name}) ->
+    lists:foreach(fun (#exchange{name = Name}) ->
                           ok = rabbit_exchange:delete(Name, false)
                   end,
                   rabbit_exchange:list(VHostPath)),
-    lists:foreach(fun ({Username, _, _, _}) ->
+    lists:foreach(fun ({Username, _, _, _, _}) ->
                           ok = clear_permissions(Username, VHostPath)
                   end,
                   list_vhost_permissions(VHostPath)),
     ok = mnesia:delete({rabbit_vhost, VHostPath}),
     ok.
 
+vhost_exists(VHostPath) ->
+    mnesia:dirty_read({rabbit_vhost, VHostPath}) /= [].
+
 list_vhosts() ->
     mnesia:dirty_all_keys(rabbit_vhost).
 
 validate_regexp(RegexpBin) ->
     Regexp = binary_to_list(RegexpBin),
-    case regexp:parse(Regexp) of
+    case re:compile(Regexp) of
         {ok, _}         -> ok;
         {error, Reason} -> throw({error, {invalid_regexp, Regexp, Reason}})
     end.
 
 set_permissions(Username, VHostPath, ConfigurePerm, WritePerm, ReadPerm) ->
+    set_permissions(<<"client">>, Username, VHostPath, ConfigurePerm,
+                    WritePerm, ReadPerm).
+
+set_permissions(ScopeBin, Username, VHostPath, ConfigurePerm, WritePerm, ReadPerm) ->
     lists:map(fun validate_regexp/1, [ConfigurePerm, WritePerm, ReadPerm]),
+    Scope = case ScopeBin of
+                <<"client">> -> client;
+                 <<"all">>    -> all;
+                _            -> throw({error, {invalid_scope, ScopeBin}})
+            end,
     rabbit_misc:execute_mnesia_transaction(
       rabbit_misc:with_user_and_vhost(
         Username, VHostPath,
         fun () -> ok = mnesia:write(
                          rabbit_user_permission,
                          #user_permission{user_vhost = #user_vhost{
-                                            username = Username,
+                                            username     = Username,
                                             virtual_host = VHostPath},
                                           permission = #permission{
+                                            scope     = Scope,
                                             configure = ConfigurePerm,
-                                            write = WritePerm,
-                                            read = ReadPerm}},
+                                            write     = WritePerm,
+                                            read      = ReadPerm}},
                          write)
         end)).
 
+
 clear_permissions(Username, VHostPath) ->
     rabbit_misc:execute_mnesia_transaction(
       rabbit_misc:with_user_and_vhost(
         Username, VHostPath,
         fun () ->
                 ok = mnesia:delete({rabbit_user_permission,
-                                    #user_vhost{username = Username,
+                                    #user_vhost{username     = Username,
                                                 virtual_host = VHostPath}})
         end)).
 
+list_permissions() ->
+    [{Username, VHostPath, ConfigurePerm, WritePerm, ReadPerm, Scope} ||
+        {Username, VHostPath, ConfigurePerm, WritePerm, ReadPerm, Scope} <-
+            list_permissions(match_user_vhost('_', '_'))].
+
 list_vhost_permissions(VHostPath) ->
-    [{Username, ConfigurePerm, WritePerm, ReadPerm} ||
-        {Username, _, ConfigurePerm, WritePerm, ReadPerm} <-
+    [{Username, ConfigurePerm, WritePerm, ReadPerm, Scope} ||
+        {Username, _, ConfigurePerm, WritePerm, ReadPerm, Scope} <-
             list_permissions(rabbit_misc:with_vhost(
                                VHostPath, match_user_vhost('_', VHostPath)))].
 
 list_user_permissions(Username) ->
-    [{VHostPath, ConfigurePerm, WritePerm, ReadPerm} ||
-        {_, VHostPath, ConfigurePerm, WritePerm, ReadPerm} <-
+    [{VHostPath, ConfigurePerm, WritePerm, ReadPerm, Scope} ||
+        {_, VHostPath, ConfigurePerm, WritePerm, ReadPerm, Scope} <-
             list_permissions(rabbit_misc:with_user(
                                Username, match_user_vhost(Username, '_')))].
 
+list_user_vhost_permissions(Username, VHostPath) ->
+    [{ConfigurePerm, WritePerm, ReadPerm, Scope} ||
+        {_, _, ConfigurePerm, WritePerm, ReadPerm, Scope} <-
+            list_permissions(rabbit_misc:with_user_and_vhost(
+                               Username, VHostPath,
+                               match_user_vhost(Username, VHostPath)))].
+
 list_permissions(QueryThunk) ->
-    [{Username, VHostPath, ConfigurePerm, WritePerm, ReadPerm} ||
-        #user_permission{user_vhost = #user_vhost{username = Username,
+    [{Username, VHostPath, ConfigurePerm, WritePerm, ReadPerm, Scope} ||
+        #user_permission{user_vhost = #user_vhost{username     = Username,
                                                   virtual_host = VHostPath},
-                         permission = #permission{
-                           configure = ConfigurePerm,
-                           write = WritePerm,
-                           read = ReadPerm}} <-
+                         permission = #permission{ scope     = Scope,
+                                                   configure = ConfigurePerm,
+                                                   write     = WritePerm,
+                                                   read      = ReadPerm}} <-
             %% TODO: use dirty ops instead
             rabbit_misc:execute_mnesia_transaction(QueryThunk)].
 
@@ -350,7 +431,7 @@ match_user_vhost(Username, VHostPath) ->
     fun () -> mnesia:match_object(
                 rabbit_user_permission,
                 #user_permission{user_vhost = #user_vhost{
-                                   username = Username,
+                                   username     = Username,
                                    virtual_host = VHostPath},
                                  permission = '_'},
                 read)
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 0aa7445a..3e677c38 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -31,16 +31,17 @@
 
 -module(rabbit_amqqueue).
 
--export([start/0, declare/5, delete/3, purge/1]).
+-export([start/0, stop/0, declare/5, delete/3, purge/1]).
 -export([internal_declare/2, internal_delete/1,
          maybe_run_queue_via_backing_queue/2,
          update_ram_duration/1, set_ram_duration_target/2,
-         set_maximum_since_use/2]).
+         set_maximum_since_use/2, maybe_expire/1]).
 -export([pseudo_queue/2]).
 -export([lookup/1, with/2, with_or_die/2, assert_equivalence/5,
          check_exclusive_access/2, with_exclusive_access_or_die/3,
-         stat/1, stat_all/0, deliver/2, requeue/3, ack/4]).
+         stat/1, deliver/2, requeue/3, ack/4, reject/4]).
 -export([list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2]).
+-export([emit_stats/1]).
 -export([consumers/1, consumers_all/1]).
 -export([basic_get/3, basic_consume/7, basic_cancel/4]).
 -export([notify_sent/2, unblock/2, flush_all/2]).
@@ -55,70 +56,112 @@
 -include("rabbit.hrl").
 -include_lib("stdlib/include/qlc.hrl").
 
+-define(EXPIRES_TYPES, [byte, short, signedint, long]).
+
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--type(qstats() :: {'ok', queue_name(), non_neg_integer(), non_neg_integer()}).
--type(qlen() :: {'ok', non_neg_integer()}).
--type(qfun(A) :: fun ((amqqueue()) -> A)).
+-export_type([name/0, qmsg/0]).
+
+-type(name() :: rabbit_types:r('queue')).
+
+-type(qlen() :: rabbit_types:ok(non_neg_integer())).
+-type(qfun(A) :: fun ((rabbit_types:amqqueue()) -> A)).
+-type(qmsg() :: {name(), pid(), msg_id(), boolean(), rabbit_types:message()}).
+-type(msg_id() :: non_neg_integer()).
 -type(ok_or_errors() ::
       'ok' | {'error', [{'error' | 'exit' | 'throw', any()}]}).
 
 -spec(start/0 :: () -> 'ok').
--spec(declare/5 :: (queue_name(), boolean(), boolean(), amqp_table(),
-                    maybe(pid())) -> {'new' | 'existing', amqqueue()}).
--spec(lookup/1 :: (queue_name()) -> {'ok', amqqueue()} | not_found()).
--spec(with/2 :: (queue_name(), qfun(A)) -> A | not_found()).
--spec(with_or_die/2 :: (queue_name(), qfun(A)) -> A).
--spec(assert_equivalence/5 :: (amqqueue(), boolean(), boolean(), amqp_table(),
-                               maybe(pid)) -> ok).
--spec(check_exclusive_access/2 :: (amqqueue(), pid()) -> 'ok').
--spec(with_exclusive_access_or_die/3 :: (queue_name(), pid(), qfun(A)) -> A).
--spec(list/1 :: (vhost()) -> [amqqueue()]).
--spec(info_keys/0 :: () -> [info_key()]).
--spec(info/1 :: (amqqueue()) -> [info()]).
--spec(info/2 :: (amqqueue(), [info_key()]) -> [info()]).
--spec(info_all/1 :: (vhost()) -> [[info()]]).
--spec(info_all/2 :: (vhost(), [info_key()]) -> [[info()]]).
--spec(consumers/1 :: (amqqueue()) -> [{pid(), ctag(), boolean()}]).
+-spec(stop/0 :: () -> 'ok').
+-spec(declare/5 ::
+        (name(), boolean(), boolean(),
+         rabbit_framing:amqp_table(), rabbit_types:maybe(pid()))
+        -> {'new' | 'existing', rabbit_types:amqqueue()} |
+           rabbit_types:channel_exit()).
+-spec(lookup/1 ::
+        (name()) -> rabbit_types:ok(rabbit_types:amqqueue()) |
+                    rabbit_types:error('not_found')).
+-spec(with/2 :: (name(), qfun(A)) -> A | rabbit_types:error('not_found')).
+-spec(with_or_die/2 ::
+        (name(), qfun(A)) -> A | rabbit_types:channel_exit()).
+-spec(assert_equivalence/5 ::
+        (rabbit_types:amqqueue(), boolean(), boolean(),
+         rabbit_framing:amqp_table(), rabbit_types:maybe(pid()))
+        -> 'ok' | rabbit_types:channel_exit() |
+           rabbit_types:connection_exit()).
+-spec(check_exclusive_access/2 ::
+        (rabbit_types:amqqueue(), pid())
+        -> 'ok' | rabbit_types:channel_exit()).
+-spec(with_exclusive_access_or_die/3 ::
+        (name(), pid(), qfun(A)) -> A | rabbit_types:channel_exit()).
+-spec(list/1 :: (rabbit_types:vhost()) -> [rabbit_types:amqqueue()]).
+-spec(info_keys/0 :: () -> [rabbit_types:info_key()]).
+-spec(info/1 :: (rabbit_types:amqqueue()) -> [rabbit_types:info()]).
+-spec(info/2 ::
+        (rabbit_types:amqqueue(), [rabbit_types:info_key()])
+        -> [rabbit_types:info()]).
+-spec(info_all/1 :: (rabbit_types:vhost()) -> [[rabbit_types:info()]]).
+-spec(info_all/2 :: (rabbit_types:vhost(), [rabbit_types:info_key()])
+                    -> [[rabbit_types:info()]]).
+-spec(consumers/1 ::
+        (rabbit_types:amqqueue())
+        -> [{pid(), rabbit_types:ctag(), boolean()}]).
 -spec(consumers_all/1 ::
-      (vhost()) -> [{queue_name(), pid(), ctag(), boolean()}]).
--spec(stat/1 :: (amqqueue()) -> qstats()).
--spec(stat_all/0 :: () -> [qstats()]).
+        (rabbit_types:vhost())
+        -> [{name(), pid(), rabbit_types:ctag(), boolean()}]).
+-spec(stat/1 ::
+        (rabbit_types:amqqueue())
+        -> {'ok', non_neg_integer(), non_neg_integer()}).
+-spec(emit_stats/1 :: (rabbit_types:amqqueue()) -> 'ok').
 -spec(delete/3 ::
-      (amqqueue(), 'false', 'false') -> qlen();
-      (amqqueue(), 'true' , 'false') -> qlen() | {'error', 'in_use'};
-      (amqqueue(), 'false', 'true' ) -> qlen() | {'error', 'not_empty'};
-      (amqqueue(), 'true' , 'true' ) -> qlen() |
-                                            {'error', 'in_use'} |
-                                            {'error', 'not_empty'}).
--spec(purge/1 :: (amqqueue()) -> qlen()).
--spec(deliver/2 :: (pid(), delivery()) -> boolean()).
+      (rabbit_types:amqqueue(), 'false', 'false')
+        -> qlen();
+      (rabbit_types:amqqueue(), 'true' , 'false')
+        -> qlen() | rabbit_types:error('in_use');
+      (rabbit_types:amqqueue(), 'false', 'true' )
+        -> qlen() | rabbit_types:error('not_empty');
+      (rabbit_types:amqqueue(), 'true' , 'true' )
+        -> qlen() |
+           rabbit_types:error('in_use') |
+           rabbit_types:error('not_empty')).
+-spec(purge/1 :: (rabbit_types:amqqueue()) -> qlen()).
+-spec(deliver/2 :: (pid(), rabbit_types:delivery()) -> boolean()).
 -spec(requeue/3 :: (pid(), [msg_id()],  pid()) -> 'ok').
--spec(ack/4 :: (pid(), maybe(txn()), [msg_id()], pid()) -> 'ok').
--spec(commit_all/3 :: ([pid()], txn(), pid()) -> ok_or_errors()).
--spec(rollback_all/3 :: ([pid()], txn(), pid()) -> 'ok').
+-spec(ack/4 ::
+        (pid(), rabbit_types:maybe(rabbit_types:txn()), [msg_id()], pid())
+        -> 'ok').
+-spec(reject/4 :: (pid(), [msg_id()], boolean(), pid()) -> 'ok').
+-spec(commit_all/3 :: ([pid()], rabbit_types:txn(), pid()) -> ok_or_errors()).
+-spec(rollback_all/3 :: ([pid()], rabbit_types:txn(), pid()) -> 'ok').
 -spec(notify_down_all/2 :: ([pid()], pid()) -> ok_or_errors()).
 -spec(limit_all/3 :: ([pid()], pid(), pid() | 'undefined') -> ok_or_errors()).
--spec(basic_get/3 :: (amqqueue(), pid(), boolean()) ->
+-spec(basic_get/3 :: (rabbit_types:amqqueue(), pid(), boolean()) ->
              {'ok', non_neg_integer(), qmsg()} | 'empty').
 -spec(basic_consume/7 ::
-      (amqqueue(), boolean(), pid(), pid() | 'undefined', ctag(),
-       boolean(), any()) ->
-             'ok' | {'error', 'exclusive_consume_unavailable'}).
--spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
+      (rabbit_types:amqqueue(), boolean(), pid(), pid() | 'undefined',
+       rabbit_types:ctag(), boolean(), any())
+        -> rabbit_types:ok_or_error('exclusive_consume_unavailable')).
+-spec(basic_cancel/4 ::
+        (rabbit_types:amqqueue(), pid(), rabbit_types:ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
 -spec(flush_all/2 :: ([pid()], pid()) -> 'ok').
--spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue() | 'not_found').
--spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
--spec(maybe_run_queue_via_backing_queue/2 :: (pid(), (fun ((A) -> A))) -> 'ok').
+-spec(internal_declare/2 ::
+        (rabbit_types:amqqueue(), boolean())
+        -> rabbit_types:amqqueue() | 'not_found').
+-spec(internal_delete/1 ::
+        (name()) -> rabbit_types:ok_or_error('not_found') |
+                    rabbit_types:connection_exit()).
+-spec(maybe_run_queue_via_backing_queue/2 ::
+        (pid(), (fun ((A) -> A))) -> 'ok').
 -spec(update_ram_duration/1 :: (pid()) -> 'ok').
 -spec(set_ram_duration_target/2 :: (pid(), number() | 'infinity') -> 'ok').
 -spec(set_maximum_since_use/2 :: (pid(), non_neg_integer()) -> 'ok').
--spec(on_node_down/1 :: (erlang_node()) -> 'ok').
--spec(pseudo_queue/2 :: (binary(), pid()) -> amqqueue()).
+-spec(maybe_expire/1 :: (pid()) -> 'ok').
+-spec(on_node_down/1 :: (node()) -> 'ok').
+-spec(pseudo_queue/2 :: (binary(), pid()) -> rabbit_types:amqqueue()).
 
 -endif.
 
@@ -126,7 +169,7 @@
 
 start() ->
     DurableQueues = find_durable_queues(),
-    {ok, BQ} = application:get_env(backing_queue_module),
+    {ok, BQ} = application:get_env(rabbit, backing_queue_module),
     ok = BQ:start([QName || #amqqueue{name = QName} <- DurableQueues]),
     {ok,_} = supervisor:start_child(
                rabbit_sup,
@@ -136,6 +179,12 @@ start() ->
     _RealDurableQueues = recover_durable_queues(DurableQueues),
     ok.
 
+stop() ->
+    ok = supervisor:terminate_child(rabbit_sup, rabbit_amqqueue_sup),
+    ok = supervisor:delete_child(rabbit_sup, rabbit_amqqueue_sup),
+    {ok, BQ} = application:get_env(rabbit, backing_queue_module),
+    ok = BQ:stop().
+
 find_durable_queues() ->
     Node = node(),
     %% TODO: use dirty ops instead
@@ -148,9 +197,11 @@ find_durable_queues() ->
 
 recover_durable_queues(DurableQueues) ->
     Qs = [start_queue_process(Q) || Q <- DurableQueues],
-    [Q || Q <- Qs, gen_server2:call(Q#amqqueue.pid, {init, true}) == Q].
+    [Q || Q <- Qs,
+          gen_server2:call(Q#amqqueue.pid, {init, true}, infinity) == Q].
 
 declare(QueueName, Durable, AutoDelete, Args, Owner) ->
+    ok = check_declare_arguments(QueueName, Args),
     Q = start_queue_process(#amqqueue{name = QueueName,
                                       durable = Durable,
                                       auto_delete = AutoDelete,
@@ -198,11 +249,12 @@ start_queue_process(Q) ->
     Q#amqqueue{pid = Pid}.
 
 add_default_binding(#amqqueue{name = QueueName}) ->
-    Exchange = rabbit_misc:r(QueueName, exchange, <<>>),
+    ExchangeName = rabbit_misc:r(QueueName, exchange, <<>>),
     RoutingKey = QueueName#resource.name,
-    rabbit_exchange:add_binding(Exchange, QueueName, RoutingKey, [],
-                                fun (_X, _Q) -> ok end),
-    ok.
+    rabbit_binding:add(#binding{exchange_name = ExchangeName,
+                                queue_name    = QueueName,
+                                key           = RoutingKey,
+                                args          = []}).
 
 lookup(Name) ->
     rabbit_misc:dirty_read({rabbit_queue, Name}).
@@ -218,13 +270,15 @@ with(Name, F) ->
 with_or_die(Name, F) ->
     with(Name, F, fun () -> rabbit_misc:not_found(Name) end).
 
-assert_equivalence(#amqqueue{durable = Durable, auto_delete = AutoDelete} = Q,
-                   Durable, AutoDelete, _Args, Owner) ->
+assert_equivalence(#amqqueue{durable     = Durable,
+                             auto_delete = AutoDelete} = Q,
+                   Durable, AutoDelete, RequiredArgs, Owner) ->
+    assert_args_equivalence(Q, RequiredArgs),
     check_exclusive_access(Q, Owner, strict);
 assert_equivalence(#amqqueue{name = QueueName},
-                   _Durable, _AutoDelete, _Args, _Owner) ->
+                   _Durable, _AutoDelete, _RequiredArgs, _Owner) ->
     rabbit_misc:protocol_error(
-      precondition_failed, "parameters for ~s not equivalent",
+      not_allowed, "parameters for ~s not equivalent",
       [rabbit_misc:rs(QueueName)]).
 
 check_exclusive_access(Q, Owner) -> check_exclusive_access(Q, Owner, lax).
@@ -243,6 +297,31 @@ with_exclusive_access_or_die(Name, ReaderPid, F) ->
     with_or_die(Name,
                 fun (Q) -> check_exclusive_access(Q, ReaderPid), F(Q) end).
 
+assert_args_equivalence(#amqqueue{name = QueueName, arguments = Args},
+                       RequiredArgs) ->
+    rabbit_misc:assert_args_equivalence(Args, RequiredArgs, QueueName,
+                                        [<<"x-expires">>]).
+
+check_declare_arguments(QueueName, Args) ->
+    [case Fun(rabbit_misc:table_lookup(Args, Key)) of
+         ok             -> ok;
+         {error, Error} -> rabbit_misc:protocol_error(
+                             precondition_failed,
+                             "invalid arg '~s' for ~s: ~w",
+                             [Key, rabbit_misc:rs(QueueName), Error])
+     end || {Key, Fun} <- [{<<"x-expires">>, fun check_expires_argument/1}]],
+    ok.
+
+check_expires_argument(undefined) ->
+    ok;
+check_expires_argument({Type, Expires}) when Expires > 0 ->
+    case lists:member(Type, ?EXPIRES_TYPES) of
+        true  -> ok;
+        false -> {error, {expires_not_of_acceptable_type, Type, Expires}}
+    end;
+check_expires_argument({_Type, _Expires}) ->
+    {error, expires_zero_or_less}.
+
 list(VHostPath) ->
     mnesia:dirty_match_object(
       rabbit_queue,
@@ -253,10 +332,10 @@ info_keys() -> rabbit_amqqueue_process:info_keys().
 map(VHostPath, F) -> rabbit_misc:filter_exit_map(F, list(VHostPath)).
 
 info(#amqqueue{ pid = QPid }) ->
-    delegate_pcall(QPid, 9, info, infinity).
+    delegate_call(QPid, info, infinity).
 
 info(#amqqueue{ pid = QPid }, Items) ->
-    case delegate_pcall(QPid, 9, {info, Items}, infinity) of
+    case delegate_call(QPid, {info, Items}, infinity) of
         {ok, Res}      -> Res;
         {error, Error} -> throw(Error)
     end.
@@ -266,7 +345,7 @@ info_all(VHostPath) -> map(VHostPath, fun (Q) -> info(Q) end).
 info_all(VHostPath, Items) -> map(VHostPath, fun (Q) -> info(Q, Items) end).
 
 consumers(#amqqueue{ pid = QPid }) ->
-    delegate_pcall(QPid, 9, consumers, infinity).
+    delegate_call(QPid, consumers, infinity).
 
 consumers_all(VHostPath) ->
     lists:concat(
@@ -277,8 +356,8 @@ consumers_all(VHostPath) ->
 
 stat(#amqqueue{pid = QPid}) -> delegate_call(QPid, stat, infinity).
 
-stat_all() ->
-    lists:map(fun stat/1, rabbit_misc:dirty_read_all(rabbit_queue)).
+emit_stats(#amqqueue{pid = QPid}) ->
+    delegate_cast(QPid, emit_stats).
 
 delete(#amqqueue{ pid = QPid }, IfUnused, IfEmpty) ->
     delegate_call(QPid, {delete, IfUnused, IfEmpty}, infinity).
@@ -301,11 +380,13 @@ requeue(QPid, MsgIds, ChPid) ->
     delegate_call(QPid, {requeue, MsgIds, ChPid}, infinity).
 
 ack(QPid, Txn, MsgIds, ChPid) ->
-    delegate_pcast(QPid, 7, {ack, Txn, MsgIds, ChPid}).
+    delegate_cast(QPid, {ack, Txn, MsgIds, ChPid}).
+
+reject(QPid, MsgIds, Requeue, ChPid) ->
+    delegate_cast(QPid, {reject, MsgIds, Requeue, ChPid}).
 
 commit_all(QPids, Txn, ChPid) ->
     safe_delegate_call_ok(
-      fun (QPid) -> exit({queue_disappeared, QPid}) end,
       fun (QPid) -> gen_server2:call(QPid, {commit, Txn, ChPid}, infinity) end,
       QPids).
 
@@ -315,9 +396,6 @@ rollback_all(QPids, Txn, ChPid) ->
 
 notify_down_all(QPids, ChPid) ->
     safe_delegate_call_ok(
-      %% we don't care if the queue process has terminated in the
-      %% meantime
-      fun (_)    -> ok end,
       fun (QPid) -> gen_server2:call(QPid, {notify_down, ChPid}, infinity) end,
       QPids).
 
@@ -341,10 +419,10 @@ basic_cancel(#amqqueue{pid = QPid}, ChPid, ConsumerTag, OkMsg) ->
                        infinity).
 
 notify_sent(QPid, ChPid) ->
-    delegate_pcast(QPid, 7, {notify_sent, ChPid}).
+    delegate_cast(QPid, {notify_sent, ChPid}).
 
 unblock(QPid, ChPid) ->
-    delegate_pcast(QPid, 7, {unblock, ChPid}).
+    delegate_cast(QPid, {unblock, ChPid}).
 
 flush_all(QPids, ChPid) ->
     delegate:invoke_no_result(
@@ -356,7 +434,7 @@ internal_delete1(QueueName) ->
     %% we want to execute some things, as
     %% decided by rabbit_exchange, after the
     %% transaction.
-    rabbit_exchange:delete_queue_bindings(QueueName).
+    rabbit_binding:remove_for_queue(QueueName).
 
 internal_delete(QueueName) ->
     case
@@ -374,17 +452,19 @@ internal_delete(QueueName) ->
     end.
 
 maybe_run_queue_via_backing_queue(QPid, Fun) ->
-    gen_server2:pcall(QPid, 7, {maybe_run_queue_via_backing_queue, Fun},
-                      infinity).
+    gen_server2:call(QPid, {maybe_run_queue_via_backing_queue, Fun}, infinity).
 
 update_ram_duration(QPid) ->
-    gen_server2:pcast(QPid, 8, update_ram_duration).
+    gen_server2:cast(QPid, update_ram_duration).
 
 set_ram_duration_target(QPid, Duration) ->
-    gen_server2:pcast(QPid, 8, {set_ram_duration_target, Duration}).
+    gen_server2:cast(QPid, {set_ram_duration_target, Duration}).
 
 set_maximum_since_use(QPid, Age) ->
-    gen_server2:pcast(QPid, 8, {set_maximum_since_use, Age}).
+    gen_server2:cast(QPid, {set_maximum_since_use, Age}).
+
+maybe_expire(QPid) ->
+    gen_server2:cast(QPid, maybe_expire).
 
 on_node_down(Node) ->
     [Hook() ||
@@ -398,7 +478,7 @@ on_node_down(Node) ->
     ok.
 
 delete_queue(QueueName) ->
-    Post = rabbit_exchange:delete_transient_queue_bindings(QueueName),
+    Post = rabbit_binding:remove_transient_for_queue(QueueName),
     ok = mnesia:delete({rabbit_queue, QueueName}),
     Post.
 
@@ -409,11 +489,11 @@ pseudo_queue(QueueName, Pid) ->
               arguments = [],
               pid = Pid}.
 
-safe_delegate_call_ok(H, F, Pids) ->
+safe_delegate_call_ok(F, Pids) ->
     {_, Bad} = delegate:invoke(Pids,
                                fun (Pid) ->
                                        rabbit_misc:with_exit_handler(
-                                         fun () -> H(Pid) end,
+                                         fun () -> ok end,
                                          fun () -> F(Pid) end)
                                end),
     case Bad of
@@ -424,11 +504,6 @@ safe_delegate_call_ok(H, F, Pids) ->
 delegate_call(Pid, Msg, Timeout) ->
     delegate:invoke(Pid, fun (P) -> gen_server2:call(P, Msg, Timeout) end).
 
-delegate_pcall(Pid, Pri, Msg, Timeout) ->
-    delegate:invoke(Pid,
-                    fun (P) -> gen_server2:pcall(P, Pri, Msg, Timeout) end).
-
-delegate_pcast(Pid, Pri, Msg) ->
-    delegate:invoke_no_result(Pid,
-                              fun (P) -> gen_server2:pcast(P, Pri, Msg) end).
+delegate_cast(Pid, Msg) ->
+    delegate:invoke(Pid, fun (P) -> gen_server2:cast(P, Msg) end).
 
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 70e6e755..91877efb 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -35,14 +35,15 @@
 
 -behaviour(gen_server2).
 
--define(UNSENT_MESSAGE_LIMIT,        100).
+-define(UNSENT_MESSAGE_LIMIT,          100).
 -define(SYNC_INTERVAL,                 5). %% milliseconds
 -define(RAM_DURATION_UPDATE_INTERVAL,  5000).
 
 -export([start_link/1, info_keys/0]).
 
 -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2,
-         handle_info/2, handle_pre_hibernate/1]).
+         handle_info/2, handle_pre_hibernate/1, prioritise_call/3,
+         prioritise_cast/2]).
 
 -import(queue).
 -import(erlang).
@@ -56,8 +57,11 @@
             backing_queue_state,
             active_consumers,
             blocked_consumers,
+            expires,
             sync_timer_ref,
-            rate_timer_ref
+            rate_timer_ref,
+            expiry_timer_ref,
+            stats_timer
            }).
 
 -record(consumer, {tag, ack_required}).
@@ -72,13 +76,8 @@
              txn,
              unsent_message_count}).
 
--define(INFO_KEYS,
-        [name,
-         durable,
-         auto_delete,
-         arguments,
-         pid,
-         owner_pid,
+-define(STATISTICS_KEYS,
+        [pid,
          exclusive_consumer_pid,
          exclusive_consumer_tag,
          messages_ready,
@@ -89,6 +88,17 @@
          backing_queue_status
         ]).
 
+-define(CREATION_EVENT_KEYS,
+        [pid,
+         name,
+         durable,
+         auto_delete,
+         arguments,
+         owner_pid
+        ]).
+
+-define(INFO_KEYS, ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]).
+
 %%----------------------------------------------------------------------------
 
 start_link(Q) -> gen_server2:start_link(?MODULE, Q, []).
@@ -102,15 +112,18 @@ init(Q) ->
     process_flag(trap_exit, true),
     {ok, BQ} = application:get_env(backing_queue_module),
 
-    {ok, #q{q = Q#amqqueue{pid = self()},
-            exclusive_consumer = none,
-            has_had_consumers = false,
-            backing_queue = BQ,
+    {ok, #q{q                   = Q#amqqueue{pid = self()},
+            exclusive_consumer  = none,
+            has_had_consumers   = false,
+            backing_queue       = BQ,
             backing_queue_state = undefined,
-            active_consumers = queue:new(),
-            blocked_consumers = queue:new(),
-            sync_timer_ref = undefined,
-            rate_timer_ref = undefined}, hibernate,
+            active_consumers    = queue:new(),
+            blocked_consumers   = queue:new(),
+            expires             = undefined,
+            sync_timer_ref      = undefined,
+            rate_timer_ref      = undefined,
+            expiry_timer_ref    = undefined,
+            stats_timer         = rabbit_event:init_stats_timer()}, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 terminate(shutdown,      State = #q{backing_queue = BQ}) ->
@@ -132,6 +145,12 @@ code_change(_OldVsn, State, _Extra) ->
 
 %%----------------------------------------------------------------------------
 
+init_expires(State = #q{q = #amqqueue{arguments = Arguments}}) ->
+    case rabbit_misc:table_lookup(Arguments, <<"x-expires">>) of
+        {_Type, Expires} -> ensure_expiry_timer(State#q{expires = Expires});
+        undefined        -> State
+    end.
+
 declare(Recover, From,
         State = #q{q = Q = #amqqueue{name = QName, durable = IsDurable},
                    backing_queue = BQ, backing_queue_state = undefined}) ->
@@ -145,7 +164,9 @@ declare(Recover, From,
                             self(), {rabbit_amqqueue,
                                      set_ram_duration_target, [self()]}),
                      BQS = BQ:init(QName, IsDurable, Recover),
-                     noreply(State#q{backing_queue_state = BQS});
+                     rabbit_event:notify(queue_created,
+                                         infos(?CREATION_EVENT_KEYS, State)),
+                     noreply(init_expires(State#q{backing_queue_state = BQS}));
         Q1        -> {stop, normal, {existing, Q1}, State}
     end.
 
@@ -163,6 +184,7 @@ terminate_shutdown(Fun, State) ->
                                           BQ:tx_rollback(Txn, BQSN),
                                       BQSN1
                               end, BQS, all_ch_record()),
+                     rabbit_event:notify(queue_deleted, [{pid, self()}]),
                      State1#q{backing_queue_state = Fun(BQS1)}
     end.
 
@@ -179,16 +201,17 @@ noreply(NewState) ->
 next_state(State) ->
     State1 = #q{backing_queue = BQ, backing_queue_state = BQS} =
         ensure_rate_timer(State),
-    case BQ:needs_sync(BQS)of
-        true  -> {ensure_sync_timer(State1), 0};
-        false -> {stop_sync_timer(State1), hibernate}
+    State2 = ensure_stats_timer(State1),
+    case BQ:needs_idle_timeout(BQS) of
+        true  -> {ensure_sync_timer(State2), 0};
+        false -> {stop_sync_timer(State2), hibernate}
     end.
 
 ensure_sync_timer(State = #q{sync_timer_ref = undefined, backing_queue = BQ}) ->
     {ok, TRef} = timer:apply_after(
                    ?SYNC_INTERVAL,
                    rabbit_amqqueue, maybe_run_queue_via_backing_queue,
-                   [self(), fun (BQS) -> BQ:sync(BQS) end]),
+                   [self(), fun (BQS) -> BQ:idle_timeout(BQS) end]),
     State#q{sync_timer_ref = TRef};
 ensure_sync_timer(State) ->
     State.
@@ -218,6 +241,39 @@ stop_rate_timer(State = #q{rate_timer_ref = TRef}) ->
     {ok, cancel} = timer:cancel(TRef),
     State#q{rate_timer_ref = undefined}.
 
+stop_expiry_timer(State = #q{expiry_timer_ref = undefined}) ->
+    State;
+stop_expiry_timer(State = #q{expiry_timer_ref = TRef}) ->
+    {ok, cancel} = timer:cancel(TRef),
+    State#q{expiry_timer_ref = undefined}.
+
+%% We only wish to expire where there are no consumers *and* when
+%% basic.get hasn't been called for the configured period.
+ensure_expiry_timer(State = #q{expires = undefined}) ->
+    State;
+ensure_expiry_timer(State = #q{expires = Expires}) ->
+    case is_unused(State) of
+        true ->
+            NewState = stop_expiry_timer(State),
+            {ok, TRef} = timer:apply_after(
+                           Expires, rabbit_amqqueue, maybe_expire, [self()]),
+            NewState#q{expiry_timer_ref = TRef};
+        false ->
+            State
+    end.
+
+ensure_stats_timer(State = #q{stats_timer = StatsTimer,
+                              q = Q}) ->
+    State#q{stats_timer = rabbit_event:ensure_stats_timer(
+                            StatsTimer,
+                            fun() -> emit_stats(State) end,
+                            fun() -> rabbit_amqqueue:emit_stats(Q) end)}.
+
+stop_stats_timer(State = #q{stats_timer = StatsTimer}) ->
+    State#q{stats_timer = rabbit_event:stop_stats_timer(
+                            StatsTimer,
+                            fun() -> emit_stats(State) end)}.
+
 assert_invariant(#q{active_consumers = AC,
                     backing_queue = BQ, backing_queue_state = BQS}) ->
     true = (queue:is_empty(AC) orelse BQ:is_empty(BQS)).
@@ -439,7 +495,8 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder}) ->
                                       _    -> rollback_transaction(Txn, ChPid,
                                                                    State1)
                                   end,
-                         {ok, requeue_and_run(sets:to_list(ChAckTags), State2)}
+                         {ok, requeue_and_run(sets:to_list(ChAckTags),
+                                              ensure_expiry_timer(State2))}
             end
     end.
 
@@ -528,8 +585,34 @@ i(backing_queue_status, #q{backing_queue_state = BQS, backing_queue = BQ}) ->
 i(Item, _) ->
     throw({bad_argument, Item}).
 
+emit_stats(State) ->
+    rabbit_event:notify(queue_stats, infos(?STATISTICS_KEYS, State)).
+
 %---------------------------------------------------------------------------
 
+prioritise_call(Msg, _From, _State) ->
+    case Msg of
+        info                                      -> 9;
+        {info, _Items}                            -> 9;
+        consumers                                 -> 9;
+        {maybe_run_queue_via_backing_queue, _Fun} -> 6;
+        _                                         -> 0
+    end.
+
+prioritise_cast(Msg, _State) ->
+    case Msg of
+        update_ram_duration                  -> 8;
+        {set_ram_duration_target, _Duration} -> 8;
+        {set_maximum_since_use, _Age}        -> 8;
+        maybe_expire                         -> 8;
+        emit_stats                           -> 7;
+        {ack, _Txn, _MsgIds, _ChPid}         -> 7;
+        {reject, _MsgIds, _Requeue, _ChPid}  -> 7;
+        {notify_sent, _ChPid}                -> 7;
+        {unblock, _ChPid}                    -> 7;
+        _                                    -> 0
+    end.
+
 handle_call({init, Recover}, From,
             State = #q{q = #amqqueue{exclusive_owner = none}}) ->
     declare(Recover, From, State);
@@ -541,6 +624,7 @@ handle_call({init, Recover}, From,
                 declare(Recover, From, State);
         _    -> #q{q = #amqqueue{name = QName, durable = IsDurable},
                    backing_queue = BQ, backing_queue_state = undefined} = State,
+                gen_server2:reply(From, not_found),
                 case Recover of
                     true -> ok;
                     _    -> rabbit_log:warning(
@@ -548,7 +632,7 @@ handle_call({init, Recover}, From,
                 end,
                 BQS = BQ:init(QName, IsDurable, Recover),
                 %% Rely on terminate to delete the queue.
-                {stop, normal, not_found, State#q{backing_queue_state = BQS}}
+                {stop, normal, State#q{backing_queue_state = BQS}}
     end;
 
 handle_call(info, _From, State) ->
@@ -610,8 +694,9 @@ handle_call({basic_get, ChPid, NoAck}, _From,
             State = #q{q = #amqqueue{name = QName},
                        backing_queue_state = BQS, backing_queue = BQ}) ->
     AckRequired = not NoAck,
+    State1 = ensure_expiry_timer(State),
     case BQ:fetch(AckRequired, BQS) of
-        {empty, BQS1} -> reply(empty, State#q{backing_queue_state = BQS1});
+        {empty, BQS1} -> reply(empty, State1#q{backing_queue_state = BQS1});
         {{Message, IsDelivered, AckTag, Remaining}, BQS1} ->
             case AckRequired of
                 true ->  C = #cr{acktags = ChAckTags} = ch_record(ChPid),
@@ -620,7 +705,7 @@ handle_call({basic_get, ChPid, NoAck}, _From,
                 false -> ok
             end,
             Msg = {QName, self(), AckTag, IsDelivered, Message},
-            reply({ok, Remaining, Msg}, State#q{backing_queue_state = BQS1})
+            reply({ok, Remaining, Msg}, State1#q{backing_queue_state = BQS1})
     end;
 
 handle_call({basic_consume, NoAck, ChPid, LimiterPid,
@@ -687,16 +772,15 @@ handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, _From,
                                               ChPid, ConsumerTag,
                                               State#q.blocked_consumers)},
             case should_auto_delete(NewState) of
-                false -> reply(ok, NewState);
+                false -> reply(ok, ensure_expiry_timer(NewState));
                 true  -> {stop, normal, ok, NewState}
             end
     end;
 
-handle_call(stat, _From, State = #q{q = #amqqueue{name = Name},
-                                    backing_queue = BQ,
+handle_call(stat, _From, State = #q{backing_queue = BQ,
                                     backing_queue_state = BQS,
                                     active_consumers = ActiveConsumers}) ->
-    reply({ok, Name, BQ:len(BQS), queue:len(ActiveConsumers)}, State);
+    reply({ok, BQ:len(BQS), queue:len(ActiveConsumers)}, State);
 
 handle_call({delete, IfUnused, IfEmpty}, _From,
             State = #q{backing_queue_state = BQS, backing_queue = BQ}) ->
@@ -720,8 +804,6 @@ handle_call({requeue, AckTags, ChPid}, From, State) ->
     gen_server2:reply(From, ok),
     case lookup_ch(ChPid) of
         not_found ->
-            rabbit_log:warning("Ignoring requeue from unknown ch: ~p~n",
-                               [ChPid]),
             noreply(State);
         C = #cr{acktags = ChAckTags} ->
             ChAckTags1 = subtract_acks(ChAckTags, AckTags),
@@ -750,7 +832,22 @@ handle_cast({ack, Txn, AckTags, ChPid},
                     _    -> {C#cr{txn = Txn}, BQ:tx_ack(Txn, AckTags, BQS)}
                 end,
             store_ch_record(C1),
-            noreply(State #q { backing_queue_state = BQS1 })
+            noreply(State#q{backing_queue_state = BQS1})
+    end;
+
+handle_cast({reject, AckTags, Requeue, ChPid},
+            State = #q{backing_queue = BQ, backing_queue_state = BQS}) ->
+    case lookup_ch(ChPid) of
+        not_found ->
+            noreply(State);
+        C = #cr{acktags = ChAckTags} ->
+            ChAckTags1 = subtract_acks(ChAckTags, AckTags),
+            store_ch_record(C#cr{acktags = ChAckTags1}),
+            noreply(case Requeue of
+                        true  -> requeue_and_run(AckTags, State);
+                        false -> BQS1 = BQ:ack(AckTags, BQS),
+                                 State #q { backing_queue_state = BQS1 }
+                    end)
     end;
 
 handle_cast({rollback, Txn, ChPid}, State) ->
@@ -804,6 +901,17 @@ handle_cast({set_ram_duration_target, Duration},
 
 handle_cast({set_maximum_since_use, Age}, State) ->
     ok = file_handle_cache:set_maximum_since_use(Age),
+    noreply(State);
+
+handle_cast(maybe_expire, State) ->
+    case is_unused(State) of
+        true  -> ?LOGDEBUG("Queue lease expired for ~p~n", [State#q.q]),
+                 {stop, normal, State};
+        false -> noreply(ensure_expiry_timer(State))
+    end;
+
+handle_cast(emit_stats, State) ->
+    emit_stats(State),
     noreply(State).
 
 handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason},
@@ -823,7 +931,7 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
 
 handle_info(timeout, State = #q{backing_queue = BQ}) ->
     noreply(maybe_run_queue_via_backing_queue(
-              fun (BQS) -> BQ:sync(BQS) end, State));
+              fun (BQS) -> BQ:idle_timeout(BQS) end, State));
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
@@ -841,4 +949,5 @@ handle_pre_hibernate(State = #q{backing_queue = BQ,
     DesiredDuration =
         rabbit_memory_monitor:report_ram_duration(self(), infinity),
     BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1),
-    {hibernate, stop_rate_timer(State#q{backing_queue_state = BQS2})}.
+    {hibernate, stop_stats_timer(
+                  stop_rate_timer(State#q{backing_queue_state = BQS2}))}.
diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl
index 432d6290..2230c507 100644
--- a/src/rabbit_backing_queue.erl
+++ b/src/rabbit_backing_queue.erl
@@ -42,6 +42,11 @@ behaviour_info(callbacks) ->
      %% shared resources.
      {start, 1},
 
+     %% Called to tear down any state/resources. NB: Implementations
+     %% should not depend on this function being called on shutdown
+     %% and instead should hook into the rabbit supervision hierarchy.
+     {stop, 0},
+
      %% Initialise the backing queue and its state.
      {init, 3},
 
@@ -113,14 +118,15 @@ behaviour_info(callbacks) ->
      %% queue.
      {ram_duration, 1},
 
-     %% Should 'sync' be called as soon as the queue process can
-     %% manage (either on an empty mailbox, or when a timer fires)?
-     {needs_sync, 1},
+     %% Should 'idle_timeout' be called as soon as the queue process
+     %% can manage (either on an empty mailbox, or when a timer
+     %% fires)?
+     {needs_idle_timeout, 1},
 
-     %% Called (eventually) after needs_sync returns 'true'. Note this
-     %% may be called more than once for each 'true' returned from
-     %% needs_sync.
-     {sync, 1},
+     %% Called (eventually) after needs_idle_timeout returns
+     %% 'true'. Note this may be called more than once for each 'true'
+     %% returned from needs_idle_timeout.
+     {idle_timeout, 1},
 
      %% Called immediately before the queue hibernates.
      {handle_pre_hibernate, 1},
diff --git a/src/rabbit_basic.erl b/src/rabbit_basic.erl
index 4ab7a2a0..d62fc07c 100644
--- a/src/rabbit_basic.erl
+++ b/src/rabbit_basic.erl
@@ -42,24 +42,41 @@
 
 -ifdef(use_specs).
 
--type(properties_input() :: (amqp_properties() | [{atom(), any()}])).
--type(publish_result() :: ({ok, routing_result(), [pid()]} | not_found())).
-
--spec(publish/1 :: (delivery()) -> publish_result()).
--spec(delivery/4 :: (boolean(), boolean(), maybe(txn()), message()) ->
-             delivery()).
--spec(message/4 :: (exchange_name(), routing_key(), properties_input(),
-                    binary()) -> (message() | {'error', any()})).
--spec(properties/1 :: (properties_input()) -> amqp_properties()).
--spec(publish/4 :: (exchange_name(), routing_key(), properties_input(),
-                    binary()) -> publish_result()).
--spec(publish/7 :: (exchange_name(), routing_key(), boolean(), boolean(),
-                    maybe(txn()), properties_input(), binary()) ->
-             publish_result()).
--spec(build_content/2 :: (amqp_properties(), binary()) -> content()).
--spec(from_content/1 :: (content()) -> {amqp_properties(), binary()}).
+-type(properties_input() ::
+        (rabbit_framing:amqp_property_record() | [{atom(), any()}])).
+-type(publish_result() ::
+        ({ok, rabbit_router:routing_result(), [pid()]}
+         | rabbit_types:error('not_found'))).
+
+-spec(publish/1 ::
+        (rabbit_types:delivery()) -> publish_result()).
+-spec(delivery/4 ::
+        (boolean(), boolean(), rabbit_types:maybe(rabbit_types:txn()),
+         rabbit_types:message()) -> rabbit_types:delivery()).
+-spec(message/4 ::
+        (rabbit_exchange:name(), rabbit_router:routing_key(),
+         properties_input(), binary())
+        -> (rabbit_types:message() | rabbit_types:error(any()))).
+-spec(properties/1 ::
+        (properties_input()) -> rabbit_framing:amqp_property_record()).
+-spec(publish/4 ::
+        (rabbit_exchange:name(), rabbit_router:routing_key(),
+         properties_input(), binary())
+        -> publish_result()).
+-spec(publish/7 ::
+        (rabbit_exchange:name(), rabbit_router:routing_key(),
+         boolean(), boolean(), rabbit_types:maybe(rabbit_types:txn()),
+         properties_input(), binary())
+        -> publish_result()).
+-spec(build_content/2 ::
+        (rabbit_framing:amqp_property_record(), binary())
+        -> rabbit_types:content()).
+-spec(from_content/1 ::
+        (rabbit_types:content())
+        -> {rabbit_framing:amqp_property_record(), binary()}).
 -spec(is_message_persistent/1 ::
-        (decoded_content()) -> (boolean() | {'invalid', non_neg_integer()})).
+        (rabbit_types:decoded_content())
+        -> (boolean() | {'invalid', non_neg_integer()})).
 
 -endif.
 
@@ -80,10 +97,13 @@ delivery(Mandatory, Immediate, Txn, Message) ->
               sender = self(), message = Message}.
 
 build_content(Properties, BodyBin) ->
-    {ClassId, _MethodId} = rabbit_framing:method_id('basic.publish'),
+    %% basic.publish hasn't changed so we can just hard-code amqp_0_9_1
+    {ClassId, _MethodId} =
+        rabbit_framing_amqp_0_9_1:method_id('basic.publish'),
     #content{class_id = ClassId,
              properties = Properties,
              properties_bin = none,
+             protocol = none,
              payload_fragments_rev = [BodyBin]}.
 
 from_content(Content) ->
@@ -91,7 +111,9 @@ from_content(Content) ->
              properties = Props,
              payload_fragments_rev = FragmentsRev} =
         rabbit_binary_parser:ensure_content_decoded(Content),
-    {ClassId, _MethodId} = rabbit_framing:method_id('basic.publish'),
+    %% basic.publish hasn't changed so we can just hard-code amqp_0_9_1
+    {ClassId, _MethodId} =
+        rabbit_framing_amqp_0_9_1:method_id('basic.publish'),
     {Props, list_to_binary(lists:reverse(FragmentsRev))}.
 
 message(ExchangeName, RoutingKeyBin, RawProperties, BodyBin) ->
diff --git a/src/rabbit_binary_generator.erl b/src/rabbit_binary_generator.erl
index 81cf3cee..056ab1b5 100644
--- a/src/rabbit_binary_generator.erl
+++ b/src/rabbit_binary_generator.erl
@@ -41,12 +41,12 @@
 % See definition of check_empty_content_body_frame_size/0, an assertion called at startup.
 -define(EMPTY_CONTENT_BODY_FRAME_SIZE, 8).
 
--export([build_simple_method_frame/2,
-         build_simple_content_frames/3,
+-export([build_simple_method_frame/3,
+         build_simple_content_frames/4,
          build_heartbeat_frame/0]).
 -export([generate_table/1, encode_properties/2]).
 -export([check_empty_content_body_frame_size/0]).
--export([ensure_content_encoded/1, clear_encoded_content/1]).
+-export([ensure_content_encoded/2, clear_encoded_content/1]).
 
 -import(lists).
 
@@ -56,45 +56,47 @@
 
 -type(frame() :: [binary()]).
 
--spec(build_simple_method_frame/2 ::
-      (channel_number(), amqp_method_record()) -> frame()).
--spec(build_simple_content_frames/3 ::
-      (channel_number(), content(), non_neg_integer()) -> [frame()]).
+-spec(build_simple_method_frame/3 ::
+        (rabbit_channel:channel_number(), rabbit_framing:amqp_method_record(),
+         rabbit_types:protocol())
+        -> frame()).
+-spec(build_simple_content_frames/4 ::
+        (rabbit_channel:channel_number(), rabbit_types:content(),
+         non_neg_integer(), rabbit_types:protocol())
+        -> [frame()]).
 -spec(build_heartbeat_frame/0 :: () -> frame()).
--spec(generate_table/1 :: (amqp_table()) -> binary()).
--spec(encode_properties/2 :: ([amqp_property_type()], [any()]) -> binary()).
+-spec(generate_table/1 :: (rabbit_framing:amqp_table()) -> binary()).
+-spec(encode_properties/2 ::
+        ([rabbit_framing:amqp_property_type()], [any()]) -> binary()).
 -spec(check_empty_content_body_frame_size/0 :: () -> 'ok').
--spec(ensure_content_encoded/1 :: (content()) -> encoded_content()).
--spec(clear_encoded_content/1 :: (content()) -> unencoded_content()).
+-spec(ensure_content_encoded/2 ::
+        (rabbit_types:content(), rabbit_types:protocol()) ->
+                                       rabbit_types:encoded_content()).
+-spec(clear_encoded_content/1 ::
+        (rabbit_types:content()) -> rabbit_types:unencoded_content()).
 
 -endif.
 
 %%----------------------------------------------------------------------------
 
-build_simple_method_frame(ChannelInt, MethodRecord) ->
-    MethodFields = rabbit_framing:encode_method_fields(MethodRecord),
+build_simple_method_frame(ChannelInt, MethodRecord, Protocol) ->
+    MethodFields = Protocol:encode_method_fields(MethodRecord),
     MethodName = rabbit_misc:method_record_type(MethodRecord),
-    {ClassId, MethodId} = rabbit_framing:method_id(MethodName),
+    {ClassId, MethodId} = Protocol:method_id(MethodName),
     create_frame(1, ChannelInt, [<<ClassId:16, MethodId:16>>, MethodFields]).
 
-build_simple_content_frames(ChannelInt,
-                            #content{class_id = ClassId,
-                                     properties = ContentProperties,
-                                     properties_bin = ContentPropertiesBin,
-                                     payload_fragments_rev = PayloadFragmentsRev},
-                            FrameMax) ->
-    {BodySize, ContentFrames} = build_content_frames(PayloadFragmentsRev, FrameMax, ChannelInt),
+build_simple_content_frames(ChannelInt, Content, FrameMax, Protocol) ->
+    #content{class_id = ClassId,
+             properties_bin = ContentPropertiesBin,
+             payload_fragments_rev = PayloadFragmentsRev} =
+        ensure_content_encoded(Content, Protocol),
+    {BodySize, ContentFrames} =
+        build_content_frames(PayloadFragmentsRev, FrameMax, ChannelInt),
     HeaderFrame = create_frame(2, ChannelInt,
                                [<<ClassId:16, 0:16, BodySize:64>>,
-                                maybe_encode_properties(ContentProperties, ContentPropertiesBin)]),
+                                ContentPropertiesBin]),
     [HeaderFrame | ContentFrames].
 
-maybe_encode_properties(_ContentProperties, ContentPropertiesBin)
-  when is_binary(ContentPropertiesBin) ->
-    ContentPropertiesBin;
-maybe_encode_properties(ContentProperties, none) ->
-    rabbit_framing:encode_properties(ContentProperties).
-
 build_content_frames(FragsRev, FrameMax, ChannelInt) ->
     BodyPayloadMax = if FrameMax == 0 ->
                              iolist_size(FragsRev);
@@ -277,13 +279,25 @@ check_empty_content_body_frame_size() ->
                   ComputedSize, ?EMPTY_CONTENT_BODY_FRAME_SIZE})
     end.
 
-ensure_content_encoded(Content = #content{properties_bin = PropsBin})
-  when PropsBin =/= 'none' ->
+ensure_content_encoded(Content = #content{properties_bin = PropBin,
+                                          protocol = Protocol}, Protocol)
+  when PropBin =/= none ->
     Content;
-ensure_content_encoded(Content = #content{properties = Props}) ->
-    Content #content{properties_bin = rabbit_framing:encode_properties(Props)}.
-
-clear_encoded_content(Content = #content{properties_bin = none}) ->
+ensure_content_encoded(Content = #content{properties = none,
+                                          properties_bin = PropBin,
+                                          protocol = Protocol}, Protocol1)
+  when PropBin =/= none ->
+    Props = Protocol:decode_properties(Content#content.class_id, PropBin),
+    Content#content{properties = Props,
+                    properties_bin = Protocol1:encode_properties(Props),
+                    protocol = Protocol1};
+ensure_content_encoded(Content = #content{properties = Props}, Protocol)
+  when Props =/= none ->
+    Content#content{properties_bin = Protocol:encode_properties(Props),
+                    protocol = Protocol}.
+
+clear_encoded_content(Content = #content{properties_bin = none,
+                                         protocol = none}) ->
     Content;
 clear_encoded_content(Content = #content{properties = none}) ->
     %% Only clear when we can rebuild the properties_bin later in
@@ -291,4 +305,4 @@ clear_encoded_content(Content = #content{properties = none}) ->
     %% one of properties and properties_bin can be 'none'
     Content;
 clear_encoded_content(Content = #content{}) ->
-    Content#content{properties_bin = none}.
+    Content#content{properties_bin = none, protocol = none}.
diff --git a/src/rabbit_binary_parser.erl b/src/rabbit_binary_parser.erl
index e022a1fa..ebf063f0 100644
--- a/src/rabbit_binary_parser.erl
+++ b/src/rabbit_binary_parser.erl
@@ -42,10 +42,13 @@
 
 -ifdef(use_specs).
 
--spec(parse_table/1 :: (binary()) -> amqp_table()).
--spec(parse_properties/2 :: ([amqp_property_type()], binary()) -> [any()]).
--spec(ensure_content_decoded/1 :: (content()) -> decoded_content()).
--spec(clear_decoded_content/1 :: (content()) -> undecoded_content()).
+-spec(parse_table/1 :: (binary()) -> rabbit_framing:amqp_table()).
+-spec(parse_properties/2 ::
+        ([rabbit_framing:amqp_property_type()], binary()) -> [any()]).
+-spec(ensure_content_decoded/1 ::
+        (rabbit_types:content()) -> rabbit_types:decoded_content()).
+-spec(clear_decoded_content/1 ::
+        (rabbit_types:content()) -> rabbit_types:undecoded_content()).
 
 -endif.
 
@@ -160,11 +163,12 @@ parse_property(table, <<Len:32/unsigned, Table:Len/binary, Rest/binary>>) ->
     {parse_table(Table), Rest}.
 
 ensure_content_decoded(Content = #content{properties = Props})
-  when Props =/= 'none' ->
+  when Props =/= none ->
     Content;
-ensure_content_decoded(Content = #content{properties_bin = PropBin})
-  when is_binary(PropBin) ->
-    Content#content{properties = rabbit_framing:decode_properties(
+ensure_content_decoded(Content = #content{properties_bin = PropBin,
+                                          protocol = Protocol})
+  when PropBin =/= none ->
+    Content#content{properties = Protocol:decode_properties(
                                    Content#content.class_id, PropBin)}.
 
 clear_decoded_content(Content = #content{properties = none}) ->
diff --git a/src/rabbit_binding.erl b/src/rabbit_binding.erl
new file mode 100644
index 00000000..19150fa9
--- /dev/null
+++ b/src/rabbit_binding.erl
@@ -0,0 +1,377 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_binding).
+-include("rabbit.hrl").
+
+-export([recover/0, exists/1, add/1, remove/1, add/2, remove/2, list/1]).
+-export([list_for_exchange/1, list_for_queue/1, list_for_exchange_and_queue/2]).
+-export([info_keys/0, info/1, info/2, info_all/1, info_all/2]).
+%% these must all be run inside a mnesia tx
+-export([has_for_exchange/1, remove_for_exchange/1,
+         remove_for_queue/1, remove_transient_for_queue/1]).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-export_type([key/0]).
+
+-type(key() :: binary()).
+
+-type(bind_errors() :: rabbit_types:error('queue_not_found' |
+                                          'exchange_not_found' |
+                                          'exchange_and_queue_not_found')).
+-type(bind_res() :: 'ok' | bind_errors()).
+-type(inner_fun() ::
+        fun((rabbit_types:exchange(), queue()) ->
+                   rabbit_types:ok_or_error(rabbit_types:amqp_error()))).
+-type(bindings() :: [rabbit_types:binding()]).
+
+-spec(recover/0 :: () -> [rabbit_types:binding()]).
+-spec(exists/1 :: (rabbit_types:binding()) -> boolean() | bind_errors()).
+-spec(add/1 :: (rabbit_types:binding()) -> bind_res()).
+-spec(remove/1 :: (rabbit_types:binding()) ->
+                       bind_res() | rabbit_types:error('binding_not_found')).
+-spec(add/2 :: (rabbit_types:binding(), inner_fun()) -> bind_res()).
+-spec(remove/2 :: (rabbit_types:binding(), inner_fun()) ->
+                       bind_res() | rabbit_types:error('binding_not_found')).
+-spec(list/1 :: (rabbit_types:vhost()) -> bindings()).
+-spec(list_for_exchange/1 :: (rabbit_exchange:name()) -> bindings()).
+-spec(list_for_queue/1 :: (rabbit_amqqueue:name()) -> bindings()).
+-spec(list_for_exchange_and_queue/2 ::
+        (rabbit_exchange:name(), rabbit_amqqueue:name()) -> bindings()).
+-spec(info_keys/0 :: () -> [rabbit_types:info_key()]).
+-spec(info/1 :: (rabbit_types:binding()) -> [rabbit_types:info()]).
+-spec(info/2 :: (rabbit_types:binding(), [rabbit_types:info_key()]) ->
+                     [rabbit_types:info()]).
+-spec(info_all/1 :: (rabbit_types:vhost()) -> [[rabbit_types:info()]]).
+-spec(info_all/2 ::(rabbit_types:vhost(), [rabbit_types:info_key()])
+                    -> [[rabbit_types:info()]]).
+-spec(has_for_exchange/1 :: (rabbit_exchange:name()) -> boolean()).
+-spec(remove_for_exchange/1 :: (rabbit_exchange:name()) -> bindings()).
+-spec(remove_for_queue/1 ::
+        (rabbit_amqqueue:name()) -> fun (() -> any())).
+-spec(remove_transient_for_queue/1 ::
+        (rabbit_amqqueue:name()) -> fun (() -> any())).
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+-define(INFO_KEYS, [exchange_name, queue_name, routing_key, arguments]).
+
+recover() ->
+    rabbit_misc:table_fold(
+      fun (Route = #route{binding = B}, Acc) ->
+              {_, ReverseRoute} = route_with_reverse(Route),
+              ok = mnesia:write(rabbit_route, Route, write),
+              ok = mnesia:write(rabbit_reverse_route, ReverseRoute, write),
+              [B | Acc]
+      end, [], rabbit_durable_route).
+
+exists(Binding) ->
+    binding_action(
+      Binding,
+      fun (_X, _Q, B) -> mnesia:read({rabbit_route, B}) /= [] end).
+
+add(Binding) -> add(Binding, fun (_X, _Q) -> ok end).
+
+remove(Binding) -> remove(Binding, fun (_X, _Q) -> ok end).
+
+add(Binding, InnerFun) ->
+    case binding_action(
+           Binding,
+           fun (X, Q, B) ->
+                   %% this argument is used to check queue exclusivity;
+                   %% in general, we want to fail on that in preference to
+                   %% anything else
+                   case InnerFun(X, Q) of
+                       ok ->
+                           case mnesia:read({rabbit_route, B}) of
+                               []  -> Durable = (X#exchange.durable andalso
+                                                 Q#amqqueue.durable),
+                                      ok = sync_binding(
+                                             B, Durable,
+                                             fun mnesia:write/3),
+                                      {new, X, B};
+                               [_] -> {existing, X, B}
+                           end;
+                       {error, _} = E ->
+                           E
+                   end
+           end) of
+        {new, X = #exchange{ type = Type }, B} ->
+            ok = (type_to_module(Type)):add_binding(X, B),
+            rabbit_event:notify(binding_created, info(B));
+        {existing, _, _} ->
+            ok;
+        {error, _} = Err ->
+            Err
+    end.
+
+remove(Binding, InnerFun) ->
+    case binding_action(
+           Binding,
+           fun (X, Q, B) ->
+                   case mnesia:match_object(rabbit_route, #route{binding = B},
+                                            write) of
+                       []  -> {error, binding_not_found};
+                       [_] -> case InnerFun(X, Q) of
+                                  ok ->
+                                      Durable = (X#exchange.durable andalso
+                                                 Q#amqqueue.durable),
+                                      ok = sync_binding(
+                                             B, Durable,
+                                             fun mnesia:delete_object/3),
+                                      Deleted =
+                                          rabbit_exchange:maybe_auto_delete(X),
+                                      {{Deleted, X}, B};
+                                  {error, _} = E ->
+                                      E
+                              end
+                   end
+           end) of
+        {error, _} = Err ->
+            Err;
+        {{IsDeleted, X = #exchange{ type = Type }}, B} ->
+            Module = type_to_module(Type),
+            case IsDeleted of
+                auto_deleted -> ok = Module:delete(X, [B]);
+                not_deleted  -> ok = Module:remove_bindings(X, [B])
+            end,
+            rabbit_event:notify(binding_deleted, info(B)),
+            ok
+    end.
+
+list(VHostPath) ->
+    Route = #route{binding = #binding{
+                     exchange_name = rabbit_misc:r(VHostPath, exchange),
+                     queue_name    = rabbit_misc:r(VHostPath, queue),
+                     _             = '_'},
+                   _       = '_'},
+    [B || #route{binding = B} <- mnesia:dirty_match_object(rabbit_route,
+                                                           Route)].
+
+list_for_exchange(XName) ->
+    Route = #route{binding = #binding{exchange_name = XName, _ = '_'}},
+    [B || #route{binding = B} <- mnesia:dirty_match_object(rabbit_route,
+                                                           Route)].
+
+list_for_queue(QueueName) ->
+    Route = #route{binding = #binding{queue_name = QueueName, _ = '_'}},
+    [reverse_binding(B) || #reverse_route{reverse_binding = B} <-
+                               mnesia:dirty_match_object(rabbit_reverse_route,
+                                                         reverse_route(Route))].
+
+list_for_exchange_and_queue(XName, QueueName) ->
+    Route = #route{binding = #binding{exchange_name = XName,
+                                      queue_name    = QueueName,
+                                      _             = '_'}},
+    [B || #route{binding = B} <- mnesia:dirty_match_object(rabbit_route,
+                                                           Route)].
+
+info_keys() -> ?INFO_KEYS.
+
+map(VHostPath, F) ->
+    %% TODO: there is scope for optimisation here, e.g. using a
+    %% cursor, parallelising the function invocation
+    lists:map(F, list(VHostPath)).
+
+infos(Items, B) -> [{Item, i(Item, B)} || Item <- Items].
+
+i(exchange_name, #binding{exchange_name = XName})      -> XName;
+i(queue_name,    #binding{queue_name    = QName})      -> QName;
+i(routing_key,   #binding{key           = RoutingKey}) -> RoutingKey;
+i(arguments,     #binding{args          = Arguments})  -> Arguments;
+i(Item, _) -> throw({bad_argument, Item}).
+
+info(B = #binding{}) -> infos(?INFO_KEYS, B).
+
+info(B = #binding{}, Items) -> infos(Items, B).
+
+info_all(VHostPath) -> map(VHostPath, fun (B) -> info(B) end).
+
+info_all(VHostPath, Items) -> map(VHostPath, fun (B) -> info(B, Items) end).
+
+has_for_exchange(XName) ->
+    Match = #route{binding = #binding{exchange_name = XName, _ = '_'}},
+    %% we need to check for durable routes here too in case a bunch of
+    %% routes to durable queues have been removed temporarily as a
+    %% result of a node failure
+    contains(rabbit_route, Match) orelse contains(rabbit_durable_route, Match).
+
+remove_for_exchange(XName) ->
+    [begin
+         ok = mnesia:delete_object(rabbit_reverse_route,
+                                   reverse_route(Route), write),
+         ok = delete_forward_routes(Route),
+         Route#route.binding
+     end || Route <- mnesia:match_object(
+                       rabbit_route,
+                       #route{binding = #binding{exchange_name = XName,
+                                                 _ = '_'}},
+                       write)].
+
+remove_for_queue(QueueName) ->
+    remove_for_queue(QueueName, fun delete_forward_routes/1).
+
+remove_transient_for_queue(QueueName) ->
+    remove_for_queue(QueueName, fun delete_transient_forward_routes/1).
+
+%%----------------------------------------------------------------------------
+
+binding_action(Binding = #binding{exchange_name = XName,
+                                  queue_name    = QueueName,
+                                  args          = Arguments}, Fun) ->
+    call_with_exchange_and_queue(
+      XName, QueueName,
+      fun (X, Q) ->
+              SortedArgs = rabbit_misc:sort_field_table(Arguments),
+              Fun(X, Q, Binding#binding{args = SortedArgs})
+      end).
+
+sync_binding(Binding, Durable, Fun) ->
+    ok = case Durable of
+             true  -> Fun(rabbit_durable_route,
+                          #route{binding = Binding}, write);
+             false -> ok
+         end,
+    {Route, ReverseRoute} = route_with_reverse(Binding),
+    ok = Fun(rabbit_route, Route, write),
+    ok = Fun(rabbit_reverse_route, ReverseRoute, write),
+    ok.
+
+call_with_exchange_and_queue(XName, QueueName, Fun) ->
+    rabbit_misc:execute_mnesia_transaction(
+      fun () -> case {mnesia:read({rabbit_exchange, XName}),
+                      mnesia:read({rabbit_queue, QueueName})} of
+                   {[X], [Q]} -> Fun(X, Q);
+                   {[ ], [_]} -> {error, exchange_not_found};
+                   {[_], [ ]} -> {error, queue_not_found};
+                   {[ ], [ ]} -> {error, exchange_and_queue_not_found}
+               end
+      end).
+
+%% Used with atoms from records; e.g., the type is expected to exist.
+type_to_module(T) ->
+    {ok, Module} = rabbit_exchange_type_registry:lookup_module(T),
+    Module.
+
+contains(Table, MatchHead) ->
+    continue(mnesia:select(Table, [{MatchHead, [], ['$_']}], 1, read)).
+
+continue('$end_of_table')    -> false;
+continue({[_|_], _})         -> true;
+continue({[], Continuation}) -> continue(mnesia:select(Continuation)).
+
+remove_for_queue(QueueName, FwdDeleteFun) ->
+    DeletedBindings =
+        [begin
+             Route = reverse_route(ReverseRoute),
+             ok = FwdDeleteFun(Route),
+             ok = mnesia:delete_object(rabbit_reverse_route,
+                                       ReverseRoute, write),
+             Route#route.binding
+         end || ReverseRoute
+                    <- mnesia:match_object(
+                         rabbit_reverse_route,
+                         reverse_route(#route{binding = #binding{
+                                                queue_name = QueueName,
+                                                _          = '_'}}),
+                         write)],
+    Grouped = group_bindings_and_auto_delete(
+                lists:keysort(#binding.exchange_name, DeletedBindings), []),
+    fun () ->
+            lists:foreach(
+              fun ({{IsDeleted, X = #exchange{ type = Type }}, Bs}) ->
+                      Module = type_to_module(Type),
+                      case IsDeleted of
+                          auto_deleted -> Module:delete(X, Bs);
+                          not_deleted  -> Module:remove_bindings(X, Bs)
+                      end
+              end, Grouped)
+    end.
+
+%% Requires that its input binding list is sorted in exchange-name
+%% order, so that the grouping of bindings (for passing to
+%% group_bindings_and_auto_delete1) works properly.
+group_bindings_and_auto_delete([], Acc) ->
+    Acc;
+group_bindings_and_auto_delete(
+  [B = #binding{exchange_name = XName} | Bs], Acc) ->
+    group_bindings_and_auto_delete(XName, Bs, [B], Acc).
+
+group_bindings_and_auto_delete(
+  XName, [B = #binding{exchange_name = XName} | Bs], Bindings, Acc) ->
+    group_bindings_and_auto_delete(XName, Bs, [B | Bindings], Acc);
+group_bindings_and_auto_delete(XName, Removed, Bindings, Acc) ->
+    %% either Removed is [], or its head has a non-matching XName
+    [X] = mnesia:read({rabbit_exchange, XName}),
+    NewAcc = [{{rabbit_exchange:maybe_auto_delete(X), X}, Bindings} | Acc],
+    group_bindings_and_auto_delete(Removed, NewAcc).
+
+delete_forward_routes(Route) ->
+    ok = mnesia:delete_object(rabbit_route, Route, write),
+    ok = mnesia:delete_object(rabbit_durable_route, Route, write).
+
+delete_transient_forward_routes(Route) ->
+    ok = mnesia:delete_object(rabbit_route, Route, write).
+
+route_with_reverse(#route{binding = Binding}) ->
+    route_with_reverse(Binding);
+route_with_reverse(Binding = #binding{}) ->
+    Route = #route{binding = Binding},
+    {Route, reverse_route(Route)}.
+
+reverse_route(#route{binding = Binding}) ->
+    #reverse_route{reverse_binding = reverse_binding(Binding)};
+
+reverse_route(#reverse_route{reverse_binding = Binding}) ->
+    #route{binding = reverse_binding(Binding)}.
+
+reverse_binding(#reverse_binding{exchange_name = XName,
+                                 queue_name    = QueueName,
+                                 key           = Key,
+                                 args          = Args}) ->
+    #binding{exchange_name = XName,
+             queue_name    = QueueName,
+             key           = Key,
+             args          = Args};
+
+reverse_binding(#binding{exchange_name = XName,
+                         queue_name    = QueueName,
+                         key           = Key,
+                         args          = Args}) ->
+    #reverse_binding{exchange_name = XName,
+                     queue_name    = QueueName,
+                     key           = Key,
+                     args          = Args}.
diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl
index 3b2af5cb..f19f98d2 100644
--- a/src/rabbit_channel.erl
+++ b/src/rabbit_channel.erl
@@ -35,70 +35,78 @@
 
 -behaviour(gen_server2).
 
--export([start_link/6, do/2, do/3, shutdown/1]).
--export([send_command/2, deliver/4, conserve_memory/2, flushed/2]).
+-export([start_link/7, do/2, do/3, shutdown/1]).
+-export([send_command/2, deliver/4, flushed/2]).
 -export([list/0, info_keys/0, info/1, info/2, info_all/0, info_all/1]).
+-export([emit_stats/1, flush/1]).
 
--export([flow_timeout/2]).
-
--export([init/1, terminate/2, code_change/3,
-         handle_call/3, handle_cast/2, handle_info/2, handle_pre_hibernate/1]).
+-export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2,
+         handle_info/2, handle_pre_hibernate/1, prioritise_call/3,
+         prioritise_cast/2]).
 
 -record(ch, {state, channel, reader_pid, writer_pid, limiter_pid,
-             transaction_id, tx_participants, next_tag,
+             start_limiter_fun, transaction_id, tx_participants, next_tag,
              uncommitted_ack_q, unacked_message_q,
              username, virtual_host, most_recently_declared_queue,
-             consumer_mapping, blocking, queue_collector_pid, flow}).
-
--record(flow, {server, client, pending}).
+             consumer_mapping, blocking, queue_collector_pid, stats_timer}).
 
 -define(MAX_PERMISSION_CACHE_SIZE, 12).
--define(FLOW_OK_TIMEOUT, 10000). %% 10 seconds
 
--define(INFO_KEYS,
+-define(STATISTICS_KEYS,
         [pid,
-         connection,
-         number,
-         user,
-         vhost,
          transactional,
          consumer_count,
          messages_unacknowledged,
          acks_uncommitted,
          prefetch_count]).
 
+-define(CREATION_EVENT_KEYS,
+        [pid,
+         connection,
+         number,
+         user,
+         vhost]).
+
+-define(INFO_KEYS, ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]).
+
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--type(ref() :: any()).
+-export_type([channel_number/0]).
+
+-type(channel_number() :: non_neg_integer()).
 
--spec(start_link/6 ::
-      (channel_number(), pid(), pid(), username(), vhost(), pid()) -> pid()).
--spec(do/2 :: (pid(), amqp_method_record()) -> 'ok').
--spec(do/3 :: (pid(), amqp_method_record(), maybe(content())) -> 'ok').
+-spec(start_link/7 ::
+      (channel_number(), pid(), pid(), rabbit_access_control:username(),
+       rabbit_types:vhost(), pid(),
+       fun ((non_neg_integer()) -> rabbit_types:ok(pid()))) ->
+                           rabbit_types:ok_pid_or_error()).
+-spec(do/2 :: (pid(), rabbit_framing:amqp_method_record()) -> 'ok').
+-spec(do/3 :: (pid(), rabbit_framing:amqp_method_record(),
+               rabbit_types:maybe(rabbit_types:content())) -> 'ok').
 -spec(shutdown/1 :: (pid()) -> 'ok').
--spec(send_command/2 :: (pid(), amqp_method()) -> 'ok').
--spec(deliver/4 :: (pid(), ctag(), boolean(), qmsg()) -> 'ok').
--spec(conserve_memory/2 :: (pid(), boolean()) -> 'ok').
+-spec(send_command/2 :: (pid(), rabbit_framing:amqp_method()) -> 'ok').
+-spec(deliver/4 ::
+        (pid(), rabbit_types:ctag(), boolean(), rabbit_amqqueue:qmsg())
+        -> 'ok').
 -spec(flushed/2 :: (pid(), pid()) -> 'ok').
--spec(flow_timeout/2 :: (pid(), ref()) -> 'ok').
 -spec(list/0 :: () -> [pid()]).
--spec(info_keys/0 :: () -> [info_key()]).
--spec(info/1 :: (pid()) -> [info()]).
--spec(info/2 :: (pid(), [info_key()]) -> [info()]).
--spec(info_all/0 :: () -> [[info()]]).
--spec(info_all/1 :: ([info_key()]) -> [[info()]]).
+-spec(info_keys/0 :: () -> [rabbit_types:info_key()]).
+-spec(info/1 :: (pid()) -> [rabbit_types:info()]).
+-spec(info/2 :: (pid(), [rabbit_types:info_key()]) -> [rabbit_types:info()]).
+-spec(info_all/0 :: () -> [[rabbit_types:info()]]).
+-spec(info_all/1 :: ([rabbit_types:info_key()]) -> [[rabbit_types:info()]]).
+-spec(emit_stats/1 :: (pid()) -> 'ok').
 
 -endif.
 
 %%----------------------------------------------------------------------------
 
-start_link(Channel, ReaderPid, WriterPid, Username, VHost, CollectorPid) ->
-    {ok, Pid} = gen_server2:start_link(
-                  ?MODULE, [Channel, ReaderPid, WriterPid,
-                            Username, VHost, CollectorPid], []),
-    Pid.
+start_link(Channel, ReaderPid, WriterPid, Username, VHost, CollectorPid,
+           StartLimiterFun) ->
+    gen_server2:start_link(?MODULE, [Channel, ReaderPid, WriterPid, Username,
+                                     VHost, CollectorPid, StartLimiterFun], []).
 
 do(Pid, Method) ->
     do(Pid, Method, none).
@@ -115,25 +123,19 @@ send_command(Pid, Msg) ->
 deliver(Pid, ConsumerTag, AckRequired, Msg) ->
     gen_server2:cast(Pid, {deliver, ConsumerTag, AckRequired, Msg}).
 
-conserve_memory(Pid, Conserve) ->
-    gen_server2:pcast(Pid, 8, {conserve_memory, Conserve}).
-
 flushed(Pid, QPid) ->
     gen_server2:cast(Pid, {flushed, QPid}).
 
-flow_timeout(Pid, Ref) ->
-    gen_server2:pcast(Pid, 7, {flow_timeout, Ref}).
-
 list() ->
     pg_local:get_members(rabbit_channels).
 
 info_keys() -> ?INFO_KEYS.
 
 info(Pid) ->
-    gen_server2:pcall(Pid, 9, info, infinity).
+    gen_server2:call(Pid, info, infinity).
 
 info(Pid, Items) ->
-    case gen_server2:pcall(Pid, 9, {info, Items}, infinity) of
+    case gen_server2:call(Pid, {info, Items}, infinity) of
         {ok, Res}      -> Res;
         {error, Error} -> throw(Error)
     end.
@@ -144,33 +146,53 @@ info_all() ->
 info_all(Items) ->
     rabbit_misc:filter_exit_map(fun (C) -> info(C, Items) end, list()).
 
+emit_stats(Pid) ->
+    gen_server2:cast(Pid, emit_stats).
+
+flush(Pid) ->
+    gen_server2:call(Pid, flush).
+
 %%---------------------------------------------------------------------------
 
-init([Channel, ReaderPid, WriterPid, Username, VHost, CollectorPid]) ->
+init([Channel, ReaderPid, WriterPid, Username, VHost, CollectorPid,
+      StartLimiterFun]) ->
     process_flag(trap_exit, true),
-    link(WriterPid),
     ok = pg_local:join(rabbit_channels, self()),
-    {ok, #ch{state                   = starting,
-             channel                 = Channel,
-             reader_pid              = ReaderPid,
-             writer_pid              = WriterPid,
-             limiter_pid             = undefined,
-             transaction_id          = none,
-             tx_participants         = sets:new(),
-             next_tag                = 1,
-             uncommitted_ack_q       = queue:new(),
-             unacked_message_q       = queue:new(),
-             username                = Username,
-             virtual_host            = VHost,
-             most_recently_declared_queue = <<>>,
-             consumer_mapping        = dict:new(),
-             blocking                = dict:new(),
-             queue_collector_pid     = CollectorPid,
-             flow                    = #flow{server = true, client = true,
-                                             pending = none}},
-     hibernate,
+    State = #ch{state                   = starting,
+                channel                 = Channel,
+                reader_pid              = ReaderPid,
+                writer_pid              = WriterPid,
+                limiter_pid             = undefined,
+                start_limiter_fun       = StartLimiterFun,
+                transaction_id          = none,
+                tx_participants         = sets:new(),
+                next_tag                = 1,
+                uncommitted_ack_q       = queue:new(),
+                unacked_message_q       = queue:new(),
+                username                = Username,
+                virtual_host            = VHost,
+                most_recently_declared_queue = <<>>,
+                consumer_mapping        = dict:new(),
+                blocking                = dict:new(),
+                queue_collector_pid     = CollectorPid,
+                stats_timer             = rabbit_event:init_stats_timer()},
+    rabbit_event:notify(channel_created, infos(?CREATION_EVENT_KEYS, State)),
+    {ok, State, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
+prioritise_call(Msg, _From, _State) ->
+    case Msg of
+        info           -> 9;
+        {info, _Items} -> 9;
+        _              -> 0
+    end.
+
+prioritise_cast(Msg, _State) ->
+    case Msg of
+        emit_stats -> 7;
+        _          -> 0
+    end.
+
 handle_call(info, _From, State) ->
     reply(infos(?INFO_KEYS, State), State);
 
@@ -180,6 +202,9 @@ handle_call({info, Items}, _From, State) ->
     catch Error -> reply({error, Error}, State)
     end;
 
+handle_call(flush, _From, State) ->
+    reply(ok, State);
+
 handle_call(_Request, _From, State) ->
     noreply(State).
 
@@ -218,40 +243,25 @@ handle_cast({deliver, ConsumerTag, AckRequired, Msg},
                         next_tag = DeliveryTag}) ->
     State1 = lock_message(AckRequired, {DeliveryTag, ConsumerTag, Msg}, State),
     ok = internal_deliver(WriterPid, true, ConsumerTag, DeliveryTag, Msg),
+    {_QName, QPid, _MsgId, _Redelivered, _Msg} = Msg,
+    maybe_incr_stats([{QPid, 1}],
+                     case AckRequired of
+                         true  -> deliver;
+                         false -> deliver_no_ack
+                     end, State),
     noreply(State1#ch{next_tag = DeliveryTag + 1});
 
-handle_cast({conserve_memory, true}, State = #ch{state = starting}) ->
-    noreply(State);
-handle_cast({conserve_memory, false}, State = #ch{state = starting}) ->
-    ok = rabbit_writer:send_command(State#ch.writer_pid, #'channel.open_ok'{}),
-    noreply(State#ch{state = running});
-handle_cast({conserve_memory, Conserve}, State = #ch{state = running}) ->
-    flow_control(not Conserve, State);
-handle_cast({conserve_memory, _Conserve}, State) ->
-    noreply(State);
-
-handle_cast({flow_timeout, Ref},
-            State = #ch{flow = #flow{client = Flow, pending = {Ref, _TRef}}}) ->
-    {stop, normal, terminating(
-                     rabbit_misc:amqp_error(
-                       precondition_failed,
-                       "timeout waiting for channel.flow_ok{active=~w}",
-                       [not Flow], none), State)};
-handle_cast({flow_timeout, _Ref}, State) ->
+handle_cast(emit_stats, State) ->
+    internal_emit_stats(State),
     {noreply, State}.
 
-handle_info({'EXIT', WriterPid, Reason = {writer, send_failed, _Error}},
-            State = #ch{writer_pid = WriterPid}) ->
-    State#ch.reader_pid ! {channel_exit, State#ch.channel, Reason},
-    {stop, normal, State};
-handle_info({'EXIT', _Pid, Reason}, State) ->
-    {stop, Reason, State};
 handle_info({'DOWN', _MRef, process, QPid, _Reason}, State) ->
+    erase_queue_stats(QPid),
     {noreply, queue_blocked(QPid, State)}.
 
 handle_pre_hibernate(State) ->
     ok = clear_permission_cache(),
-    {hibernate, State}.
+    {hibernate, stop_stats_timer(State)}.
 
 terminate(_Reason, State = #ch{state = terminating}) ->
     terminate(State);
@@ -259,8 +269,10 @@ terminate(_Reason, State = #ch{state = terminating}) ->
 terminate(Reason, State) ->
     Res = rollback_and_notify(State),
     case Reason of
-        normal -> ok = Res;
-        _      -> ok
+        normal            -> ok = Res;
+        shutdown          -> ok = Res;
+        {shutdown, _Term} -> ok = Res;
+        _                 -> ok
     end,
     terminate(State).
 
@@ -269,9 +281,23 @@ code_change(_OldVsn, State, _Extra) ->
 
 %%---------------------------------------------------------------------------
 
-reply(Reply, NewState) -> {reply, Reply, NewState, hibernate}.
+reply(Reply, NewState) ->
+    {reply, Reply, ensure_stats_timer(NewState), hibernate}.
+
+noreply(NewState) ->
+    {noreply, ensure_stats_timer(NewState), hibernate}.
 
-noreply(NewState) -> {noreply, NewState, hibernate}.
+ensure_stats_timer(State = #ch{stats_timer = StatsTimer}) ->
+    ChPid = self(),
+    State#ch{stats_timer = rabbit_event:ensure_stats_timer(
+                             StatsTimer,
+                             fun() -> internal_emit_stats(State) end,
+                             fun() -> emit_stats(ChPid) end)}.
+
+stop_stats_timer(State = #ch{stats_timer = StatsTimer}) ->
+    State#ch{stats_timer = rabbit_event:stop_stats_timer(
+                             StatsTimer,
+                             fun() -> internal_emit_stats(State) end)}.
 
 return_ok(State, true, _Msg)  -> {noreply, State};
 return_ok(State, false, Msg)  -> {reply, Msg, State}.
@@ -378,10 +404,7 @@ queue_blocked(QPid, State = #ch{blocking = Blocking}) ->
     end.
 
 handle_method(#'channel.open'{}, _, State = #ch{state = starting}) ->
-    case rabbit_alarm:register(self(), {?MODULE, conserve_memory, []}) of
-        true  -> {noreply, State};
-        false -> {reply, #'channel.open_ok'{}, State#ch{state = running}}
-    end;
+    {reply, #'channel.open_ok'{}, State#ch{state = running}};
 
 handle_method(#'channel.open'{}, _, _State) ->
     rabbit_misc:protocol_error(
@@ -392,16 +415,12 @@ handle_method(_Method, _, #ch{state = starting}) ->
 
 handle_method(#'channel.close'{}, _, State = #ch{writer_pid = WriterPid}) ->
     ok = rollback_and_notify(State),
-    ok = rabbit_writer:send_command(WriterPid, #'channel.close_ok'{}),
+    ok = rabbit_writer:send_command_sync(WriterPid, #'channel.close_ok'{}),
     stop;
 
 handle_method(#'access.request'{},_, State) ->
     {reply, #'access.request_ok'{ticket = 1}, State};
 
-handle_method(#'basic.publish'{}, _, #ch{flow = #flow{client = false}}) ->
-    rabbit_misc:protocol_error(
-      command_invalid,
-      "basic.publish received after channel.flow_ok{active=false}", []);
 handle_method(#'basic.publish'{exchange    = ExchangeNameBin,
                                routing_key = RoutingKey,
                                mandatory   = Mandatory,
@@ -426,13 +445,13 @@ handle_method(#'basic.publish'{exchange    = ExchangeNameBin,
           Exchange,
           rabbit_basic:delivery(Mandatory, Immediate, TxnKey, Message)),
     case RoutingRes of
-        routed ->
-            ok;
-        unroutable ->
-            ok = basic_return(Message, WriterPid, no_route);
-        not_delivered ->
-            ok = basic_return(Message, WriterPid, no_consumers)
+        routed        -> ok;
+        unroutable    -> ok = basic_return(Message, WriterPid, no_route);
+        not_delivered -> ok = basic_return(Message, WriterPid, no_consumers)
     end,
+    maybe_incr_stats([{ExchangeName, 1} |
+                      [{{QPid, ExchangeName}, 1} ||
+                          QPid <- DeliveredQPids]], publish, State),
     {noreply, case TxnKey of
                   none -> State;
                   _    -> add_tx_participants(DeliveredQPids, State)
@@ -443,7 +462,9 @@ handle_method(#'basic.ack'{delivery_tag = DeliveryTag,
               _, State = #ch{transaction_id = TxnKey,
                              unacked_message_q = UAMQ}) ->
     {Acked, Remaining} = collect_acks(UAMQ, DeliveryTag, Multiple),
-    Participants = ack(TxnKey, Acked),
+    QIncs = ack(TxnKey, Acked),
+    Participants = [QPid || {QPid, _} <- QIncs],
+    maybe_incr_stats(QIncs, ack, State),
     {noreply, case TxnKey of
                   none -> ok = notify_limiter(State#ch.limiter_pid, Acked),
                           State#ch{unacked_message_q = Remaining};
@@ -466,11 +487,16 @@ handle_method(#'basic.get'{queue = QueueNameBin,
            QueueName, ReaderPid,
            fun (Q) -> rabbit_amqqueue:basic_get(Q, self(), NoAck) end) of
         {ok, MessageCount,
-         Msg = {_QName, _QPid, _MsgId, Redelivered,
+         Msg = {_QName, QPid, _MsgId, Redelivered,
                 #basic_message{exchange_name = ExchangeName,
                                routing_key = RoutingKey,
                                content = Content}}} ->
             State1 = lock_message(not(NoAck), {DeliveryTag, none, Msg}, State),
+            maybe_incr_stats([{QPid, 1}],
+                             case NoAck of
+                                 true  -> get_no_ack;
+                                 false -> get
+                             end, State),
             ok = rabbit_writer:send_command(
                    WriterPid,
                    #'basic.get_ok'{delivery_tag = DeliveryTag,
@@ -481,7 +507,7 @@ handle_method(#'basic.get'{queue = QueueNameBin,
                    Content),
             {noreply, State1#ch{next_tag = DeliveryTag + 1}};
         empty ->
-            {reply, #'basic.get_empty'{cluster_id = <<>>}, State}
+            {reply, #'basic.get_empty'{}, State}
     end;
 
 handle_method(#'basic.consume'{queue = QueueNameBin,
@@ -634,6 +660,17 @@ handle_method(#'basic.recover'{requeue = Requeue}, Content, State) ->
     ok = rabbit_writer:send_command(WriterPid, #'basic.recover_ok'{}),
     {noreply, State2};
 
+handle_method(#'basic.reject'{delivery_tag = DeliveryTag,
+                              requeue = Requeue},
+              _, State = #ch{ unacked_message_q = UAMQ}) ->
+    {Acked, Remaining} = collect_acks(UAMQ, DeliveryTag, false),
+    ok = fold_per_queue(
+           fun (QPid, MsgIds, ok) ->
+                   rabbit_amqqueue:reject(QPid, MsgIds, Requeue, self())
+           end, ok, Acked),
+    ok = notify_limiter(State#ch.limiter_pid, Acked),
+    {noreply, State#ch{unacked_message_q = Remaining}};
+
 handle_method(#'exchange.declare'{exchange = ExchangeNameBin,
                                   type = TypeNameBin,
                                   passive = false,
@@ -718,7 +755,7 @@ handle_method(#'queue.declare'{queue       = QueueNameBin,
                              Q, Durable, AutoDelete, Args, Owner),
                       rabbit_amqqueue:stat(Q)
            end) of
-        {ok, QueueName, MessageCount, ConsumerCount} ->
+        {ok, MessageCount, ConsumerCount} ->
             return_queue_declare_ok(QueueName, NoWait, MessageCount,
                                     ConsumerCount, State);
         {error, not_found} ->
@@ -731,7 +768,8 @@ handle_method(#'queue.declare'{queue       = QueueNameBin,
                     %% the connection shuts down.
                     ok = case Owner of
                              none -> ok;
-                             _    -> rabbit_reader_queue_collector:register_exclusive_queue(CollectorPid, Q)
+                             _    -> rabbit_queue_collector:register(
+                                       CollectorPid, Q)
                          end,
                     return_queue_declare_ok(QueueName, NoWait, 0, 0, State);
                 {existing, _Q} ->
@@ -748,7 +786,7 @@ handle_method(#'queue.declare'{queue   = QueueNameBin,
                              reader_pid   = ReaderPid}) ->
     QueueName = rabbit_misc:r(VHostPath, queue, QueueNameBin),
     check_configure_permitted(QueueName, State),
-    {{ok, QueueName, MessageCount, ConsumerCount}, #amqqueue{} = Q} =
+    {{ok, MessageCount, ConsumerCount}, #amqqueue{} = Q} =
         rabbit_amqqueue:with_or_die(
           QueueName, fun (Q) -> {rabbit_amqqueue:stat(Q), Q} end),
     ok = rabbit_amqqueue:check_exclusive_access(Q, ReaderPid),
@@ -781,17 +819,17 @@ handle_method(#'queue.bind'{queue = QueueNameBin,
                             routing_key = RoutingKey,
                             nowait = NoWait,
                             arguments = Arguments}, _, State) ->
-    binding_action(fun rabbit_exchange:add_binding/5, ExchangeNameBin,
-                   QueueNameBin, RoutingKey, Arguments, #'queue.bind_ok'{},
-                   NoWait, State);
+    binding_action(fun rabbit_binding:add/2,
+                   ExchangeNameBin, QueueNameBin, RoutingKey, Arguments,
+                   #'queue.bind_ok'{}, NoWait, State);
 
 handle_method(#'queue.unbind'{queue = QueueNameBin,
                               exchange = ExchangeNameBin,
                               routing_key = RoutingKey,
                               arguments = Arguments}, _, State) ->
-    binding_action(fun rabbit_exchange:delete_binding/5, ExchangeNameBin,
-                   QueueNameBin, RoutingKey, Arguments, #'queue.unbind_ok'{},
-                   false, State);
+    binding_action(fun rabbit_binding:remove/2,
+                   ExchangeNameBin, QueueNameBin, RoutingKey, Arguments,
+                   #'queue.unbind_ok'{}, false, State);
 
 handle_method(#'queue.purge'{queue = QueueNameBin,
                              nowait = NoWait},
@@ -849,48 +887,12 @@ handle_method(#'channel.flow'{active = false}, _,
                                  blocking = dict:from_list(Queues)}}
     end;
 
-handle_method(#'channel.flow_ok'{active = Active}, _,
-              State = #ch{flow = #flow{server = Active, client = Flow,
-                                       pending = {_Ref, TRef}} = F})
-  when Flow =:= not Active ->
-    {ok, cancel} = timer:cancel(TRef),
-    {noreply, State#ch{flow = F#flow{client = Active, pending = none}}};
-handle_method(#'channel.flow_ok'{active = Active}, _,
-              State = #ch{flow = #flow{server = Flow, client = Flow,
-                                       pending = {_Ref, TRef}}})
-  when Flow =:= not Active ->
-    {ok, cancel} = timer:cancel(TRef),
-    {noreply, issue_flow(Flow, State)};
-handle_method(#'channel.flow_ok'{}, _, #ch{flow = #flow{pending = none}}) ->
-    rabbit_misc:protocol_error(
-      command_invalid, "unsolicited channel.flow_ok", []);
-handle_method(#'channel.flow_ok'{active = Active}, _, _State) ->
-    rabbit_misc:protocol_error(
-      command_invalid,
-      "received channel.flow_ok{active=~w} has incorrect polarity", [Active]);
-
 handle_method(_MethodRecord, _Content, _State) ->
     rabbit_misc:protocol_error(
       command_invalid, "unimplemented method", []).
 
 %%----------------------------------------------------------------------------
 
-flow_control(Active, State = #ch{flow = #flow{server = Flow, pending = none}})
-  when Flow =:= not Active ->
-    ok = clear_permission_cache(),
-    noreply(issue_flow(Active, State));
-flow_control(Active, State = #ch{flow = F}) ->
-    noreply(State#ch{flow = F#flow{server = Active}}).
-
-issue_flow(Active, State) ->
-    ok = rabbit_writer:send_command(
-           State#ch.writer_pid, #'channel.flow'{active = Active}),
-    Ref = make_ref(),
-    {ok, TRef} = timer:apply_after(?FLOW_OK_TIMEOUT, ?MODULE, flow_timeout,
-                                   [self(), Ref]),
-    State#ch{flow = #flow{server = Active, client = not Active,
-                          pending = {Ref, TRef}}}.
-
 binding_action(Fun, ExchangeNameBin, QueueNameBin, RoutingKey, Arguments,
                ReturnMethod, NoWait,
                State = #ch{virtual_host = VHostPath,
@@ -905,9 +907,14 @@ binding_action(Fun, ExchangeNameBin, QueueNameBin, RoutingKey, Arguments,
                                                    State),
     ExchangeName = rabbit_misc:r(VHostPath, exchange, ExchangeNameBin),
     check_read_permitted(ExchangeName, State),
-    case Fun(ExchangeName, QueueName, ActualRoutingKey, Arguments,
+    case Fun(#binding{exchange_name = ExchangeName,
+                      queue_name    = QueueName,
+                      key           = ActualRoutingKey,
+                      args          = Arguments},
              fun (_X, Q) ->
-                     rabbit_amqqueue:check_exclusive_access(Q, ReaderPid)
+                     try rabbit_amqqueue:check_exclusive_access(Q, ReaderPid)
+                     catch exit:Reason -> {error, Reason}
+                     end
              end) of
         {error, exchange_not_found} ->
             rabbit_misc:not_found(ExchangeName);
@@ -922,6 +929,8 @@ binding_action(Fun, ExchangeNameBin, QueueNameBin, RoutingKey, Arguments,
               not_found, "no binding ~s between ~s and ~s",
               [RoutingKey, rabbit_misc:rs(ExchangeName),
                rabbit_misc:rs(QueueName)]);
+        {error, #amqp_error{} = Error} ->
+            rabbit_misc:protocol_error(Error);
         ok -> return_ok(State, NoWait, ReturnMethod)
     end.
 
@@ -930,7 +939,7 @@ basic_return(#basic_message{exchange_name = ExchangeName,
                             content       = Content},
              WriterPid, Reason) ->
     {_Close, ReplyCode, ReplyText} =
-        rabbit_framing:lookup_amqp_exception(Reason),
+        rabbit_framing_amqp_0_9_1:lookup_amqp_exception(Reason),
     ok = rabbit_writer:send_command(
            WriterPid,
            #'basic.return'{reply_code  = ReplyCode,
@@ -959,7 +968,7 @@ collect_acks(ToAcc, PrefixAcc, Q, DeliveryTag, Multiple) ->
             end;
         {empty, _} ->
             rabbit_misc:protocol_error(
-              not_found, "unknown delivery tag ~w", [DeliveryTag])
+              precondition_failed, "unknown delivery tag ~w", [DeliveryTag])
     end.
 
 add_tx_participants(MoreP, State = #ch{tx_participants = Participants}) ->
@@ -970,7 +979,7 @@ ack(TxnKey, UAQ) ->
     fold_per_queue(
       fun (QPid, MsgIds, L) ->
               ok = rabbit_amqqueue:ack(QPid, TxnKey, MsgIds, self()),
-              [QPid | L]
+              [{QPid, length(MsgIds)} | L]
       end, [], UAQ).
 
 make_tx_id() -> rabbit_guid:guid().
@@ -1022,8 +1031,8 @@ fold_per_queue(F, Acc0, UAQ) ->
     dict:fold(fun (QPid, MsgIds, Acc) -> F(QPid, MsgIds, Acc) end,
               Acc0, D).
 
-start_limiter(State = #ch{unacked_message_q = UAMQ}) ->
-    LPid = rabbit_limiter:start_link(self(), queue:len(UAMQ)),
+start_limiter(State = #ch{unacked_message_q = UAMQ, start_limiter_fun = SLF}) ->
+    {ok, LPid} = SLF(queue:len(UAMQ)),
     ok = limit_queues(LPid, State),
     LPid.
 
@@ -1095,10 +1104,9 @@ internal_deliver(WriterPid, Notify, ConsumerTag, DeliveryTag,
              false -> rabbit_writer:send_command(WriterPid, M, Content)
          end.
 
-terminate(#ch{writer_pid = WriterPid, limiter_pid = LimiterPid}) ->
+terminate(_State) ->
     pg_local:leave(rabbit_channels, self()),
-    rabbit_writer:shutdown(WriterPid),
-    rabbit_limiter:shutdown(LimiterPid).
+    rabbit_event:notify(channel_closed, [{pid, self()}]).
 
 infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items].
 
@@ -1119,3 +1127,60 @@ i(prefetch_count, #ch{limiter_pid = LimiterPid}) ->
     rabbit_limiter:get_limit(LimiterPid);
 i(Item, _) ->
     throw({bad_argument, Item}).
+
+maybe_incr_stats(QXIncs, Measure, #ch{stats_timer = StatsTimer}) ->
+    case rabbit_event:stats_level(StatsTimer) of
+        fine -> [incr_stats(QX, Inc, Measure) || {QX, Inc} <- QXIncs];
+        _    -> ok
+    end.
+
+incr_stats({QPid, _} = QX, Inc, Measure) ->
+    maybe_monitor(QPid),
+    update_measures(queue_exchange_stats, QX, Inc, Measure);
+incr_stats(QPid, Inc, Measure) when is_pid(QPid) ->
+    maybe_monitor(QPid),
+    update_measures(queue_stats, QPid, Inc, Measure);
+incr_stats(X, Inc, Measure) ->
+    update_measures(exchange_stats, X, Inc, Measure).
+
+maybe_monitor(QPid) ->
+    case get({monitoring, QPid}) of
+        undefined -> erlang:monitor(process, QPid),
+                     put({monitoring, QPid}, true);
+        _         -> ok
+    end.
+
+update_measures(Type, QX, Inc, Measure) ->
+    Measures = case get({Type, QX}) of
+                   undefined -> [];
+                   D         -> D
+               end,
+    Cur = case orddict:find(Measure, Measures) of
+              error   -> 0;
+              {ok, C} -> C
+          end,
+    put({Type, QX},
+        orddict:store(Measure, Cur + Inc, Measures)).
+
+internal_emit_stats(State = #ch{stats_timer = StatsTimer}) ->
+    CoarseStats = infos(?STATISTICS_KEYS, State),
+    case rabbit_event:stats_level(StatsTimer) of
+        coarse ->
+            rabbit_event:notify(channel_stats, CoarseStats);
+        fine ->
+            FineStats =
+                [{channel_queue_stats,
+                  [{QPid, Stats} || {{queue_stats, QPid}, Stats} <- get()]},
+                 {channel_exchange_stats,
+                  [{X, Stats} || {{exchange_stats, X}, Stats} <- get()]},
+                 {channel_queue_exchange_stats,
+                  [{QX, Stats} ||
+                      {{queue_exchange_stats, QX}, Stats} <- get()]}],
+            rabbit_event:notify(channel_stats, CoarseStats ++ FineStats)
+    end.
+
+erase_queue_stats(QPid) ->
+    erase({monitoring, QPid}),
+    erase({queue_stats, QPid}),
+    [erase({queue_exchange_stats, QX}) ||
+        {{queue_exchange_stats, QX = {QPid0, _}}, _} <- get(), QPid =:= QPid0].
diff --git a/src/rabbit_channel_sup.erl b/src/rabbit_channel_sup.erl
new file mode 100644
index 00000000..02199a65
--- /dev/null
+++ b/src/rabbit_channel_sup.erl
@@ -0,0 +1,96 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_channel_sup).
+
+-behaviour(supervisor2).
+
+-export([start_link/1]).
+
+-export([init/1]).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-export_type([start_link_args/0]).
+
+-type(start_link_args() ::
+        {rabbit_types:protocol(), rabbit_net:socket(),
+         rabbit_channel:channel_number(), non_neg_integer(), pid(),
+         rabbit_access_control:username(), rabbit_types:vhost(), pid()}).
+
+-spec(start_link/1 :: (start_link_args()) -> {'ok', pid(), pid()}).
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+start_link({Protocol, Sock, Channel, FrameMax, ReaderPid, Username, VHost,
+            Collector}) ->
+    {ok, SupPid} = supervisor2:start_link(?MODULE, []),
+    {ok, WriterPid} =
+        supervisor2:start_child(
+          SupPid,
+          {writer, {rabbit_writer, start_link,
+                    [Sock, Channel, FrameMax, Protocol, ReaderPid]},
+           intrinsic, ?MAX_WAIT, worker, [rabbit_writer]}),
+    {ok, ChannelPid} =
+        supervisor2:start_child(
+          SupPid,
+          {channel, {rabbit_channel, start_link,
+                     [Channel, ReaderPid, WriterPid, Username, VHost,
+                      Collector, start_limiter_fun(SupPid)]},
+           intrinsic, ?MAX_WAIT, worker, [rabbit_channel]}),
+    {ok, FramingChannelPid} =
+        supervisor2:start_child(
+          SupPid,
+          {framing_channel, {rabbit_framing_channel, start_link,
+                             [ReaderPid, ChannelPid, Protocol]},
+           intrinsic, ?MAX_WAIT, worker, [rabbit_framing_channel]}),
+    {ok, SupPid, FramingChannelPid}.
+
+%%----------------------------------------------------------------------------
+
+init([]) ->
+    {ok, {{one_for_all, 0, 1}, []}}.
+
+start_limiter_fun(SupPid) ->
+    fun (UnackedCount) ->
+            Me = self(),
+            {ok, _Pid} =
+                supervisor2:start_child(
+                  SupPid,
+                  {limiter, {rabbit_limiter, start_link, [Me, UnackedCount]},
+                   transient, ?MAX_WAIT, worker, [rabbit_limiter]})
+    end.
diff --git a/src/rabbit_hooks.erl b/src/rabbit_channel_sup_sup.erl
index 3fc84c1e..21c39780 100644
--- a/src/rabbit_hooks.erl
+++ b/src/rabbit_channel_sup_sup.erl
@@ -29,45 +29,35 @@
 %%   Contributor(s): ______________________________________.
 %%
 
--module(rabbit_hooks).
+-module(rabbit_channel_sup_sup).
 
--export([start/0]).
--export([subscribe/3, unsubscribe/2, trigger/2, notify_remote/5]).
+-behaviour(supervisor2).
 
--define(TableName, rabbit_hooks).
+-export([start_link/0, start_channel/2]).
+
+-export([init/1]).
+
+%%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--spec(start/0 :: () -> 'ok').
--spec(subscribe/3 :: (atom(), atom(), {atom(), atom(), list()}) -> 'ok').
--spec(unsubscribe/2 :: (atom(), atom()) -> 'ok').
--spec(trigger/2 :: (atom(), list()) -> 'ok').
--spec(notify_remote/5 :: (atom(), atom(), list(), pid(), list()) -> 'ok').
+-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()).
+-spec(start_channel/2 :: (pid(), rabbit_channel_sup:start_link_args()) ->
+                              {'ok', pid(), pid()}).
 
 -endif.
 
-start() ->
-    ets:new(?TableName, [bag, public, named_table]),
-    ok.
+%%----------------------------------------------------------------------------
 
-subscribe(Hook, HandlerName, Handler) ->
-    ets:insert(?TableName, {Hook, HandlerName, Handler}),
-    ok.
+start_link() ->
+    supervisor2:start_link(?MODULE, []).
 
-unsubscribe(Hook, HandlerName) ->
-    ets:match_delete(?TableName, {Hook, HandlerName, '_'}),
-    ok.
+start_channel(Pid, Args) ->
+    supervisor2:start_child(Pid, [Args]).
 
-trigger(Hook, Args) ->
-    Hooks = ets:lookup(?TableName, Hook),
-    [case catch apply(M, F, [Hook, Name, Args | A]) of
-        {'EXIT', Reason} ->
-            rabbit_log:warning("Failed to execute handler ~p for hook ~p: ~p",
-                               [Name, Hook, Reason]);
-        _ -> ok
-     end || {_, Name, {M, F, A}} <- Hooks],
-    ok.
+%%----------------------------------------------------------------------------
 
-notify_remote(Hook, HandlerName, Args, Pid, PidArgs) ->
-    Pid ! {rabbitmq_hook, [Hook, HandlerName, Args | PidArgs]},
-    ok.
+init([]) ->
+    {ok, {{simple_one_for_one_terminate, 0, 1},
+          [{channel_sup, {rabbit_channel_sup, start_link, []},
+            temporary, infinity, supervisor, [rabbit_channel_sup]}]}}.
diff --git a/src/rabbit_connection_sup.erl b/src/rabbit_connection_sup.erl
new file mode 100644
index 00000000..b3821d3b
--- /dev/null
+++ b/src/rabbit_connection_sup.erl
@@ -0,0 +1,99 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_connection_sup).
+
+-behaviour(supervisor2).
+
+-export([start_link/0, reader/1]).
+
+-export([init/1]).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-spec(start_link/0 :: () -> {'ok', pid(), pid()}).
+-spec(reader/1 :: (pid()) -> pid()).
+
+-endif.
+
+%%--------------------------------------------------------------------------
+
+start_link() ->
+    {ok, SupPid} = supervisor2:start_link(?MODULE, []),
+    {ok, ChannelSupSupPid} =
+        supervisor2:start_child(
+          SupPid,
+          {channel_sup_sup, {rabbit_channel_sup_sup, start_link, []},
+           intrinsic, infinity, supervisor, [rabbit_channel_sup_sup]}),
+    {ok, Collector} =
+        supervisor2:start_child(
+          SupPid,
+          {collector, {rabbit_queue_collector, start_link, []},
+           intrinsic, ?MAX_WAIT, worker, [rabbit_queue_collector]}),
+    {ok, ReaderPid} =
+        supervisor2:start_child(
+          SupPid,
+          {reader, {rabbit_reader, start_link,
+                    [ChannelSupSupPid, Collector, start_heartbeat_fun(SupPid)]},
+           intrinsic, ?MAX_WAIT, worker, [rabbit_reader]}),
+    {ok, SupPid, ReaderPid}.
+
+reader(Pid) ->
+    hd(supervisor2:find_child(Pid, reader)).
+
+%%--------------------------------------------------------------------------
+
+init([]) ->
+    {ok, {{one_for_all, 0, 1}, []}}.
+
+start_heartbeat_fun(SupPid) ->
+    fun (_Sock, 0) ->
+            none;
+        (Sock, TimeoutSec) ->
+            Parent = self(),
+            {ok, Sender} =
+                supervisor2:start_child(
+                  SupPid, {heartbeat_sender,
+                           {rabbit_heartbeat, start_heartbeat_sender,
+                            [Parent, Sock, TimeoutSec]},
+                           transient, ?MAX_WAIT, worker, [rabbit_heartbeat]}),
+            {ok, Receiver} =
+                supervisor2:start_child(
+                  SupPid, {heartbeat_receiver,
+                           {rabbit_heartbeat, start_heartbeat_receiver,
+                            [Parent, Sock, TimeoutSec]},
+                           transient, ?MAX_WAIT, worker, [rabbit_heartbeat]}),
+            {Sender, Receiver}
+    end.
diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl
index 323d4d2f..a3b6f369 100644
--- a/src/rabbit_control.erl
+++ b/src/rabbit_control.erl
@@ -32,20 +32,25 @@
 -module(rabbit_control).
 -include("rabbit.hrl").
 
--export([start/0, stop/0, action/4]).
-
--record(params, {quiet, node, command, args}).
+-export([start/0, stop/0, action/5]).
 
 -define(RPC_TIMEOUT, infinity).
 
+-define(QUIET_OPT, "-q").
+-define(NODE_OPT, "-n").
+-define(VHOST_OPT, "-p").
+-define(SCOPE_OPT, "-s").
+
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
 -spec(start/0 :: () -> no_return()).
 -spec(stop/0 :: () -> 'ok').
--spec(action/4 :: (atom(), erlang_node(), [string()],
-                   fun ((string(), [any()]) -> 'ok')) -> 'ok').
+-spec(action/5 ::
+        (atom(), node(), [string()], [{string(), any()}],
+         fun ((string(), [any()]) -> 'ok'))
+        -> 'ok').
 -spec(usage/0 :: () -> no_return()).
 
 -endif.
@@ -55,18 +60,33 @@
 start() ->
     {ok, [[NodeStr|_]|_]} = init:get_argument(nodename),
     FullCommand = init:get_plain_arguments(),
-    #params{quiet = Quiet, node = Node, command = Command, args = Args} =
-        parse_args(FullCommand, #params{quiet = false,
-                                        node = rabbit_misc:makenode(NodeStr)}),
+    case FullCommand of
+        [] -> usage();
+        _ -> ok
+    end,
+    {[Command0 | Args], Opts} =
+        rabbit_misc:get_options(
+          [{flag, ?QUIET_OPT}, {option, ?NODE_OPT, NodeStr},
+           {option, ?VHOST_OPT, "/"}, {option, ?SCOPE_OPT, "client"}],
+          FullCommand),
+    Opts1 = lists:map(fun({K, V}) ->
+                              case K of
+                                  ?NODE_OPT -> {?NODE_OPT, rabbit_misc:makenode(V)};
+                                  _    -> {K, V}
+                              end
+                      end, Opts),
+    Command = list_to_atom(Command0),
+    Quiet = proplists:get_bool(?QUIET_OPT, Opts1),
+    Node = proplists:get_value(?NODE_OPT, Opts1),
     Inform = case Quiet of
                  true  -> fun (_Format, _Args1) -> ok end;
                  false -> fun (Format, Args1) ->
                                   io:format(Format ++ " ...~n", Args1)
-                         end
+                          end
              end,
     %% The reason we don't use a try/catch here is that rpc:call turns
     %% thrown errors into normal return values
-    case catch action(Command, Node, Args, Inform) of
+    case catch action(Command, Node, Args, Opts, Inform) of
         ok ->
             case Quiet of
                 true  -> ok;
@@ -118,15 +138,6 @@ print_badrpc_diagnostics(Node) ->
     fmt_stderr("- current node cookie hash: ~s", [rabbit_misc:cookie_hash()]),
     ok.
 
-parse_args(["-n", NodeS | Args], Params) ->
-    parse_args(Args, Params#params{node = rabbit_misc:makenode(NodeS)});
-parse_args(["-q" | Args], Params) ->
-    parse_args(Args, Params#params{quiet = true});
-parse_args([Command | Args], Params) ->
-    Params#params{command = list_to_atom(Command), args = Args};
-parse_args([], _) ->
-    usage().
-
 stop() ->
     ok.
 
@@ -134,33 +145,39 @@ usage() ->
     io:format("~s", [rabbit_ctl_usage:usage()]),
     halt(1).
 
-action(stop, Node, [], Inform) ->
+action(stop, Node, [], _Opts, Inform) ->
     Inform("Stopping and halting node ~p", [Node]),
     call(Node, {rabbit, stop_and_halt, []});
 
-action(stop_app, Node, [], Inform) ->
+action(stop_app, Node, [], _Opts, Inform) ->
     Inform("Stopping node ~p", [Node]),
     call(Node, {rabbit, stop, []});
 
-action(start_app, Node, [], Inform) ->
+action(start_app, Node, [], _Opts, Inform) ->
     Inform("Starting node ~p", [Node]),
     call(Node, {rabbit, start, []});
 
-action(reset, Node, [], Inform) ->
+action(reset, Node, [], _Opts, Inform) ->
     Inform("Resetting node ~p", [Node]),
     call(Node, {rabbit_mnesia, reset, []});
 
-action(force_reset, Node, [], Inform) ->
+action(force_reset, Node, [], _Opts, Inform) ->
     Inform("Forcefully resetting node ~p", [Node]),
     call(Node, {rabbit_mnesia, force_reset, []});
 
-action(cluster, Node, ClusterNodeSs, Inform) ->
+action(cluster, Node, ClusterNodeSs, _Opts, Inform) ->
     ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs),
     Inform("Clustering node ~p with ~p",
               [Node, ClusterNodes]),
     rpc_call(Node, rabbit_mnesia, cluster, [ClusterNodes]);
 
-action(status, Node, [], Inform) ->
+action(force_cluster, Node, ClusterNodeSs, _Opts, Inform) ->
+    ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs),
+    Inform("Forcefully clustering node ~p with ~p (ignoring offline nodes)",
+              [Node, ClusterNodes]),
+    rpc_call(Node, rabbit_mnesia, force_cluster, [ClusterNodes]);
+
+action(status, Node, [], _Opts, Inform) ->
     Inform("Status of node ~p", [Node]),
     case call(Node, {rabbit, status, []}) of
         {badrpc, _} = Res -> Res;
@@ -168,129 +185,125 @@ action(status, Node, [], Inform) ->
                              ok
     end;
 
-action(rotate_logs, Node, [], Inform) ->
+action(rotate_logs, Node, [], _Opts, Inform) ->
     Inform("Reopening logs for node ~p", [Node]),
     call(Node, {rabbit, rotate_logs, [""]});
-action(rotate_logs, Node, Args = [Suffix], Inform) ->
+action(rotate_logs, Node, Args = [Suffix], _Opts, Inform) ->
     Inform("Rotating logs to files with suffix ~p", [Suffix]),
     call(Node, {rabbit, rotate_logs, Args});
 
-action(close_connection, Node, [PidStr, Explanation], Inform) ->
+action(close_connection, Node, [PidStr, Explanation], _Opts, Inform) ->
     Inform("Closing connection ~s", [PidStr]),
     rpc_call(Node, rabbit_networking, close_connection,
              [rabbit_misc:string_to_pid(PidStr), Explanation]);
 
-action(add_user, Node, Args = [Username, _Password], Inform) ->
+action(add_user, Node, Args = [Username, _Password], _Opts, Inform) ->
     Inform("Creating user ~p", [Username]),
     call(Node, {rabbit_access_control, add_user, Args});
 
-action(delete_user, Node, Args = [_Username], Inform) ->
+action(delete_user, Node, Args = [_Username], _Opts, Inform) ->
     Inform("Deleting user ~p", Args),
     call(Node, {rabbit_access_control, delete_user, Args});
 
-action(change_password, Node, Args = [Username, _Newpassword], Inform) ->
+action(change_password, Node, Args = [Username, _Newpassword], _Opts, Inform) ->
     Inform("Changing password for user ~p", [Username]),
     call(Node, {rabbit_access_control, change_password, Args});
 
-action(list_users, Node, [], Inform) ->
+action(set_admin, Node, [Username], _Opts, Inform) ->
+    Inform("Setting administrative status for user ~p", [Username]),
+    call(Node, {rabbit_access_control, set_admin, [Username]});
+
+action(clear_admin, Node, [Username], _Opts, Inform) ->
+    Inform("Clearing administrative status for user ~p", [Username]),
+    call(Node, {rabbit_access_control, clear_admin, [Username]});
+
+action(list_users, Node, [], _Opts, Inform) ->
     Inform("Listing users", []),
     display_list(call(Node, {rabbit_access_control, list_users, []}));
 
-action(add_vhost, Node, Args = [_VHostPath], Inform) ->
+action(add_vhost, Node, Args = [_VHostPath], _Opts, Inform) ->
     Inform("Creating vhost ~p", Args),
     call(Node, {rabbit_access_control, add_vhost, Args});
 
-action(delete_vhost, Node, Args = [_VHostPath], Inform) ->
+action(delete_vhost, Node, Args = [_VHostPath], _Opts, Inform) ->
     Inform("Deleting vhost ~p", Args),
     call(Node, {rabbit_access_control, delete_vhost, Args});
 
-action(list_vhosts, Node, [], Inform) ->
+action(list_vhosts, Node, [], _Opts, Inform) ->
     Inform("Listing vhosts", []),
     display_list(call(Node, {rabbit_access_control, list_vhosts, []}));
 
-action(list_user_permissions, Node, Args = [_Username], Inform) ->
+action(list_user_permissions, Node, Args = [_Username], _Opts, Inform) ->
     Inform("Listing permissions for user ~p", Args),
     display_list(call(Node, {rabbit_access_control, list_user_permissions,
                              Args}));
 
-action(list_queues, Node, Args, Inform) ->
+action(list_queues, Node, Args, Opts, Inform) ->
     Inform("Listing queues", []),
-    {VHostArg, RemainingArgs} = parse_vhost_flag_bin(Args),
-    ArgAtoms = default_if_empty(RemainingArgs, [name, messages]),
+    VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)),
+    ArgAtoms = default_if_empty(Args, [name, messages]),
     display_info_list(rpc_call(Node, rabbit_amqqueue, info_all,
                                [VHostArg, ArgAtoms]),
                       ArgAtoms);
 
-action(list_exchanges, Node, Args, Inform) ->
+action(list_exchanges, Node, Args, Opts, Inform) ->
     Inform("Listing exchanges", []),
-    {VHostArg, RemainingArgs} = parse_vhost_flag_bin(Args),
-    ArgAtoms = default_if_empty(RemainingArgs, [name, type]),
+    VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)),
+    ArgAtoms = default_if_empty(Args, [name, type]),
     display_info_list(rpc_call(Node, rabbit_exchange, info_all,
                                [VHostArg, ArgAtoms]),
                       ArgAtoms);
 
-action(list_bindings, Node, Args, Inform) ->
+action(list_bindings, Node, Args, Opts, Inform) ->
     Inform("Listing bindings", []),
-    {VHostArg, _} = parse_vhost_flag_bin(Args),
-    InfoKeys = [exchange_name, queue_name, routing_key, args],
-    display_info_list(
-      [lists:zip(InfoKeys, tuple_to_list(X)) ||
-          X <- rpc_call(Node, rabbit_exchange, list_bindings, [VHostArg])],
-      InfoKeys);
+    VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)),
+    ArgAtoms = default_if_empty(Args, [exchange_name, queue_name,
+                                       routing_key, arguments]),
+    display_info_list(rpc_call(Node, rabbit_binding, info_all,
+                               [VHostArg, ArgAtoms]),
+                      ArgAtoms);
 
-action(list_connections, Node, Args, Inform) ->
+action(list_connections, Node, Args, _Opts, Inform) ->
     Inform("Listing connections", []),
     ArgAtoms = default_if_empty(Args, [user, peer_address, peer_port, state]),
     display_info_list(rpc_call(Node, rabbit_networking, connection_info_all,
                                [ArgAtoms]),
                       ArgAtoms);
 
-action(list_channels, Node, Args, Inform) ->
+action(list_channels, Node, Args, _Opts, Inform) ->
     Inform("Listing channels", []),
     ArgAtoms = default_if_empty(Args, [pid, user, transactional, consumer_count,
                                        messages_unacknowledged]),
     display_info_list(rpc_call(Node, rabbit_channel, info_all, [ArgAtoms]),
                       ArgAtoms);
 
-action(list_consumers, Node, Args, Inform) ->
+action(list_consumers, Node, _Args, Opts, Inform) ->
     Inform("Listing consumers", []),
-    {VHostArg, _} = parse_vhost_flag_bin(Args),
+    VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)),
     InfoKeys = [queue_name, channel_pid, consumer_tag, ack_required],
     display_info_list(
       [lists:zip(InfoKeys, tuple_to_list(X)) ||
           X <- rpc_call(Node, rabbit_amqqueue, consumers_all, [VHostArg])],
       InfoKeys);
 
-action(Command, Node, Args, Inform) ->
-    {VHost, RemainingArgs} = parse_vhost_flag(Args),
-    action(Command, Node, VHost, RemainingArgs, Inform).
-
-action(set_permissions, Node, VHost, [Username, CPerm, WPerm, RPerm], Inform) ->
+action(set_permissions, Node, [Username, CPerm, WPerm, RPerm], Opts, Inform) ->
+    VHost = proplists:get_value(?VHOST_OPT, Opts),
+    Scope = proplists:get_value(?SCOPE_OPT, Opts),
     Inform("Setting permissions for user ~p in vhost ~p", [Username, VHost]),
     call(Node, {rabbit_access_control, set_permissions,
-                [Username, VHost, CPerm, WPerm, RPerm]});
+                [Scope, Username, VHost, CPerm, WPerm, RPerm]});
 
-action(clear_permissions, Node, VHost, [Username], Inform) ->
+action(clear_permissions, Node, [Username], Opts, Inform) ->
+    VHost = proplists:get_value(?VHOST_OPT, Opts),
     Inform("Clearing permissions for user ~p in vhost ~p", [Username, VHost]),
     call(Node, {rabbit_access_control, clear_permissions, [Username, VHost]});
 
-action(list_permissions, Node, VHost, [], Inform) ->
+action(list_permissions, Node, [], Opts, Inform) ->
+    VHost = proplists:get_value(?VHOST_OPT, Opts),
     Inform("Listing permissions in vhost ~p", [VHost]),
     display_list(call(Node, {rabbit_access_control, list_vhost_permissions,
                              [VHost]})).
 
-parse_vhost_flag(Args) when is_list(Args) ->
-    case Args of
-        ["-p", VHost | RemainingArgs] ->
-            {VHost, RemainingArgs};
-        RemainingArgs ->
-            {"/", RemainingArgs}
-    end.
-
-parse_vhost_flag_bin(Args) ->
-    {VHost, RemainingArgs} = parse_vhost_flag(Args),
-    {list_to_binary(VHost), RemainingArgs}.
-
 default_if_empty(List, Default) when is_list(List) ->
     if List == [] ->
         Default;
@@ -299,9 +312,11 @@ default_if_empty(List, Default) when is_list(List) ->
     end.
 
 display_info_list(Results, InfoItemKeys) when is_list(Results) ->
-    lists:foreach(fun (Result) -> display_row([format_info_item(X, Result) ||
-                                                  X <- InfoItemKeys])
-                  end, Results),
+    lists:foreach(
+      fun (Result) -> display_row(
+                        [format_info_item(proplists:get_value(X, Result)) ||
+                            X <- InfoItemKeys])
+      end, Results),
     ok;
 display_info_list(Other, _) ->
     Other.
@@ -310,25 +325,30 @@ display_row(Row) ->
     io:fwrite(lists:flatten(rabbit_misc:intersperse("\t", Row))),
     io:nl().
 
-format_info_item(Key, Items) ->
-    case proplists:get_value(Key, Items) of
-        #resource{name = Name} ->
-            escape(Name);
-        Value when Key =:= address; Key =:= peer_address andalso
-                   is_tuple(Value) ->
-            inet_parse:ntoa(Value);
-        Value when is_pid(Value) ->
-            rabbit_misc:pid_to_string(Value);
-        Value when is_binary(Value) ->
-            escape(Value);
-        Value when is_atom(Value) ->
-            escape(atom_to_list(Value));
-        Value = [{TableEntryKey, TableEntryType, _TableEntryValue} | _]
-        when is_binary(TableEntryKey) andalso is_atom(TableEntryType) ->
-            io_lib:format("~1000000000000p", [prettify_amqp_table(Value)]);
-        Value ->
-            io_lib:format("~w", [Value])
-    end.
+-define(IS_U8(X),  (X >= 0 andalso X =< 255)).
+-define(IS_U16(X), (X >= 0 andalso X =< 65535)).
+
+format_info_item(#resource{name = Name}) ->
+    escape(Name);
+format_info_item({N1, N2, N3, N4} = Value) when
+      ?IS_U8(N1), ?IS_U8(N2), ?IS_U8(N3), ?IS_U8(N4) ->
+    inet_parse:ntoa(Value);
+format_info_item({K1, K2, K3, K4, K5, K6, K7, K8} = Value) when
+      ?IS_U16(K1), ?IS_U16(K2), ?IS_U16(K3), ?IS_U16(K4),
+      ?IS_U16(K5), ?IS_U16(K6), ?IS_U16(K7), ?IS_U16(K8) ->
+    inet_parse:ntoa(Value);
+format_info_item(Value) when is_pid(Value) ->
+    rabbit_misc:pid_to_string(Value);
+format_info_item(Value) when is_binary(Value) ->
+    escape(Value);
+format_info_item(Value) when is_atom(Value) ->
+    escape(atom_to_list(Value));
+format_info_item([{TableEntryKey, TableEntryType, _TableEntryValue} | _] =
+                     Value) when is_binary(TableEntryKey) andalso
+                                 is_atom(TableEntryType) ->
+    io_lib:format("~1000000000000p", [prettify_amqp_table(Value)]);
+format_info_item(Value) ->
+    io_lib:format("~w", [Value]).
 
 display_list(L) when is_list(L) ->
     lists:foreach(fun (I) when is_binary(I) ->
@@ -351,6 +371,8 @@ rpc_call(Node, Mod, Fun, Args) ->
 %% characters.  We don't escape characters above 127, since they may
 %% form part of UTF-8 strings.
 
+escape(Atom) when is_atom(Atom) ->
+    escape(atom_to_list(Atom));
 escape(Bin) when is_binary(Bin) ->
     escape(binary_to_list(Bin));
 escape(L) when is_list(L) ->
diff --git a/src/rabbit_dialyzer.erl b/src/rabbit_dialyzer.erl
index f19e8d02..51bd6b1f 100644
--- a/src/rabbit_dialyzer.erl
+++ b/src/rabbit_dialyzer.erl
@@ -30,17 +30,17 @@
 %%
 
 -module(rabbit_dialyzer).
--include("rabbit.hrl").
 
--export([create_basic_plt/1, add_to_plt/2, dialyze_files/2, halt_with_code/1]).
+-export([create_basic_plt/1, add_to_plt/2, dialyze_files/2,
+         halt_with_code/1]).
 
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--spec(create_basic_plt/1 :: (file_path()) -> 'ok').
--spec(add_to_plt/2 :: (file_path(), string()) -> 'ok').
--spec(dialyze_files/2 :: (file_path(), string()) -> 'ok').
+-spec(create_basic_plt/1 :: (file:filename()) -> 'ok').
+-spec(add_to_plt/2 :: (file:filename(), string()) -> 'ok').
+-spec(dialyze_files/2 :: (file:filename(), string()) -> 'ok').
 -spec(halt_with_code/1 :: (atom()) -> no_return()).
 
 -endif.
@@ -56,7 +56,7 @@ create_basic_plt(BasicPltPath) ->
     ok.
 
 add_to_plt(PltPath, FilesString) ->
-    {ok, Files} = regexp:split(FilesString, " "),
+    Files = string:tokens(FilesString, " "),
     DialyzerWarnings = dialyzer:run([{analysis_type, plt_add},
                                      {init_plt, PltPath},
                                      {output_plt, PltPath},
@@ -65,7 +65,7 @@ add_to_plt(PltPath, FilesString) ->
     ok.
 
 dialyze_files(PltPath, ModifiedFiles) ->
-    {ok, Files} = regexp:split(ModifiedFiles, " "),
+    Files = string:tokens(ModifiedFiles, " "),
     DialyzerWarnings = dialyzer:run([{init_plt, PltPath},
                                      {files, Files}]),
     case DialyzerWarnings of
diff --git a/src/rabbit_error_logger.erl b/src/rabbit_error_logger.erl
index e9baf2c4..42861f86 100644
--- a/src/rabbit_error_logger.erl
+++ b/src/rabbit_error_logger.erl
@@ -39,7 +39,8 @@
 
 -export([boot/0]).
 
--export([init/1, terminate/2, code_change/3, handle_call/2, handle_event/2, handle_info/2]).
+-export([init/1, terminate/2, code_change/3, handle_call/2, handle_event/2,
+         handle_info/2]).
 
 boot() ->
     {ok, DefaultVHost} = application:get_env(default_vhost),
diff --git a/src/rabbit_error_logger_file_h.erl b/src/rabbit_error_logger_file_h.erl
index 45b66712..875d680f 100644
--- a/src/rabbit_error_logger_file_h.erl
+++ b/src/rabbit_error_logger_file_h.erl
@@ -33,7 +33,8 @@
 
 -behaviour(gen_event).
 
--export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2, code_change/3]).
+-export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2,
+         code_change/3]).
 
 %% rabbit_error_logger_file_h is a wrapper around the error_logger_file_h
 %% module because the original's init/1 does not match properly
diff --git a/src/rabbit_event.erl b/src/rabbit_event.erl
new file mode 100644
index 00000000..0f00537a
--- /dev/null
+++ b/src/rabbit_event.erl
@@ -0,0 +1,138 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_event).
+
+-include("rabbit.hrl").
+
+-export([start_link/0]).
+-export([init_stats_timer/0, ensure_stats_timer/3, stop_stats_timer/2]).
+-export([ensure_stats_timer_after/2, reset_stats_timer_after/1]).
+-export([stats_level/1]).
+-export([notify/2]).
+
+%%----------------------------------------------------------------------------
+
+-record(state, {level, timer}).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-export_type([event_type/0, event_props/0, event_timestamp/0, event/0]).
+
+-type(event_type() :: atom()).
+-type(event_props() :: term()).
+-type(event_timestamp() ::
+        {non_neg_integer(), non_neg_integer(), non_neg_integer()}).
+
+-type(event() :: #event {
+             type :: event_type(),
+             props :: event_props(),
+             timestamp :: event_timestamp()
+            }).
+
+-type(level() :: 'none' | 'coarse' | 'fine').
+
+-opaque(state() :: #state {
+               level :: level(),
+               timer :: atom()
+              }).
+
+-type(timer_fun() :: fun (() -> 'ok')).
+
+-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()).
+-spec(init_stats_timer/0 :: () -> state()).
+-spec(ensure_stats_timer/3 :: (state(), timer_fun(), timer_fun()) -> state()).
+-spec(stop_stats_timer/2 :: (state(), timer_fun()) -> state()).
+-spec(ensure_stats_timer_after/2 :: (state(), timer_fun()) -> state()).
+-spec(reset_stats_timer_after/1 :: (state()) -> state()).
+-spec(stats_level/1 :: (state()) -> level()).
+-spec(notify/2 :: (event_type(), event_props()) -> 'ok').
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+start_link() ->
+    gen_event:start_link({local, ?MODULE}).
+
+init_stats_timer() ->
+    {ok, StatsLevel} = application:get_env(rabbit, collect_statistics),
+    #state{level = StatsLevel, timer = undefined}.
+
+ensure_stats_timer(State = #state{level = none}, _NowFun, _TimerFun) ->
+    State;
+ensure_stats_timer(State = #state{timer = undefined}, NowFun, TimerFun) ->
+    NowFun(),
+    {ok, TRef} = timer:apply_interval(?STATS_INTERVAL,
+                                      erlang, apply, [TimerFun, []]),
+    State#state{timer = TRef};
+ensure_stats_timer(State, _NowFun, _TimerFun) ->
+    State.
+
+stop_stats_timer(State = #state{level = none}, _NowFun) ->
+    State;
+stop_stats_timer(State = #state{timer = undefined}, _NowFun) ->
+    State;
+stop_stats_timer(State = #state{timer = TRef}, NowFun) ->
+    {ok, cancel} = timer:cancel(TRef),
+    NowFun(),
+    State#state{timer = undefined}.
+
+ensure_stats_timer_after(State = #state{level = none}, _TimerFun) ->
+    State;
+ensure_stats_timer_after(State = #state{timer = undefined}, TimerFun) ->
+    {ok, TRef} = timer:apply_after(?STATS_INTERVAL,
+                                   erlang, apply, [TimerFun, []]),
+    State#state{timer = TRef};
+ensure_stats_timer_after(State, _TimerFun) ->
+    State.
+
+reset_stats_timer_after(State) ->
+    State#state{timer = undefined}.
+
+stats_level(#state{level = Level}) ->
+    Level.
+
+notify(Type, Props) ->
+    try
+        %% TODO: switch to os:timestamp() when we drop support for
+        %% Erlang/OTP < R13B01
+        gen_event:notify(rabbit_event, #event{type = Type,
+                                              props = Props,
+                                              timestamp = now()})
+    catch error:badarg ->
+            %% badarg means rabbit_event is no longer registered. We never
+            %% unregister it so the great likelihood is that we're shutting
+            %% down the broker but some events were backed up. Ignore it.
+            ok
+    end.
diff --git a/src/rabbit_exchange.erl b/src/rabbit_exchange.erl
index d77bf833..2a19d5b1 100644
--- a/src/rabbit_exchange.erl
+++ b/src/rabbit_exchange.erl
@@ -33,68 +33,57 @@
 -include("rabbit.hrl").
 -include("rabbit_framing.hrl").
 
--export([recover/0, declare/5, lookup/1, lookup_or_die/1,
-         list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2,
-         publish/2]).
--export([add_binding/5, delete_binding/5, list_bindings/1]).
--export([delete/2]).
--export([delete_queue_bindings/1, delete_transient_queue_bindings/1]).
--export([assert_equivalence/5]).
--export([assert_args_equivalence/2]).
--export([check_type/1]).
-
-%% EXTENDED API
--export([list_exchange_bindings/1]).
--export([list_queue_bindings/1]).
-
--import(mnesia).
--import(sets).
--import(lists).
--import(regexp).
+-export([recover/0, declare/5, lookup/1, lookup_or_die/1, list/1, info_keys/0,
+         info/1, info/2, info_all/1, info_all/2, publish/2, delete/2]).
+%% this must be run inside a mnesia tx
+-export([maybe_auto_delete/1]).
+-export([assert_equivalence/5, assert_args_equivalence/2, check_type/1]).
 
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--type(bind_res() :: 'ok' | {'error',
-                            'queue_not_found' |
-                            'exchange_not_found' |
-                            'exchange_and_queue_not_found'}).
--type(inner_fun() :: fun((exchange(), queue()) -> any())).
+-export_type([name/0, type/0]).
+
+-type(name() :: rabbit_types:r('exchange')).
+-type(type() :: atom()).
 
 -spec(recover/0 :: () -> 'ok').
--spec(declare/5 :: (exchange_name(), exchange_type(), boolean(), boolean(),
-                    amqp_table()) -> exchange()).
--spec(check_type/1 :: (binary()) -> atom()).
--spec(assert_equivalence/5 :: (exchange(), atom(), boolean(), boolean(),
-                               amqp_table()) -> 'ok').
--spec(assert_args_equivalence/2 :: (exchange(), amqp_table()) -> 'ok').
--spec(lookup/1 :: (exchange_name()) -> {'ok', exchange()} | not_found()).
--spec(lookup_or_die/1 :: (exchange_name()) -> exchange()).
--spec(list/1 :: (vhost()) -> [exchange()]).
--spec(info_keys/0 :: () -> [info_key()]).
--spec(info/1 :: (exchange()) -> [info()]).
--spec(info/2 :: (exchange(), [info_key()]) -> [info()]).
--spec(info_all/1 :: (vhost()) -> [[info()]]).
--spec(info_all/2 :: (vhost(), [info_key()]) -> [[info()]]).
--spec(publish/2 :: (exchange(), delivery()) -> {routing_result(), [pid()]}).
--spec(add_binding/5 ::
-      (exchange_name(), queue_name(), routing_key(), amqp_table(), inner_fun()) ->
-             bind_res()).
--spec(delete_binding/5 ::
-      (exchange_name(), queue_name(), routing_key(), amqp_table(), inner_fun()) ->
-             bind_res() | {'error', 'binding_not_found'}).
--spec(list_bindings/1 :: (vhost()) ->
-             [{exchange_name(), queue_name(), routing_key(), amqp_table()}]).
--spec(delete_queue_bindings/1 :: (queue_name()) -> fun (() -> none())).
--spec(delete_transient_queue_bindings/1 :: (queue_name()) -> 
-             fun (() -> none())).
--spec(delete/2 :: (exchange_name(), boolean()) ->
-             'ok' | not_found() | {'error', 'in_use'}).
--spec(list_queue_bindings/1 :: (queue_name()) ->
-              [{exchange_name(), routing_key(), amqp_table()}]).
--spec(list_exchange_bindings/1 :: (exchange_name()) ->
-              [{queue_name(), routing_key(), amqp_table()}]).
+-spec(declare/5 ::
+        (name(), type(), boolean(), boolean(), rabbit_framing:amqp_table())
+        -> rabbit_types:exchange()).
+-spec(check_type/1 ::
+        (binary()) -> atom() | rabbit_types:connection_exit()).
+-spec(assert_equivalence/5 ::
+        (rabbit_types:exchange(), atom(), boolean(), boolean(),
+         rabbit_framing:amqp_table())
+        -> 'ok' | rabbit_types:connection_exit()).
+-spec(assert_args_equivalence/2 ::
+        (rabbit_types:exchange(), rabbit_framing:amqp_table())
+        -> 'ok' | rabbit_types:connection_exit()).
+-spec(lookup/1 ::
+        (name()) -> rabbit_types:ok(rabbit_types:exchange()) |
+                    rabbit_types:error('not_found')).
+-spec(lookup_or_die/1 ::
+        (name()) -> rabbit_types:exchange() |
+                    rabbit_types:channel_exit()).
+-spec(list/1 :: (rabbit_types:vhost()) -> [rabbit_types:exchange()]).
+-spec(info_keys/0 :: () -> [rabbit_types:info_key()]).
+-spec(info/1 :: (rabbit_types:exchange()) -> [rabbit_types:info()]).
+-spec(info/2 ::
+        (rabbit_types:exchange(), [rabbit_types:info_key()])
+        -> [rabbit_types:info()]).
+-spec(info_all/1 :: (rabbit_types:vhost()) -> [[rabbit_types:info()]]).
+-spec(info_all/2 ::(rabbit_types:vhost(), [rabbit_types:info_key()])
+                    -> [[rabbit_types:info()]]).
+-spec(publish/2 :: (rabbit_types:exchange(), rabbit_types:delivery())
+                   -> {rabbit_router:routing_result(), [pid()]}).
+-spec(delete/2 ::
+        (name(), boolean())-> 'ok' |
+                              rabbit_types:error('not_found') |
+                              rabbit_types:error('in_use')).
+-spec(maybe_auto_delete/1:: (rabbit_types:exchange()) ->
+                                 'not_deleted' | 'auto_deleted').
 
 -endif.
 
@@ -103,27 +92,15 @@
 -define(INFO_KEYS, [name, type, durable, auto_delete, arguments]).
 
 recover() ->
-    Exs = rabbit_misc:table_fold(
-            fun (Exchange, Acc) ->
-                    ok = mnesia:write(rabbit_exchange, Exchange, write),
-                    [Exchange | Acc]
-            end, [], rabbit_durable_exchange),
-    Bs = rabbit_misc:table_fold(
-           fun (Route = #route{binding = B}, Acc) ->
-                   {_, ReverseRoute} = route_with_reverse(Route),
-                   ok = mnesia:write(rabbit_route,
-                                     Route, write),
-                   ok = mnesia:write(rabbit_reverse_route,
-                                     ReverseRoute, write),
-                   [B | Acc]
-           end, [], rabbit_durable_route),
-    recover_with_bindings(Bs, Exs),
-    ok.
-
-recover_with_bindings(Bs, Exs) ->
+    Xs = rabbit_misc:table_fold(
+           fun (X, Acc) ->
+                   ok = mnesia:write(rabbit_exchange, X, write),
+                   [X | Acc]
+           end, [], rabbit_durable_exchange),
+    Bs = rabbit_binding:recover(),
     recover_with_bindings(
       lists:keysort(#binding.exchange_name, Bs),
-      lists:keysort(#exchange.name, Exs), []).
+      lists:keysort(#exchange.name, Xs), []).
 
 recover_with_bindings([B = #binding{exchange_name = Name} | Rest],
                       Xs = [#exchange{name = Name} | _],
@@ -135,35 +112,36 @@ recover_with_bindings(Bs, [X = #exchange{type = Type} | Xs], Bindings) ->
 recover_with_bindings([], [], []) ->
     ok.
 
-declare(ExchangeName, Type, Durable, AutoDelete, Args) ->
-    Exchange = #exchange{name = ExchangeName,
-                         type = Type,
-                         durable = Durable,
-                         auto_delete = AutoDelete,
-                         arguments = Args},
+declare(XName, Type, Durable, AutoDelete, Args) ->
+    X = #exchange{name        = XName,
+                  type        = Type,
+                  durable     = Durable,
+                  auto_delete = AutoDelete,
+                  arguments   = Args},
     %% We want to upset things if it isn't ok; this is different from
     %% the other hooks invocations, where we tend to ignore the return
     %% value.
     TypeModule = type_to_module(Type),
-    ok = TypeModule:validate(Exchange),
+    ok = TypeModule:validate(X),
     case rabbit_misc:execute_mnesia_transaction(
            fun () ->
-                   case mnesia:wread({rabbit_exchange, ExchangeName}) of
+                   case mnesia:wread({rabbit_exchange, XName}) of
                        [] ->
-                           ok = mnesia:write(rabbit_exchange, Exchange, write),
+                           ok = mnesia:write(rabbit_exchange, X, write),
                            ok = case Durable of
                                     true ->
                                         mnesia:write(rabbit_durable_exchange,
-                                                     Exchange, write);
+                                                     X, write);
                                     false ->
                                         ok
                            end,
-                           {new, Exchange};
+                           {new, X};
                        [ExistingX] ->
                            {existing, ExistingX}
                    end
            end) of
         {new, X}      -> TypeModule:create(X),
+                         rabbit_event:notify(exchange_created, info(X)),
                          X;
         {existing, X} -> X;
         Err           -> Err
@@ -171,12 +149,8 @@ declare(ExchangeName, Type, Durable, AutoDelete, Args) ->
 
 %% Used with atoms from records; e.g., the type is expected to exist.
 type_to_module(T) ->
-    case rabbit_exchange_type_registry:lookup_module(T) of
-        {ok, Module}       -> Module;
-        {error, not_found} -> rabbit_misc:protocol_error(
-                                command_invalid,
-                                "invalid exchange type '~s'", [T])
-    end.
+    {ok, Module} = rabbit_exchange_type_registry:lookup_module(T),
+    Module.
 
 %% Used with binaries sent over the wire; the type may not exist.
 check_type(TypeBin) ->
@@ -185,40 +159,33 @@ check_type(TypeBin) ->
             rabbit_misc:protocol_error(
               command_invalid, "unknown exchange type '~s'", [TypeBin]);
         T ->
-            _Module = type_to_module(T),
-            T
+            case rabbit_exchange_type_registry:lookup_module(T) of
+                {error, not_found} -> rabbit_misc:protocol_error(
+                                        command_invalid,
+                                        "invalid exchange type '~s'", [T]);
+                {ok, _Module}      -> T
+            end
     end.
 
-assert_equivalence(X = #exchange{ durable = Durable,
+assert_equivalence(X = #exchange{ durable     = Durable,
                                   auto_delete = AutoDelete,
-                                  type = Type},
-                   Type, Durable, AutoDelete,
-                   RequiredArgs) ->
-    ok = (type_to_module(Type)):assert_args_equivalence(X, RequiredArgs);
+                                  type        = Type},
+                   Type, Durable, AutoDelete, RequiredArgs) ->
+    (type_to_module(Type)):assert_args_equivalence(X, RequiredArgs);
 assert_equivalence(#exchange{ name = Name }, _Type, _Durable, _AutoDelete,
                    _Args) ->
     rabbit_misc:protocol_error(
-      precondition_failed,
+      not_allowed,
       "cannot redeclare ~s with different type, durable or autodelete value",
       [rabbit_misc:rs(Name)]).
 
-alternate_exchange_value(Args) ->
-    lists:keysearch(<<"alternate-exchange">>, 1, Args).
-
-assert_args_equivalence(#exchange{ name = Name,
-                                   arguments = Args },
+assert_args_equivalence(#exchange{ name = Name, arguments = Args },
                         RequiredArgs) ->
     %% The spec says "Arguments are compared for semantic
     %% equivalence".  The only arg we care about is
     %% "alternate-exchange".
-    Ae1 = alternate_exchange_value(RequiredArgs),
-    Ae2 = alternate_exchange_value(Args),
-    if Ae1==Ae2 -> ok;
-       true     -> rabbit_misc:protocol_error(
-                     precondition_failed,
-                     "cannot redeclare ~s with inequivalent args",
-                     [rabbit_misc:rs(Name)])
-    end.
+    rabbit_misc:assert_args_equivalence(Args, RequiredArgs, Name,
+                                        [<<"alternate-exchange">>]).
 
 lookup(Name) ->
     rabbit_misc:dirty_read({rabbit_exchange, Name}).
@@ -290,234 +257,20 @@ publish(X = #exchange{type = Type}, Seen, Delivery) ->
             R
     end.
 
-%% TODO: Should all of the route and binding management not be
-%% refactored to its own module, especially seeing as unbind will have
-%% to be implemented for 0.91 ?
-
-delete_exchange_bindings(ExchangeName) ->
-    [begin
-         ok = mnesia:delete_object(rabbit_reverse_route,
-                                   reverse_route(Route), write),
-         ok = delete_forward_routes(Route),
-         Route#route.binding
-     end || Route <- mnesia:match_object(
-                       rabbit_route,
-                       #route{binding = #binding{exchange_name = ExchangeName,
-                                                 _ = '_'}},
-                       write)].
-
-delete_queue_bindings(QueueName) ->
-    delete_queue_bindings(QueueName, fun delete_forward_routes/1).
-
-delete_transient_queue_bindings(QueueName) ->
-    delete_queue_bindings(QueueName, fun delete_transient_forward_routes/1).
-
-delete_queue_bindings(QueueName, FwdDeleteFun) ->
-    DeletedBindings =
-        [begin
-             Route = reverse_route(ReverseRoute),
-             ok = FwdDeleteFun(Route),
-             ok = mnesia:delete_object(rabbit_reverse_route,
-                                       ReverseRoute, write),
-             Route#route.binding
-         end || ReverseRoute
-                    <- mnesia:match_object(
-                         rabbit_reverse_route,
-                         reverse_route(#route{binding = #binding{
-                                                queue_name = QueueName,
-                                                _          = '_'}}),
-                         write)],
-    Cleanup = cleanup_deleted_queue_bindings(
-                lists:keysort(#binding.exchange_name, DeletedBindings), []),
-    fun () ->
-            lists:foreach(
-              fun ({{IsDeleted, X = #exchange{ type = Type }}, Bs}) ->
-                      Module = type_to_module(Type),
-                      case IsDeleted of
-                          auto_deleted -> Module:delete(X, Bs);
-                          not_deleted  -> Module:remove_bindings(X, Bs)
-                      end
-              end, Cleanup)
-    end.
-
-%% Requires that its input binding list is sorted in exchange-name
-%% order, so that the grouping of bindings (for passing to
-%% cleanup_deleted_queue_bindings1) works properly.
-cleanup_deleted_queue_bindings([], Acc) ->
-    Acc;
-cleanup_deleted_queue_bindings(
-  [B = #binding{exchange_name = ExchangeName} | Bs], Acc) ->
-    cleanup_deleted_queue_bindings(ExchangeName, Bs, [B], Acc).
-
-cleanup_deleted_queue_bindings(
-  ExchangeName, [B = #binding{exchange_name = ExchangeName} | Bs],
-  Bindings, Acc) ->
-    cleanup_deleted_queue_bindings(ExchangeName, Bs, [B | Bindings], Acc);
-cleanup_deleted_queue_bindings(ExchangeName, Deleted, Bindings, Acc) ->
-    %% either Deleted is [], or its head has a non-matching ExchangeName
-    NewAcc = [cleanup_deleted_queue_bindings1(ExchangeName, Bindings) | Acc],
-    cleanup_deleted_queue_bindings(Deleted, NewAcc).
-
-cleanup_deleted_queue_bindings1(ExchangeName, Bindings) ->
-    [X] = mnesia:read({rabbit_exchange, ExchangeName}),
-    {maybe_auto_delete(X), Bindings}.
-
-
-delete_forward_routes(Route) ->
-    ok = mnesia:delete_object(rabbit_route, Route, write),
-    ok = mnesia:delete_object(rabbit_durable_route, Route, write).
-
-delete_transient_forward_routes(Route) ->
-    ok = mnesia:delete_object(rabbit_route, Route, write).
-
-contains(Table, MatchHead) ->
-    continue(mnesia:select(Table, [{MatchHead, [], ['$_']}], 1, read)).
-
-continue('$end_of_table')    -> false;
-continue({[_|_], _})         -> true;
-continue({[], Continuation}) -> continue(mnesia:select(Continuation)).
-
-call_with_exchange(Exchange, Fun) ->
+call_with_exchange(XName, Fun) ->
     rabbit_misc:execute_mnesia_transaction(
-      fun () -> case mnesia:read({rabbit_exchange, Exchange}) of
+      fun () -> case mnesia:read({rabbit_exchange, XName}) of
                    []  -> {error, not_found};
                    [X] -> Fun(X)
                end
       end).
 
-call_with_exchange_and_queue(Exchange, Queue, Fun) ->
-    rabbit_misc:execute_mnesia_transaction(
-      fun () -> case {mnesia:read({rabbit_exchange, Exchange}),
-                     mnesia:read({rabbit_queue, Queue})} of
-                   {[X], [Q]} -> Fun(X, Q);
-                   {[ ], [_]} -> {error, exchange_not_found};
-                   {[_], [ ]} -> {error, queue_not_found};
-                   {[ ], [ ]} -> {error, exchange_and_queue_not_found}
-               end
-      end).
-
-add_binding(ExchangeName, QueueName, RoutingKey, Arguments, InnerFun) ->
-    case binding_action(
-           ExchangeName, QueueName, RoutingKey, Arguments,
-           fun (X, Q, B) ->
-                   %% this argument is used to check queue exclusivity;
-                   %% in general, we want to fail on that in preference to
-                   %% anything else
-                   InnerFun(X, Q),
-                   case mnesia:read({rabbit_route, B}) of
-                       [] ->
-                           sync_binding(B,
-                                        X#exchange.durable andalso
-                                        Q#amqqueue.durable,
-                                        fun mnesia:write/3),
-                           {new, X, B};
-                       [_R] ->
-                           {existing, X, B}
-                   end
-           end) of
-        {new, Exchange = #exchange{ type = Type }, Binding} ->
-            (type_to_module(Type)):add_binding(Exchange, Binding);
-        {existing, _, _} ->
-            ok;
-        Err = {error, _}  ->
-            Err
-    end.
-
-delete_binding(ExchangeName, QueueName, RoutingKey, Arguments, InnerFun) ->
-    case binding_action(
-           ExchangeName, QueueName, RoutingKey, Arguments,
-           fun (X, Q, B) ->
-                   case mnesia:match_object(rabbit_route, #route{binding = B},
-                                            write) of
-                       [] -> {error, binding_not_found};
-                       _  -> InnerFun(X, Q),
-                             ok = sync_binding(B, Q#amqqueue.durable,
-                                               fun mnesia:delete_object/3),
-                             {maybe_auto_delete(X), B}
-                   end
-           end) of
-        Err = {error, _}  ->
-            Err;
-        {{IsDeleted, X = #exchange{ type = Type }}, B} ->
-            Module = type_to_module(Type),
-            case IsDeleted of
-                auto_deleted -> Module:delete(X, [B]);
-                not_deleted  -> Module:remove_bindings(X, [B])
-            end
-    end.
-
-binding_action(ExchangeName, QueueName, RoutingKey, Arguments, Fun) ->
-    call_with_exchange_and_queue(
-      ExchangeName, QueueName,
-      fun (X, Q) ->
-              Fun(X, Q, #binding{
-                       exchange_name = ExchangeName,
-                       queue_name    = QueueName,
-                       key           = RoutingKey,
-                       args          = rabbit_misc:sort_field_table(Arguments)})
-      end).
-
-sync_binding(Binding, Durable, Fun) ->
-    ok = case Durable of
-             true  -> Fun(rabbit_durable_route,
-                          #route{binding = Binding}, write);
-             false -> ok
-         end,
-    {Route, ReverseRoute} = route_with_reverse(Binding),
-    ok = Fun(rabbit_route, Route, write),
-    ok = Fun(rabbit_reverse_route, ReverseRoute, write),
-    ok.
-
-list_bindings(VHostPath) ->
-    [{ExchangeName, QueueName, RoutingKey, Arguments} ||
-        #route{binding = #binding{
-                 exchange_name = ExchangeName,
-                 key           = RoutingKey,
-                 queue_name    = QueueName,
-                 args          = Arguments}}
-            <- mnesia:dirty_match_object(
-                 rabbit_route,
-                 #route{binding = #binding{
-                          exchange_name = rabbit_misc:r(VHostPath, exchange),
-                          _             = '_'},
-                        _       = '_'})].
-
-route_with_reverse(#route{binding = Binding}) ->
-    route_with_reverse(Binding);
-route_with_reverse(Binding = #binding{}) ->
-    Route = #route{binding = Binding},
-    {Route, reverse_route(Route)}.
-
-reverse_route(#route{binding = Binding}) ->
-    #reverse_route{reverse_binding = reverse_binding(Binding)};
-
-reverse_route(#reverse_route{reverse_binding = Binding}) ->
-    #route{binding = reverse_binding(Binding)}.
-
-reverse_binding(#reverse_binding{exchange_name = Exchange,
-                                 queue_name    = Queue,
-                                 key           = Key,
-                                 args          = Args}) ->
-    #binding{exchange_name = Exchange,
-             queue_name    = Queue,
-             key           = Key,
-             args          = Args};
-
-reverse_binding(#binding{exchange_name = Exchange,
-                         queue_name    = Queue,
-                         key           = Key,
-                         args          = Args}) ->
-    #reverse_binding{exchange_name = Exchange,
-                     queue_name    = Queue,
-                     key           = Key,
-                     args          = Args}.
-
-delete(ExchangeName, IfUnused) ->
+delete(XName, IfUnused) ->
     Fun = case IfUnused of
               true  -> fun conditional_delete/1;
               false -> fun unconditional_delete/1
           end,
-    case call_with_exchange(ExchangeName, Fun) of
+    case call_with_exchange(XName, Fun) of
         {deleted, X = #exchange{type = Type}, Bs} ->
             (type_to_module(Type)):delete(X, Bs),
             ok;
@@ -525,53 +278,23 @@ delete(ExchangeName, IfUnused) ->
             Error
     end.
 
-maybe_auto_delete(Exchange = #exchange{auto_delete = false}) ->
-    {not_deleted, Exchange};
-maybe_auto_delete(Exchange = #exchange{auto_delete = true}) ->
-    case conditional_delete(Exchange) of
-        {error, in_use}         -> {not_deleted, Exchange};
-        {deleted, Exchange, []} -> {auto_deleted, Exchange}
+maybe_auto_delete(#exchange{auto_delete = false}) ->
+    not_deleted;
+maybe_auto_delete(#exchange{auto_delete = true} = X) ->
+    case conditional_delete(X) of
+        {error, in_use}  -> not_deleted;
+        {deleted, X, []} -> auto_deleted
     end.
 
-conditional_delete(Exchange = #exchange{name = ExchangeName}) ->
-    Match = #route{binding = #binding{exchange_name = ExchangeName, _ = '_'}},
-    %% we need to check for durable routes here too in case a bunch of
-    %% routes to durable queues have been removed temporarily as a
-    %% result of a node failure
-    case contains(rabbit_route, Match) orelse
-         contains(rabbit_durable_route, Match) of
-        false  -> unconditional_delete(Exchange);
+conditional_delete(X = #exchange{name = XName}) ->
+    case rabbit_binding:has_for_exchange(XName) of
+        false  -> unconditional_delete(X);
         true   -> {error, in_use}
     end.
 
-unconditional_delete(Exchange = #exchange{name = ExchangeName}) ->
-    Bindings = delete_exchange_bindings(ExchangeName),
-    ok = mnesia:delete({rabbit_durable_exchange, ExchangeName}),
-    ok = mnesia:delete({rabbit_exchange, ExchangeName}),
-    {deleted, Exchange, Bindings}.
-
-%%----------------------------------------------------------------------------
-%% EXTENDED API
-%% These are API calls that are not used by the server internally,
-%% they are exported for embedded clients to use
-
-%% This is currently used in mod_rabbit.erl (XMPP) and expects this to
-%% return {QueueName, RoutingKey, Arguments} tuples
-list_exchange_bindings(ExchangeName) ->
-    Route = #route{binding = #binding{exchange_name = ExchangeName,
-                                      _ = '_'}},
-    [{QueueName, RoutingKey, Arguments} ||
-        #route{binding = #binding{queue_name = QueueName,
-                                  key = RoutingKey,
-                                  args = Arguments}}
-            <- mnesia:dirty_match_object(rabbit_route, Route)].
-
-% Refactoring is left as an exercise for the reader
-list_queue_bindings(QueueName) ->
-    Route = #route{binding = #binding{queue_name = QueueName,
-                                      _ = '_'}},
-    [{ExchangeName, RoutingKey, Arguments} ||
-        #route{binding = #binding{exchange_name = ExchangeName,
-                                  key = RoutingKey,
-                                  args = Arguments}}
-            <- mnesia:dirty_match_object(rabbit_route, Route)].
+unconditional_delete(X = #exchange{name = XName}) ->
+    Bindings = rabbit_binding:remove_for_exchange(XName),
+    ok = mnesia:delete({rabbit_durable_exchange, XName}),
+    ok = mnesia:delete({rabbit_exchange, XName}),
+    rabbit_event:notify(exchange_deleted, [{name, XName}]),
+    {deleted, X, Bindings}.
diff --git a/src/rabbit_exchange_type_fanout.erl b/src/rabbit_exchange_type_fanout.erl
index 4f9712b1..94798c78 100644
--- a/src/rabbit_exchange_type_fanout.erl
+++ b/src/rabbit_exchange_type_fanout.erl
@@ -35,8 +35,8 @@
 -behaviour(rabbit_exchange_type).
 
 -export([description/0, publish/2]).
--export([validate/1, create/1, recover/2, delete/2,
-         add_binding/2, remove_bindings/2, assert_args_equivalence/2]).
+-export([validate/1, create/1, recover/2, delete/2, add_binding/2,
+         remove_bindings/2, assert_args_equivalence/2]).
 -include("rabbit_exchange_type_spec.hrl").
 
 -rabbit_boot_step({?MODULE,
diff --git a/src/rabbit_exchange_type_headers.erl b/src/rabbit_exchange_type_headers.erl
index 315e8000..0a59a175 100644
--- a/src/rabbit_exchange_type_headers.erl
+++ b/src/rabbit_exchange_type_headers.erl
@@ -36,8 +36,8 @@
 -behaviour(rabbit_exchange_type).
 
 -export([description/0, publish/2]).
--export([validate/1, create/1, recover/2, delete/2,
-         add_binding/2, remove_bindings/2, assert_args_equivalence/2]).
+-export([validate/1, create/1, recover/2, delete/2, add_binding/2,
+         remove_bindings/2, assert_args_equivalence/2]).
 -include("rabbit_exchange_type_spec.hrl").
 
 -rabbit_boot_step({?MODULE,
@@ -48,7 +48,8 @@
                     {enables,     kernel_ready}]}).
 
 -ifdef(use_specs).
--spec(headers_match/2 :: (amqp_table(), amqp_table()) -> boolean()).
+-spec(headers_match/2 :: (rabbit_framing:amqp_table(),
+                          rabbit_framing:amqp_table()) -> boolean()).
 -endif.
 
 description() ->
@@ -78,8 +79,8 @@ parse_x_match(Other) ->
 
 %% Horrendous matching algorithm. Depends for its merge-like
 %% (linear-time) behaviour on the lists:keysort
-%% (rabbit_misc:sort_field_table) that route/3 and
-%% rabbit_exchange:{add,delete}_binding/4 do.
+%% (rabbit_misc:sort_field_table) that publish/1 and
+%% rabbit_binding:{add,remove}/2 do.
 %%
 %%                 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 %% In other words: REQUIRES BOTH PATTERN AND DATA TO BE SORTED ASCENDING BY KEY.
diff --git a/src/rabbit_exchange_type_registry.erl b/src/rabbit_exchange_type_registry.erl
index 33ea0e92..f15275b5 100644
--- a/src/rabbit_exchange_type_registry.erl
+++ b/src/rabbit_exchange_type_registry.erl
@@ -35,8 +35,8 @@
 
 -export([start_link/0]).
 
--export([init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3]).
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+         code_change/3]).
 
 -export([register/2, binary_to_type/1, lookup_module/1]).
 
@@ -45,10 +45,12 @@
 
 -ifdef(use_specs).
 
--spec(start_link/0 :: () -> 'ignore' | {'error', term()} | {'ok', pid()}).
+-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()).
 -spec(register/2 :: (binary(), atom()) -> 'ok').
--spec(binary_to_type/1 :: (binary()) -> atom() | {'error', 'not_found'}).
--spec(lookup_module/1 :: (atom()) -> {'ok', atom()} | {'error', 'not_found'}).
+-spec(binary_to_type/1 ::
+        (binary()) -> atom() | rabbit_types:error('not_found')).
+-spec(lookup_module/1 ::
+        (atom()) -> rabbit_types:ok_or_error2(atom(), 'not_found')).
 
 -endif.
 
diff --git a/src/rabbit_exchange_type_topic.erl b/src/rabbit_exchange_type_topic.erl
index 0e22d545..e796acf3 100644
--- a/src/rabbit_exchange_type_topic.erl
+++ b/src/rabbit_exchange_type_topic.erl
@@ -35,8 +35,8 @@
 -behaviour(rabbit_exchange_type).
 
 -export([description/0, publish/2]).
--export([validate/1, create/1, recover/2, delete/2,
-         add_binding/2, remove_bindings/2, assert_args_equivalence/2]).
+-export([validate/1, create/1, recover/2, delete/2, add_binding/2,
+         remove_bindings/2, assert_args_equivalence/2]).
 -include("rabbit_exchange_type_spec.hrl").
 
 -rabbit_boot_step({?MODULE,
@@ -49,7 +49,9 @@
 -export([topic_matches/2]).
 
 -ifdef(use_specs).
+
 -spec(topic_matches/2 :: (binary(), binary()) -> boolean()).
+
 -endif.
 
 description() ->
@@ -65,8 +67,7 @@ publish(#exchange{name = Name}, Delivery =
                           Delivery).
 
 split_topic_key(Key) ->
-    {ok, KeySplit} = regexp:split(binary_to_list(Key), "\\."),
-    KeySplit.
+    string:tokens(binary_to_list(Key), ".").
 
 topic_matches(PatternKey, RoutingKey) ->
     P = split_topic_key(PatternKey),
diff --git a/src/rabbit_framing_channel.erl b/src/rabbit_framing_channel.erl
index b7c6aa96..cb53185f 100644
--- a/src/rabbit_framing_channel.erl
+++ b/src/rabbit_framing_channel.erl
@@ -32,21 +32,16 @@
 -module(rabbit_framing_channel).
 -include("rabbit.hrl").
 
--export([start_link/2, process/2, shutdown/1]).
+-export([start_link/3, process/2, shutdown/1]).
 
 %% internal
--export([mainloop/1]).
+-export([mainloop/3]).
 
 %%--------------------------------------------------------------------
 
-start_link(StartFun, StartArgs) ->
-    spawn_link(
-      fun () ->
-              %% we trap exits so that a normal termination of the
-              %% channel or reader process terminates us too.
-              process_flag(trap_exit, true),
-              mainloop(apply(StartFun, StartArgs))
-      end).
+start_link(Parent, ChannelPid, Protocol) ->
+    {ok, proc_lib:spawn_link(
+           fun () -> mainloop(Parent, ChannelPid, Protocol) end)}.
 
 process(Pid, Frame) ->
     Pid ! {frame, Frame},
@@ -60,55 +55,61 @@ shutdown(Pid) ->
 
 read_frame(ChannelPid) ->
     receive
-        %% converting the exit signal into one of our own ensures that
-        %% the reader sees the right pid (i.e. ours) when a channel
-        %% exits. Similarly in the other direction, though it is not
-        %% really relevant there since the channel is not specifically
-        %% watching out for reader exit signals.
-        {'EXIT', _Pid, Reason} -> exit(Reason);
         {frame, Frame}         -> Frame;
         terminate              -> rabbit_channel:shutdown(ChannelPid),
                                   read_frame(ChannelPid);
         Msg                    -> exit({unexpected_message, Msg})
     end.
 
-mainloop(ChannelPid) ->
-    {method, MethodName, FieldsBin} = read_frame(ChannelPid),
-    Method = rabbit_framing:decode_method_fields(MethodName, FieldsBin),
-    case rabbit_framing:method_has_content(MethodName) of
-        true  -> rabbit_channel:do(ChannelPid, Method,
-                                   collect_content(ChannelPid, MethodName));
-        false -> rabbit_channel:do(ChannelPid, Method)
-    end,
-    ?MODULE:mainloop(ChannelPid).
+mainloop(Parent, ChannelPid, Protocol) ->
+    case read_frame(ChannelPid) of
+        {method, MethodName, FieldsBin} ->
+            Method = Protocol:decode_method_fields(MethodName, FieldsBin),
+            case Protocol:method_has_content(MethodName) of
+                true  -> {ClassId, _MethodId} = Protocol:method_id(MethodName),
+                         case collect_content(ChannelPid, ClassId, Protocol) of
+                             {ok, Content} ->
+                                 rabbit_channel:do(ChannelPid, Method, Content),
+                                 ?MODULE:mainloop(Parent, ChannelPid, Protocol);
+                             {error, Reason} ->
+                                 channel_exit(Parent, Reason, MethodName)
+                         end;
+                false -> rabbit_channel:do(ChannelPid, Method),
+                         ?MODULE:mainloop(Parent, ChannelPid, Protocol)
+            end;
+        _ ->
+            channel_exit(Parent, {unexpected_frame,
+                                  "expected method frame, "
+                                  "got non method frame instead",
+                                  []}, none)
+    end.
 
-collect_content(ChannelPid, MethodName) ->
-    {ClassId, _MethodId} = rabbit_framing:method_id(MethodName),
+collect_content(ChannelPid, ClassId, Protocol) ->
     case read_frame(ChannelPid) of
-        {content_header, HeaderClassId, 0, BodySize, PropertiesBin} ->
-            if HeaderClassId == ClassId ->
-                    Payload = collect_content_payload(ChannelPid, BodySize, []),
-                    #content{class_id = ClassId,
-                             properties = none,
-                             properties_bin = PropertiesBin,
-                             payload_fragments_rev = Payload};
-               true ->
-                    rabbit_misc:protocol_error(
-                      command_invalid,
-                      "expected content header for class ~w, "
-                      "got one for class ~w instead",
-                      [ClassId, HeaderClassId])
+        {content_header, ClassId, 0, BodySize, PropertiesBin} ->
+            case collect_content_payload(ChannelPid, BodySize, []) of
+                {ok, Payload} -> {ok, #content{
+                                    class_id = ClassId,
+                                    properties = none,
+                                    properties_bin = PropertiesBin,
+                                    protocol = Protocol,
+                                    payload_fragments_rev = Payload}};
+                Error         -> Error
             end;
+        {content_header, HeaderClassId, 0, _BodySize, _PropertiesBin} ->
+            {error, {unexpected_frame,
+                     "expected content header for class ~w, "
+                     "got one for class ~w instead",
+                     [ClassId, HeaderClassId]}};
         _ ->
-            rabbit_misc:protocol_error(
-              command_invalid,
-              "expected content header for class ~w, "
-              "got non content header frame instead",
-              [ClassId])
+            {error, {unexpected_frame,
+                     "expected content header for class ~w, "
+                     "got non content header frame instead",
+                     [ClassId]}}
     end.
 
 collect_content_payload(_ChannelPid, 0, Acc) ->
-    Acc;
+    {ok, Acc};
 collect_content_payload(ChannelPid, RemainingByteCount, Acc) ->
     case read_frame(ChannelPid) of
         {content_body, FragmentBin} ->
@@ -116,8 +117,13 @@ collect_content_payload(ChannelPid, RemainingByteCount, Acc) ->
                                     RemainingByteCount - size(FragmentBin),
                                     [FragmentBin | Acc]);
         _ ->
-            rabbit_misc:protocol_error(
-              command_invalid,
-              "expected content body, got non content body frame instead",
-              [])
+            {error, {unexpected_frame,
+                     "expected content body, "
+                     "got non content body frame instead",
+                     []}}
     end.
+
+channel_exit(Parent, {ErrorName, ExplanationFormat, Params}, MethodName) ->
+    Reason = rabbit_misc:amqp_error(ErrorName, ExplanationFormat, Params,
+                                    MethodName),
+    Parent ! {channel_exit, self(), Reason}.
diff --git a/src/rabbit_guid.erl b/src/rabbit_guid.erl
index 1ae8f7da..e7d0c101 100644
--- a/src/rabbit_guid.erl
+++ b/src/rabbit_guid.erl
@@ -31,15 +31,13 @@
 
 -module(rabbit_guid).
 
--include("rabbit.hrl").
-
 -behaviour(gen_server).
 
 -export([start_link/0]).
 -export([guid/0, string_guid/1, binstring_guid/1]).
 
--export([init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3]).
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+         code_change/3]).
 
 -define(SERVER, ?MODULE).
 -define(SERIAL_FILENAME, "rabbit_serial").
@@ -50,7 +48,11 @@
 
 -ifdef(use_specs).
 
--spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}).
+-export_type([guid/0]).
+
+-type(guid() :: binary()).
+
+-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()).
 -spec(guid/0 :: () -> guid()).
 -spec(string_guid/1 :: (any()) -> string()).
 -spec(binstring_guid/1 :: (any()) -> binary()).
diff --git a/src/rabbit_heartbeat.erl b/src/rabbit_heartbeat.erl
index 45565705..a9945af1 100644
--- a/src/rabbit_heartbeat.erl
+++ b/src/rabbit_heartbeat.erl
@@ -31,70 +31,102 @@
 
 -module(rabbit_heartbeat).
 
--export([start_heartbeat/2]).
+-export([start_heartbeat_sender/3, start_heartbeat_receiver/3,
+         pause_monitor/1, resume_monitor/1]).
 
-start_heartbeat(_Sock, 0) ->
-    none;
-start_heartbeat(Sock, TimeoutSec) ->
-    Parent = self(),
-    %% we check for incoming data every interval, and time out after
-    %% two checks with no change. As a result we will time out between
-    %% 2 and 3 intervals after the last data has been received.
-    spawn_link(fun () -> heartbeater(Sock, TimeoutSec * 1000,
-                                     recv_oct, 1,
-                                     fun () ->
-                                             Parent ! timeout,
-                                             stop
-                                     end,
-                                     erlang:monitor(process, Parent)) end),
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-export_type([heartbeaters/0]).
+
+-type(heartbeaters() :: rabbit_types:maybe({pid(), pid()})).
+
+-spec(start_heartbeat_sender/3 ::
+        (pid(), rabbit_net:socket(), non_neg_integer()) ->
+                                       rabbit_types:ok(pid())).
+-spec(start_heartbeat_receiver/3 ::
+        (pid(), rabbit_net:socket(), non_neg_integer()) ->
+                                         rabbit_types:ok(pid())).
+
+-spec(pause_monitor/1 :: (heartbeaters()) -> 'ok').
+-spec(resume_monitor/1 :: (heartbeaters()) -> 'ok').
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+start_heartbeat_sender(_Parent, Sock, TimeoutSec) ->
     %% the 'div 2' is there so that we don't end up waiting for nearly
     %% 2 * TimeoutSec before sending a heartbeat in the boundary case
     %% where the last message was sent just after a heartbeat.
-    spawn_link(fun () -> heartbeater(Sock, TimeoutSec * 1000 div 2,
-                                     send_oct, 0,
-                                     fun () ->
-                                             catch rabbit_net:send(Sock, rabbit_binary_generator:build_heartbeat_frame()),
-                                             continue
-                                     end,
-                                     erlang:monitor(process, Parent)) end),
+    heartbeater(
+      {Sock, TimeoutSec * 1000 div 2, send_oct, 0,
+       fun () ->
+               catch rabbit_net:send(
+                       Sock, rabbit_binary_generator:build_heartbeat_frame()),
+               continue
+       end}).
+
+start_heartbeat_receiver(Parent, Sock, TimeoutSec) ->
+    %% we check for incoming data every interval, and time out after
+    %% two checks with no change. As a result we will time out between
+    %% 2 and 3 intervals after the last data has been received.
+    heartbeater({Sock, TimeoutSec * 1000, recv_oct, 1, fun () ->
+                                                               Parent ! timeout,
+                                                               stop
+                                                       end}).
+
+pause_monitor(none) ->
+    ok;
+pause_monitor({_Sender, Receiver}) ->
+    Receiver ! pause,
+    ok.
+
+resume_monitor(none) ->
+    ok;
+resume_monitor({_Sender, Receiver}) ->
+    Receiver ! resume,
     ok.
 
-%% Y-combinator, posted by Vladimir Sekissov to the Erlang mailing list
-%% http://www.erlang.org/ml-archive/erlang-questions/200301/msg00053.html
-y(X) ->
-    F = fun (P) -> X(fun (A) -> (P(P))(A) end) end,
-    F(F).
-
-heartbeater(Sock, TimeoutMillisec, StatName, Threshold, Handler, MonitorRef) ->
-    Heartbeat =
-        fun (F) ->
-                fun ({StatVal, SameCount}) ->
-                        receive
-                            {'DOWN', MonitorRef, process, _Object, _Info} -> ok;
-                            Other -> exit({unexpected_message, Other})
-                        after TimeoutMillisec ->
-                                case rabbit_net:getstat(Sock, [StatName]) of
-                                    {ok, [{StatName, NewStatVal}]} ->
-                                        if NewStatVal =/= StatVal ->
-                                                F({NewStatVal, 0});
-                                           SameCount < Threshold ->
-                                                F({NewStatVal, SameCount + 1});
-                                           true ->
-                                                case Handler() of
-                                                    stop     -> ok;
-                                                    continue -> F({NewStatVal, 0})
-                                                end
-                                        end;
-                                    {error, einval} ->
-                                        %% the socket is dead, most
-                                        %% likely because the
-                                        %% connection is being shut
-                                        %% down -> terminate
-                                        ok;
-                                    {error, Reason} ->
-                                        exit({cannot_get_socket_stats, Reason})
-                                end
-                        end
-                end
-        end,
-    (y(Heartbeat))({0, 0}).
+%%----------------------------------------------------------------------------
+
+heartbeater(Params) ->
+    {ok, proc_lib:spawn_link(fun () -> heartbeater(Params, {0, 0}) end)}.
+
+heartbeater({Sock, TimeoutMillisec, StatName, Threshold, Handler} = Params,
+            {StatVal, SameCount}) ->
+    Recurse = fun (V) -> heartbeater(Params, V) end,
+    receive
+        pause ->
+            receive
+                resume ->
+                    Recurse({0, 0});
+                Other ->
+                    exit({unexpected_message, Other})
+            end;
+        Other ->
+            exit({unexpected_message, Other})
+    after TimeoutMillisec ->
+            case rabbit_net:getstat(Sock, [StatName]) of
+                {ok, [{StatName, NewStatVal}]} ->
+                    if NewStatVal =/= StatVal ->
+                            Recurse({NewStatVal, 0});
+                       SameCount < Threshold ->
+                            Recurse({NewStatVal, SameCount + 1});
+                       true ->
+                            case Handler() of
+                                stop     -> ok;
+                                continue -> Recurse({NewStatVal, 0})
+                            end
+                    end;
+                {error, einval} ->
+                    %% the socket is dead, most likely because the
+                    %% connection is being shut down -> terminate
+                    ok;
+                {error, Reason} ->
+                    exit({cannot_get_socket_stats, Reason})
+            end
+    end.
diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl
index a7ca20c8..4e0dad84 100644
--- a/src/rabbit_invariable_queue.erl
+++ b/src/rabbit_invariable_queue.erl
@@ -34,10 +34,10 @@
 -export([init/3, terminate/1, delete_and_terminate/1, purge/1, publish/2,
          publish_delivered/3, fetch/2, ack/2, tx_publish/3, tx_ack/3,
          tx_rollback/2, tx_commit/3, requeue/2, len/1, is_empty/1,
-         set_ram_duration_target/2, ram_duration/1, needs_sync/1, sync/1,
-         handle_pre_hibernate/1, status/1]).
+         set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1,
+         idle_timeout/1, handle_pre_hibernate/1, status/1]).
 
--export([start/1]).
+-export([start/1, stop/0]).
 
 -behaviour(rabbit_backing_queue).
 
@@ -48,11 +48,11 @@
 
 -ifdef(use_specs).
 
--type(ack() :: guid() | 'blank_ack').
+-type(ack() :: rabbit_guid:guid() | 'blank_ack').
 -type(state() :: #iv_state { queue       :: queue(),
-                             qname       :: queue_name(),
+                             qname       :: rabbit_amqqueue:name(),
                              len         :: non_neg_integer(),
-                             pending_ack :: dict()
+                             pending_ack :: dict:dictionary()
                            }).
 -include("rabbit_backing_queue_spec.hrl").
 
@@ -61,6 +61,9 @@
 start(DurableQueues) ->
     ok = rabbit_sup:start_child(rabbit_persister, [DurableQueues]).
 
+stop() ->
+    ok = rabbit_sup:stop_child(rabbit_persister).
+
 init(QName, IsDurable, Recover) ->
     Q = queue:from_list(case IsDurable andalso Recover of
                             true  -> rabbit_persister:queue_content(QName);
@@ -197,9 +200,9 @@ set_ram_duration_target(_DurationTarget, State) -> State.
 
 ram_duration(State) -> {0, State}.
 
-needs_sync(_State) -> false.
+needs_idle_timeout(_State) -> false.
 
-sync(State) -> State.
+idle_timeout(State) -> State.
 
 handle_pre_hibernate(State) -> State.
 
@@ -242,8 +245,7 @@ do_if_persistent(F, Txn, QName) ->
 persist_message(QName, true, Txn, Msg = #basic_message {
                                     is_persistent = true }) ->
     Msg1 = Msg #basic_message {
-             %% don't persist any recoverable decoded properties,
-             %% rebuild from properties_bin on restore
+             %% don't persist any recoverable decoded properties
              content = rabbit_binary_parser:clear_decoded_content(
                          Msg #basic_message.content)},
     persist_work(Txn, QName,
diff --git a/src/rabbit_limiter.erl b/src/rabbit_limiter.erl
index 878af029..c323d7ce 100644
--- a/src/rabbit_limiter.erl
+++ b/src/rabbit_limiter.erl
@@ -34,8 +34,8 @@
 -behaviour(gen_server2).
 
 -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2,
-         handle_info/2]).
--export([start_link/2, shutdown/1]).
+         handle_info/2, prioritise_call/3]).
+-export([start_link/2]).
 -export([limit/2, can_send/3, ack/2, register/2, unregister/2]).
 -export([get_limit/1, block/1, unblock/1]).
 
@@ -45,8 +45,8 @@
 
 -type(maybe_pid() :: pid() | 'undefined').
 
--spec(start_link/2 :: (pid(), non_neg_integer()) -> pid()).
--spec(shutdown/1 :: (maybe_pid()) -> 'ok').
+-spec(start_link/2 :: (pid(), non_neg_integer()) ->
+                           rabbit_types:ok_pid_or_error()).
 -spec(limit/2 :: (maybe_pid(), non_neg_integer()) -> 'ok' | 'stopped').
 -spec(can_send/3 :: (maybe_pid(), pid(), boolean()) -> boolean()).
 -spec(ack/2 :: (maybe_pid(), non_neg_integer()) -> 'ok').
@@ -74,20 +74,12 @@
 %%----------------------------------------------------------------------------
 
 start_link(ChPid, UnackedMsgCount) ->
-    {ok, Pid} = gen_server2:start_link(?MODULE, [ChPid, UnackedMsgCount], []),
-    Pid.
-
-shutdown(undefined) ->
-    ok;
-shutdown(LimiterPid) ->
-    true = unlink(LimiterPid),
-    gen_server2:cast(LimiterPid, shutdown).
+    gen_server2:start_link(?MODULE, [ChPid, UnackedMsgCount], []).
 
 limit(undefined, 0) ->
     ok;
 limit(LimiterPid, PrefetchCount) ->
-    unlink_on_stopped(LimiterPid,
-                      gen_server2:call(LimiterPid, {limit, PrefetchCount})).
+    gen_server2:call(LimiterPid, {limit, PrefetchCount}).
 
 %% Ask the limiter whether the queue can deliver a message without
 %% breaching a limit
@@ -115,7 +107,7 @@ get_limit(undefined) ->
 get_limit(Pid) ->
     rabbit_misc:with_exit_handler(
       fun () -> 0 end,
-      fun () -> gen_server2:pcall(Pid, 9, get_limit, infinity) end).
+      fun () -> gen_server2:call(Pid, get_limit, infinity) end).
 
 block(undefined) ->
     ok;
@@ -125,8 +117,7 @@ block(LimiterPid) ->
 unblock(undefined) ->
     ok;
 unblock(LimiterPid) ->
-    unlink_on_stopped(LimiterPid,
-                      gen_server2:call(LimiterPid, unblock, infinity)).
+    gen_server2:call(LimiterPid, unblock, infinity).
 
 %%----------------------------------------------------------------------------
 %% gen_server callbacks
@@ -135,6 +126,9 @@ unblock(LimiterPid) ->
 init([ChPid, UnackedMsgCount]) ->
     {ok, #lim{ch_pid = ChPid, volume = UnackedMsgCount}}.
 
+prioritise_call(get_limit, _From, _State) -> 9;
+prioritise_call(_Msg,      _From, _State) -> 0.
+
 handle_call({can_send, _QPid, _AckRequired}, _From,
             State = #lim{blocked = true}) ->
     {reply, false, State};
@@ -165,9 +159,6 @@ handle_call(unblock, _From, State) ->
         {stop, State1} -> {stop, normal, stopped, State1}
     end.
 
-handle_cast(shutdown, State) ->
-    {stop, normal, State};
-
 handle_cast({ack, Count}, State = #lim{volume = Volume}) ->
     NewVolume = if Volume == 0 -> 0;
                    true        -> Volume - Count
@@ -247,9 +238,3 @@ notify_queues(State = #lim{ch_pid = ChPid, queues = Queues}) ->
             ok
     end,
     State#lim{queues = NewQueues}.
-
-unlink_on_stopped(LimiterPid, stopped) ->
-    ok = rabbit_misc:unlink_and_capture_exit(LimiterPid),
-    stopped;
-unlink_on_stopped(_LimiterPid, Result) ->
-    Result.
diff --git a/src/rabbit_log.erl b/src/rabbit_log.erl
index cc80e360..863f77e7 100644
--- a/src/rabbit_log.erl
+++ b/src/rabbit_log.erl
@@ -50,7 +50,7 @@
 
 -ifdef(use_specs).
 
--spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()).
 -spec(debug/1 :: (string()) -> 'ok').
 -spec(debug/2 :: (string(), [any()]) -> 'ok').
 -spec(info/1 :: (string()) -> 'ok').
diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index e78b59f1..f87b6271 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -86,12 +86,12 @@
 
 -ifdef(use_specs).
 
--spec(start_link/0 :: () -> 'ignore' | {'error', _} | {'ok', pid()}).
+-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()).
 -spec(update/0 :: () -> 'ok').
 -spec(register/2 :: (pid(), {atom(),atom(),[any()]}) -> 'ok').
 -spec(deregister/1 :: (pid()) -> 'ok').
--spec(report_ram_duration/2 :: (pid(), float() | 'infinity') ->
-                                    number() | 'infinity').
+-spec(report_ram_duration/2 ::
+        (pid(), float() | 'infinity') -> number() | 'infinity').
 -spec(stop/0 :: () -> 'ok').
 
 -endif.
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 35739dcb..086d260e 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -32,14 +32,15 @@
 -module(rabbit_misc).
 -include("rabbit.hrl").
 -include("rabbit_framing.hrl").
+
 -include_lib("kernel/include/file.hrl").
 
 -export([method_record_type/1, polite_pause/0, polite_pause/1]).
 -export([die/1, frame_error/2, amqp_error/4,
-         protocol_error/3, protocol_error/4]).
--export([not_found/1]).
--export([get_config/1, get_config/2, set_config/2]).
+         protocol_error/3, protocol_error/4, protocol_error/1]).
+-export([not_found/1, assert_args_equivalence/4]).
 -export([dirty_read/1]).
+-export([table_lookup/2]).
 -export([r/3, r/2, r_arg/4, rs/1]).
 -export([enable_cover/0, report_cover/0]).
 -export([enable_cover/1, report_cover/1]).
@@ -60,7 +61,9 @@
 -export([sort_field_table/1]).
 -export([pid_to_string/1, string_to_pid/1]).
 -export([version_compare/2, version_compare/3]).
--export([recursive_delete/1, dict_cons/3, unlink_and_capture_exit/1]).
+-export([recursive_delete/1, dict_cons/3, orddict_cons/3,
+         unlink_and_capture_exit/1]).
+-export([get_options/2]).
 
 -import(mnesia).
 -import(lists).
@@ -71,61 +74,91 @@
 
 -ifdef(use_specs).
 
--include_lib("kernel/include/inet.hrl").
+-export_type([resource_name/0]).
 
--type(ok_or_error() :: 'ok' | {'error', any()}).
+-type(ok_or_error() :: rabbit_types:ok_or_error(any())).
+-type(thunk(T) :: fun(() -> T)).
+-type(resource_name() :: binary()).
+-type(optdef() :: {flag, string()} | {option, string(), any()}).
+-type(channel_or_connection_exit()
+      :: rabbit_types:channel_exit() | rabbit_types:connection_exit()).
 
--spec(method_record_type/1 :: (tuple()) -> atom()).
+-spec(method_record_type/1 :: (rabbit_framing:amqp_method_record())
+                              -> rabbit_framing:amqp_method_name()).
 -spec(polite_pause/0 :: () -> 'done').
 -spec(polite_pause/1 :: (non_neg_integer()) -> 'done').
--spec(die/1 :: (atom()) -> no_return()).
--spec(frame_error/2 :: (atom(), binary()) -> no_return()).
--spec(amqp_error/4 :: (atom(), string(), [any()], atom()) -> amqp_error()).
--spec(protocol_error/3 :: (atom(), string(), [any()]) -> no_return()).
--spec(protocol_error/4 :: (atom(), string(), [any()], atom()) -> no_return()).
--spec(not_found/1 :: (r(atom())) -> no_return()).
--spec(get_config/1 :: (atom()) -> {'ok', any()} | not_found()).
--spec(get_config/2 :: (atom(), A) -> A).
--spec(set_config/2 :: (atom(), any()) -> 'ok').
--spec(dirty_read/1 :: ({atom(), any()}) -> {'ok', any()} | not_found()).
--spec(r/3 :: (vhost() | r(atom()), K, resource_name()) ->
-             r(K) when is_subtype(K, atom())).
--spec(r/2 :: (vhost(), K) -> #resource{virtual_host :: vhost(),
-                                       kind         :: K,
-                                       name         :: '_'}
-                                 when is_subtype(K, atom())).
--spec(r_arg/4 :: (vhost() | r(atom()), K, amqp_table(), binary()) ->
-             undefined | r(K)  when is_subtype(K, atom())).
--spec(rs/1 :: (r(atom())) -> string()).
+-spec(die/1 ::
+        (rabbit_framing:amqp_exception()) -> channel_or_connection_exit()).
+-spec(frame_error/2 :: (rabbit_framing:amqp_method_name(), binary())
+                       -> rabbit_types:connection_exit()).
+-spec(amqp_error/4 ::
+        (rabbit_framing:amqp_exception(), string(), [any()],
+         rabbit_framing:amqp_method_name())
+        -> rabbit_types:amqp_error()).
+-spec(protocol_error/3 :: (rabbit_framing:amqp_exception(), string(), [any()])
+                          -> channel_or_connection_exit()).
+-spec(protocol_error/4 ::
+        (rabbit_framing:amqp_exception(), string(), [any()],
+         rabbit_framing:amqp_method_name()) -> channel_or_connection_exit()).
+-spec(protocol_error/1 ::
+        (rabbit_types:amqp_error()) -> channel_or_connection_exit()).
+-spec(not_found/1 :: (rabbit_types:r(atom())) -> rabbit_types:channel_exit()).
+-spec(assert_args_equivalence/4 :: (rabbit_framing:amqp_table(),
+                                    rabbit_framing:amqp_table(),
+                                    rabbit_types:r(any()), [binary()]) ->
+                                        'ok' | rabbit_types:connection_exit()).
+-spec(dirty_read/1 ::
+        ({atom(), any()}) -> rabbit_types:ok_or_error2(any(), 'not_found')).
+-spec(table_lookup/2 ::
+        (rabbit_framing:amqp_table(), binary())
+         -> 'undefined' | {rabbit_framing:amqp_field_type(), any()}).
+-spec(r/2 :: (rabbit_types:vhost(), K)
+             -> rabbit_types:r3(rabbit_types:vhost(), K, '_')
+                    when is_subtype(K, atom())).
+-spec(r/3 ::
+        (rabbit_types:vhost() | rabbit_types:r(atom()), K, resource_name())
+        -> rabbit_types:r3(rabbit_types:vhost(), K, resource_name())
+               when is_subtype(K, atom())).
+-spec(r_arg/4 ::
+        (rabbit_types:vhost() | rabbit_types:r(atom()), K,
+         rabbit_framing:amqp_table(), binary())
+        -> undefined | rabbit_types:r(K)
+               when is_subtype(K, atom())).
+-spec(rs/1 :: (rabbit_types:r(atom())) -> string()).
 -spec(enable_cover/0 :: () -> ok_or_error()).
 -spec(start_cover/1 :: ([{string(), string()} | string()]) -> 'ok').
 -spec(report_cover/0 :: () -> 'ok').
--spec(enable_cover/1 :: (file_path()) -> ok_or_error()).
--spec(report_cover/1 :: (file_path()) -> 'ok').
+-spec(enable_cover/1 :: (file:filename()) -> ok_or_error()).
+-spec(report_cover/1 :: (file:filename()) -> 'ok').
 -spec(throw_on_error/2 ::
-      (atom(), thunk({error, any()} | {ok, A} | A)) -> A).
+        (atom(), thunk(rabbit_types:error(any()) | {ok, A} | A)) -> A).
 -spec(with_exit_handler/2 :: (thunk(A), thunk(A)) -> A).
 -spec(filter_exit_map/2 :: (fun ((A) -> B), [A]) -> [B]).
--spec(with_user/2 :: (username(), thunk(A)) -> A).
--spec(with_vhost/2 :: (vhost(), thunk(A)) -> A).
--spec(with_user_and_vhost/3 :: (username(), vhost(), thunk(A)) -> A).
+-spec(with_user/2 :: (rabbit_access_control:username(), thunk(A)) -> A).
+-spec(with_vhost/2 :: (rabbit_types:vhost(), thunk(A)) -> A).
+-spec(with_user_and_vhost/3 ::
+        (rabbit_access_control:username(), rabbit_types:vhost(), thunk(A))
+        -> A).
 -spec(execute_mnesia_transaction/1 :: (thunk(A)) -> A).
 -spec(ensure_ok/2 :: (ok_or_error(), atom()) -> 'ok').
--spec(makenode/1 :: ({string(), string()} | string()) -> erlang_node()).
--spec(nodeparts/1 :: (erlang_node() | string()) -> {string(), string()}).
+-spec(makenode/1 :: ({string(), string()} | string()) -> node()).
+-spec(nodeparts/1 :: (node() | string()) -> {string(), string()}).
 -spec(cookie_hash/0 :: () -> string()).
--spec(tcp_name/3 :: (atom(), ip_address(), ip_port()) -> atom()).
+-spec(tcp_name/3 ::
+        (atom(), inet:ip_address(), rabbit_networking:ip_port())
+        -> atom()).
 -spec(intersperse/2 :: (A, [A]) -> [A]).
 -spec(upmap/2 :: (fun ((A) -> B), [A]) -> [B]).
 -spec(map_in_order/2 :: (fun ((A) -> B), [A]) -> [B]).
 -spec(table_fold/3 :: (fun ((any(), A) -> A), A, atom()) -> A).
 -spec(dirty_read_all/1 :: (atom()) -> [any()]).
--spec(dirty_foreach_key/2 :: (fun ((any()) -> any()), atom()) ->
-             'ok' | 'aborted').
--spec(dirty_dump_log/1 :: (file_path()) -> ok_or_error()).
--spec(read_term_file/1 :: (file_path()) -> {'ok', [any()]} | {'error', any()}).
--spec(write_term_file/2 :: (file_path(), [any()]) -> ok_or_error()).
--spec(append_file/2 :: (file_path(), string()) -> ok_or_error()).
+-spec(dirty_foreach_key/2 :: (fun ((any()) -> any()), atom())
+                             -> 'ok' | 'aborted').
+-spec(dirty_dump_log/1 :: (file:filename()) -> ok_or_error()).
+-spec(read_term_file/1 ::
+        (file:filename()) -> {'ok', [any()]} | rabbit_types:error(any())).
+-spec(write_term_file/2 :: (file:filename(), [any()]) -> ok_or_error()).
+-spec(append_file/2 :: (file:filename(), string()) -> ok_or_error()).
 -spec(ensure_parent_dirs_exist/1 :: (string()) -> 'ok').
 -spec(format_stderr/2 :: (string(), [any()]) -> 'ok').
 -spec(start_applications/1 :: ([atom()]) -> 'ok').
@@ -133,16 +166,24 @@
 -spec(unfold/2  :: (fun ((A) -> ({'true', B, A} | 'false')), A) -> {[B], A}).
 -spec(ceil/1 :: (number()) -> integer()).
 -spec(queue_fold/3 :: (fun ((any(), B) -> B), B, queue()) -> B).
--spec(sort_field_table/1 :: (amqp_table()) -> amqp_table()).
+-spec(sort_field_table/1 ::
+        (rabbit_framing:amqp_table()) -> rabbit_framing:amqp_table()).
 -spec(pid_to_string/1 :: (pid()) -> string()).
 -spec(string_to_pid/1 :: (string()) -> pid()).
 -spec(version_compare/2 :: (string(), string()) -> 'lt' | 'eq' | 'gt').
--spec(version_compare/3 :: (string(), string(),
-                            ('lt' | 'lte' | 'eq' | 'gte' | 'gt')) -> boolean()).
--spec(recursive_delete/1 :: ([file_path()]) ->
-             'ok' | {'error', {file_path(), any()}}).
--spec(dict_cons/3 :: (any(), any(), dict()) -> dict()).
+-spec(version_compare/3 ::
+        (string(), string(), ('lt' | 'lte' | 'eq' | 'gte' | 'gt'))
+        -> boolean()).
+-spec(recursive_delete/1 ::
+        ([file:filename()])
+        -> rabbit_types:ok_or_error({file:filename(), any()})).
+-spec(dict_cons/3 :: (any(), any(), dict:dictionary()) ->
+                          dict:dictionary()).
+-spec(orddict_cons/3 :: (any(), any(), orddict:dictionary()) ->
+                             orddict:dictionary()).
 -spec(unlink_and_capture_exit/1 :: (pid()) -> 'ok').
+-spec(get_options/2 :: ([optdef()], [string()])
+                       -> {[string()], [{string(), any()}]}).
 
 -endif.
 
@@ -173,31 +214,39 @@ protocol_error(Name, ExplanationFormat, Params) ->
     protocol_error(Name, ExplanationFormat, Params, none).
 
 protocol_error(Name, ExplanationFormat, Params, Method) ->
-    exit(amqp_error(Name, ExplanationFormat, Params, Method)).
+    protocol_error(amqp_error(Name, ExplanationFormat, Params, Method)).
+
+protocol_error(#amqp_error{} = Error) ->
+    exit(Error).
 
 not_found(R) -> protocol_error(not_found, "no ~s", [rs(R)]).
 
-get_config(Key) ->
-    case dirty_read({rabbit_config, Key}) of
-        {ok, {rabbit_config, Key, V}} -> {ok, V};
-        Other -> Other
-    end.
+assert_args_equivalence(Orig, New, Name, Keys) ->
+    [assert_args_equivalence1(Orig, New, Name, Key) || Key <- Keys],
+    ok.
 
-get_config(Key, DefaultValue) ->
-    case get_config(Key) of
-        {ok, V} -> V;
-        {error, not_found} -> DefaultValue
+assert_args_equivalence1(Orig, New, Name, Key) ->
+    case {table_lookup(Orig, Key), table_lookup(New, Key)} of
+        {Same, Same}  -> ok;
+        {Orig1, New1} -> protocol_error(
+                           not_allowed,
+                           "inequivalent arg '~s' for ~s:  "
+                           "required ~w, received ~w",
+                           [Key, rabbit_misc:rs(Name), New1, Orig1])
     end.
 
-set_config(Key, Value) ->
-    ok = mnesia:dirty_write({rabbit_config, Key, Value}).
-
 dirty_read(ReadSpec) ->
     case mnesia:dirty_read(ReadSpec) of
         [Result] -> {ok, Result};
         []       -> {error, not_found}
     end.
 
+table_lookup(Table, Key) ->
+    case lists:keysearch(Key, 1, Table) of
+        {value, {_, TypeBin, ValueBin}} -> {TypeBin, ValueBin};
+        false                           -> undefined
+    end.
+
 r(#resource{virtual_host = VHostPath}, Kind, Name)
   when is_binary(Name) ->
     #resource{virtual_host = VHostPath, kind = Kind, name = Name};
@@ -210,9 +259,9 @@ r(VHostPath, Kind) when is_binary(VHostPath) ->
 r_arg(#resource{virtual_host = VHostPath}, Kind, Table, Key) ->
     r_arg(VHostPath, Kind, Table, Key);
 r_arg(VHostPath, Kind, Table, Key) ->
-    case lists:keysearch(Key, 1, Table) of
-        {value, {_, longstr, NameBin}} -> r(VHostPath, Kind, NameBin);
-        false                          -> undefined
+    case table_lookup(Table, Key) of
+        {longstr, NameBin} -> r(VHostPath, Kind, NameBin);
+        undefined          -> undefined
     end.
 
 rs(#resource{virtual_host = VHostPath, kind = Kind, name = Name}) ->
@@ -555,7 +604,7 @@ string_to_pid(Str) ->
             binary_to_term(<<131,103,NodeEnc/binary,Id:32,Ser:32,0:8>>);
         nomatch ->
             throw(Err)
-    end. 
+    end.
 
 version_compare(A, B, lte) ->
     case version_compare(A, B) of
@@ -631,8 +680,44 @@ recursive_delete1(Path) ->
 dict_cons(Key, Value, Dict) ->
     dict:update(Key, fun (List) -> [Value | List] end, [Value], Dict).
 
+orddict_cons(Key, Value, Dict) ->
+    orddict:update(Key, fun (List) -> [Value | List] end, [Value], Dict).
+
 unlink_and_capture_exit(Pid) ->
     unlink(Pid),
     receive {'EXIT', Pid, _} -> ok
     after 0 -> ok
     end.
+
+% Separate flags and options from arguments.
+% get_options([{flag, "-q"}, {option, "-p", "/"}],
+%             ["set_permissions","-p","/","guest",
+%              "-q",".*",".*",".*"])
+% == {["set_permissions","guest",".*",".*",".*"],
+%     [{"-q",true},{"-p","/"}]}
+get_options(Defs, As) ->
+    lists:foldl(fun(Def, {AsIn, RsIn}) ->
+                        {AsOut, Value} = case Def of
+                                             {flag, Key} ->
+                                                 get_flag(Key, AsIn);
+                                             {option, Key, Default} ->
+                                                 get_option(Key, Default, AsIn)
+                                         end,
+                        {AsOut, [{Key, Value} | RsIn]}
+                end, {As, []}, Defs).
+
+get_option(K, _Default, [K, V | As]) ->
+    {As, V};
+get_option(K, Default, [Nk | As]) ->
+    {As1, V} = get_option(K, Default, As),
+    {[Nk | As1], V};
+get_option(_, Default, As) ->
+    {As, Default}.
+
+get_flag(K, [K | As]) ->
+    {As, true};
+get_flag(K, [Nk | As]) ->
+    {As1, V} = get_flag(K, As),
+    {[Nk | As1], V};
+get_flag(_, []) ->
+    {[], false}.
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index d4b29943..a3214888 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -29,11 +29,12 @@
 %%   Contributor(s): ______________________________________.
 %%
 
+
 -module(rabbit_mnesia).
 
 -export([ensure_mnesia_dir/0, dir/0, status/0, init/0, is_db_empty/0,
-         cluster/1, reset/0, force_reset/0, is_clustered/0,
-         empty_ram_only_tables/0]).
+         cluster/1, force_cluster/1, reset/0, force_reset/0,
+         is_clustered/0, empty_ram_only_tables/0]).
 
 -export([table_names/0]).
 
@@ -47,13 +48,18 @@
 
 -ifdef(use_specs).
 
--spec(status/0 :: () -> [{'nodes', [{node_type(), [erlang_node()]}]} |
-                         {'running_nodes', [erlang_node()]}]).
--spec(dir/0 :: () -> file_path()).
+-export_type([node_type/0]).
+
+-type(node_type() :: disc_only | disc | ram | unknown).
+-spec(status/0 :: () -> [{'nodes', [{node_type(), [node()]}]} |
+                         {'running_nodes', [node()]}]).
+-spec(dir/0 :: () -> file:filename()).
 -spec(ensure_mnesia_dir/0 :: () -> 'ok').
 -spec(init/0 :: () -> 'ok').
 -spec(is_db_empty/0 :: () -> boolean()).
--spec(cluster/1 :: ([erlang_node()]) -> 'ok').
+-spec(cluster/1 :: ([node()]) -> 'ok').
+-spec(force_cluster/1 :: ([node()]) -> 'ok').
+-spec(cluster/2 :: ([node()], boolean()) -> 'ok').
 -spec(reset/0 :: () -> 'ok').
 -spec(force_reset/0 :: () -> 'ok').
 -spec(is_clustered/0 :: () -> boolean()).
@@ -71,7 +77,7 @@ status() ->
                                                 {disc,      disc_copies},
                                                 {ram,       ram_copies}],
                             begin
-                                Nodes = mnesia:table_info(schema, CopyType),
+                                Nodes = nodes_of_type(CopyType),
                                 Nodes =/= []
                             end];
                  no -> case mnesia:system_info(db_nodes) of
@@ -84,25 +90,29 @@ status() ->
 init() ->
     ok = ensure_mnesia_running(),
     ok = ensure_mnesia_dir(),
-    ok = init_db(read_cluster_nodes_config()),
-    ok = wait_for_tables(),
+    ok = init_db(read_cluster_nodes_config(), true),
     ok.
 
 is_db_empty() ->
     lists:all(fun (Tab) -> mnesia:dirty_first(Tab) == '$end_of_table' end,
               table_names()).
 
+cluster(ClusterNodes) ->
+    cluster(ClusterNodes, false).
+force_cluster(ClusterNodes) ->
+    cluster(ClusterNodes, true).
+
 %% Alter which disk nodes this node is clustered with. This can be a
 %% subset of all the disk nodes in the cluster but can (and should)
 %% include the node itself if it is to be a disk rather than a ram
-%% node.
-cluster(ClusterNodes) ->
+%% node.  If Force is false, only connections to online nodes are
+%% allowed.
+cluster(ClusterNodes, Force) ->
     ok = ensure_mnesia_not_running(),
     ok = ensure_mnesia_dir(),
     rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia),
     try
-        ok = init_db(ClusterNodes),
-        ok = wait_for_tables(),
+        ok = init_db(ClusterNodes, Force),
         ok = create_cluster_nodes_config(ClusterNodes)
     after
         mnesia:stop()
@@ -132,58 +142,96 @@ empty_ram_only_tables() ->
 
 %%--------------------------------------------------------------------
 
+nodes_of_type(Type) ->
+    %% This function should return the nodes of a certain type (ram,
+    %% disc or disc_only) in the current cluster.  The type of nodes
+    %% is determined when the cluster is initially configured.
+    %% Specifically, we check whether a certain table, which we know
+    %% will be written to disk on a disc node, is stored on disk or in
+    %% RAM.
+    mnesia:table_info(rabbit_durable_exchange, Type).
+
 table_definitions() ->
     [{rabbit_user,
       [{record_name, user},
        {attributes, record_info(fields, user)},
-       {disc_copies, [node()]}]},
+       {disc_copies, [node()]},
+       {match, #user{_='_'}}]},
      {rabbit_user_permission,
       [{record_name, user_permission},
        {attributes, record_info(fields, user_permission)},
-       {disc_copies, [node()]}]},
+       {disc_copies, [node()]},
+       {match, #user_permission{user_vhost = #user_vhost{_='_'},
+                                permission = #permission{_='_'},
+                                _='_'}}]},
      {rabbit_vhost,
       [{record_name, vhost},
        {attributes, record_info(fields, vhost)},
-       {disc_copies, [node()]}]},
-     {rabbit_config,
-      [{disc_copies, [node()]}]},
+       {disc_copies, [node()]},
+       {match, #vhost{_='_'}}]},
      {rabbit_listener,
       [{record_name, listener},
        {attributes, record_info(fields, listener)},
-       {type, bag}]},
+       {type, bag},
+       {match, #listener{_='_'}}]},
      {rabbit_durable_route,
       [{record_name, route},
        {attributes, record_info(fields, route)},
-       {disc_copies, [node()]}]},
+       {disc_copies, [node()]},
+       {match, #route{binding = binding_match(), _='_'}}]},
      {rabbit_route,
       [{record_name, route},
        {attributes, record_info(fields, route)},
-       {type, ordered_set}]},
+       {type, ordered_set},
+       {match, #route{binding = binding_match(), _='_'}}]},
      {rabbit_reverse_route,
       [{record_name, reverse_route},
        {attributes, record_info(fields, reverse_route)},
-       {type, ordered_set}]},
+       {type, ordered_set},
+       {match, #reverse_route{reverse_binding = reverse_binding_match(),
+                              _='_'}}]},
+     %% Consider the implications to nodes_of_type/1 before altering
+     %% the next entry.
      {rabbit_durable_exchange,
       [{record_name, exchange},
        {attributes, record_info(fields, exchange)},
-       {disc_copies, [node()]}]},
+       {disc_copies, [node()]},
+       {match, #exchange{name = exchange_name_match(), _='_'}}]},
      {rabbit_exchange,
       [{record_name, exchange},
-       {attributes, record_info(fields, exchange)}]},
+       {attributes, record_info(fields, exchange)},
+       {match, #exchange{name = exchange_name_match(), _='_'}}]},
      {rabbit_durable_queue,
       [{record_name, amqqueue},
        {attributes, record_info(fields, amqqueue)},
-       {disc_copies, [node()]}]},
+       {disc_copies, [node()]},
+       {match, #amqqueue{name = queue_name_match(), _='_'}}]},
      {rabbit_queue,
       [{record_name, amqqueue},
-       {attributes, record_info(fields, amqqueue)}]}].
+       {attributes, record_info(fields, amqqueue)},
+       {match, #amqqueue{name = queue_name_match(), _='_'}}]}].
+
+binding_match() ->
+    #binding{queue_name = queue_name_match(),
+             exchange_name = exchange_name_match(),
+             _='_'}.
+reverse_binding_match() ->
+    #reverse_binding{queue_name = queue_name_match(),
+                     exchange_name = exchange_name_match(),
+                     _='_'}.
+exchange_name_match() ->
+    resource_match(exchange).
+queue_name_match() ->
+    resource_match(queue).
+resource_match(Kind) ->
+    #resource{kind = Kind, _='_'}.
 
 table_names() ->
     [Tab || {Tab, _} <- table_definitions()].
 
 replicated_table_names() ->
-    [Tab || {Tab, Attrs} <- table_definitions(),
-            not lists:member({local_content, true}, Attrs)
+    [Tab || {Tab, TabDef} <- table_definitions(),
+            not lists:member({local_content, true}, TabDef)
     ].
 
 dir() -> mnesia:system_info(directory).
@@ -208,11 +256,53 @@ ensure_mnesia_not_running() ->
         yes -> throw({error, mnesia_unexpectedly_running})
     end.
 
+ensure_schema_integrity() ->
+    case check_schema_integrity() of
+        ok ->
+            ok;
+        {error, Reason} ->
+            throw({error, {schema_integrity_check_failed, Reason}})
+    end.
+
 check_schema_integrity() ->
-    %%TODO: more thorough checks
-    case catch [mnesia:table_info(Tab, version) || Tab <- table_names()] of
-        {'EXIT', Reason} -> {error, Reason};
-        _ -> ok
+    Tables = mnesia:system_info(tables),
+    case [Error || {Tab, TabDef} <- table_definitions(),
+                   case lists:member(Tab, Tables) of
+                       false ->
+                           Error = {table_missing, Tab},
+                           true;
+                       true  ->
+                           {_, ExpAttrs} = proplists:lookup(attributes, TabDef),
+                           Attrs = mnesia:table_info(Tab, attributes),
+                           Error = {table_attributes_mismatch, Tab,
+                                    ExpAttrs, Attrs},
+                           Attrs /= ExpAttrs
+                   end] of
+        []     -> check_table_integrity();
+        Errors -> {error, Errors}
+    end.
+
+check_table_integrity() ->
+    ok = wait_for_tables(),
+    case lists:all(fun ({Tab, TabDef}) ->
+                           {_, Match} = proplists:lookup(match, TabDef),
+                           read_test_table(Tab, Match)
+                   end, table_definitions()) of
+        true  -> ok;
+        false -> {error, invalid_table_content}
+    end.
+
+read_test_table(Tab, Match) ->
+    case mnesia:dirty_first(Tab) of
+        '$end_of_table' ->
+            true;
+        Key ->
+            ObjList = mnesia:dirty_read(Tab, Key),
+            MatchComp = ets:match_spec_compile([{Match, [], ['$_']}]),
+            case ets:match_spec_run(ObjList, MatchComp) of
+                ObjList -> true;
+                _       -> false
+            end
     end.
 
 %% The cluster node config file contains some or all of the disk nodes
@@ -241,20 +331,9 @@ read_cluster_nodes_config() ->
     case rabbit_misc:read_term_file(FileName) of
         {ok, [ClusterNodes]} -> ClusterNodes;
         {error, enoent} ->
-            case application:get_env(cluster_config) of
+            case application:get_env(cluster_nodes) of
                 undefined -> [];
-                {ok, DefaultFileName} ->
-                    case file:consult(DefaultFileName) of
-                        {ok, [ClusterNodes]} -> ClusterNodes;
-                        {error, enoent} ->
-                            error_logger:warning_msg(
-                              "default cluster config file ~p does not exist~n",
-                              [DefaultFileName]),
-                            [];
-                        {error, Reason} ->
-                            throw({error, {cannot_read_cluster_nodes_config,
-                                           DefaultFileName, Reason}})
-                    end
+                {ok, ClusterNodes} -> ClusterNodes
             end;
         {error, Reason} ->
             throw({error, {cannot_read_cluster_nodes_config,
@@ -273,38 +352,57 @@ delete_cluster_nodes_config() ->
 
 %% Take a cluster node config and create the right kind of node - a
 %% standalone disk node, or disk or ram node connected to the
-%% specified cluster nodes.
-init_db(ClusterNodes) ->
-    case mnesia:change_config(extra_db_nodes, ClusterNodes -- [node()]) of
-        {ok, []} ->
-            case mnesia:system_info(use_dir) of
-                true ->
-                    case check_schema_integrity() of
-                        ok ->
-                            ok;
-                        {error, Reason} ->
-                            %% NB: we cannot use rabbit_log here since
-                            %% it may not have been started yet
-                            error_logger:warning_msg(
-                              "schema integrity check failed: ~p~n"
-                              "moving database to backup location "
-                              "and recreating schema from scratch~n",
-                              [Reason]),
-                            ok = move_db(),
+%% specified cluster nodes.  If Force is false, don't allow
+%% connections to offline nodes.
+init_db(ClusterNodes, Force) ->
+    UClusterNodes = lists:usort(ClusterNodes),
+    ProperClusterNodes = UClusterNodes -- [node()],
+    case mnesia:change_config(extra_db_nodes, ProperClusterNodes) of
+        {ok, Nodes} ->
+            case Force of
+                false ->
+                    FailedClusterNodes = ProperClusterNodes -- Nodes,
+                    case FailedClusterNodes of
+                        [] -> ok;
+                        _ ->
+                            throw({error, {failed_to_cluster_with,
+                                           FailedClusterNodes,
+                                           "Mnesia could not connect to some nodes."}})
+                    end;
+                _ -> ok
+            end,
+            case Nodes of
+                [] ->
+                    case mnesia:system_info(use_dir) of
+                        true ->
+                            case check_schema_integrity() of
+                                ok ->
+                                    ok;
+                                {error, Reason} ->
+                                    %% NB: we cannot use rabbit_log here since
+                                    %% it may not have been started yet
+                                    error_logger:warning_msg(
+                                      "schema integrity check failed: ~p~n"
+                                      "moving database to backup location "
+                                      "and recreating schema from scratch~n",
+                                      [Reason]),
+                                    ok = move_db(),
+                                    ok = create_schema()
+                            end;
+                        false ->
                             ok = create_schema()
                     end;
-                false ->
-                    ok = create_schema()
+                [_|_] ->
+                    IsDiskNode = ClusterNodes == [] orelse
+                        lists:member(node(), ClusterNodes),
+                    ok = wait_for_replicated_tables(),
+                    ok = create_local_table_copy(schema, disc_copies),
+                    ok = create_local_table_copies(case IsDiskNode of
+                                                       true  -> disc;
+                                                       false -> ram
+                                                   end),
+                    ok = ensure_schema_integrity()
             end;
-        {ok, [_|_]} ->
-            IsDiskNode = ClusterNodes == [] orelse
-                lists:member(node(), ClusterNodes),
-            ok = wait_for_replicated_tables(),
-            ok = create_local_table_copy(schema, disc_copies),
-            ok = create_local_table_copies(case IsDiskNode of
-                                               true  -> disc;
-                                               false -> ram
-                                           end);
         {error, Reason} ->
             %% one reason we may end up here is if we try to join
             %% nodes together that are currently running standalone or
@@ -319,7 +417,9 @@ create_schema() ->
                           cannot_create_schema),
     rabbit_misc:ensure_ok(mnesia:start(),
                           cannot_start_mnesia),
-    create_tables().
+    ok = create_tables(),
+    ok = ensure_schema_integrity(),
+    ok = wait_for_tables().
 
 move_db() ->
     mnesia:stop(),
@@ -344,12 +444,13 @@ move_db() ->
     ok.
 
 create_tables() ->
-    lists:foreach(fun ({Tab, TabArgs}) ->
-                          case mnesia:create_table(Tab, TabArgs) of
+    lists:foreach(fun ({Tab, TabDef}) ->
+                          TabDef1 = proplists:delete(match, TabDef),
+                          case mnesia:create_table(Tab, TabDef1) of
                               {atomic, ok} -> ok;
                               {aborted, Reason} ->
                                   throw({error, {table_creation_failed,
-                                                 Tab, TabArgs, Reason}})
+                                                 Tab, TabDef1, Reason}})
                           end
                   end,
                   table_definitions()),
@@ -404,17 +505,12 @@ wait_for_replicated_tables() -> wait_for_tables(replicated_table_names()).
 wait_for_tables() -> wait_for_tables(table_names()).
 
 wait_for_tables(TableNames) ->
-    case check_schema_integrity() of
-        ok ->
-            case mnesia:wait_for_tables(TableNames, 30000) of
-                ok -> ok;
-                {timeout, BadTabs} ->
-                    throw({error, {timeout_waiting_for_tables, BadTabs}});
-                {error, Reason} ->
-                    throw({error, {failed_waiting_for_tables, Reason}})
-            end;
+    case mnesia:wait_for_tables(TableNames, 30000) of
+        ok -> ok;
+        {timeout, BadTabs} ->
+            throw({error, {timeout_waiting_for_tables, BadTabs}});
         {error, Reason} ->
-            throw({error, {schema_integrity_check_failed, Reason}})
+            throw({error, {failed_waiting_for_tables, Reason}})
     end.
 
 reset(Force) ->
diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
new file mode 100644
index 00000000..4f178439
--- /dev/null
+++ b/src/rabbit_msg_file.erl
@@ -0,0 +1,136 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_msg_file).
+
+-export([append/3, read/2, scan/2]).
+
+%%----------------------------------------------------------------------------
+
+-include("rabbit_msg_store.hrl").
+
+-define(INTEGER_SIZE_BYTES,      8).
+-define(INTEGER_SIZE_BITS,       (8 * ?INTEGER_SIZE_BYTES)).
+-define(WRITE_OK_SIZE_BITS,      8).
+-define(WRITE_OK_MARKER,         255).
+-define(FILE_PACKING_ADJUSTMENT, (1 + ?INTEGER_SIZE_BYTES)).
+-define(GUID_SIZE_BYTES,         16).
+-define(GUID_SIZE_BITS,          (8 * ?GUID_SIZE_BYTES)).
+-define(SCAN_BLOCK_SIZE,         4194304). %% 4MB
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(io_device() :: any()).
+-type(position() :: non_neg_integer()).
+-type(msg_size() :: non_neg_integer()).
+-type(file_size() :: non_neg_integer()).
+
+-spec(append/3 :: (io_device(), rabbit_guid:guid(), msg()) ->
+                       rabbit_types:ok_or_error2(msg_size(), any())).
+-spec(read/2 :: (io_device(), msg_size()) ->
+                     rabbit_types:ok_or_error2({rabbit_guid:guid(), msg()},
+                                               any())).
+-spec(scan/2 :: (io_device(), file_size()) ->
+                     {'ok', [{rabbit_guid:guid(), msg_size(), position()}],
+                      position()}).
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+append(FileHdl, Guid, MsgBody)
+  when is_binary(Guid) andalso size(Guid) =:= ?GUID_SIZE_BYTES ->
+    MsgBodyBin  = term_to_binary(MsgBody),
+    MsgBodyBinSize = size(MsgBodyBin),
+    Size = MsgBodyBinSize + ?GUID_SIZE_BYTES,
+    case file_handle_cache:append(FileHdl,
+                                  <<Size:?INTEGER_SIZE_BITS,
+                                   Guid:?GUID_SIZE_BYTES/binary,
+                                   MsgBodyBin:MsgBodyBinSize/binary,
+                                   ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of
+        ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT};
+        KO -> KO
+    end.
+
+read(FileHdl, TotalSize) ->
+    Size = TotalSize - ?FILE_PACKING_ADJUSTMENT,
+    BodyBinSize = Size - ?GUID_SIZE_BYTES,
+    case file_handle_cache:read(FileHdl, TotalSize) of
+        {ok, <<Size:?INTEGER_SIZE_BITS,
+              Guid:?GUID_SIZE_BYTES/binary,
+              MsgBodyBin:BodyBinSize/binary,
+              ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>} ->
+            {ok, {Guid, binary_to_term(MsgBodyBin)}};
+        KO -> KO
+    end.
+
+scan(FileHdl, FileSize) when FileSize >= 0 ->
+    scan(FileHdl, FileSize, <<>>, 0, [], 0).
+
+scan(_FileHdl, FileSize, _Data, FileSize, Acc, ScanOffset) ->
+    {ok, Acc, ScanOffset};
+scan(FileHdl, FileSize, Data, ReadOffset, Acc, ScanOffset) ->
+    Read = lists:min([?SCAN_BLOCK_SIZE, (FileSize - ReadOffset)]),
+    case file_handle_cache:read(FileHdl, Read) of
+        {ok, Data1} ->
+            {Data2, Acc1, ScanOffset1} =
+                scan(<<Data/binary, Data1/binary>>, Acc, ScanOffset),
+            ReadOffset1 = ReadOffset + size(Data1),
+            scan(FileHdl, FileSize, Data2, ReadOffset1, Acc1, ScanOffset1);
+        _KO ->
+            {ok, Acc, ScanOffset}
+    end.
+
+scan(<<>>, Acc, Offset) ->
+    {<<>>, Acc, Offset};
+scan(<<0:?INTEGER_SIZE_BITS, _Rest/binary>>, Acc, Offset) ->
+    {<<>>, Acc, Offset}; %% Nothing to do other than stop.
+scan(<<Size:?INTEGER_SIZE_BITS, GuidAndMsg:Size/binary,
+       WriteMarker:?WRITE_OK_SIZE_BITS, Rest/binary>>, Acc, Offset) ->
+    TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
+    case WriteMarker of
+        ?WRITE_OK_MARKER ->
+            %% Here we take option 5 from
+            %% http://www.erlang.org/cgi-bin/ezmlm-cgi?2:mss:1569 in
+            %% which we read the Guid as a number, and then convert it
+            %% back to a binary in order to work around bugs in
+            %% Erlang's GC.
+            <<GuidNum:?GUID_SIZE_BITS, _Msg/binary>> =
+                <<GuidAndMsg:Size/binary>>,
+            <<Guid:?GUID_SIZE_BYTES/binary>> = <<GuidNum:?GUID_SIZE_BITS>>,
+            scan(Rest, [{Guid, TotalSize, Offset} | Acc], Offset + TotalSize);
+        _ ->
+            scan(Rest, Acc, Offset + TotalSize)
+    end;
+scan(Data, Acc, Offset) ->
+    {Data, Acc, Offset}.
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
new file mode 100644
index 00000000..bbecbfe2
--- /dev/null
+++ b/src/rabbit_msg_store.erl
@@ -0,0 +1,1654 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_msg_store).
+
+-behaviour(gen_server2).
+
+-export([start_link/4, write/4, read/3, contains/2, remove/2, release/2,
+         sync/3, client_init/2, client_terminate/2,
+         client_delete_and_terminate/3, successfully_recovered_state/1]).
+
+-export([sync/1, gc_done/4, set_maximum_since_use/2, gc/3]). %% internal
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3, prioritise_call/3, prioritise_cast/2]).
+
+%%----------------------------------------------------------------------------
+
+-include("rabbit_msg_store.hrl").
+
+-define(SYNC_INTERVAL,  5).   %% milliseconds
+-define(CLEAN_FILENAME, "clean.dot").
+-define(FILE_SUMMARY_FILENAME, "file_summary.ets").
+
+-define(BINARY_MODE,     [raw, binary]).
+-define(READ_MODE,       [read]).
+-define(READ_AHEAD_MODE, [read_ahead | ?READ_MODE]).
+-define(WRITE_MODE,      [write]).
+
+-define(FILE_EXTENSION,        ".rdq").
+-define(FILE_EXTENSION_TMP,    ".rdt").
+
+-define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB
+
+%%----------------------------------------------------------------------------
+
+-record(msstate,
+        { dir,                    %% store directory
+          index_module,           %% the module for index ops
+          index_state,            %% where are messages?
+          current_file,           %% current file name as number
+          current_file_handle,    %% current file handle since the last fsync?
+          file_handle_cache,      %% file handle cache
+          on_sync,                %% pending sync requests
+          sync_timer_ref,         %% TRef for our interval timer
+          sum_valid_data,         %% sum of valid data in all files
+          sum_file_size,          %% sum of file sizes
+          pending_gc_completion,  %% things to do once GC completes
+          gc_active,              %% is the GC currently working?
+          gc_pid,                 %% pid of our GC
+          file_handles_ets,       %% tid of the shared file handles table
+          file_summary_ets,       %% tid of the file summary table
+          dedup_cache_ets,        %% tid of dedup cache table
+          cur_file_cache_ets,     %% tid of current file cache table
+          client_refs,            %% set of references of all registered clients
+          successfully_recovered, %% boolean: did we recover state?
+          file_size_limit         %% how big are our files allowed to get?
+         }).
+
+-record(client_msstate,
+        { file_handle_cache,
+          index_state,
+          index_module,
+          dir,
+          gc_pid,
+          file_handles_ets,
+          file_summary_ets,
+          dedup_cache_ets,
+          cur_file_cache_ets
+         }).
+
+-record(file_summary,
+        {file, valid_total_size, left, right, file_size, locked, readers}).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(server() :: pid() | atom()).
+-type(file_num() :: non_neg_integer()).
+-type(client_msstate() :: #client_msstate {
+                      file_handle_cache  :: dict:dictionary(),
+                      index_state        :: any(),
+                      index_module       :: atom(),
+                      dir                :: file:filename(),
+                      gc_pid             :: pid(),
+                      file_handles_ets   :: ets:tid(),
+                      file_summary_ets   :: ets:tid(),
+                      dedup_cache_ets    :: ets:tid(),
+                      cur_file_cache_ets :: ets:tid() }).
+-type(startup_fun_state() ::
+        {(fun ((A) -> 'finished' | {rabbit_guid:guid(), non_neg_integer(), A})),
+         A}).
+
+-spec(start_link/4 ::
+        (atom(), file:filename(), [binary()] | 'undefined',
+         startup_fun_state()) -> rabbit_types:ok_pid_or_error()).
+-spec(write/4 :: (server(), rabbit_guid:guid(), msg(), client_msstate()) ->
+                      rabbit_types:ok(client_msstate())).
+-spec(read/3 :: (server(), rabbit_guid:guid(), client_msstate()) ->
+                     {rabbit_types:ok(msg()) | 'not_found', client_msstate()}).
+-spec(contains/2 :: (server(), rabbit_guid:guid()) -> boolean()).
+-spec(remove/2 :: (server(), [rabbit_guid:guid()]) -> 'ok').
+-spec(release/2 :: (server(), [rabbit_guid:guid()]) -> 'ok').
+-spec(sync/3 :: (server(), [rabbit_guid:guid()], fun (() -> any())) -> 'ok').
+-spec(gc_done/4 :: (server(), non_neg_integer(), file_num(), file_num()) ->
+                        'ok').
+-spec(set_maximum_since_use/2 :: (server(), non_neg_integer()) -> 'ok').
+-spec(client_init/2 :: (server(), binary()) -> client_msstate()).
+-spec(client_terminate/2 :: (client_msstate(), server()) -> 'ok').
+-spec(client_delete_and_terminate/3 ::
+        (client_msstate(), server(), binary()) -> 'ok').
+-spec(successfully_recovered_state/1 :: (server()) -> boolean()).
+
+-spec(gc/3 :: (non_neg_integer(), non_neg_integer(),
+               {ets:tid(), file:filename(), atom(), any()}) ->
+                   'concurrent_readers' | non_neg_integer()).
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+%% We run GC whenever (garbage / sum_file_size) > ?GARBAGE_FRACTION
+%% It is not recommended to set this to < 0.5
+-define(GARBAGE_FRACTION,      0.5).
+
+%% The components:
+%%
+%% Index: this is a mapping from Guid to #msg_location{}:
+%%        {Guid, RefCount, File, Offset, TotalSize}
+%%        By default, it's in ets, but it's also pluggable.
+%% FileSummary: this is an ets table which maps File to #file_summary{}:
+%%        {File, ValidTotalSize, Left, Right, FileSize, Locked, Readers}
+%%
+%% The basic idea is that messages are appended to the current file up
+%% until that file becomes too big (> file_size_limit). At that point,
+%% the file is closed and a new file is created on the _right_ of the
+%% old file which is used for new messages. Files are named
+%% numerically ascending, thus the file with the lowest name is the
+%% eldest file.
+%%
+%% We need to keep track of which messages are in which files (this is
+%% the Index); how much useful data is in each file and which files
+%% are on the left and right of each other. This is the purpose of the
+%% FileSummary ets table.
+%%
+%% As messages are removed from files, holes appear in these
+%% files. The field ValidTotalSize contains the total amount of useful
+%% data left in the file. This is needed for garbage collection.
+%%
+%% When we discover that a file is now empty, we delete it. When we
+%% discover that it can be combined with the useful data in either its
+%% left or right neighbour, and overall, across all the files, we have
+%% ((the amount of garbage) / (the sum of all file sizes)) >
+%% ?GARBAGE_FRACTION, we start a garbage collection run concurrently,
+%% which will compact the two files together. This keeps disk
+%% utilisation high and aids performance. We deliberately do this
+%% lazily in order to prevent doing GC on files which are soon to be
+%% emptied (and hence deleted) soon.
+%%
+%% Given the compaction between two files, the left file (i.e. elder
+%% file) is considered the ultimate destination for the good data in
+%% the right file. If necessary, the good data in the left file which
+%% is fragmented throughout the file is written out to a temporary
+%% file, then read back in to form a contiguous chunk of good data at
+%% the start of the left file. Thus the left file is garbage collected
+%% and compacted. Then the good data from the right file is copied
+%% onto the end of the left file. Index and FileSummary tables are
+%% updated.
+%%
+%% On non-clean startup, we scan the files we discover, dealing with
+%% the possibilites of a crash having occured during a compaction
+%% (this consists of tidyup - the compaction is deliberately designed
+%% such that data is duplicated on disk rather than risking it being
+%% lost), and rebuild the FileSummary ets table and Index.
+%%
+%% So, with this design, messages move to the left. Eventually, they
+%% should end up in a contiguous block on the left and are then never
+%% rewritten. But this isn't quite the case. If in a file there is one
+%% message that is being ignored, for some reason, and messages in the
+%% file to the right and in the current block are being read all the
+%% time then it will repeatedly be the case that the good data from
+%% both files can be combined and will be written out to a new
+%% file. Whenever this happens, our shunned message will be rewritten.
+%%
+%% So, provided that we combine messages in the right order,
+%% (i.e. left file, bottom to top, right file, bottom to top),
+%% eventually our shunned message will end up at the bottom of the
+%% left file. The compaction/combining algorithm is smart enough to
+%% read in good data from the left file that is scattered throughout
+%% (i.e. C and D in the below diagram), then truncate the file to just
+%% above B (i.e. truncate to the limit of the good contiguous region
+%% at the start of the file), then write C and D on top and then write
+%% E, F and G from the right file on top. Thus contiguous blocks of
+%% good data at the bottom of files are not rewritten.
+%%
+%% +-------+    +-------+         +-------+
+%% |   X   |    |   G   |         |   G   |
+%% +-------+    +-------+         +-------+
+%% |   D   |    |   X   |         |   F   |
+%% +-------+    +-------+         +-------+
+%% |   X   |    |   X   |         |   E   |
+%% +-------+    +-------+         +-------+
+%% |   C   |    |   F   |   ===>  |   D   |
+%% +-------+    +-------+         +-------+
+%% |   X   |    |   X   |         |   C   |
+%% +-------+    +-------+         +-------+
+%% |   B   |    |   X   |         |   B   |
+%% +-------+    +-------+         +-------+
+%% |   A   |    |   E   |         |   A   |
+%% +-------+    +-------+         +-------+
+%%   left         right             left
+%%
+%% From this reasoning, we do have a bound on the number of times the
+%% message is rewritten. From when it is inserted, there can be no
+%% files inserted between it and the head of the queue, and the worst
+%% case is that everytime it is rewritten, it moves one position lower
+%% in the file (for it to stay at the same position requires that
+%% there are no holes beneath it, which means truncate would be used
+%% and so it would not be rewritten at all). Thus this seems to
+%% suggest the limit is the number of messages ahead of it in the
+%% queue, though it's likely that that's pessimistic, given the
+%% requirements for compaction/combination of files.
+%%
+%% The other property is that we have is the bound on the lowest
+%% utilisation, which should be 50% - worst case is that all files are
+%% fractionally over half full and can't be combined (equivalent is
+%% alternating full files and files with only one tiny message in
+%% them).
+%%
+%% Messages are reference-counted. When a message with the same guid
+%% is written several times we only store it once, and only remove it
+%% from the store when it has been removed the same number of times.
+%%
+%% The reference counts do not persist. Therefore the initialisation
+%% function must be provided with a generator that produces ref count
+%% deltas for all recovered messages. This is only used on startup
+%% when the shutdown was non-clean.
+%%
+%% Read messages with a reference count greater than one are entered
+%% into a message cache. The purpose of the cache is not especially
+%% performance, though it can help there too, but prevention of memory
+%% explosion. It ensures that as messages with a high reference count
+%% are read from several processes they are read back as the same
+%% binary object rather than multiples of identical binary
+%% objects.
+%%
+%% Reads can be performed directly by clients without calling to the
+%% server. This is safe because multiple file handles can be used to
+%% read files. However, locking is used by the concurrent GC to make
+%% sure that reads are not attempted from files which are in the
+%% process of being garbage collected.
+%%
+%% The server automatically defers reads, removes and contains calls
+%% that occur which refer to files which are currently being
+%% GC'd. Contains calls are only deferred in order to ensure they do
+%% not overtake removes.
+%%
+%% The current file to which messages are being written has a
+%% write-back cache. This is written to immediately by clients and can
+%% be read from by clients too. This means that there are only ever
+%% writes made to the current file, thus eliminating delays due to
+%% flushing write buffers in order to be able to safely read from the
+%% current file. The one exception to this is that on start up, the
+%% cache is not populated with msgs found in the current file, and
+%% thus in this case only, reads may have to come from the file
+%% itself. The effect of this is that even if the msg_store process is
+%% heavily overloaded, clients can still write and read messages with
+%% very low latency and not block at all.
+%%
+%% For notes on Clean Shutdown and startup, see documentation in
+%% variable_queue.
+
+%%----------------------------------------------------------------------------
+%% public API
+%%----------------------------------------------------------------------------
+
+start_link(Server, Dir, ClientRefs, StartupFunState) ->
+    gen_server2:start_link({local, Server}, ?MODULE,
+                           [Server, Dir, ClientRefs, StartupFunState],
+                           [{timeout, infinity}]).
+
+write(Server, Guid, Msg,
+      CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts }) ->
+    ok = update_msg_cache(CurFileCacheEts, Guid, Msg),
+    {gen_server2:cast(Server, {write, Guid}), CState}.
+
+read(Server, Guid,
+     CState = #client_msstate { dedup_cache_ets    = DedupCacheEts,
+                                cur_file_cache_ets = CurFileCacheEts }) ->
+    %% 1. Check the dedup cache
+    case fetch_and_increment_cache(DedupCacheEts, Guid) of
+        not_found ->
+            %% 2. Check the cur file cache
+            case ets:lookup(CurFileCacheEts, Guid) of
+                [] ->
+                    Defer = fun() -> {gen_server2:call(
+                                        Server, {read, Guid}, infinity),
+                                      CState} end,
+                    case index_lookup(Guid, CState) of
+                        not_found   -> Defer();
+                        MsgLocation -> client_read1(Server, MsgLocation, Defer,
+                                                    CState)
+                    end;
+                [{Guid, Msg, _CacheRefCount}] ->
+                    %% Although we've found it, we don't know the
+                    %% refcount, so can't insert into dedup cache
+                    {{ok, Msg}, CState}
+            end;
+        Msg ->
+            {{ok, Msg}, CState}
+    end.
+
+contains(Server, Guid) -> gen_server2:call(Server, {contains, Guid}, infinity).
+remove(_Server, [])    -> ok;
+remove(Server, Guids)  -> gen_server2:cast(Server, {remove, Guids}).
+release(_Server, [])   -> ok;
+release(Server, Guids) -> gen_server2:cast(Server, {release, Guids}).
+sync(Server, Guids, K) -> gen_server2:cast(Server, {sync, Guids, K}).
+sync(Server)           -> gen_server2:cast(Server, sync). %% internal
+
+gc_done(Server, Reclaimed, Source, Destination) ->
+    gen_server2:cast(Server, {gc_done, Reclaimed, Source, Destination}).
+
+set_maximum_since_use(Server, Age) ->
+    gen_server2:cast(Server, {set_maximum_since_use, Age}).
+
+client_init(Server, Ref) ->
+    {IState, IModule, Dir, GCPid,
+     FileHandlesEts, FileSummaryEts, DedupCacheEts, CurFileCacheEts} =
+        gen_server2:call(Server, {new_client_state, Ref}, infinity),
+    #client_msstate { file_handle_cache  = dict:new(),
+                      index_state        = IState,
+                      index_module       = IModule,
+                      dir                = Dir,
+                      gc_pid             = GCPid,
+                      file_handles_ets   = FileHandlesEts,
+                      file_summary_ets   = FileSummaryEts,
+                      dedup_cache_ets    = DedupCacheEts,
+                      cur_file_cache_ets = CurFileCacheEts }.
+
+client_terminate(CState, Server) ->
+    close_all_handles(CState),
+    ok = gen_server2:call(Server, client_terminate, infinity).
+
+client_delete_and_terminate(CState, Server, Ref) ->
+    close_all_handles(CState),
+    ok = gen_server2:cast(Server, {client_delete, Ref}).
+
+successfully_recovered_state(Server) ->
+    gen_server2:call(Server, successfully_recovered_state, infinity).
+
+%%----------------------------------------------------------------------------
+%% Client-side-only helpers
+%%----------------------------------------------------------------------------
+
+client_read1(Server,
+             #msg_location { guid = Guid, file = File } = MsgLocation,
+             Defer,
+             CState = #client_msstate { file_summary_ets = FileSummaryEts }) ->
+    case ets:lookup(FileSummaryEts, File) of
+        [] -> %% File has been GC'd and no longer exists. Go around again.
+            read(Server, Guid, CState);
+        [#file_summary { locked = Locked, right = Right }] ->
+            client_read2(Server, Locked, Right, MsgLocation, Defer, CState)
+    end.
+
+client_read2(_Server, false, undefined, _MsgLocation, Defer, _CState) ->
+    %% Although we've already checked both caches and not found the
+    %% message there, the message is apparently in the
+    %% current_file. We can only arrive here if we are trying to read
+    %% a message which we have not written, which is very odd, so just
+    %% defer.
+    %%
+    %% OR, on startup, the cur_file_cache is not populated with the
+    %% contents of the current file, thus reads from the current file
+    %% will end up here and will need to be deferred.
+    Defer();
+client_read2(_Server, true, _Right, _MsgLocation, Defer, _CState) ->
+    %% Of course, in the mean time, the GC could have run and our msg
+    %% is actually in a different file, unlocked. However, defering is
+    %% the safest and simplest thing to do.
+    Defer();
+client_read2(Server, false, _Right,
+             MsgLocation = #msg_location { guid = Guid, file = File },
+             Defer,
+             CState = #client_msstate { file_summary_ets = FileSummaryEts }) ->
+    %% It's entirely possible that everything we're doing from here on
+    %% is for the wrong file, or a non-existent file, as a GC may have
+    %% finished.
+    safe_ets_update_counter(
+      FileSummaryEts, File, {#file_summary.readers, +1},
+      fun (_) -> client_read3(Server, MsgLocation, Defer, CState) end,
+      fun () -> read(Server, Guid, CState) end).
+
+client_read3(Server, #msg_location { guid = Guid, file = File }, Defer,
+             CState = #client_msstate { file_handles_ets = FileHandlesEts,
+                                        file_summary_ets = FileSummaryEts,
+                                        dedup_cache_ets  = DedupCacheEts,
+                                        gc_pid           = GCPid }) ->
+    Release =
+        fun() -> ok = case ets:update_counter(FileSummaryEts, File,
+                                              {#file_summary.readers, -1}) of
+                          0 -> case ets:lookup(FileSummaryEts, File) of
+                                   [#file_summary { locked = true }] ->
+                                       rabbit_msg_store_gc:no_readers(
+                                         GCPid, File);
+                                   _ -> ok
+                               end;
+                          _ -> ok
+                      end
+        end,
+    %% If a GC involving the file hasn't already started, it won't
+    %% start now. Need to check again to see if we've been locked in
+    %% the meantime, between lookup and update_counter (thus GC
+    %% started before our +1. In fact, it could have finished by now
+    %% too).
+    case ets:lookup(FileSummaryEts, File) of
+        [] -> %% GC has deleted our file, just go round again.
+            read(Server, Guid, CState);
+        [#file_summary { locked = true }] ->
+            %% If we get a badarg here, then the GC has finished and
+            %% deleted our file. Try going around again. Otherwise,
+            %% just defer.
+            %%
+            %% badarg scenario: we lookup, msg_store locks, GC starts,
+            %% GC ends, we +1 readers, msg_store ets:deletes (and
+            %% unlocks the dest)
+            try Release(),
+                Defer()
+            catch error:badarg -> read(Server, Guid, CState)
+            end;
+        [#file_summary { locked = false }] ->
+            %% Ok, we're definitely safe to continue - a GC involving
+            %% the file cannot start up now, and isn't running, so
+            %% nothing will tell us from now on to close the handle if
+            %% it's already open.
+            %%
+            %% Finally, we need to recheck that the msg is still at
+            %% the same place - it's possible an entire GC ran between
+            %% us doing the lookup and the +1 on the readers. (Same as
+            %% badarg scenario above, but we don't have a missing file
+            %% - we just have the /wrong/ file).
+            case index_lookup(Guid, CState) of
+                #msg_location { file = File } = MsgLocation ->
+                    %% Still the same file.
+                    mark_handle_open(FileHandlesEts, File),
+
+                    CState1 = close_all_indicated(CState),
+                    {Msg, CState2} = %% This will never be the current file
+                        read_from_disk(MsgLocation, CState1, DedupCacheEts),
+                    Release(), %% this MUST NOT fail with badarg
+                    {{ok, Msg}, CState2};
+                MsgLocation -> %% different file!
+                    Release(), %% this MUST NOT fail with badarg
+                    client_read1(Server, MsgLocation, Defer, CState)
+            end
+    end.
+
+%%----------------------------------------------------------------------------
+%% gen_server callbacks
+%%----------------------------------------------------------------------------
+
+init([Server, BaseDir, ClientRefs, StartupFunState]) ->
+    process_flag(trap_exit, true),
+
+    ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use,
+                                             [self()]),
+
+    Dir = filename:join(BaseDir, atom_to_list(Server)),
+
+    {ok, IndexModule} = application:get_env(msg_store_index_module),
+    rabbit_log:info("~w: using ~p to provide index~n", [Server, IndexModule]),
+
+    AttemptFileSummaryRecovery =
+        case ClientRefs of
+            undefined -> ok = rabbit_misc:recursive_delete([Dir]),
+                         ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
+                         false;
+            _         -> ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
+                         recover_crashed_compactions(Dir)
+        end,
+
+    %% if we found crashed compactions we trust neither the
+    %% file_summary nor the location index. Note the file_summary is
+    %% left empty here if it can't be recovered.
+    {FileSummaryRecovered, FileSummaryEts} =
+        recover_file_summary(AttemptFileSummaryRecovery, Dir),
+
+    {CleanShutdown, IndexState, ClientRefs1} =
+        recover_index_and_client_refs(IndexModule, FileSummaryRecovered,
+                                      ClientRefs, Dir, Server),
+    %% CleanShutdown => msg location index and file_summary both
+    %% recovered correctly.
+    true = case {FileSummaryRecovered, CleanShutdown} of
+               {true, false} -> ets:delete_all_objects(FileSummaryEts);
+               _             -> true
+           end,
+    %% CleanShutdown <=> msg location index and file_summary both
+    %% recovered correctly.
+
+    DedupCacheEts   = ets:new(rabbit_msg_store_dedup_cache, [set, public]),
+    FileHandlesEts  = ets:new(rabbit_msg_store_shared_file_handles,
+                              [ordered_set, public]),
+    CurFileCacheEts = ets:new(rabbit_msg_store_cur_file, [set, public]),
+
+    {ok, FileSizeLimit} = application:get_env(msg_store_file_size_limit),
+
+    State = #msstate { dir                    = Dir,
+                       index_module           = IndexModule,
+                       index_state            = IndexState,
+                       current_file           = 0,
+                       current_file_handle    = undefined,
+                       file_handle_cache      = dict:new(),
+                       on_sync                = [],
+                       sync_timer_ref         = undefined,
+                       sum_valid_data         = 0,
+                       sum_file_size          = 0,
+                       pending_gc_completion  = [],
+                       gc_active              = false,
+                       gc_pid                 = undefined,
+                       file_handles_ets       = FileHandlesEts,
+                       file_summary_ets       = FileSummaryEts,
+                       dedup_cache_ets        = DedupCacheEts,
+                       cur_file_cache_ets     = CurFileCacheEts,
+                       client_refs            = ClientRefs1,
+                       successfully_recovered = CleanShutdown,
+                       file_size_limit        = FileSizeLimit
+                      },
+
+    %% If we didn't recover the msg location index then we need to
+    %% rebuild it now.
+    {Offset, State1 = #msstate { current_file = CurFile }} =
+        build_index(CleanShutdown, StartupFunState, State),
+
+    %% read is only needed so that we can seek
+    {ok, CurHdl} = open_file(Dir, filenum_to_name(CurFile),
+                             [read | ?WRITE_MODE]),
+    {ok, Offset} = file_handle_cache:position(CurHdl, Offset),
+    ok = file_handle_cache:truncate(CurHdl),
+
+    {ok, GCPid} = rabbit_msg_store_gc:start_link(Dir, IndexState, IndexModule,
+                                                 FileSummaryEts),
+
+    {ok, maybe_compact(
+           State1 #msstate { current_file_handle = CurHdl, gc_pid = GCPid }),
+     hibernate,
+     {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+prioritise_call(Msg, _From, _State) ->
+    case Msg of
+        {new_client_state, _Ref}     -> 7;
+        successfully_recovered_state -> 7;
+        {read, _Guid}                -> 2;
+        _                            -> 0
+    end.
+
+prioritise_cast(Msg, _State) ->
+    case Msg of
+        sync                                         -> 8;
+        {gc_done, _Reclaimed, _Source, _Destination} -> 8;
+        {set_maximum_since_use, _Age}                -> 8;
+        _                                            -> 0
+    end.
+
+handle_call({read, Guid}, From, State) ->
+    State1 = read_message(Guid, From, State),
+    noreply(State1);
+
+handle_call({contains, Guid}, From, State) ->
+    State1 = contains_message(Guid, From, State),
+    noreply(State1);
+
+handle_call({new_client_state, CRef}, _From,
+            State = #msstate { dir                = Dir,
+                               index_state        = IndexState,
+                               index_module       = IndexModule,
+                               file_handles_ets   = FileHandlesEts,
+                               file_summary_ets   = FileSummaryEts,
+                               dedup_cache_ets    = DedupCacheEts,
+                               cur_file_cache_ets = CurFileCacheEts,
+                               client_refs        = ClientRefs,
+                               gc_pid             = GCPid }) ->
+    reply({IndexState, IndexModule, Dir, GCPid,
+           FileHandlesEts, FileSummaryEts, DedupCacheEts, CurFileCacheEts},
+          State #msstate { client_refs = sets:add_element(CRef, ClientRefs) });
+
+handle_call(successfully_recovered_state, _From, State) ->
+    reply(State #msstate.successfully_recovered, State);
+
+handle_call(client_terminate, _From, State) ->
+    reply(ok, State).
+
+handle_cast({write, Guid},
+            State = #msstate { current_file_handle = CurHdl,
+                               current_file        = CurFile,
+                               sum_valid_data      = SumValid,
+                               sum_file_size       = SumFileSize,
+                               file_summary_ets    = FileSummaryEts,
+                               cur_file_cache_ets  = CurFileCacheEts }) ->
+    true = 0 =< ets:update_counter(CurFileCacheEts, Guid, {3, -1}),
+    [{Guid, Msg, _CacheRefCount}] = ets:lookup(CurFileCacheEts, Guid),
+    case index_lookup(Guid, State) of
+        not_found ->
+            %% New message, lots to do
+            {ok, CurOffset} = file_handle_cache:current_virtual_offset(CurHdl),
+            {ok, TotalSize} = rabbit_msg_file:append(CurHdl, Guid, Msg),
+            ok = index_insert(#msg_location {
+                                guid = Guid, ref_count = 1, file = CurFile,
+                                offset = CurOffset, total_size = TotalSize },
+                              State),
+            [#file_summary { valid_total_size = ValidTotalSize,
+                             right            = undefined,
+                             locked           = false,
+                             file_size        = FileSize }] =
+                ets:lookup(FileSummaryEts, CurFile),
+            ValidTotalSize1 = ValidTotalSize + TotalSize,
+            true = ets:update_element(
+                     FileSummaryEts, CurFile,
+                     [{#file_summary.valid_total_size, ValidTotalSize1},
+                      {#file_summary.file_size,        FileSize + TotalSize}]),
+            NextOffset = CurOffset + TotalSize,
+            noreply(
+              maybe_roll_to_new_file(
+                NextOffset, State #msstate {
+                              sum_valid_data = SumValid + TotalSize,
+                              sum_file_size  = SumFileSize + TotalSize }));
+        #msg_location { ref_count = RefCount } ->
+            %% We already know about it, just update counter. Only
+            %% update field otherwise bad interaction with concurrent GC
+            ok = index_update_fields(Guid,
+                                     {#msg_location.ref_count, RefCount + 1},
+                                     State),
+            noreply(State)
+    end;
+
+handle_cast({remove, Guids}, State) ->
+    State1 = lists:foldl(
+               fun (Guid, State2) -> remove_message(Guid, State2) end,
+               State, Guids),
+    noreply(maybe_compact(State1));
+
+handle_cast({release, Guids}, State =
+                #msstate { dedup_cache_ets = DedupCacheEts }) ->
+    lists:foreach(
+      fun (Guid) -> decrement_cache(DedupCacheEts, Guid) end, Guids),
+    noreply(State);
+
+handle_cast({sync, Guids, K},
+            State = #msstate { current_file        = CurFile,
+                               current_file_handle = CurHdl,
+                               on_sync             = Syncs }) ->
+    {ok, SyncOffset} = file_handle_cache:last_sync_offset(CurHdl),
+    case lists:any(fun (Guid) ->
+                           #msg_location { file = File, offset = Offset } =
+                               index_lookup(Guid, State),
+                           File =:= CurFile andalso Offset >= SyncOffset
+                   end, Guids) of
+        false -> K(),
+                 noreply(State);
+        true  -> noreply(State #msstate { on_sync = [K | Syncs] })
+    end;
+
+handle_cast(sync, State) ->
+    noreply(internal_sync(State));
+
+handle_cast({gc_done, Reclaimed, Src, Dst},
+            State = #msstate { sum_file_size    = SumFileSize,
+                               gc_active        = {Src, Dst},
+                               file_handles_ets = FileHandlesEts,
+                               file_summary_ets = FileSummaryEts }) ->
+    %% GC done, so now ensure that any clients that have open fhs to
+    %% those files close them before using them again. This has to be
+    %% done here (given it's done in the msg_store, and not the gc),
+    %% and not when starting up the GC, because if done when starting
+    %% up the GC, the client could find the close, and close and
+    %% reopen the fh, whilst the GC is waiting for readers to
+    %% disappear, before it's actually done the GC.
+    true = mark_handle_to_close(FileHandlesEts, Src),
+    true = mark_handle_to_close(FileHandlesEts, Dst),
+    %% we always move data left, so Src has gone and was on the
+    %% right, so need to make dest = source.right.left, and also
+    %% dest.right = source.right
+    [#file_summary { left    = Dst,
+                     right   = SrcRight,
+                     locked  = true,
+                     readers = 0 }] = ets:lookup(FileSummaryEts, Src),
+    %% this could fail if SrcRight =:= undefined
+    ets:update_element(FileSummaryEts, SrcRight, {#file_summary.left, Dst}),
+    true = ets:update_element(FileSummaryEts, Dst,
+                              [{#file_summary.locked, false},
+                               {#file_summary.right,  SrcRight}]),
+    true = ets:delete(FileSummaryEts, Src),
+    noreply(
+      maybe_compact(run_pending(
+                      State #msstate { sum_file_size = SumFileSize - Reclaimed,
+                                       gc_active     = false })));
+
+handle_cast({set_maximum_since_use, Age}, State) ->
+    ok = file_handle_cache:set_maximum_since_use(Age),
+    noreply(State);
+
+handle_cast({client_delete, CRef},
+            State = #msstate { client_refs = ClientRefs }) ->
+    noreply(
+      State #msstate { client_refs = sets:del_element(CRef, ClientRefs) }).
+
+handle_info(timeout, State) ->
+    noreply(internal_sync(State));
+
+handle_info({'EXIT', _Pid, Reason}, State) ->
+    {stop, Reason, State}.
+
+terminate(_Reason, State = #msstate { index_state         = IndexState,
+                                      index_module        = IndexModule,
+                                      current_file_handle = CurHdl,
+                                      gc_pid              = GCPid,
+                                      file_handles_ets    = FileHandlesEts,
+                                      file_summary_ets    = FileSummaryEts,
+                                      dedup_cache_ets     = DedupCacheEts,
+                                      cur_file_cache_ets  = CurFileCacheEts,
+                                      client_refs         = ClientRefs,
+                                      dir                 = Dir }) ->
+    %% stop the gc first, otherwise it could be working and we pull
+    %% out the ets tables from under it.
+    ok = rabbit_msg_store_gc:stop(GCPid),
+    State1 = case CurHdl of
+                 undefined -> State;
+                 _         -> State2 = internal_sync(State),
+                              file_handle_cache:close(CurHdl),
+                              State2
+             end,
+    State3 = close_all_handles(State1),
+    store_file_summary(FileSummaryEts, Dir),
+    [ets:delete(T) ||
+        T <- [FileSummaryEts, DedupCacheEts, FileHandlesEts, CurFileCacheEts]],
+    IndexModule:terminate(IndexState),
+    store_recovery_terms([{client_refs, sets:to_list(ClientRefs)},
+                          {index_module, IndexModule}], Dir),
+    State3 #msstate { index_state         = undefined,
+                      current_file_handle = undefined }.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
+
+%%----------------------------------------------------------------------------
+%% general helper functions
+%%----------------------------------------------------------------------------
+
+noreply(State) ->
+    {State1, Timeout} = next_state(State),
+    {noreply, State1, Timeout}.
+
+reply(Reply, State) ->
+    {State1, Timeout} = next_state(State),
+    {reply, Reply, State1, Timeout}.
+
+next_state(State = #msstate { on_sync = [], sync_timer_ref = undefined }) ->
+    {State, hibernate};
+next_state(State = #msstate { sync_timer_ref = undefined }) ->
+    {start_sync_timer(State), 0};
+next_state(State = #msstate { on_sync = [] }) ->
+    {stop_sync_timer(State), hibernate};
+next_state(State) ->
+    {State, 0}.
+
+start_sync_timer(State = #msstate { sync_timer_ref = undefined }) ->
+    {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, ?MODULE, sync, [self()]),
+    State #msstate { sync_timer_ref = TRef }.
+
+stop_sync_timer(State = #msstate { sync_timer_ref = undefined }) ->
+    State;
+stop_sync_timer(State = #msstate { sync_timer_ref = TRef }) ->
+    {ok, cancel} = timer:cancel(TRef),
+    State #msstate { sync_timer_ref = undefined }.
+
+internal_sync(State = #msstate { current_file_handle = CurHdl,
+                                 on_sync = Syncs }) ->
+    State1 = stop_sync_timer(State),
+    case Syncs of
+        [] -> State1;
+        _  -> ok = file_handle_cache:sync(CurHdl),
+              lists:foreach(fun (K) -> K() end, lists:reverse(Syncs)),
+              State1 #msstate { on_sync = [] }
+    end.
+
+read_message(Guid, From,
+             State = #msstate { dedup_cache_ets = DedupCacheEts }) ->
+    case index_lookup(Guid, State) of
+        not_found ->
+            gen_server2:reply(From, not_found),
+            State;
+        MsgLocation ->
+            case fetch_and_increment_cache(DedupCacheEts, Guid) of
+                not_found -> read_message1(From, MsgLocation, State);
+                Msg       -> gen_server2:reply(From, {ok, Msg}),
+                             State
+            end
+    end.
+
+read_message1(From, #msg_location { guid = Guid, ref_count = RefCount,
+                                    file = File, offset = Offset } = MsgLoc,
+              State = #msstate { current_file        = CurFile,
+                                 current_file_handle = CurHdl,
+                                 file_summary_ets    = FileSummaryEts,
+                                 dedup_cache_ets     = DedupCacheEts,
+                                 cur_file_cache_ets  = CurFileCacheEts }) ->
+    case File =:= CurFile of
+        true  -> {Msg, State1} =
+                     %% can return [] if msg in file existed on startup
+                     case ets:lookup(CurFileCacheEts, Guid) of
+                         [] ->
+                             {ok, RawOffSet} =
+                                 file_handle_cache:current_raw_offset(CurHdl),
+                             ok = case Offset >= RawOffSet of
+                                      true  -> file_handle_cache:flush(CurHdl);
+                                      false -> ok
+                                  end,
+                             read_from_disk(MsgLoc, State, DedupCacheEts);
+                         [{Guid, Msg1, _CacheRefCount}] ->
+                             ok = maybe_insert_into_cache(
+                                    DedupCacheEts, RefCount, Guid, Msg1),
+                             {Msg1, State}
+                     end,
+                 gen_server2:reply(From, {ok, Msg}),
+                 State1;
+        false -> [#file_summary { locked = Locked }] =
+                     ets:lookup(FileSummaryEts, File),
+                 case Locked of
+                     true  -> add_to_pending_gc_completion({read, Guid, From},
+                                                           State);
+                     false -> {Msg, State1} =
+                                  read_from_disk(MsgLoc, State, DedupCacheEts),
+                              gen_server2:reply(From, {ok, Msg}),
+                              State1
+                 end
+    end.
+
+read_from_disk(#msg_location { guid = Guid, ref_count = RefCount,
+                               file = File, offset = Offset,
+                               total_size = TotalSize },
+               State, DedupCacheEts) ->
+    {Hdl, State1} = get_read_handle(File, State),
+    {ok, Offset} = file_handle_cache:position(Hdl, Offset),
+    {ok, {Guid, Msg}} =
+        case rabbit_msg_file:read(Hdl, TotalSize) of
+            {ok, {Guid, _}} = Obj ->
+                Obj;
+            Rest ->
+                {error, {misread, [{old_state, State},
+                                   {file_num,  File},
+                                   {offset,    Offset},
+                                   {guid,      Guid},
+                                   {read,      Rest},
+                                   {proc_dict, get()}
+                                  ]}}
+        end,
+    ok = maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg),
+    {Msg, State1}.
+
+contains_message(Guid, From, State = #msstate { gc_active = GCActive }) ->
+    case index_lookup(Guid, State) of
+        not_found ->
+            gen_server2:reply(From, false),
+            State;
+        #msg_location { file = File } ->
+            case GCActive of
+                {A, B} when File =:= A orelse File =:= B ->
+                    add_to_pending_gc_completion(
+                      {contains, Guid, From}, State);
+                _ ->
+                    gen_server2:reply(From, true),
+                    State
+            end
+    end.
+
+remove_message(Guid, State = #msstate { sum_valid_data   = SumValid,
+                                        file_summary_ets = FileSummaryEts,
+                                        dedup_cache_ets  = DedupCacheEts }) ->
+    #msg_location { ref_count = RefCount, file = File,
+                    total_size = TotalSize } = index_lookup(Guid, State),
+    case RefCount of
+        1 ->
+            %% don't remove from CUR_FILE_CACHE_ETS_NAME here because
+            %% there may be further writes in the mailbox for the same
+            %% msg.
+            ok = remove_cache_entry(DedupCacheEts, Guid),
+            [#file_summary { valid_total_size = ValidTotalSize,
+                             locked           = Locked }] =
+                ets:lookup(FileSummaryEts, File),
+            case Locked of
+                true ->
+                    add_to_pending_gc_completion({remove, Guid}, State);
+                false ->
+                    ok = index_delete(Guid, State),
+                    ValidTotalSize1 = ValidTotalSize - TotalSize,
+                    true =
+                        ets:update_element(
+                          FileSummaryEts, File,
+                          [{#file_summary.valid_total_size, ValidTotalSize1}]),
+                    State1 = delete_file_if_empty(File, State),
+                    State1 #msstate { sum_valid_data = SumValid - TotalSize }
+            end;
+        _ when 1 < RefCount ->
+            ok = decrement_cache(DedupCacheEts, Guid),
+            %% only update field, otherwise bad interaction with concurrent GC
+            ok = index_update_fields(Guid,
+                                     {#msg_location.ref_count, RefCount - 1},
+                                     State),
+            State
+    end.
+
+add_to_pending_gc_completion(
+  Op, State = #msstate { pending_gc_completion = Pending }) ->
+    State #msstate { pending_gc_completion = [Op | Pending] }.
+
+run_pending(State = #msstate { pending_gc_completion = [] }) ->
+    State;
+run_pending(State = #msstate { pending_gc_completion = Pending }) ->
+    State1 = State #msstate { pending_gc_completion = [] },
+    lists:foldl(fun run_pending/2, State1, lists:reverse(Pending)).
+
+run_pending({read, Guid, From}, State) ->
+    read_message(Guid, From, State);
+run_pending({contains, Guid, From}, State) ->
+    contains_message(Guid, From, State);
+run_pending({remove, Guid}, State) ->
+    remove_message(Guid, State).
+
+safe_ets_update_counter(Tab, Key, UpdateOp, SuccessFun, FailThunk) ->
+    try
+        SuccessFun(ets:update_counter(Tab, Key, UpdateOp))
+    catch error:badarg -> FailThunk()
+    end.
+
+safe_ets_update_counter_ok(Tab, Key, UpdateOp, FailThunk) ->
+    safe_ets_update_counter(Tab, Key, UpdateOp, fun (_) -> ok end, FailThunk).
+
+%%----------------------------------------------------------------------------
+%% file helper functions
+%%----------------------------------------------------------------------------
+
+open_file(Dir, FileName, Mode) ->
+    file_handle_cache:open(form_filename(Dir, FileName), ?BINARY_MODE ++ Mode,
+                           [{write_buffer, ?HANDLE_CACHE_BUFFER_SIZE}]).
+
+close_handle(Key, CState = #client_msstate { file_handle_cache = FHC }) ->
+    CState #client_msstate { file_handle_cache = close_handle(Key, FHC) };
+
+close_handle(Key, State = #msstate { file_handle_cache = FHC }) ->
+    State #msstate { file_handle_cache = close_handle(Key, FHC) };
+
+close_handle(Key, FHC) ->
+    case dict:find(Key, FHC) of
+        {ok, Hdl} -> ok = file_handle_cache:close(Hdl),
+                     dict:erase(Key, FHC);
+        error     -> FHC
+    end.
+
+mark_handle_open(FileHandlesEts, File) ->
+    %% This is fine to fail (already exists)
+    ets:insert_new(FileHandlesEts, {{self(), File}, open}),
+    true.
+
+mark_handle_to_close(FileHandlesEts, File) ->
+    [ ets:update_element(FileHandlesEts, Key, {2, close})
+      || {Key, open} <- ets:match_object(FileHandlesEts, {{'_', File}, open}) ],
+    true.
+
+close_all_indicated(#client_msstate { file_handles_ets = FileHandlesEts } =
+                    CState) ->
+    Objs = ets:match_object(FileHandlesEts, {{self(), '_'}, close}),
+    lists:foldl(fun ({Key = {_Self, File}, close}, CStateM) ->
+                        true = ets:delete(FileHandlesEts, Key),
+                        close_handle(File, CStateM)
+                end, CState, Objs).
+
+close_all_handles(CState = #client_msstate { file_handles_ets = FileHandlesEts,
+                                             file_handle_cache = FHC }) ->
+    Self = self(),
+    ok = dict:fold(fun (File, Hdl, ok) ->
+                           true = ets:delete(FileHandlesEts, {Self, File}),
+                           file_handle_cache:close(Hdl)
+                   end, ok, FHC),
+    CState #client_msstate { file_handle_cache = dict:new() };
+
+close_all_handles(State = #msstate { file_handle_cache = FHC }) ->
+    ok = dict:fold(fun (_Key, Hdl, ok) -> file_handle_cache:close(Hdl) end,
+                   ok, FHC),
+    State #msstate { file_handle_cache = dict:new() }.
+
+get_read_handle(FileNum, CState = #client_msstate { file_handle_cache = FHC,
+                                                    dir = Dir }) ->
+    {Hdl, FHC2} = get_read_handle(FileNum, FHC, Dir),
+    {Hdl, CState #client_msstate { file_handle_cache = FHC2 }};
+
+get_read_handle(FileNum, State = #msstate { file_handle_cache = FHC,
+                                            dir = Dir }) ->
+    {Hdl, FHC2} = get_read_handle(FileNum, FHC, Dir),
+    {Hdl, State #msstate { file_handle_cache = FHC2 }}.
+
+get_read_handle(FileNum, FHC, Dir) ->
+    case dict:find(FileNum, FHC) of
+        {ok, Hdl} -> {Hdl, FHC};
+        error     -> {ok, Hdl} = open_file(Dir, filenum_to_name(FileNum),
+                                           ?READ_MODE),
+                     {Hdl, dict:store(FileNum, Hdl, FHC)}
+    end.
+
+preallocate(Hdl, FileSizeLimit, FinalPos) ->
+    {ok, FileSizeLimit} = file_handle_cache:position(Hdl, FileSizeLimit),
+    ok = file_handle_cache:truncate(Hdl),
+    {ok, FinalPos} = file_handle_cache:position(Hdl, FinalPos),
+    ok.
+
+truncate_and_extend_file(Hdl, Lowpoint, Highpoint) ->
+    {ok, Lowpoint} = file_handle_cache:position(Hdl, Lowpoint),
+    ok = file_handle_cache:truncate(Hdl),
+    ok = preallocate(Hdl, Highpoint, Lowpoint).
+
+form_filename(Dir, Name) -> filename:join(Dir, Name).
+
+filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION.
+
+filename_to_num(FileName) -> list_to_integer(filename:rootname(FileName)).
+
+list_sorted_file_names(Dir, Ext) ->
+    lists:sort(fun (A, B) -> filename_to_num(A) < filename_to_num(B) end,
+               filelib:wildcard("*" ++ Ext, Dir)).
+
+%%----------------------------------------------------------------------------
+%% message cache helper functions
+%%----------------------------------------------------------------------------
+
+maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg)
+  when RefCount > 1 ->
+    update_msg_cache(DedupCacheEts, Guid, Msg);
+maybe_insert_into_cache(_DedupCacheEts, _RefCount, _Guid, _Msg) ->
+    ok.
+
+update_msg_cache(CacheEts, Guid, Msg) ->
+    case ets:insert_new(CacheEts, {Guid, Msg, 1}) of
+        true  -> ok;
+        false -> safe_ets_update_counter_ok(
+                   CacheEts, Guid, {3, +1},
+                   fun () -> update_msg_cache(CacheEts, Guid, Msg) end)
+    end.
+
+remove_cache_entry(DedupCacheEts, Guid) ->
+    true = ets:delete(DedupCacheEts, Guid),
+    ok.
+
+fetch_and_increment_cache(DedupCacheEts, Guid) ->
+    case ets:lookup(DedupCacheEts, Guid) of
+        [] ->
+            not_found;
+        [{_Guid, Msg, _RefCount}] ->
+            safe_ets_update_counter_ok(
+              DedupCacheEts, Guid, {3, +1},
+              %% someone has deleted us in the meantime, insert us
+              fun () -> ok = update_msg_cache(DedupCacheEts, Guid, Msg) end),
+            Msg
+    end.
+
+decrement_cache(DedupCacheEts, Guid) ->
+    true = safe_ets_update_counter(
+             DedupCacheEts, Guid, {3, -1},
+             fun (N) when N =< 0 -> true = ets:delete(DedupCacheEts, Guid);
+                 (_N)            -> true
+             end,
+             %% Guid is not in there because although it's been
+             %% delivered, it's never actually been read (think:
+             %% persistent message held in RAM)
+             fun () -> true end),
+    ok.
+
+%%----------------------------------------------------------------------------
+%% index
+%%----------------------------------------------------------------------------
+
+index_lookup(Key, #client_msstate { index_module = Index,
+                                    index_state  = State }) ->
+    Index:lookup(Key, State);
+
+index_lookup(Key, #msstate { index_module = Index, index_state = State }) ->
+    Index:lookup(Key, State).
+
+index_insert(Obj, #msstate { index_module = Index, index_state = State }) ->
+    Index:insert(Obj, State).
+
+index_update(Obj, #msstate { index_module = Index, index_state = State }) ->
+    Index:update(Obj, State).
+
+index_update_fields(Key, Updates, #msstate { index_module = Index,
+                                             index_state  = State }) ->
+    Index:update_fields(Key, Updates, State).
+
+index_delete(Key, #msstate { index_module = Index, index_state = State }) ->
+    Index:delete(Key, State).
+
+index_delete_by_file(File, #msstate { index_module = Index,
+                                      index_state  = State }) ->
+    Index:delete_by_file(File, State).
+
+%%----------------------------------------------------------------------------
+%% shutdown and recovery
+%%----------------------------------------------------------------------------
+
+recover_index_and_client_refs(IndexModule, _Recover, undefined, Dir, _Server) ->
+    {false, IndexModule:new(Dir), sets:new()};
+recover_index_and_client_refs(IndexModule, false, _ClientRefs, Dir, Server) ->
+    rabbit_log:warning("~w: rebuilding indices from scratch~n", [Server]),
+    {false, IndexModule:new(Dir), sets:new()};
+recover_index_and_client_refs(IndexModule, true, ClientRefs, Dir, Server) ->
+    Fresh = fun (ErrorMsg, ErrorArgs) ->
+                    rabbit_log:warning("~w: " ++ ErrorMsg ++ "~n"
+                                       "rebuilding indices from scratch~n",
+                                       [Server | ErrorArgs]),
+                    {false, IndexModule:new(Dir), sets:new()}
+            end,
+    case read_recovery_terms(Dir) of
+        {false, Error} ->
+            Fresh("failed to read recovery terms: ~p", [Error]);
+        {true, Terms} ->
+            RecClientRefs  = proplists:get_value(client_refs, Terms, []),
+            RecIndexModule = proplists:get_value(index_module, Terms),
+            case (lists:sort(ClientRefs) =:= lists:sort(RecClientRefs)
+                  andalso IndexModule =:= RecIndexModule) of
+                true  -> case IndexModule:recover(Dir) of
+                             {ok, IndexState1} ->
+                                 {true, IndexState1,
+                                  sets:from_list(ClientRefs)};
+                             {error, Error} ->
+                                 Fresh("failed to recover index: ~p", [Error])
+                         end;
+                false -> Fresh("recovery terms differ from present", [])
+            end
+    end.
+
+store_recovery_terms(Terms, Dir) ->
+    rabbit_misc:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms).
+
+read_recovery_terms(Dir) ->
+    Path = filename:join(Dir, ?CLEAN_FILENAME),
+    case rabbit_misc:read_term_file(Path) of
+        {ok, Terms}    -> case file:delete(Path) of
+                              ok             -> {true,  Terms};
+                              {error, Error} -> {false, Error}
+                          end;
+        {error, Error} -> {false, Error}
+    end.
+
+store_file_summary(Tid, Dir) ->
+    ok = ets:tab2file(Tid, filename:join(Dir, ?FILE_SUMMARY_FILENAME),
+                      [{extended_info, [object_count]}]).
+
+recover_file_summary(false, _Dir) ->
+    %% TODO: the only reason for this to be an *ordered*_set is so
+    %% that a) maybe_compact can start a traversal from the eldest
+    %% file, and b) build_index in fast recovery mode can easily
+    %% identify the current file. It's awkward to have both that
+    %% odering and the left/right pointers in the entries - replacing
+    %% the former with some additional bit of state would be easy, but
+    %% ditching the latter would be neater.
+    {false, ets:new(rabbit_msg_store_file_summary,
+                    [ordered_set, public, {keypos, #file_summary.file}])};
+recover_file_summary(true, Dir) ->
+    Path = filename:join(Dir, ?FILE_SUMMARY_FILENAME),
+    case ets:file2tab(Path) of
+        {ok, Tid}       -> file:delete(Path),
+                          {true, Tid};
+        {error, _Error} -> recover_file_summary(false, Dir)
+    end.
+
+count_msg_refs(Gen, Seed, State) ->
+    case Gen(Seed) of
+        finished ->
+            ok;
+        {_Guid, 0, Next} ->
+            count_msg_refs(Gen, Next, State);
+        {Guid, Delta, Next} ->
+            ok = case index_lookup(Guid, State) of
+                     not_found ->
+                         index_insert(#msg_location { guid = Guid,
+                                                      file = undefined,
+                                                      ref_count = Delta },
+                                      State);
+                     #msg_location { ref_count = RefCount } = StoreEntry ->
+                         NewRefCount = RefCount + Delta,
+                         case NewRefCount of
+                             0 -> index_delete(Guid, State);
+                             _ -> index_update(StoreEntry #msg_location {
+                                                 ref_count = NewRefCount },
+                                               State)
+                         end
+                 end,
+            count_msg_refs(Gen, Next, State)
+    end.
+
+recover_crashed_compactions(Dir) ->
+    FileNames =    list_sorted_file_names(Dir, ?FILE_EXTENSION),
+    TmpFileNames = list_sorted_file_names(Dir, ?FILE_EXTENSION_TMP),
+    lists:foreach(
+      fun (TmpFileName) ->
+              NonTmpRelatedFileName =
+                  filename:rootname(TmpFileName) ++ ?FILE_EXTENSION,
+              true = lists:member(NonTmpRelatedFileName, FileNames),
+              ok = recover_crashed_compaction(
+                     Dir, TmpFileName, NonTmpRelatedFileName)
+      end, TmpFileNames),
+    TmpFileNames == [].
+
+recover_crashed_compaction(Dir, TmpFileName, NonTmpRelatedFileName) ->
+    %% Because a msg can legitimately appear multiple times in the
+    %% same file, identifying the contents of the tmp file and where
+    %% they came from is non-trivial. If we are recovering a crashed
+    %% compaction then we will be rebuilding the index, which can cope
+    %% with duplicates appearing. Thus the simplest and safest thing
+    %% to do is to append the contents of the tmp file to its main
+    %% file.
+    {ok, TmpHdl}  = open_file(Dir, TmpFileName, ?READ_MODE),
+    {ok, MainHdl} = open_file(Dir, NonTmpRelatedFileName,
+                              ?READ_MODE ++ ?WRITE_MODE),
+    {ok, _End} = file_handle_cache:position(MainHdl, eof),
+    Size = filelib:file_size(form_filename(Dir, TmpFileName)),
+    {ok, Size} = file_handle_cache:copy(TmpHdl, MainHdl, Size),
+    ok = file_handle_cache:close(MainHdl),
+    ok = file_handle_cache:delete(TmpHdl),
+    ok.
+
+scan_file_for_valid_messages(Dir, FileName) ->
+    case open_file(Dir, FileName, ?READ_MODE) of
+        {ok, Hdl}       -> Valid = rabbit_msg_file:scan(
+                                     Hdl, filelib:file_size(
+                                            form_filename(Dir, FileName))),
+                           %% if something really bad has happened,
+                           %% the close could fail, but ignore
+                           file_handle_cache:close(Hdl),
+                           Valid;
+        {error, enoent} -> {ok, [], 0};
+        {error, Reason} -> {error, {unable_to_scan_file, FileName, Reason}}
+    end.
+
+%% Takes the list in *ascending* order (i.e. eldest message
+%% first). This is the opposite of what scan_file_for_valid_messages
+%% produces. The list of msgs that is produced is youngest first.
+drop_contiguous_block_prefix(L) -> drop_contiguous_block_prefix(L, 0).
+
+drop_contiguous_block_prefix([], ExpectedOffset) ->
+    {ExpectedOffset, []};
+drop_contiguous_block_prefix([#msg_location { offset = ExpectedOffset,
+                                              total_size = TotalSize } | Tail],
+                             ExpectedOffset) ->
+    ExpectedOffset1 = ExpectedOffset + TotalSize,
+    drop_contiguous_block_prefix(Tail, ExpectedOffset1);
+drop_contiguous_block_prefix(MsgsAfterGap, ExpectedOffset) ->
+    {ExpectedOffset, MsgsAfterGap}.
+
+build_index(true, _StartupFunState,
+            State = #msstate { file_summary_ets = FileSummaryEts }) ->
+    ets:foldl(
+      fun (#file_summary { valid_total_size = ValidTotalSize,
+                           file_size        = FileSize,
+                           file             = File },
+           {_Offset, State1 = #msstate { sum_valid_data = SumValid,
+                                         sum_file_size  = SumFileSize }}) ->
+              {FileSize, State1 #msstate {
+                           sum_valid_data = SumValid + ValidTotalSize,
+                           sum_file_size  = SumFileSize + FileSize,
+                           current_file   = File }}
+      end, {0, State}, FileSummaryEts);
+build_index(false, {MsgRefDeltaGen, MsgRefDeltaGenInit},
+            State = #msstate { dir = Dir }) ->
+    ok = count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State),
+    {ok, Pid} = gatherer:start_link(),
+    case [filename_to_num(FileName) ||
+             FileName <- list_sorted_file_names(Dir, ?FILE_EXTENSION)] of
+        []     -> build_index(Pid, undefined, [State #msstate.current_file],
+                              State);
+        Files  -> {Offset, State1} = build_index(Pid, undefined, Files, State),
+                  {Offset, lists:foldl(fun delete_file_if_empty/2,
+                                       State1, Files)}
+    end.
+
+build_index(Gatherer, Left, [],
+            State = #msstate { file_summary_ets = FileSummaryEts,
+                               sum_valid_data   = SumValid,
+                               sum_file_size    = SumFileSize }) ->
+    case gatherer:out(Gatherer) of
+        empty ->
+            ok = gatherer:stop(Gatherer),
+            ok = rabbit_misc:unlink_and_capture_exit(Gatherer),
+            ok = index_delete_by_file(undefined, State),
+            Offset = case ets:lookup(FileSummaryEts, Left) of
+                         []                                       -> 0;
+                         [#file_summary { file_size = FileSize }] -> FileSize
+                     end,
+            {Offset, State #msstate { current_file = Left }};
+        {value, #file_summary { valid_total_size = ValidTotalSize,
+                                file_size = FileSize } = FileSummary} ->
+            true = ets:insert_new(FileSummaryEts, FileSummary),
+            build_index(Gatherer, Left, [],
+                        State #msstate {
+                          sum_valid_data = SumValid + ValidTotalSize,
+                          sum_file_size  = SumFileSize + FileSize })
+    end;
+build_index(Gatherer, Left, [File|Files], State) ->
+    ok = gatherer:fork(Gatherer),
+    ok = worker_pool:submit_async(
+           fun () -> build_index_worker(Gatherer, State,
+                                        Left, File, Files)
+           end),
+    build_index(Gatherer, File, Files, State).
+
+build_index_worker(Gatherer, State = #msstate { dir = Dir },
+                   Left, File, Files) ->
+    {ok, Messages, FileSize} =
+        scan_file_for_valid_messages(Dir, filenum_to_name(File)),
+    {ValidMessages, ValidTotalSize} =
+        lists:foldl(
+          fun (Obj = {Guid, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
+                  case index_lookup(Guid, State) of
+                      #msg_location { file = undefined } = StoreEntry ->
+                          ok = index_update(StoreEntry #msg_location {
+                                              file = File, offset = Offset,
+                                              total_size = TotalSize },
+                                            State),
+                          {[Obj | VMAcc], VTSAcc + TotalSize};
+                      _ ->
+                          {VMAcc, VTSAcc}
+                  end
+          end, {[], 0}, Messages),
+    {Right, FileSize1} =
+        case Files of
+            %% if it's the last file, we'll truncate to remove any
+            %% rubbish above the last valid message. This affects the
+            %% file size.
+            []    -> {undefined, case ValidMessages of
+                                     [] -> 0;
+                                     _  -> {_Guid, TotalSize, Offset} =
+                                               lists:last(ValidMessages),
+                                           Offset + TotalSize
+                                 end};
+            [F|_] -> {F, FileSize}
+        end,
+    ok = gatherer:in(Gatherer, #file_summary {
+                       file             = File,
+                       valid_total_size = ValidTotalSize,
+                       left             = Left,
+                       right            = Right,
+                       file_size        = FileSize1,
+                       locked           = false,
+                       readers          = 0 }),
+    ok = gatherer:finish(Gatherer).
+
+%%----------------------------------------------------------------------------
+%% garbage collection / compaction / aggregation -- internal
+%%----------------------------------------------------------------------------
+
+maybe_roll_to_new_file(
+  Offset,
+  State = #msstate { dir                 = Dir,
+                     current_file_handle = CurHdl,
+                     current_file        = CurFile,
+                     file_summary_ets    = FileSummaryEts,
+                     cur_file_cache_ets  = CurFileCacheEts,
+                     file_size_limit     = FileSizeLimit })
+  when Offset >= FileSizeLimit ->
+    State1 = internal_sync(State),
+    ok = file_handle_cache:close(CurHdl),
+    NextFile = CurFile + 1,
+    {ok, NextHdl} = open_file(Dir, filenum_to_name(NextFile), ?WRITE_MODE),
+    true = ets:insert_new(FileSummaryEts, #file_summary {
+                            file             = NextFile,
+                            valid_total_size = 0,
+                            left             = CurFile,
+                            right            = undefined,
+                            file_size        = 0,
+                            locked           = false,
+                            readers          = 0 }),
+    true = ets:update_element(FileSummaryEts, CurFile,
+                              {#file_summary.right, NextFile}),
+    true = ets:match_delete(CurFileCacheEts, {'_', '_', 0}),
+    maybe_compact(State1 #msstate { current_file_handle = NextHdl,
+                                    current_file        = NextFile });
+maybe_roll_to_new_file(_, State) ->
+    State.
+
+maybe_compact(State = #msstate { sum_valid_data   = SumValid,
+                                 sum_file_size    = SumFileSize,
+                                 gc_active        = false,
+                                 gc_pid           = GCPid,
+                                 file_summary_ets = FileSummaryEts,
+                                 file_size_limit  = FileSizeLimit })
+  when (SumFileSize > 2 * FileSizeLimit andalso
+        (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION) ->
+    %% TODO: the algorithm here is sub-optimal - it may result in a
+    %% complete traversal of FileSummaryEts.
+    case ets:first(FileSummaryEts) of
+        '$end_of_table' ->
+            State;
+        First ->
+            case find_files_to_gc(FileSummaryEts, FileSizeLimit,
+                                  ets:lookup(FileSummaryEts, First)) of
+                not_found ->
+                    State;
+                {Src, Dst} ->
+                    State1 = close_handle(Src, close_handle(Dst, State)),
+                    true = ets:update_element(FileSummaryEts, Src,
+                                              {#file_summary.locked, true}),
+                    true = ets:update_element(FileSummaryEts, Dst,
+                                              {#file_summary.locked, true}),
+                    ok = rabbit_msg_store_gc:gc(GCPid, Src, Dst),
+                    State1 #msstate { gc_active = {Src, Dst} }
+            end
+    end;
+maybe_compact(State) ->
+    State.
+
+find_files_to_gc(FileSummaryEts, FileSizeLimit,
+                 [#file_summary { file             = Dst,
+                                  valid_total_size = DstValid,
+                                  right            = Src }]) ->
+    case Src of
+        undefined ->
+            not_found;
+        _   ->
+            [#file_summary { file             = Src,
+                             valid_total_size = SrcValid,
+                             left             = Dst,
+                             right            = SrcRight }] = Next =
+                ets:lookup(FileSummaryEts, Src),
+            case SrcRight of
+                undefined -> not_found;
+                _         -> case DstValid + SrcValid =< FileSizeLimit of
+                                 true  -> {Src, Dst};
+                                 false -> find_files_to_gc(
+                                            FileSummaryEts, FileSizeLimit, Next)
+                             end
+            end
+    end.
+
+delete_file_if_empty(File, State = #msstate { current_file = File }) ->
+    State;
+delete_file_if_empty(File, State = #msstate {
+                             dir              = Dir,
+                             sum_file_size    = SumFileSize,
+                             file_handles_ets = FileHandlesEts,
+                             file_summary_ets = FileSummaryEts }) ->
+    [#file_summary { valid_total_size = ValidData,
+                     left             = Left,
+                     right            = Right,
+                     file_size        = FileSize,
+                     locked           = false }] =
+        ets:lookup(FileSummaryEts, File),
+    case ValidData of
+        %% we should NEVER find the current file in here hence right
+        %% should always be a file, not undefined
+        0 -> case {Left, Right} of
+                 {undefined, _} when Right =/= undefined ->
+                     %% the eldest file is empty.
+                     true = ets:update_element(
+                              FileSummaryEts, Right,
+                              {#file_summary.left, undefined});
+                 {_, _} when Right =/= undefined ->
+                     true = ets:update_element(FileSummaryEts, Right,
+                                               {#file_summary.left, Left}),
+                     true = ets:update_element(FileSummaryEts, Left,
+                                               {#file_summary.right, Right})
+             end,
+             true = mark_handle_to_close(FileHandlesEts, File),
+             true = ets:delete(FileSummaryEts, File),
+             State1 = close_handle(File, State),
+             ok = file:delete(form_filename(Dir, filenum_to_name(File))),
+             State1 #msstate { sum_file_size = SumFileSize - FileSize };
+        _ -> State
+    end.
+
+%%----------------------------------------------------------------------------
+%% garbage collection / compaction / aggregation -- external
+%%----------------------------------------------------------------------------
+
+gc(SrcFile, DstFile, State = {FileSummaryEts, _Dir, _Index, _IndexState}) ->
+    [SrcObj = #file_summary {
+       readers          = SrcReaders,
+       left             = DstFile,
+       file_size        = SrcFileSize,
+       locked           = true }] = ets:lookup(FileSummaryEts, SrcFile),
+    [DstObj = #file_summary {
+       readers          = DstReaders,
+       right            = SrcFile,
+       file_size        = DstFileSize,
+       locked           = true }] = ets:lookup(FileSummaryEts, DstFile),
+
+    case SrcReaders =:= 0 andalso DstReaders =:= 0 of
+        true  -> TotalValidData = combine_files(SrcObj, DstObj, State),
+                 %% don't update dest.right, because it could be
+                 %% changing at the same time
+                 true = ets:update_element(
+                          FileSummaryEts, DstFile,
+                          [{#file_summary.valid_total_size, TotalValidData},
+                           {#file_summary.file_size,        TotalValidData}]),
+                 SrcFileSize + DstFileSize - TotalValidData;
+        false -> concurrent_readers
+    end.
+
+combine_files(#file_summary { file             = Source,
+                              valid_total_size = SourceValid,
+                              left             = Destination },
+              #file_summary { file             = Destination,
+                              valid_total_size = DestinationValid,
+                              right            = Source },
+              State = {_FileSummaryEts, Dir, _Index, _IndexState}) ->
+    SourceName      = filenum_to_name(Source),
+    DestinationName = filenum_to_name(Destination),
+    {ok, SourceHdl}      = open_file(Dir, SourceName,
+                                     ?READ_AHEAD_MODE),
+    {ok, DestinationHdl} = open_file(Dir, DestinationName,
+                                     ?READ_AHEAD_MODE ++ ?WRITE_MODE),
+    ExpectedSize = SourceValid + DestinationValid,
+    %% if DestinationValid =:= DestinationContiguousTop then we don't
+    %% need a tmp file
+    %% if they're not equal, then we need to write out everything past
+    %%   the DestinationContiguousTop to a tmp file then truncate,
+    %%   copy back in, and then copy over from Source
+    %% otherwise we just truncate straight away and copy over from Source
+    {DestinationWorkList, DestinationValid} =
+        find_unremoved_messages_in_file(Destination, State),
+    {DestinationContiguousTop, DestinationWorkListTail} =
+        drop_contiguous_block_prefix(DestinationWorkList),
+    case DestinationWorkListTail of
+        [] -> ok = truncate_and_extend_file(
+                     DestinationHdl, DestinationContiguousTop, ExpectedSize);
+        _  -> Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP,
+              {ok, TmpHdl} = open_file(Dir, Tmp, ?READ_AHEAD_MODE++?WRITE_MODE),
+              ok = copy_messages(
+                     DestinationWorkListTail, DestinationContiguousTop,
+                     DestinationValid, DestinationHdl, TmpHdl, Destination,
+                     State),
+              TmpSize = DestinationValid - DestinationContiguousTop,
+              %% so now Tmp contains everything we need to salvage
+              %% from Destination, and index_state has been updated to
+              %% reflect the compaction of Destination so truncate
+              %% Destination and copy from Tmp back to the end
+              {ok, 0} = file_handle_cache:position(TmpHdl, 0),
+              ok = truncate_and_extend_file(
+                     DestinationHdl, DestinationContiguousTop, ExpectedSize),
+              {ok, TmpSize} =
+                  file_handle_cache:copy(TmpHdl, DestinationHdl, TmpSize),
+              %% position in DestinationHdl should now be DestinationValid
+              ok = file_handle_cache:sync(DestinationHdl),
+              ok = file_handle_cache:delete(TmpHdl)
+    end,
+    {SourceWorkList, SourceValid} =
+        find_unremoved_messages_in_file(Source, State),
+    ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
+                       SourceHdl, DestinationHdl, Destination, State),
+    %% tidy up
+    ok = file_handle_cache:close(DestinationHdl),
+    ok = file_handle_cache:delete(SourceHdl),
+    ExpectedSize.
+
+find_unremoved_messages_in_file(File,
+                                {_FileSummaryEts, Dir, Index, IndexState}) ->
+    %% Messages here will be end-of-file at start-of-list
+    {ok, Messages, _FileSize} =
+        scan_file_for_valid_messages(Dir, filenum_to_name(File)),
+    %% foldl will reverse so will end up with msgs in ascending offset order
+    lists:foldl(fun ({Guid, TotalSize, Offset}, Acc = {List, Size}) ->
+                        case Index:lookup(Guid, IndexState) of
+                            #msg_location { file = File, total_size = TotalSize,
+                                            offset = Offset } = Entry ->
+                                {[ Entry | List ], TotalSize + Size};
+                            _ ->
+                                Acc
+                        end
+                end, {[], 0}, Messages).
+
+copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
+              Destination, {_FileSummaryEts, _Dir, Index, IndexState}) ->
+    Copy = fun ({BlockStart, BlockEnd}) ->
+                   BSize = BlockEnd - BlockStart,
+                   {ok, BlockStart} =
+                       file_handle_cache:position(SourceHdl, BlockStart),
+                   {ok, BSize} =
+                       file_handle_cache:copy(SourceHdl, DestinationHdl, BSize)
+           end,
+    case
+        lists:foldl(
+          fun (#msg_location { guid = Guid, offset = Offset,
+                               total_size = TotalSize },
+               {CurOffset, Block = {BlockStart, BlockEnd}}) ->
+                  %% CurOffset is in the DestinationFile.
+                  %% Offset, BlockStart and BlockEnd are in the SourceFile
+                  %% update MsgLocation to reflect change of file and offset
+                  ok = Index:update_fields(Guid,
+                                           [{#msg_location.file, Destination},
+                                            {#msg_location.offset, CurOffset}],
+                                           IndexState),
+                  {CurOffset + TotalSize,
+                   case BlockEnd of
+                       undefined ->
+                           %% base case, called only for the first list elem
+                           {Offset, Offset + TotalSize};
+                       Offset ->
+                           %% extend the current block because the
+                           %% next msg follows straight on
+                           {BlockStart, BlockEnd + TotalSize};
+                       _ ->
+                           %% found a gap, so actually do the work for
+                           %% the previous block
+                           Copy(Block),
+                           {Offset, Offset + TotalSize}
+                   end}
+          end, {InitOffset, {undefined, undefined}}, WorkList) of
+        {FinalOffset, Block} ->
+            case WorkList of
+                [] -> ok;
+                _  -> Copy(Block), %% do the last remaining block
+                      ok = file_handle_cache:sync(DestinationHdl)
+            end;
+        {FinalOffsetZ, _Block} ->
+            {gc_error, [{expected, FinalOffset},
+                        {got, FinalOffsetZ},
+                        {destination, Destination}]}
+    end.
diff --git a/src/rabbit_msg_store_ets_index.erl b/src/rabbit_msg_store_ets_index.erl
new file mode 100644
index 00000000..1eb3c11f
--- /dev/null
+++ b/src/rabbit_msg_store_ets_index.erl
@@ -0,0 +1,90 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_msg_store_ets_index).
+
+-behaviour(rabbit_msg_store_index).
+
+-export([new/1, recover/1,
+         lookup/2, insert/2, update/2, update_fields/3, delete/2,
+         delete_by_file/2, terminate/1]).
+
+-define(MSG_LOC_NAME, rabbit_msg_store_ets_index).
+-define(FILENAME, "msg_store_index.ets").
+
+-include("rabbit_msg_store_index.hrl").
+
+-record(state, { table, dir }).
+
+new(Dir) ->
+    file:delete(filename:join(Dir, ?FILENAME)),
+    Tid = ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.guid}]),
+    #state { table = Tid, dir = Dir }.
+
+recover(Dir) ->
+    Path = filename:join(Dir, ?FILENAME),
+    case ets:file2tab(Path) of
+        {ok, Tid}  -> file:delete(Path),
+                      {ok, #state { table = Tid, dir = Dir }};
+        Error      -> Error
+    end.
+
+lookup(Key, State) ->
+    case ets:lookup(State #state.table, Key) of
+        []      -> not_found;
+        [Entry] -> Entry
+    end.
+
+insert(Obj, State) ->
+    true = ets:insert_new(State #state.table, Obj),
+    ok.
+
+update(Obj, State) ->
+    true = ets:insert(State #state.table, Obj),
+    ok.
+
+update_fields(Key, Updates, State) ->
+    true = ets:update_element(State #state.table, Key, Updates),
+    ok.
+
+delete(Key, State) ->
+    true = ets:delete(State #state.table, Key),
+    ok.
+
+delete_by_file(File, State) ->
+    MatchHead = #msg_location { file = File, _ = '_' },
+    ets:select_delete(State #state.table, [{MatchHead, [], [true]}]),
+    ok.
+
+terminate(#state { table = MsgLocations, dir = Dir }) ->
+    ok = ets:tab2file(MsgLocations, filename:join(Dir, ?FILENAME),
+                      [{extended_info, [object_count]}]),
+    ets:delete(MsgLocations).
diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
new file mode 100644
index 00000000..a7855bbf
--- /dev/null
+++ b/src/rabbit_msg_store_gc.erl
@@ -0,0 +1,144 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_msg_store_gc).
+
+-behaviour(gen_server2).
+
+-export([start_link/4, gc/3, no_readers/2, stop/1]).
+
+-export([set_maximum_since_use/2]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3, prioritise_cast/2]).
+
+-record(gcstate,
+        {dir,
+         index_state,
+         index_module,
+         parent,
+         file_summary_ets,
+         scheduled
+        }).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-spec(start_link/4 :: (file:filename(), any(), atom(), ets:tid()) ->
+                           rabbit_types:ok_pid_or_error()).
+-spec(gc/3 :: (pid(), non_neg_integer(), non_neg_integer()) -> 'ok').
+-spec(no_readers/2 :: (pid(), non_neg_integer()) -> 'ok').
+-spec(stop/1 :: (pid()) -> 'ok').
+-spec(set_maximum_since_use/2 :: (pid(), non_neg_integer()) -> 'ok').
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+start_link(Dir, IndexState, IndexModule, FileSummaryEts) ->
+    gen_server2:start_link(
+      ?MODULE, [self(), Dir, IndexState, IndexModule, FileSummaryEts],
+      [{timeout, infinity}]).
+
+gc(Server, Source, Destination) ->
+    gen_server2:cast(Server, {gc, Source, Destination}).
+
+no_readers(Server, File) ->
+    gen_server2:cast(Server, {no_readers, File}).
+
+stop(Server) ->
+    gen_server2:call(Server, stop, infinity).
+
+set_maximum_since_use(Pid, Age) ->
+    gen_server2:cast(Pid, {set_maximum_since_use, Age}).
+
+%%----------------------------------------------------------------------------
+
+init([Parent, Dir, IndexState, IndexModule, FileSummaryEts]) ->
+    ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use,
+                                             [self()]),
+    {ok, #gcstate { dir              = Dir,
+                    index_state      = IndexState,
+                    index_module     = IndexModule,
+                    parent           = Parent,
+                    file_summary_ets = FileSummaryEts,
+                    scheduled        = undefined },
+     hibernate,
+     {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+prioritise_cast({set_maximum_since_use, _Age}, _State) -> 8;
+prioritise_cast(_Msg,                          _State) -> 0.
+
+handle_call(stop, _From, State) ->
+    {stop, normal, ok, State}.
+
+handle_cast({gc, Source, Destination},
+            State = #gcstate { scheduled = undefined }) ->
+    {noreply, attempt_gc(State #gcstate { scheduled = {Source, Destination} }),
+     hibernate};
+
+handle_cast({no_readers, File},
+            State = #gcstate { scheduled = {Source, Destination} })
+  when File =:= Source orelse File =:= Destination ->
+    {noreply, attempt_gc(State), hibernate};
+
+handle_cast({no_readers, _File}, State) ->
+    {noreply, State, hibernate};
+
+handle_cast({set_maximum_since_use, Age}, State) ->
+    ok = file_handle_cache:set_maximum_since_use(Age),
+    {noreply, State, hibernate}.
+
+handle_info(Info, State) ->
+    {stop, {unhandled_info, Info}, State}.
+
+terminate(_Reason, State) ->
+    State.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
+
+attempt_gc(State = #gcstate { dir              = Dir,
+                              index_state      = IndexState,
+                              index_module     = Index,
+                              parent           = Parent,
+                              file_summary_ets = FileSummaryEts,
+                              scheduled        = {Source, Destination} }) ->
+    case rabbit_msg_store:gc(Source, Destination,
+                             {FileSummaryEts, Dir, Index, IndexState}) of
+        concurrent_readers -> State;
+        Reclaimed          -> ok = rabbit_msg_store:gc_done(
+                                     Parent, Reclaimed, Source, Destination),
+                              State #gcstate { scheduled = undefined }
+    end.
diff --git a/src/rabbit_msg_store_index.erl b/src/rabbit_msg_store_index.erl
new file mode 100644
index 00000000..0ed64a9d
--- /dev/null
+++ b/src/rabbit_msg_store_index.erl
@@ -0,0 +1,47 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_msg_store_index).
+
+-export([behaviour_info/1]).
+
+behaviour_info(callbacks) ->
+    [{new,            1},
+     {recover,        1},
+     {lookup,         2},
+     {insert,         2},
+     {update,         2},
+     {update_fields,  3},
+     {delete,         2},
+     {delete_by_file, 2},
+     {terminate,      1}];
+behaviour_info(_Other) ->
+    undefined.
diff --git a/src/rabbit_multi.erl b/src/rabbit_multi.erl
index 5db1d77a..5cfd6a5c 100644
--- a/src/rabbit_multi.erl
+++ b/src/rabbit_multi.erl
@@ -93,7 +93,14 @@ usage() ->
 action(start_all, [NodeCount], RpcTimeout) ->
     io:format("Starting all nodes...~n", []),
     application:load(rabbit),
-    NodeName = rabbit_misc:nodeparts(getenv("RABBITMQ_NODENAME")),
+    {_NodeNamePrefix, NodeHost} = NodeName = rabbit_misc:nodeparts(
+                                               getenv("RABBITMQ_NODENAME")),
+    case net_adm:names(NodeHost) of
+        {error, EpmdReason} ->
+            throw({cannot_connect_to_epmd, NodeHost, EpmdReason});
+        {ok, _} ->
+            ok
+    end,
     {NodePids, Running} =
         case list_to_integer(NodeCount) of
             1 -> {NodePid, Started} = start_node(rabbit_misc:makenode(NodeName),
@@ -303,18 +310,28 @@ kill_wait(Pid, TimeLeft, Forceful) ->
 is_dead(Pid) ->
     PidS = integer_to_list(Pid),
     with_os([{unix, fun () ->
-                            Res = os:cmd("ps --no-headers --pid " ++ PidS),
-                            Res == ""
+                            system("kill -0 " ++ PidS
+                                   ++ " >/dev/null 2>&1") /= 0
                     end},
              {win32, fun () ->
                              Res = os:cmd("tasklist /nh /fi \"pid eq " ++
                                           PidS ++ "\""),
-                             case regexp:first_match(Res, "erl.exe") of
-                                 {match, _, _} -> false;
-                                 _             -> true
+                             case re:run(Res, "erl\\.exe", [{capture, none}]) of
+                                 match -> false;
+                                 _     -> true
                              end
                      end}]).
 
+% Like system(3)
+system(Cmd) ->
+    ShCmd = "sh -c '" ++ escape_quotes(Cmd) ++ "'",
+    Port = erlang:open_port({spawn, ShCmd}, [exit_status,nouse_stdio]),
+    receive {Port, {exit_status, Status}} -> Status end.
+
+% Escape the quotes in a shell command so that it can be used in "sh -c 'cmd'"
+escape_quotes(Cmd) ->
+    lists:flatten(lists:map(fun ($') -> "'\\''"; (Ch) -> Ch end, Cmd)).
+
 call_all_nodes(Func) ->
     case read_pids_file() of
         []       -> throw(no_nodes_running);
diff --git a/src/rabbit_net.erl b/src/rabbit_net.erl
index 975954fc..2286896b 100644
--- a/src/rabbit_net.erl
+++ b/src/rabbit_net.erl
@@ -31,102 +31,99 @@
 
 -module(rabbit_net).
 -include("rabbit.hrl").
--include_lib("kernel/include/inet.hrl").
 
 -export([async_recv/3, close/1, controlling_process/2,
         getstat/2, peername/1, port_command/2,
         send/2, sockname/1]).
+
 %%---------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
+-export_type([socket/0]).
+
 -type(stat_option() ::
 	'recv_cnt' | 'recv_max' | 'recv_avg' | 'recv_oct' | 'recv_dvi' |
 	'send_cnt' | 'send_max' | 'send_avg' | 'send_oct' | 'send_pend').
--type(error() :: {'error', any()}).
-
--spec(async_recv/3 :: (socket(), integer(), timeout()) -> {'ok', any()}).
--spec(close/1 :: (socket()) -> 'ok' | error()).
--spec(controlling_process/2 :: (socket(), pid()) -> 'ok' | error()).
+-type(error() :: rabbit_types:error(any())).
+-type(socket() :: port() | #ssl_socket{}).
+
+-spec(async_recv/3 ::
+        (socket(), integer(), timeout()) -> rabbit_types:ok(any())).
+-spec(close/1 :: (socket()) -> rabbit_types:ok_or_error(any())).
+-spec(controlling_process/2 ::
+        (socket(), pid()) -> rabbit_types:ok_or_error(any())).
 -spec(port_command/2 :: (socket(), iolist()) -> 'true').
--spec(send/2 :: (socket(), binary() | iolist()) -> 'ok' | error()).
--spec(peername/1 :: (socket()) ->
-        {'ok', {ip_address(), non_neg_integer()}} | error()).
--spec(sockname/1 :: (socket()) ->
-        {'ok', {ip_address(), non_neg_integer()}} | error()).
--spec(getstat/2 :: (socket(), [stat_option()]) ->
-        {'ok', [{stat_option(), integer()}]} | error()).
+-spec(send/2 ::
+        (socket(), binary() | iolist()) -> rabbit_types:ok_or_error(any())).
+-spec(peername/1 ::
+        (socket())
+        -> rabbit_types:ok({inet:ip_address(), rabbit_networking:ip_port()}) |
+           error()).
+-spec(sockname/1 ::
+        (socket())
+        -> rabbit_types:ok({inet:ip_address(), rabbit_networking:ip_port()}) |
+           error()).
+-spec(getstat/2 ::
+        (socket(), [stat_option()])
+        -> rabbit_types:ok([{stat_option(), integer()}]) | error()).
 
 -endif.
 
 %%---------------------------------------------------------------------------
 
+-define(IS_SSL(Sock), is_record(Sock, ssl_socket)).
 
-async_recv(Sock, Length, Timeout) when is_record(Sock, ssl_socket) ->
+async_recv(Sock, Length, Timeout) when ?IS_SSL(Sock) ->
     Pid = self(),
     Ref = make_ref(),
 
     spawn(fun () -> Pid ! {inet_async, Sock, Ref,
-                    ssl:recv(Sock#ssl_socket.ssl, Length, Timeout)}
-        end),
+                           ssl:recv(Sock#ssl_socket.ssl, Length, Timeout)}
+          end),
 
     {ok, Ref};
-
 async_recv(Sock, Length, infinity) when is_port(Sock) ->
     prim_inet:async_recv(Sock, Length, -1);
-
 async_recv(Sock, Length, Timeout) when is_port(Sock) ->
     prim_inet:async_recv(Sock, Length, Timeout).
 
-close(Sock) when is_record(Sock, ssl_socket) ->
+close(Sock) when ?IS_SSL(Sock) ->
     ssl:close(Sock#ssl_socket.ssl);
-
 close(Sock) when is_port(Sock) ->
     gen_tcp:close(Sock).
 
-
-controlling_process(Sock, Pid) when is_record(Sock, ssl_socket) ->
+controlling_process(Sock, Pid) when ?IS_SSL(Sock) ->
     ssl:controlling_process(Sock#ssl_socket.ssl, Pid);
-
 controlling_process(Sock, Pid) when is_port(Sock) ->
     gen_tcp:controlling_process(Sock, Pid).
 
-
-getstat(Sock, Stats) when is_record(Sock, ssl_socket) ->
+getstat(Sock, Stats) when ?IS_SSL(Sock) ->
     inet:getstat(Sock#ssl_socket.tcp, Stats);
-
 getstat(Sock, Stats) when is_port(Sock) ->
     inet:getstat(Sock, Stats).
 
-
-peername(Sock) when is_record(Sock, ssl_socket) ->
+peername(Sock) when ?IS_SSL(Sock) ->
     ssl:peername(Sock#ssl_socket.ssl);
-
 peername(Sock) when is_port(Sock) ->
     inet:peername(Sock).
 
-
-port_command(Sock, Data) when is_record(Sock, ssl_socket) ->
+port_command(Sock, Data) when ?IS_SSL(Sock) ->
     case ssl:send(Sock#ssl_socket.ssl, Data) of
-        ok ->
-            self() ! {inet_reply, Sock, ok},
-            true;
-        {error, Reason} ->
-            erlang:error(Reason)
+        ok              -> self() ! {inet_reply, Sock, ok},
+                           true;
+        {error, Reason} -> erlang:error(Reason)
     end;
-
 port_command(Sock, Data) when is_port(Sock) ->
     erlang:port_command(Sock, Data).
 
-send(Sock, Data) when is_record(Sock, ssl_socket) ->
+send(Sock, Data) when ?IS_SSL(Sock) ->
     ssl:send(Sock#ssl_socket.ssl, Data);
-
 send(Sock, Data) when is_port(Sock) ->
     gen_tcp:send(Sock, Data).
 
 
-sockname(Sock) when is_record(Sock, ssl_socket) ->
+sockname(Sock) when ?IS_SSL(Sock) ->
     ssl:sockname(Sock#ssl_socket.ssl);
-
 sockname(Sock) when is_port(Sock) ->
     inet:sockname(Sock).
diff --git a/src/rabbit_networking.erl b/src/rabbit_networking.erl
index 68ffc98a..6dbd54d2 100644
--- a/src/rabbit_networking.erl
+++ b/src/rabbit_networking.erl
@@ -46,6 +46,8 @@
 
 -include("rabbit.hrl").
 -include_lib("kernel/include/inet.hrl").
+-include_lib("ssl/src/ssl_record.hrl").
+
 
 -define(RABBIT_TCP_OPTS, [
         binary,
@@ -63,25 +65,29 @@
 
 -ifdef(use_specs).
 
--type(host() :: ip_address() | string() | atom()).
--type(connection() :: pid()).
+-export_type([ip_port/0, hostname/0]).
 
 -spec(start/0 :: () -> 'ok').
--spec(start_tcp_listener/2 :: (host(), ip_port()) -> 'ok').
--spec(start_ssl_listener/3 :: (host(), ip_port(), [info()]) -> 'ok').
--spec(stop_tcp_listener/2 :: (host(), ip_port()) -> 'ok').
--spec(active_listeners/0 :: () -> [listener()]).
--spec(node_listeners/1 :: (erlang_node()) -> [listener()]).
--spec(connections/0 :: () -> [connection()]).
--spec(connection_info_keys/0 :: () -> [info_key()]).
--spec(connection_info/1 :: (connection()) -> [info()]).
--spec(connection_info/2 :: (connection(), [info_key()]) -> [info()]).
--spec(connection_info_all/0 :: () -> [[info()]]).
--spec(connection_info_all/1 :: ([info_key()]) -> [[info()]]).
+-spec(start_tcp_listener/2 :: (hostname(), ip_port()) -> 'ok').
+-spec(start_ssl_listener/3 :: (hostname(), ip_port(), [rabbit_types:info()])
+                              -> 'ok').
+-spec(stop_tcp_listener/2 :: (hostname(), ip_port()) -> 'ok').
+-spec(active_listeners/0 :: () -> [rabbit_types:listener()]).
+-spec(node_listeners/1 :: (node()) -> [rabbit_types:listener()]).
+-spec(connections/0 :: () -> [rabbit_types:connection()]).
+-spec(connection_info_keys/0 :: () -> [rabbit_types:info_key()]).
+-spec(connection_info/1 ::
+        (rabbit_types:connection()) -> [rabbit_types:info()]).
+-spec(connection_info/2 ::
+        (rabbit_types:connection(), [rabbit_types:info_key()])
+        -> [rabbit_types:info()]).
+-spec(connection_info_all/0 :: () -> [[rabbit_types:info()]]).
+-spec(connection_info_all/1 ::
+        ([rabbit_types:info_key()]) -> [[rabbit_types:info()]]).
 -spec(close_connection/2 :: (pid(), string()) -> 'ok').
--spec(on_node_down/1 :: (erlang_node()) -> 'ok').
--spec(check_tcp_listener_address/3 :: (atom(), host(), ip_port()) ->
-             {ip_address(), atom()}).
+-spec(on_node_down/1 :: (node()) -> 'ok').
+-spec(check_tcp_listener_address/3 ::
+        (atom(), hostname(), ip_port()) -> {inet:ip_address(), atom()}).
 
 -endif.
 
@@ -103,8 +109,37 @@ boot_ssl() ->
             ok;
         {ok, SslListeners} ->
             ok = rabbit_misc:start_applications([crypto, public_key, ssl]),
-            {ok, SslOpts} = application:get_env(ssl_options),
-            [start_ssl_listener(Host, Port, SslOpts) || {Host, Port} <- SslListeners],
+            {ok, SslOptsConfig} = application:get_env(ssl_options),
+            % unknown_ca errors are silently ignored  prior to R14B unless we
+            % supply this verify_fun - remove when at least R14B is required
+            SslOpts =
+                case proplists:get_value(verify, SslOptsConfig, verify_none) of
+                    verify_none -> SslOptsConfig;
+                    verify_peer -> [{verify_fun, fun([])    -> true;
+                                                    ([_|_]) -> false
+                                                 end}
+                                   | SslOptsConfig]
+                end,
+            % In R13B04 and R14A (at least), rc4 is incorrectly implemented.
+            CipherSuites = proplists:get_value(ciphers,
+                                               SslOpts,
+                                               ssl:cipher_suites()),
+            FilteredCipherSuites =
+                [C || C <- CipherSuites,
+                      begin
+                          SuiteCode =
+                              if is_tuple(C) -> ssl_cipher:suite(C);
+                                 is_list(C)  -> ssl_cipher:openssl_suite(C)
+                              end,
+                          SP = ssl_cipher:security_parameters(
+                              SuiteCode,
+                              #security_parameters{}),
+                          SP#security_parameters.bulk_cipher_algorithm =/= ?RC4
+                      end],
+            SslOpts1 = [{ciphers, FilteredCipherSuites}
+                        | [{K, V} || {K, V} <- SslOpts, K =/= ciphers]],
+            [start_ssl_listener(Host, Port, SslOpts1)
+                || {Host, Port} <- SslListeners],
             ok
     end.
 
@@ -114,7 +149,7 @@ start() ->
                {rabbit_tcp_client_sup,
                 {tcp_client_sup, start_link,
                  [{local, rabbit_tcp_client_sup},
-                  {rabbit_reader,start_link,[]}]},
+                  {rabbit_connection_sup,start_link,[]}]},
                 transient, infinity, supervisor, [tcp_client_sup]}),
     ok.
 
@@ -200,10 +235,10 @@ on_node_down(Node) ->
     ok = mnesia:dirty_delete(rabbit_listener, Node).
 
 start_client(Sock, SockTransform) ->
-    {ok, Child} = supervisor:start_child(rabbit_tcp_client_sup, []),
-    ok = rabbit_net:controlling_process(Sock, Child),
-    Child ! {go, Sock, SockTransform},
-    Child.
+    {ok, _Child, Reader} = supervisor:start_child(rabbit_tcp_client_sup, []),
+    ok = rabbit_net:controlling_process(Sock, Reader),
+    Reader ! {go, Sock, SockTransform},
+    Reader.
 
 start_client(Sock) ->
     start_client(Sock, fun (S) -> {ok, S} end).
@@ -226,8 +261,9 @@ start_ssl_client(SslOpts, Sock) ->
       end).
 
 connections() ->
-    [Pid || {_, Pid, _, _} <- supervisor:which_children(
-                                rabbit_tcp_client_sup)].
+    [rabbit_connection_sup:reader(ConnSup) ||
+        {_, ConnSup, supervisor, _}
+            <- supervisor:which_children(rabbit_tcp_client_sup)].
 
 connection_info_keys() -> rabbit_reader:info_keys().
 
@@ -238,8 +274,7 @@ connection_info_all() -> cmap(fun (Q) -> connection_info(Q) end).
 connection_info_all(Items) -> cmap(fun (Q) -> connection_info(Q, Items) end).
 
 close_connection(Pid, Explanation) ->
-    case lists:any(fun ({_, ChildPid, _, _}) -> ChildPid =:= Pid end,
-                   supervisor:which_children(rabbit_tcp_client_sup)) of
+    case lists:member(Pid, connections()) of
         true  -> rabbit_reader:shutdown(Pid, Explanation);
         false -> throw({error, {not_a_connection_pid, Pid}})
     end.
diff --git a/src/rabbit_persister.erl b/src/rabbit_persister.erl
index 8d3c2dc0..66e5cf63 100644
--- a/src/rabbit_persister.erl
+++ b/src/rabbit_persister.erl
@@ -65,21 +65,28 @@
 
 -ifdef(use_specs).
 
--type(pmsg() :: {queue_name(), pkey()}).
+-type(pkey() :: rabbit_guid:guid()).
+-type(pmsg() :: {rabbit_amqqueue:name(), pkey()}).
+
 -type(work_item() ::
-      {publish, message(), pmsg()} |
+      {publish, rabbit_types:message(), pmsg()} |
       {deliver, pmsg()} |
       {ack, pmsg()}).
 
--spec(start_link/1 :: ([queue_name()]) ->
-                           {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(start_link/1 :: ([rabbit_amqqueue:name()]) ->
+                           rabbit_types:ok_pid_or_error()).
 -spec(transaction/1 :: ([work_item()]) -> 'ok').
--spec(extend_transaction/2 :: ({txn(), queue_name()}, [work_item()]) -> 'ok').
+-spec(extend_transaction/2 ::
+        ({rabbit_types:txn(), rabbit_amqqueue:name()}, [work_item()])
+        -> 'ok').
 -spec(dirty_work/1 :: ([work_item()]) -> 'ok').
--spec(commit_transaction/1 :: ({txn(), queue_name()}) -> 'ok').
--spec(rollback_transaction/1 :: ({txn(), queue_name()}) -> 'ok').
+-spec(commit_transaction/1 ::
+        ({rabbit_types:txn(), rabbit_amqqueue:name()}) -> 'ok').
+-spec(rollback_transaction/1 ::
+        ({rabbit_types:txn(), rabbit_amqqueue:name()}) -> 'ok').
 -spec(force_snapshot/0 :: () -> 'ok').
--spec(queue_content/1 :: (queue_name()) -> [{message(), boolean()}]).
+-spec(queue_content/1 ::
+        (rabbit_amqqueue:name()) -> [{rabbit_types:message(), boolean()}]).
 
 -endif.
 
diff --git a/src/rabbit_plugin_activator.erl b/src/rabbit_plugin_activator.erl
index ef3c5cc2..b23776cd 100644
--- a/src/rabbit_plugin_activator.erl
+++ b/src/rabbit_plugin_activator.erl
@@ -35,7 +35,6 @@
 
 -define(DefaultPluginDir, "plugins").
 -define(DefaultUnpackedPluginDir, "priv/plugins").
--define(DefaultRabbitEBin, "ebin").
 -define(BaseApps, [rabbit]).
 
 %%----------------------------------------------------------------------------
@@ -52,23 +51,22 @@
 %%----------------------------------------------------------------------------
 
 start() ->
+    io:format("Activating RabbitMQ plugins ...~n"),
     %% Ensure Rabbit is loaded so we can access it's environment
     application:load(rabbit),
 
     %% Determine our various directories
     PluginDir         = get_env(plugins_dir,        ?DefaultPluginDir),
     UnpackedPluginDir = get_env(plugins_expand_dir, ?DefaultUnpackedPluginDir),
-    RabbitEBin        = get_env(rabbit_ebin,        ?DefaultRabbitEBin),
 
-    RootName = RabbitEBin ++ "/rabbit",
+    RootName = UnpackedPluginDir ++ "/rabbit",
 
     %% Unpack any .ez plugins
     unpack_ez_plugins(PluginDir, UnpackedPluginDir),
 
     %% Build a list of required apps based on the fixed set, and any plugins
-    RequiredApps = ?BaseApps ++
-        find_plugins(PluginDir) ++
-        find_plugins(UnpackedPluginDir),
+    PluginApps = find_plugins(PluginDir) ++ find_plugins(UnpackedPluginDir),
+    RequiredApps = ?BaseApps ++ PluginApps,
 
     %% Build the entire set of dependencies - this will load the
     %% applications along the way
@@ -79,7 +77,7 @@ start() ->
                       AppList
               end,
     AppVersions = [determine_version(App) || App <- AllApps],
-    {rabbit, RabbitVersion} = proplists:lookup(rabbit, AppVersions),
+    RabbitVersion = proplists:get_value(rabbit, AppVersions),
 
     %% Build the overall release descriptor
     RDesc = {release,
@@ -87,7 +85,7 @@ start() ->
              {erts, erlang:system_info(version)},
              AppVersions},
 
-    %% Write it out to ebin/rabbit.rel
+    %% Write it out to $RABBITMQ_PLUGINS_EXPAND_DIR/rabbit.rel
     file:write_file(RootName ++ ".rel", io_lib:format("~p.~n", [RDesc])),
 
     %% Compile the script
@@ -132,6 +130,10 @@ start() ->
         ok    -> ok;
         error -> error("failed to compile boot script file ~s", [ScriptFile])
     end,
+    io:format("~w plugins activated:~n", [length(PluginApps)]),
+    [io:format("* ~s-~s~n", [App, proplists:get_value(App, AppVersions)])
+     || App <- PluginApps],
+    io:nl(),
     halt(),
     ok.
 
@@ -149,29 +151,33 @@ determine_version(App) ->
     {ok, Vsn} = application:get_key(App, vsn),
     {App, Vsn}.
 
-assert_dir(Dir) ->
-    case filelib:is_dir(Dir) of
-        true  -> ok;
-        false -> ok = filelib:ensure_dir(Dir),
-                 ok = file:make_dir(Dir)
-    end.
-
-delete_dir(Dir) ->
-    case filelib:is_dir(Dir) of
+delete_recursively(Fn) ->
+    case filelib:is_dir(Fn) and not(is_symlink(Fn)) of
         true ->
-            case file:list_dir(Dir) of
+            case file:list_dir(Fn) of
                 {ok, Files} ->
-                    [case Dir ++ "/" ++ F of
-                         Fn ->
-                             case filelib:is_dir(Fn) and not(is_symlink(Fn)) of
-                                 true  -> delete_dir(Fn);
-                                 false -> file:delete(Fn)
-                             end
-                     end || F <- Files]
-            end,
-            ok = file:del_dir(Dir);
+                    case lists:foldl(fun ( Fn1,  ok) -> delete_recursively(
+                                                          Fn ++ "/" ++ Fn1);
+                                         (_Fn1, Err) -> Err
+                                     end, ok, Files) of
+                        ok  -> case file:del_dir(Fn) of
+                                   ok         -> ok;
+                                   {error, E} -> {error,
+                                                  {cannot_delete, Fn, E}}
+                               end;
+                        Err -> Err
+                    end;
+                {error, E} ->
+                    {error, {cannot_list_files, Fn, E}}
+            end;
         false ->
-            ok
+            case filelib:is_file(Fn) of
+                true  -> case file:delete(Fn) of
+                             ok         -> ok;
+                             {error, E} -> {error, {cannot_delete, Fn, E}}
+                         end;
+                false -> ok
+            end
     end.
 
 is_symlink(Name) ->
@@ -180,13 +186,18 @@ is_symlink(Name) ->
         _       -> false
     end.
 
-unpack_ez_plugins(PluginSrcDir, PluginDestDir) ->
+unpack_ez_plugins(SrcDir, DestDir) ->
     %% Eliminate the contents of the destination directory
-    delete_dir(PluginDestDir),
-
-    assert_dir(PluginDestDir),
-    [unpack_ez_plugin(PluginName, PluginDestDir) ||
-        PluginName <- filelib:wildcard(PluginSrcDir ++ "/*.ez")].
+    case delete_recursively(DestDir) of
+        ok         -> ok;
+        {error, E} -> error("Could not delete dir ~s (~p)", [DestDir, E])
+    end,
+    case filelib:ensure_dir(DestDir ++ "/") of
+        ok          -> ok;
+        {error, E2} -> error("Could not create dir ~s (~p)", [DestDir, E2])
+    end,
+    [unpack_ez_plugin(PluginName, DestDir) ||
+        PluginName <- filelib:wildcard(SrcDir ++ "/*.ez")].
 
 unpack_ez_plugin(PluginFn, PluginDestDir) ->
     zip:unzip(PluginFn, [{cwd, PluginDestDir}]),
@@ -245,8 +256,8 @@ post_process_script(ScriptFile) ->
             {error, {failed_to_load_script, Reason}}
     end.
 
-process_entry(Entry = {apply,{application,start_boot,[stdlib,permanent]}}) ->
-    [Entry, {apply,{rabbit,prepare,[]}}];
+process_entry(Entry = {apply,{application,start_boot,[rabbit,permanent]}}) ->
+    [{apply,{rabbit,prepare,[]}}, Entry];
 process_entry(Entry) ->
     [Entry].
 
diff --git a/src/rabbit_reader_queue_collector.erl b/src/rabbit_queue_collector.erl
index 8d4e8fdb..0a49b94d 100644
--- a/src/rabbit_reader_queue_collector.erl
+++ b/src/rabbit_queue_collector.erl
@@ -29,16 +29,16 @@
 %%   Contributor(s): ______________________________________.
 %%
 
--module(rabbit_reader_queue_collector).
+-module(rabbit_queue_collector).
 
 -behaviour(gen_server).
 
--export([start_link/0, register_exclusive_queue/2, delete_all/1, shutdown/1]).
+-export([start_link/0, register/2, delete_all/1]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--record(state, {exclusive_queues}).
+-record(state, {queues}).
 
 -include("rabbit.hrl").
 
@@ -46,8 +46,8 @@
 
 -ifdef(use_specs).
 
--spec(start_link/0 :: () -> {'ok', pid()}).
--spec(register_exclusive_queue/2 :: (pid(), amqqueue()) -> 'ok').
+-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()).
+-spec(register/2 :: (pid(), rabbit_types:amqqueue()) -> 'ok').
 -spec(delete_all/1 :: (pid()) -> 'ok').
 
 -endif.
@@ -57,49 +57,41 @@
 start_link() ->
     gen_server:start_link(?MODULE, [], []).
 
-register_exclusive_queue(CollectorPid, Q) ->
-    gen_server:call(CollectorPid, {register_exclusive_queue, Q}, infinity).
+register(CollectorPid, Q) ->
+    gen_server:call(CollectorPid, {register, Q}, infinity).
 
 delete_all(CollectorPid) ->
     gen_server:call(CollectorPid, delete_all, infinity).
 
-shutdown(CollectorPid) ->
-    gen_server:call(CollectorPid, shutdown, infinity).
-
 %%----------------------------------------------------------------------------
 
 init([]) ->
-    {ok, #state{exclusive_queues = dict:new()}}.
+    {ok, #state{queues = dict:new()}}.
 
 %%--------------------------------------------------------------------------
 
-handle_call({register_exclusive_queue, Q}, _From,
-            State = #state{exclusive_queues = Queues}) ->
+handle_call({register, Q}, _From,
+            State = #state{queues = Queues}) ->
     MonitorRef = erlang:monitor(process, Q#amqqueue.pid),
     {reply, ok,
-     State#state{exclusive_queues = dict:store(MonitorRef, Q, Queues)}};
+     State#state{queues = dict:store(MonitorRef, Q, Queues)}};
 
-handle_call(delete_all, _From,
-            State = #state{exclusive_queues = ExclusiveQueues}) ->
+handle_call(delete_all, _From, State = #state{queues = Queues}) ->
     [rabbit_misc:with_exit_handler(
        fun () -> ok end,
        fun () ->
                erlang:demonitor(MonitorRef),
                rabbit_amqqueue:delete(Q, false, false)
        end)
-     || {MonitorRef, Q} <- dict:to_list(ExclusiveQueues)],
-    {reply, ok, State};
-
-handle_call(shutdown, _From, State) ->
-    {stop, normal, ok, State}.
+     || {MonitorRef, Q} <- dict:to_list(Queues)],
+    {reply, ok, State}.
 
-handle_cast(_Msg, State) ->
-    {noreply, State}.
+handle_cast(Msg, State) ->
+    {stop, {unhandled_cast, Msg}, State}.
 
 handle_info({'DOWN', MonitorRef, process, _DownPid, _Reason},
-            State = #state{exclusive_queues = ExclusiveQueues}) ->
-    {noreply, State#state{exclusive_queues =
-                              dict:erase(MonitorRef, ExclusiveQueues)}}.
+            State = #state{queues = Queues}) ->
+    {noreply, State#state{queues = dict:erase(MonitorRef, Queues)}}.
 
 terminate(_Reason, _State) ->
     ok.
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
new file mode 100644
index 00000000..d6b8bb28
--- /dev/null
+++ b/src/rabbit_queue_index.erl
@@ -0,0 +1,932 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_queue_index).
+
+-export([init/4, terminate/2, delete_and_terminate/1, publish/4,
+         deliver/2, ack/2, sync/2, flush/1, read/3,
+         next_segment_boundary/1, bounds/1, recover/1]).
+
+-define(CLEAN_FILENAME, "clean.dot").
+
+%%----------------------------------------------------------------------------
+
+%% The queue index is responsible for recording the order of messages
+%% within a queue on disk.
+%%
+%% Because of the fact that the queue can decide at any point to send
+%% a queue entry to disk, you can not rely on publishes appearing in
+%% order. The only thing you can rely on is a message being published,
+%% then delivered, then ack'd.
+%%
+%% In order to be able to clean up ack'd messages, we write to segment
+%% files. These files have a fixed maximum size: ?SEGMENT_ENTRY_COUNT
+%% publishes, delivers and acknowledgements. They are numbered, and so
+%% it is known that the 0th segment contains messages 0 ->
+%% ?SEGMENT_ENTRY_COUNT - 1, the 1st segment contains messages
+%% ?SEGMENT_ENTRY_COUNT -> 2*?SEGMENT_ENTRY_COUNT - 1 and so on. As
+%% such, in the segment files, we only refer to message sequence ids
+%% by the LSBs as SeqId rem ?SEGMENT_ENTRY_COUNT. This gives them a
+%% fixed size.
+%%
+%% However, transient messages which are not sent to disk at any point
+%% will cause gaps to appear in segment files. Therefore, we delete a
+%% segment file whenever the number of publishes == number of acks
+%% (note that although it is not fully enforced, it is assumed that a
+%% message will never be ackd before it is delivered, thus this test
+%% also implies == number of delivers). In practise, this does not
+%% cause disk churn in the pathological case because of the journal
+%% and caching (see below).
+%%
+%% Because of the fact that publishes, delivers and acks can occur all
+%% over, we wish to avoid lots of seeking. Therefore we have a fixed
+%% sized journal to which all actions are appended. When the number of
+%% entries in this journal reaches max_journal_entries, the journal
+%% entries are scattered out to their relevant files, and the journal
+%% is truncated to zero size. Note that entries in the journal must
+%% carry the full sequence id, thus the format of entries in the
+%% journal is different to that in the segments.
+%%
+%% The journal is also kept fully in memory, pre-segmented: the state
+%% contains a mapping from segment numbers to state-per-segment (this
+%% state is held for all segments which have been "seen": thus a
+%% segment which has been read but has no pending entries in the
+%% journal is still held in this mapping. Also note that a dict is
+%% used for this mapping, not an array because with an array, you will
+%% always have entries from 0). Actions are stored directly in this
+%% state. Thus at the point of flushing the journal, firstly no
+%% reading from disk is necessary, but secondly if the known number of
+%% acks and publishes in a segment are equal, given the known state of
+%% the segment file combined with the journal, no writing needs to be
+%% done to the segment file either (in fact it is deleted if it exists
+%% at all). This is safe given that the set of acks is a subset of the
+%% set of publishes. When it's necessary to sync messages because of
+%% transactions, it's only necessary to fsync on the journal: when
+%% entries are distributed from the journal to segment files, those
+%% segments appended to are fsync'd prior to the journal being
+%% truncated.
+%%
+%% This module is also responsible for scanning the queue index files
+%% and seeding the message store on start up.
+%%
+%% Note that in general, the representation of a message's state as
+%% the tuple: {('no_pub'|{Guid, IsPersistent}), ('del'|'no_del'),
+%% ('ack'|'no_ack')} is richer than strictly necessary for most
+%% operations. However, for startup, and to ensure the safe and
+%% correct combination of journal entries with entries read from the
+%% segment on disk, this richer representation vastly simplifies and
+%% clarifies the code.
+%%
+%% For notes on Clean Shutdown and startup, see documentation in
+%% variable_queue.
+%%
+%%----------------------------------------------------------------------------
+
+%% ---- Journal details ----
+
+-define(JOURNAL_FILENAME, "journal.jif").
+
+-define(PUB_PERSIST_JPREFIX, 2#00).
+-define(PUB_TRANS_JPREFIX,   2#01).
+-define(DEL_JPREFIX,         2#10).
+-define(ACK_JPREFIX,         2#11).
+-define(JPREFIX_BITS, 2).
+-define(SEQ_BYTES, 8).
+-define(SEQ_BITS, ((?SEQ_BYTES * 8) - ?JPREFIX_BITS)).
+
+%% ---- Segment details ----
+
+-define(SEGMENT_EXTENSION, ".idx").
+
+%% TODO: The segment size would be configurable, but deriving all the
+%% other values is quite hairy and quite possibly noticably less
+%% efficient, depending on how clever the compiler is when it comes to
+%% binary generation/matching with constant vs variable lengths.
+
+-define(REL_SEQ_BITS, 14).
+-define(SEGMENT_ENTRY_COUNT, 16384). %% trunc(math:pow(2,?REL_SEQ_BITS))).
+
+%% seq only is binary 00 followed by 14 bits of rel seq id
+%% (range: 0 - 16383)
+-define(REL_SEQ_ONLY_PREFIX, 00).
+-define(REL_SEQ_ONLY_PREFIX_BITS, 2).
+-define(REL_SEQ_ONLY_ENTRY_LENGTH_BYTES, 2).
+
+%% publish record is binary 1 followed by a bit for is_persistent,
+%% then 14 bits of rel seq id, and 128 bits of md5sum msg id
+-define(PUBLISH_PREFIX, 1).
+-define(PUBLISH_PREFIX_BITS, 1).
+
+-define(GUID_BYTES, 16). %% md5sum is 128 bit or 16 bytes
+-define(GUID_BITS, (?GUID_BYTES * 8)).
+%% 16 bytes for md5sum + 2 for seq, bits and prefix
+-define(PUBLISH_RECORD_LENGTH_BYTES, ?GUID_BYTES + 2).
+
+%% 1 publish, 1 deliver, 1 ack per msg
+-define(SEGMENT_TOTAL_SIZE, ?SEGMENT_ENTRY_COUNT *
+        (?PUBLISH_RECORD_LENGTH_BYTES +
+         (2 * ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES))).
+
+%% ---- misc ----
+
+-define(PUB, {_, _}). %% {Guid, IsPersistent}
+
+-define(READ_MODE, [binary, raw, read, {read_ahead, ?SEGMENT_TOTAL_SIZE}]).
+
+%%----------------------------------------------------------------------------
+
+-record(qistate, { dir, segments, journal_handle, dirty_count,
+                   max_journal_entries }).
+
+-record(segment, { num, path, journal_entries, unacked }).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(hdl() :: ('undefined' | any())).
+-type(segment() :: ('undefined' |
+                    #segment { num             :: non_neg_integer(),
+                               path            :: file:filename(),
+                               journal_entries :: array(),
+                               unacked         :: non_neg_integer()
+                              })).
+-type(seq_id() :: integer()).
+-type(seg_dict() :: {dict:dictionary(), [segment()]}).
+-type(qistate() :: #qistate { dir                 :: file:filename(),
+                              segments            :: 'undefined' | seg_dict(),
+                              journal_handle      :: hdl(),
+                              dirty_count         :: integer(),
+                              max_journal_entries :: non_neg_integer()
+                             }).
+-type(startup_fun_state() ::
+        {(fun ((A) -> 'finished' | {rabbit_guid:guid(), non_neg_integer(), A})),
+         A}).
+
+-spec(init/4 :: (rabbit_amqqueue:name(), boolean(), boolean(),
+                 fun ((rabbit_guid:guid()) -> boolean())) ->
+             {'undefined' | non_neg_integer(), [any()], qistate()}).
+-spec(terminate/2 :: ([any()], qistate()) -> qistate()).
+-spec(delete_and_terminate/1 :: (qistate()) -> qistate()).
+-spec(publish/4 :: (rabbit_guid:guid(), seq_id(), boolean(), qistate()) ->
+                        qistate()).
+-spec(deliver/2 :: ([seq_id()], qistate()) -> qistate()).
+-spec(ack/2 :: ([seq_id()], qistate()) -> qistate()).
+-spec(sync/2 :: ([seq_id()], qistate()) -> qistate()).
+-spec(flush/1 :: (qistate()) -> qistate()).
+-spec(read/3 :: (seq_id(), seq_id(), qistate()) ->
+                     {[{rabbit_guid:guid(), seq_id(), boolean(), boolean()}],
+                      qistate()}).
+-spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
+-spec(bounds/1 :: (qistate()) ->
+             {non_neg_integer(), non_neg_integer(), qistate()}).
+-spec(recover/1 ::
+        ([rabbit_amqqueue:name()]) -> {[[any()]], startup_fun_state()}).
+
+-endif.
+
+
+%%----------------------------------------------------------------------------
+%% public API
+%%----------------------------------------------------------------------------
+
+init(Name, Recover, MsgStoreRecovered, ContainsCheckFun) ->
+    State = #qistate { dir = Dir } = blank_state(Name, not Recover),
+    Terms = case read_shutdown_terms(Dir) of
+                {error, _}   -> [];
+                {ok, Terms1} -> Terms1
+            end,
+    CleanShutdown = detect_clean_shutdown(Dir),
+    {Count, State1} =
+        case CleanShutdown andalso MsgStoreRecovered of
+            true  -> RecoveredCounts = proplists:get_value(segments, Terms, []),
+                     init_clean(RecoveredCounts, State);
+            false -> init_dirty(CleanShutdown, ContainsCheckFun, State)
+        end,
+    {Count, Terms, State1}.
+
+terminate(Terms, State) ->
+    {SegmentCounts, State1 = #qistate { dir = Dir }} = terminate(State),
+    store_clean_shutdown([{segments, SegmentCounts} | Terms], Dir),
+    State1.
+
+delete_and_terminate(State) ->
+    {_SegmentCounts, State1 = #qistate { dir = Dir }} = terminate(State),
+    ok = rabbit_misc:recursive_delete([Dir]),
+    State1.
+
+publish(Guid, SeqId, IsPersistent, State) when is_binary(Guid) ->
+    ?GUID_BYTES = size(Guid),
+    {JournalHdl, State1} = get_journal_handle(State),
+    ok = file_handle_cache:append(
+           JournalHdl, [<<(case IsPersistent of
+                               true  -> ?PUB_PERSIST_JPREFIX;
+                               false -> ?PUB_TRANS_JPREFIX
+                           end):?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Guid]),
+    maybe_flush_journal(add_to_journal(SeqId, {Guid, IsPersistent}, State1)).
+
+deliver(SeqIds, State) ->
+    deliver_or_ack(del, SeqIds, State).
+
+ack(SeqIds, State) ->
+    deliver_or_ack(ack, SeqIds, State).
+
+sync([], State) ->
+    State;
+sync(_SeqIds, State = #qistate { journal_handle = undefined }) ->
+    State;
+sync(_SeqIds, State = #qistate { journal_handle = JournalHdl }) ->
+    %% The SeqIds here contains the SeqId of every publish and ack in
+    %% the transaction. Ideally we should go through these seqids and
+    %% only sync the journal if the pubs or acks appear in the
+    %% journal. However, this would be complex to do, and given that
+    %% the variable queue publishes and acks to the qi, and then
+    %% syncs, all in one operation, there is no possibility of the
+    %% seqids not being in the journal, provided the transaction isn't
+    %% emptied (handled above anyway).
+    ok = file_handle_cache:sync(JournalHdl),
+    State.
+
+flush(State = #qistate { dirty_count = 0 }) -> State;
+flush(State)                                -> flush_journal(State).
+
+read(StartEnd, StartEnd, State) ->
+    {[], State};
+read(Start, End, State = #qistate { segments = Segments,
+                                    dir = Dir }) when Start =< End ->
+    %% Start is inclusive, End is exclusive.
+    LowerB = {StartSeg, _StartRelSeq} = seq_id_to_seg_and_rel_seq_id(Start),
+    UpperB = {EndSeg,   _EndRelSeq}   = seq_id_to_seg_and_rel_seq_id(End - 1),
+    {Messages, Segments1} =
+        lists:foldr(fun (Seg, Acc) ->
+                            read_bounded_segment(Seg, LowerB, UpperB, Acc, Dir)
+                    end, {[], Segments}, lists:seq(StartSeg, EndSeg)),
+    {Messages, State #qistate { segments = Segments1 }}.
+
+next_segment_boundary(SeqId) ->
+    {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+    reconstruct_seq_id(Seg + 1, 0).
+
+bounds(State = #qistate { segments = Segments }) ->
+    %% This is not particularly efficient, but only gets invoked on
+    %% queue initialisation.
+    SegNums = lists:sort(segment_nums(Segments)),
+    %% Don't bother trying to figure out the lowest seq_id, merely the
+    %% seq_id of the start of the lowest segment. That seq_id may not
+    %% actually exist, but that's fine. The important thing is that
+    %% the segment exists and the seq_id reported is on a segment
+    %% boundary.
+    %%
+    %% We also don't really care about the max seq_id. Just start the
+    %% next segment: it makes life much easier.
+    %%
+    %% SegNums is sorted, ascending.
+    {LowSeqId, NextSeqId} =
+        case SegNums of
+            []         -> {0, 0};
+            [MinSeg|_] -> {reconstruct_seq_id(MinSeg, 0),
+                           reconstruct_seq_id(1 + lists:last(SegNums), 0)}
+        end,
+    {LowSeqId, NextSeqId, State}.
+
+recover(DurableQueues) ->
+    DurableDict = dict:from_list([ {queue_name_to_dir_name(Queue), Queue} ||
+                                     Queue <- DurableQueues ]),
+    QueuesDir = queues_dir(),
+    Directories = case file:list_dir(QueuesDir) of
+                      {ok, Entries}   -> [ Entry || Entry <- Entries,
+                                                    filelib:is_dir(
+                                                      filename:join(
+                                                        QueuesDir, Entry)) ];
+                      {error, enoent} -> []
+                  end,
+    DurableDirectories = sets:from_list(dict:fetch_keys(DurableDict)),
+    {DurableQueueNames, DurableTerms} =
+        lists:foldl(
+          fun (QueueDir, {DurableAcc, TermsAcc}) ->
+                  case sets:is_element(QueueDir, DurableDirectories) of
+                      true ->
+                          TermsAcc1 =
+                              case read_shutdown_terms(
+                                     filename:join(QueuesDir, QueueDir)) of
+                                  {error, _}  -> TermsAcc;
+                                  {ok, Terms} -> [Terms | TermsAcc]
+                              end,
+                          {[dict:fetch(QueueDir, DurableDict) | DurableAcc],
+                           TermsAcc1};
+                      false ->
+                          Dir = filename:join(queues_dir(), QueueDir),
+                          ok = rabbit_misc:recursive_delete([Dir]),
+                          {DurableAcc, TermsAcc}
+                  end
+          end, {[], []}, Directories),
+    {DurableTerms, {fun queue_index_walker/1, {start, DurableQueueNames}}}.
+
+%%----------------------------------------------------------------------------
+%% startup and shutdown
+%%----------------------------------------------------------------------------
+
+blank_state(QueueName, EnsureFresh) ->
+    StrName = queue_name_to_dir_name(QueueName),
+    Dir = filename:join(queues_dir(), StrName),
+    ok = case EnsureFresh of
+             true  -> false = filelib:is_file(Dir), %% is_file == is file or dir
+                      ok;
+             false -> ok
+         end,
+    ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
+    {ok, MaxJournal} =
+        application:get_env(rabbit, queue_index_max_journal_entries),
+    #qistate { dir                 = Dir,
+               segments            = segments_new(),
+               journal_handle      = undefined,
+               dirty_count         = 0,
+               max_journal_entries = MaxJournal }.
+
+detect_clean_shutdown(Dir) ->
+    case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of
+        ok              -> true;
+        {error, enoent} -> false
+    end.
+
+read_shutdown_terms(Dir) ->
+    rabbit_misc:read_term_file(filename:join(Dir, ?CLEAN_FILENAME)).
+
+store_clean_shutdown(Terms, Dir) ->
+    rabbit_misc:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms).
+
+init_clean(RecoveredCounts, State) ->
+    %% Load the journal. Since this is a clean recovery this (almost)
+    %% gets us back to where we were on shutdown.
+    State1 = #qistate { dir = Dir, segments = Segments } = load_journal(State),
+    %% The journal loading only creates records for segments touched
+    %% by the journal, and the counts are based on the journal entries
+    %% only. We need *complete* counts for *all* segments. By an
+    %% amazing coincidence we stored that information on shutdown.
+    Segments1 =
+        lists:foldl(
+          fun ({Seg, UnackedCount}, SegmentsN) ->
+                  Segment = segment_find_or_new(Seg, Dir, SegmentsN),
+                  segment_store(Segment #segment { unacked = UnackedCount },
+                                SegmentsN)
+          end, Segments, RecoveredCounts),
+    %% the counts above include transient messages, which would be the
+    %% wrong thing to return
+    {undefined, State1 # qistate { segments = Segments1 }}.
+
+init_dirty(CleanShutdown, ContainsCheckFun, State) ->
+    %% Recover the journal completely. This will also load segments
+    %% which have entries in the journal and remove duplicates. The
+    %% counts will correctly reflect the combination of the segment
+    %% and the journal.
+    State1 = #qistate { dir = Dir, segments = Segments } =
+        recover_journal(State),
+    {Segments1, Count} =
+        %% Load each segment in turn and filter out messages that are
+        %% not in the msg_store, by adding acks to the journal. These
+        %% acks only go to the RAM journal as it doesn't matter if we
+        %% lose them. Also mark delivered if not clean shutdown. Also
+        %% find the number of unacked messages.
+        lists:foldl(
+          fun (Seg, {Segments2, CountAcc}) ->
+                  Segment = #segment { unacked = UnackedCount } =
+                      recover_segment(ContainsCheckFun, CleanShutdown,
+                                      segment_find_or_new(Seg, Dir, Segments2)),
+                  {segment_store(Segment, Segments2), CountAcc + UnackedCount}
+          end, {Segments, 0}, all_segment_nums(State1)),
+    %% Unconditionally flush since the dirty_count doesn't get updated
+    %% by the above foldl.
+    State2 = flush_journal(State1 #qistate { segments = Segments1 }),
+    {Count, State2}.
+
+terminate(State = #qistate { journal_handle = JournalHdl,
+                             segments = Segments }) ->
+    ok = case JournalHdl of
+             undefined -> ok;
+             _         -> file_handle_cache:close(JournalHdl)
+         end,
+    SegmentCounts =
+        segment_fold(
+          fun (#segment { num = Seg, unacked = UnackedCount }, Acc) ->
+                  [{Seg, UnackedCount} | Acc]
+          end, [], Segments),
+    {SegmentCounts, State #qistate { journal_handle = undefined,
+                                     segments = undefined }}.
+
+recover_segment(ContainsCheckFun, CleanShutdown,
+                Segment = #segment { journal_entries = JEntries }) ->
+    {SegEntries, UnackedCount} = load_segment(false, Segment),
+    {SegEntries1, UnackedCountDelta} =
+        segment_plus_journal(SegEntries, JEntries),
+    array:sparse_foldl(
+      fun (RelSeq, {{Guid, _IsPersistent}, Del, no_ack}, Segment1) ->
+              recover_message(ContainsCheckFun(Guid), CleanShutdown,
+                              Del, RelSeq, Segment1)
+      end,
+      Segment #segment { unacked = UnackedCount + UnackedCountDelta },
+      SegEntries1).
+
+recover_message( true,  true,   _Del, _RelSeq, Segment) ->
+    Segment;
+recover_message( true, false,    del, _RelSeq, Segment) ->
+    Segment;
+recover_message( true, false, no_del,  RelSeq, Segment) ->
+    add_to_journal(RelSeq, del, Segment);
+recover_message(false,     _,    del,  RelSeq, Segment) ->
+    add_to_journal(RelSeq, ack, Segment);
+recover_message(false,     _, no_del,  RelSeq, Segment) ->
+    add_to_journal(RelSeq, ack, add_to_journal(RelSeq, del, Segment)).
+
+queue_name_to_dir_name(Name = #resource { kind = queue }) ->
+    <<Num:128>> = erlang:md5(term_to_binary(Name)),
+    lists:flatten(io_lib:format("~.36B", [Num])).
+
+queues_dir() ->
+    filename:join(rabbit_mnesia:dir(), "queues").
+
+%%----------------------------------------------------------------------------
+%% msg store startup delta function
+%%----------------------------------------------------------------------------
+
+queue_index_walker({start, DurableQueues}) when is_list(DurableQueues) ->
+    {ok, Gatherer} = gatherer:start_link(),
+    [begin
+         ok = gatherer:fork(Gatherer),
+         ok = worker_pool:submit_async(
+                fun () -> queue_index_walker_reader(QueueName, Gatherer)
+                end)
+     end || QueueName <- DurableQueues],
+    queue_index_walker({next, Gatherer});
+
+queue_index_walker({next, Gatherer}) when is_pid(Gatherer) ->
+    case gatherer:out(Gatherer) of
+        empty ->
+            ok = gatherer:stop(Gatherer),
+            ok = rabbit_misc:unlink_and_capture_exit(Gatherer),
+            finished;
+        {value, {Guid, Count}} ->
+            {Guid, Count, {next, Gatherer}}
+    end.
+
+queue_index_walker_reader(QueueName, Gatherer) ->
+    State = #qistate { segments = Segments, dir = Dir } =
+        recover_journal(blank_state(QueueName, false)),
+    [ok = segment_entries_foldr(
+            fun (_RelSeq, {{Guid, true}, _IsDelivered, no_ack}, ok) ->
+                    gatherer:in(Gatherer, {Guid, 1});
+                (_RelSeq, _Value, Acc) ->
+                    Acc
+            end, ok, segment_find_or_new(Seg, Dir, Segments)) ||
+        Seg <- all_segment_nums(State)],
+    {_SegmentCounts, _State} = terminate(State),
+    ok = gatherer:finish(Gatherer).
+
+%%----------------------------------------------------------------------------
+%% journal manipulation
+%%----------------------------------------------------------------------------
+
+add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount,
+                                                 segments = Segments,
+                                                 dir = Dir }) ->
+    {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+    Segment = segment_find_or_new(Seg, Dir, Segments),
+    Segment1 = add_to_journal(RelSeq, Action, Segment),
+    State #qistate { dirty_count = DCount + 1,
+                     segments = segment_store(Segment1, Segments) };
+
+add_to_journal(RelSeq, Action,
+               Segment = #segment { journal_entries = JEntries,
+                                    unacked = UnackedCount }) ->
+    Segment1 = Segment #segment {
+                 journal_entries = add_to_journal(RelSeq, Action, JEntries) },
+    case Action of
+        del  -> Segment1;
+        ack  -> Segment1 #segment { unacked = UnackedCount - 1 };
+        ?PUB -> Segment1 #segment { unacked = UnackedCount + 1 }
+    end;
+
+add_to_journal(RelSeq, Action, JEntries) ->
+    Val = case array:get(RelSeq, JEntries) of
+              undefined ->
+                  case Action of
+                      ?PUB -> {Action, no_del, no_ack};
+                      del  -> {no_pub,    del, no_ack};
+                      ack  -> {no_pub, no_del,    ack}
+                  end;
+              ({Pub, no_del, no_ack}) when Action == del ->
+                  {Pub, del, no_ack};
+              ({Pub,    Del, no_ack}) when Action == ack ->
+                  {Pub, Del,    ack}
+          end,
+    array:set(RelSeq, Val, JEntries).
+
+maybe_flush_journal(State = #qistate { dirty_count = DCount,
+                                       max_journal_entries = MaxJournal })
+  when DCount > MaxJournal ->
+    flush_journal(State);
+maybe_flush_journal(State) ->
+    State.
+
+flush_journal(State = #qistate { segments = Segments }) ->
+    Segments1 =
+        segment_fold(
+          fun (#segment { unacked = 0, path = Path }, SegmentsN) ->
+                  case filelib:is_file(Path) of
+                      true  -> ok = file:delete(Path);
+                      false -> ok
+                  end,
+                  SegmentsN;
+              (#segment {} = Segment, SegmentsN) ->
+                  segment_store(append_journal_to_segment(Segment), SegmentsN)
+          end, segments_new(), Segments),
+    {JournalHdl, State1} =
+        get_journal_handle(State #qistate { segments = Segments1 }),
+    ok = file_handle_cache:clear(JournalHdl),
+    State1 #qistate { dirty_count = 0 }.
+
+append_journal_to_segment(#segment { journal_entries = JEntries,
+                                     path = Path } = Segment) ->
+    case array:sparse_size(JEntries) of
+        0 -> Segment;
+        _ -> {ok, Hdl} = file_handle_cache:open(Path, [write | ?READ_MODE],
+                                                [{write_buffer, infinity}]),
+             array:sparse_foldl(fun write_entry_to_segment/3, Hdl, JEntries),
+             ok = file_handle_cache:close(Hdl),
+             Segment #segment { journal_entries = array_new() }
+    end.
+
+get_journal_handle(State = #qistate { journal_handle = undefined,
+                                      dir = Dir }) ->
+    Path = filename:join(Dir, ?JOURNAL_FILENAME),
+    {ok, Hdl} = file_handle_cache:open(Path, [write | ?READ_MODE],
+                                       [{write_buffer, infinity}]),
+    {Hdl, State #qistate { journal_handle = Hdl }};
+get_journal_handle(State = #qistate { journal_handle = Hdl }) ->
+    {Hdl, State}.
+
+%% Loading Journal. This isn't idempotent and will mess up the counts
+%% if you call it more than once on the same state. Assumes the counts
+%% are 0 to start with.
+load_journal(State) ->
+    {JournalHdl, State1} = get_journal_handle(State),
+    {ok, 0} = file_handle_cache:position(JournalHdl, 0),
+    load_journal_entries(State1).
+
+%% ditto
+recover_journal(State) ->
+    State1 = #qistate { segments = Segments } = load_journal(State),
+    Segments1 =
+        segment_map(
+          fun (Segment = #segment { journal_entries = JEntries,
+                                    unacked = UnackedCountInJournal }) ->
+                  %% We want to keep ack'd entries in so that we can
+                  %% remove them if duplicates are in the journal. The
+                  %% counts here are purely from the segment itself.
+                  {SegEntries, UnackedCountInSeg} = load_segment(true, Segment),
+                  {JEntries1, UnackedCountDuplicates} =
+                      journal_minus_segment(JEntries, SegEntries),
+                  Segment #segment { journal_entries = JEntries1,
+                                     unacked = (UnackedCountInJournal +
+                                                UnackedCountInSeg -
+                                                UnackedCountDuplicates) }
+          end, Segments),
+    State1 #qistate { segments = Segments1 }.
+
+load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
+    case file_handle_cache:read(Hdl, ?SEQ_BYTES) of
+        {ok, <<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>>} ->
+            case Prefix of
+                ?DEL_JPREFIX ->
+                    load_journal_entries(add_to_journal(SeqId, del, State));
+                ?ACK_JPREFIX ->
+                    load_journal_entries(add_to_journal(SeqId, ack, State));
+                _ ->
+                    case file_handle_cache:read(Hdl, ?GUID_BYTES) of
+                        {ok, <<GuidNum:?GUID_BITS>>} ->
+                            %% work around for binary data
+                            %% fragmentation. See
+                            %% rabbit_msg_file:read_next/2
+                            <<Guid:?GUID_BYTES/binary>> =
+                                <<GuidNum:?GUID_BITS>>,
+                            Publish = {Guid, case Prefix of
+                                                 ?PUB_PERSIST_JPREFIX -> true;
+                                                 ?PUB_TRANS_JPREFIX   -> false
+                                             end},
+                            load_journal_entries(
+                              add_to_journal(SeqId, Publish, State));
+                        _ErrOrEoF -> %% err, we've lost at least a publish
+                            State
+                    end
+            end;
+        _ErrOrEoF -> State
+    end.
+
+deliver_or_ack(_Kind, [], State) ->
+    State;
+deliver_or_ack(Kind, SeqIds, State) ->
+    JPrefix = case Kind of ack -> ?ACK_JPREFIX; del -> ?DEL_JPREFIX end,
+    {JournalHdl, State1} = get_journal_handle(State),
+    ok = file_handle_cache:append(
+           JournalHdl,
+           [<<JPrefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>> || SeqId <- SeqIds]),
+    maybe_flush_journal(lists:foldl(fun (SeqId, StateN) ->
+                                            add_to_journal(SeqId, Kind, StateN)
+                                    end, State1, SeqIds)).
+
+%%----------------------------------------------------------------------------
+%% segment manipulation
+%%----------------------------------------------------------------------------
+
+seq_id_to_seg_and_rel_seq_id(SeqId) ->
+    { SeqId div ?SEGMENT_ENTRY_COUNT, SeqId rem ?SEGMENT_ENTRY_COUNT }.
+
+reconstruct_seq_id(Seg, RelSeq) ->
+    (Seg * ?SEGMENT_ENTRY_COUNT) + RelSeq.
+
+all_segment_nums(#qistate { dir = Dir, segments = Segments }) ->
+    lists:sort(
+      sets:to_list(
+        lists:foldl(
+          fun (SegName, Set) ->
+                  sets:add_element(
+                    list_to_integer(
+                      lists:takewhile(fun (C) -> $0 =< C andalso C =< $9 end,
+                                      SegName)), Set)
+          end, sets:from_list(segment_nums(Segments)),
+          filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)))).
+
+segment_find_or_new(Seg, Dir, Segments) ->
+    case segment_find(Seg, Segments) of
+        {ok, Segment} -> Segment;
+        error         -> SegName = integer_to_list(Seg)  ++ ?SEGMENT_EXTENSION,
+                         Path = filename:join(Dir, SegName),
+                         #segment { num             = Seg,
+                                    path            = Path,
+                                    journal_entries = array_new(),
+                                    unacked         = 0 }
+    end.
+
+segment_find(Seg, {_Segments, [Segment = #segment { num = Seg } |_]}) ->
+    {ok, Segment}; %% 1 or (2, matches head)
+segment_find(Seg, {_Segments, [_, Segment = #segment { num = Seg }]}) ->
+    {ok, Segment}; %% 2, matches tail
+segment_find(Seg, {Segments, _}) -> %% no match
+    dict:find(Seg, Segments).
+
+segment_store(Segment = #segment { num = Seg }, %% 1 or (2, matches head)
+              {Segments, [#segment { num = Seg } | Tail]}) ->
+    {Segments, [Segment | Tail]};
+segment_store(Segment = #segment { num = Seg }, %% 2, matches tail
+              {Segments, [SegmentA, #segment { num = Seg }]}) ->
+    {Segments, [Segment, SegmentA]};
+segment_store(Segment = #segment { num = Seg }, {Segments, []}) ->
+    {dict:erase(Seg, Segments), [Segment]};
+segment_store(Segment = #segment { num = Seg }, {Segments, [SegmentA]}) ->
+    {dict:erase(Seg, Segments), [Segment, SegmentA]};
+segment_store(Segment = #segment { num = Seg },
+              {Segments, [SegmentA, SegmentB]}) ->
+    {dict:store(SegmentB#segment.num, SegmentB, dict:erase(Seg, Segments)),
+     [Segment, SegmentA]}.
+
+segment_fold(Fun, Acc, {Segments, CachedSegments}) ->
+    dict:fold(fun (_Seg, Segment, Acc1) -> Fun(Segment, Acc1) end,
+              lists:foldl(Fun, Acc, CachedSegments), Segments).
+
+segment_map(Fun, {Segments, CachedSegments}) ->
+    {dict:map(fun (_Seg, Segment) -> Fun(Segment) end, Segments),
+     lists:map(Fun, CachedSegments)}.
+
+segment_nums({Segments, CachedSegments}) ->
+    lists:map(fun (#segment { num = Num }) -> Num end, CachedSegments) ++
+        dict:fetch_keys(Segments).
+
+segments_new() ->
+    {dict:new(), []}.
+
+write_entry_to_segment(_RelSeq, {?PUB, del, ack}, Hdl) ->
+    Hdl;
+write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) ->
+    ok = case Pub of
+             no_pub ->
+                 ok;
+             {Guid, IsPersistent} ->
+                 file_handle_cache:append(
+                   Hdl, [<<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
+                          (bool_to_int(IsPersistent)):1,
+                          RelSeq:?REL_SEQ_BITS>>, Guid])
+         end,
+    ok = case {Del, Ack} of
+             {no_del, no_ack} ->
+                 ok;
+             _ ->
+                 Binary = <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                           RelSeq:?REL_SEQ_BITS>>,
+                 file_handle_cache:append(
+                   Hdl, case {Del, Ack} of
+                            {del, ack} -> [Binary, Binary];
+                            _          -> Binary
+                        end)
+         end,
+    Hdl.
+
+read_bounded_segment(Seg, {StartSeg, StartRelSeq}, {EndSeg, EndRelSeq},
+                     {Messages, Segments}, Dir) ->
+    Segment = segment_find_or_new(Seg, Dir, Segments),
+    {segment_entries_foldr(
+       fun (RelSeq, {{Guid, IsPersistent}, IsDelivered, no_ack}, Acc)
+             when (Seg > StartSeg orelse StartRelSeq =< RelSeq) andalso
+                  (Seg < EndSeg   orelse EndRelSeq   >= RelSeq) ->
+               [ {Guid, reconstruct_seq_id(StartSeg, RelSeq),
+                  IsPersistent, IsDelivered == del} | Acc ];
+           (_RelSeq, _Value, Acc) ->
+               Acc
+       end, Messages, Segment),
+     segment_store(Segment, Segments)}.
+
+segment_entries_foldr(Fun, Init,
+                      Segment = #segment { journal_entries = JEntries }) ->
+    {SegEntries, _UnackedCount} = load_segment(false, Segment),
+    {SegEntries1, _UnackedCountD} = segment_plus_journal(SegEntries, JEntries),
+    array:sparse_foldr(Fun, Init, SegEntries1).
+
+%% Loading segments
+%%
+%% Does not do any combining with the journal at all.
+load_segment(KeepAcked, #segment { path = Path }) ->
+    case filelib:is_file(Path) of
+        false -> {array_new(), 0};
+        true  -> {ok, Hdl} = file_handle_cache:open(Path, ?READ_MODE, []),
+                 {ok, 0} = file_handle_cache:position(Hdl, bof),
+                 Res = load_segment_entries(KeepAcked, Hdl, array_new(), 0),
+                 ok = file_handle_cache:close(Hdl),
+                 Res
+    end.
+
+load_segment_entries(KeepAcked, Hdl, SegEntries, UnackedCount) ->
+    case file_handle_cache:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES) of
+        {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
+              IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>} ->
+            %% because we specify /binary, and binaries are complete
+            %% bytes, the size spec is in bytes, not bits.
+            {ok, Guid} = file_handle_cache:read(Hdl, ?GUID_BYTES),
+            Obj = {{Guid, 1 == IsPersistentNum}, no_del, no_ack},
+            SegEntries1 = array:set(RelSeq, Obj, SegEntries),
+            load_segment_entries(KeepAcked, Hdl, SegEntries1,
+                                 UnackedCount + 1);
+        {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+              RelSeq:?REL_SEQ_BITS>>} ->
+            {UnackedCountDelta, SegEntries1} =
+                case array:get(RelSeq, SegEntries) of
+                    {Pub, no_del, no_ack} ->
+                        { 0, array:set(RelSeq, {Pub, del, no_ack}, SegEntries)};
+                    {Pub, del, no_ack} when KeepAcked ->
+                        {-1, array:set(RelSeq, {Pub, del, ack}, SegEntries)};
+                    {_Pub, del, no_ack} ->
+                        {-1, array:reset(RelSeq, SegEntries)}
+                end,
+            load_segment_entries(KeepAcked, Hdl, SegEntries1,
+                                 UnackedCount + UnackedCountDelta);
+        _ErrOrEoF ->
+            {SegEntries, UnackedCount}
+    end.
+
+array_new() ->
+    array:new([{default, undefined}, fixed, {size, ?SEGMENT_ENTRY_COUNT}]).
+
+bool_to_int(true ) -> 1;
+bool_to_int(false) -> 0.
+
+%%----------------------------------------------------------------------------
+%% journal & segment combination
+%%----------------------------------------------------------------------------
+
+%% Combine what we have just read from a segment file with what we're
+%% holding for that segment in memory. There must be no duplicates.
+segment_plus_journal(SegEntries, JEntries) ->
+    array:sparse_foldl(
+      fun (RelSeq, JObj, {SegEntriesOut, AdditionalUnacked}) ->
+              SegEntry = array:get(RelSeq, SegEntriesOut),
+              {Obj, AdditionalUnackedDelta} =
+                  segment_plus_journal1(SegEntry, JObj),
+              {case Obj of
+                   undefined -> array:reset(RelSeq, SegEntriesOut);
+                   _         -> array:set(RelSeq, Obj, SegEntriesOut)
+               end,
+               AdditionalUnacked + AdditionalUnackedDelta}
+      end, {SegEntries, 0}, JEntries).
+
+%% Here, the result is a tuple with the first element containing the
+%% item which we may be adding to (for items only in the journal),
+%% modifying in (bits in both), or, when returning 'undefined',
+%% erasing from (ack in journal, not segment) the segment array. The
+%% other element of the tuple is the delta for AdditionalUnacked.
+segment_plus_journal1(undefined, {?PUB, no_del, no_ack} = Obj) ->
+    {Obj, 1};
+segment_plus_journal1(undefined, {?PUB, del, no_ack} = Obj) ->
+    {Obj, 1};
+segment_plus_journal1(undefined, {?PUB, del, ack}) ->
+    {undefined, 0};
+
+segment_plus_journal1({?PUB = Pub, no_del, no_ack}, {no_pub, del, no_ack}) ->
+    {{Pub, del, no_ack}, 0};
+segment_plus_journal1({?PUB, no_del, no_ack},       {no_pub, del, ack}) ->
+    {undefined, -1};
+segment_plus_journal1({?PUB, del, no_ack},          {no_pub, no_del, ack}) ->
+    {undefined, -1}.
+
+%% Remove from the journal entries for a segment, items that are
+%% duplicates of entries found in the segment itself. Used on start up
+%% to clean up the journal.
+journal_minus_segment(JEntries, SegEntries) ->
+    array:sparse_foldl(
+      fun (RelSeq, JObj, {JEntriesOut, UnackedRemoved}) ->
+              SegEntry = array:get(RelSeq, SegEntries),
+              {Obj, UnackedRemovedDelta} =
+                  journal_minus_segment1(JObj, SegEntry),
+              {case Obj of
+                   keep      -> JEntriesOut;
+                   undefined -> array:reset(RelSeq, JEntriesOut);
+                   _         -> array:set(RelSeq, Obj, JEntriesOut)
+               end,
+               UnackedRemoved + UnackedRemovedDelta}
+      end, {JEntries, 0}, JEntries).
+
+%% Here, the result is a tuple with the first element containing the
+%% item we are adding to or modifying in the (initially fresh) journal
+%% array. If the item is 'undefined' we leave the journal array
+%% alone. The other element of the tuple is the deltas for
+%% UnackedRemoved.
+
+%% Both the same. Must be at least the publish
+journal_minus_segment1({?PUB, _Del, no_ack} = Obj, Obj) ->
+    {undefined, 1};
+journal_minus_segment1({?PUB, _Del, ack} = Obj,    Obj) ->
+    {undefined, 0};
+
+%% Just publish in journal
+journal_minus_segment1({?PUB, no_del, no_ack},     undefined) ->
+    {keep, 0};
+
+%% Publish and deliver in journal
+journal_minus_segment1({?PUB, del, no_ack},        undefined) ->
+    {keep, 0};
+journal_minus_segment1({?PUB = Pub, del, no_ack},  {Pub, no_del, no_ack}) ->
+    {{no_pub, del, no_ack}, 1};
+
+%% Publish, deliver and ack in journal
+journal_minus_segment1({?PUB, del, ack},           undefined) ->
+    {keep, 0};
+journal_minus_segment1({?PUB = Pub, del, ack},     {Pub, no_del, no_ack}) ->
+    {{no_pub, del, ack}, 1};
+journal_minus_segment1({?PUB = Pub, del, ack},     {Pub, del, no_ack}) ->
+    {{no_pub, no_del, ack}, 1};
+
+%% Just deliver in journal
+journal_minus_segment1({no_pub, del, no_ack},      {?PUB, no_del, no_ack}) ->
+    {keep, 0};
+journal_minus_segment1({no_pub, del, no_ack},      {?PUB, del, no_ack}) ->
+    {undefined, 0};
+
+%% Just ack in journal
+journal_minus_segment1({no_pub, no_del, ack},      {?PUB, del, no_ack}) ->
+    {keep, 0};
+journal_minus_segment1({no_pub, no_del, ack},      {?PUB, del, ack}) ->
+    {undefined, -1};
+
+%% Deliver and ack in journal
+journal_minus_segment1({no_pub, del, ack},         {?PUB, no_del, no_ack}) ->
+    {keep, 0};
+journal_minus_segment1({no_pub, del, ack},         {?PUB, del, no_ack}) ->
+    {{no_pub, no_del, ack}, 0};
+journal_minus_segment1({no_pub, del, ack},         {?PUB, del, ack}) ->
+    {undefined, -1}.
diff --git a/src/rabbit_reader.erl b/src/rabbit_reader.erl
index f2a903dc..252f81a3 100644
--- a/src/rabbit_reader.erl
+++ b/src/rabbit_reader.erl
@@ -33,18 +33,19 @@
 -include("rabbit_framing.hrl").
 -include("rabbit.hrl").
 
--export([start_link/0, info_keys/0, info/1, info/2, shutdown/2]).
+-export([start_link/3, info_keys/0, info/1, info/2, shutdown/2]).
 
 -export([system_continue/3, system_terminate/4, system_code_change/4]).
 
--export([init/1, mainloop/3]).
+-export([init/4, mainloop/2]).
 
--export([server_properties/0]).
+-export([conserve_memory/2, server_properties/0]).
 
--export([analyze_frame/2]).
+-export([analyze_frame/3]).
+
+-export([emit_stats/1]).
 
 -import(gen_tcp).
--import(fprof).
 -import(inet).
 -import(prim_inet).
 
@@ -57,13 +58,18 @@
 
 %---------------------------------------------------------------------------
 
--record(v1, {sock, connection, callback, recv_ref, connection_state,
-             queue_collector}).
+-record(v1, {parent, sock, connection, callback, recv_length, recv_ref,
+             connection_state, queue_collector, heartbeater, stats_timer,
+             channel_sup_sup_pid, start_heartbeat_fun}).
+
+-define(STATISTICS_KEYS, [pid, recv_oct, recv_cnt, send_oct, send_cnt,
+                          send_pend, state, channels]).
+
+-define(CREATION_EVENT_KEYS, [pid, address, port, peer_address, peer_port,
+                              protocol, user, vhost, timeout, frame_max,
+                              client_properties]).
 
--define(INFO_KEYS,
-        [pid, address, port, peer_address, peer_port,
-         recv_oct, recv_cnt, send_oct, send_cnt, send_pend,
-         state, channels, user, vhost, timeout, frame_max, client_properties]).
+-define(INFO_KEYS, ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]).
 
 %% connection lifecycle
 %%
@@ -101,6 +107,17 @@
 %%   -> log error, mark channel as closing, *running*
 %%   handshake_timeout -> ignore, *running*
 %%   heartbeat timeout -> *throw*
+%%   conserve_memory=true -> *blocking*
+%% blocking:
+%%   conserve_memory=true -> *blocking*
+%%   conserve_memory=false -> *running*
+%%   receive a method frame for a content-bearing method
+%%   -> process, stop receiving, *blocked*
+%%   ...rest same as 'running'
+%% blocked:
+%%   conserve_memory=true -> *blocked*
+%%   conserve_memory=false -> resume receiving, *running*
+%%   ...rest same as 'running'
 %% closing:
 %%   socket close -> *terminate*
 %%   receive connection.close -> send connection.close_ok,
@@ -134,35 +151,60 @@
 %%
 %% TODO: refactor the code so that the above is obvious
 
+-define(IS_RUNNING(State),
+        (State#v1.connection_state =:= running orelse
+         State#v1.connection_state =:= blocking orelse
+         State#v1.connection_state =:= blocked)).
+
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--spec(info_keys/0 :: () -> [info_key()]).
--spec(info/1 :: (pid()) -> [info()]).
--spec(info/2 :: (pid(), [info_key()]) -> [info()]).
+-type(start_heartbeat_fun() ::
+        fun ((rabbit_networking:socket(), non_neg_integer()) ->
+                    rabbit_heartbeat:heartbeaters())).
+
+-spec(start_link/3 :: (pid(), pid(), start_heartbeat_fun()) ->
+                           rabbit_types:ok(pid())).
+-spec(info_keys/0 :: () -> [rabbit_types:info_key()]).
+-spec(info/1 :: (pid()) -> [rabbit_types:info()]).
+-spec(info/2 :: (pid(), [rabbit_types:info_key()]) -> [rabbit_types:info()]).
+-spec(emit_stats/1 :: (pid()) -> 'ok').
 -spec(shutdown/2 :: (pid(), string()) -> 'ok').
--spec(server_properties/0 :: () -> amqp_table()).
+-spec(conserve_memory/2 :: (pid(), boolean()) -> 'ok').
+-spec(server_properties/0 :: () -> rabbit_framing:amqp_table()).
+
+%% These specs only exists to add no_return() to keep dialyzer happy
+-spec(init/4 :: (pid(), pid(), pid(), start_heartbeat_fun()) -> no_return()).
+-spec(start_connection/7 ::
+        (pid(), pid(), pid(), start_heartbeat_fun(), any(),
+         rabbit_networking:socket(),
+         fun ((rabbit_networking:socket()) ->
+                     rabbit_types:ok_or_error2(
+                       rabbit_networking:socket(), any()))) -> no_return()).
 
 -endif.
 
 %%--------------------------------------------------------------------------
 
-start_link() ->
-    {ok, proc_lib:spawn_link(?MODULE, init, [self()])}.
+start_link(ChannelSupSupPid, Collector, StartHeartbeatFun) ->
+    {ok, proc_lib:spawn_link(?MODULE, init, [self(), ChannelSupSupPid,
+                                             Collector, StartHeartbeatFun])}.
 
 shutdown(Pid, Explanation) ->
     gen_server:call(Pid, {shutdown, Explanation}, infinity).
 
-init(Parent) ->
+init(Parent, ChannelSupSupPid, Collector, StartHeartbeatFun) ->
     Deb = sys:debug_options([]),
     receive
         {go, Sock, SockTransform} ->
-            start_connection(Parent, Deb, Sock, SockTransform)
+            start_connection(
+              Parent, ChannelSupSupPid, Collector, StartHeartbeatFun, Deb, Sock,
+              SockTransform)
     end.
 
 system_continue(Parent, Deb, State) ->
-    ?MODULE:mainloop(Parent, Deb, State).
+    ?MODULE:mainloop(Deb, State#v1{parent = Parent}).
 
 system_terminate(Reason, _Parent, _Deb, _State) ->
     exit(Reason).
@@ -181,32 +223,12 @@ info(Pid, Items) ->
         {error, Error} -> throw(Error)
     end.
 
-setup_profiling() ->
-    Value = rabbit_misc:get_config(profiling_enabled, false),
-    case Value of
-        once ->
-            rabbit_log:info("Enabling profiling for this connection, "
-                            "and disabling for subsequent.~n"),
-            rabbit_misc:set_config(profiling_enabled, false),
-            fprof:trace(start);
-        true ->
-            rabbit_log:info("Enabling profiling for this connection.~n"),
-            fprof:trace(start);
-        false ->
-            ok
-    end,
-    Value.
+emit_stats(Pid) ->
+    gen_server:cast(Pid, emit_stats).
 
-teardown_profiling(Value) ->
-    case Value of
-        false ->
-            ok;
-        _ ->
-            rabbit_log:info("Completing profiling for this connection.~n"),
-            fprof:trace(stop),
-            fprof:profile(),
-            fprof:analyse([{dest, []}, {cols, 100}])
-    end.
+conserve_memory(Pid, Conserve) ->
+    Pid ! {conserve_memory, Conserve},
+    ok.
 
 server_properties() ->
     {ok, Product} = application:get_key(rabbit, id),
@@ -230,7 +252,8 @@ socket_op(Sock, Fun) ->
                            exit(normal)
     end.
 
-start_connection(Parent, Deb, Sock, SockTransform) ->
+start_connection(Parent, ChannelSupSupPid, Collector, StartHeartbeatFun, Deb,
+                 Sock, SockTransform) ->
     process_flag(trap_exit, true),
     {PeerAddress, PeerPort} = socket_op(Sock, fun rabbit_net:peername/1),
     PeerAddressS = inet_parse:ntoa(PeerAddress),
@@ -239,22 +262,29 @@ start_connection(Parent, Deb, Sock, SockTransform) ->
     ClientSock = socket_op(Sock, SockTransform),
     erlang:send_after(?HANDSHAKE_TIMEOUT * 1000, self(),
                       handshake_timeout),
-    ProfilingValue = setup_profiling(),
-    {ok, Collector} = rabbit_reader_queue_collector:start_link(),
     try
-        mainloop(Parent, Deb, switch_callback(
-                                #v1{sock = ClientSock,
-                                    connection = #connection{
-                                      user = none,
-                                      timeout_sec = ?HANDSHAKE_TIMEOUT,
-                                      frame_max = ?FRAME_MIN_SIZE,
-                                      vhost = none,
-                                      client_properties = none},
-                                    callback = uninitialized_callback,
-                                    recv_ref = none,
-                                    connection_state = pre_init,
-                                    queue_collector = Collector},
-                                handshake, 8))
+        mainloop(Deb, switch_callback(
+                        #v1{parent              = Parent,
+                            sock                = ClientSock,
+                            connection          = #connection{
+                              protocol           = none,
+                              user               = none,
+                              timeout_sec        = ?HANDSHAKE_TIMEOUT,
+                              frame_max          = ?FRAME_MIN_SIZE,
+                              vhost              = none,
+                              client_properties  = none},
+                            callback            = uninitialized_callback,
+                            recv_length         = 0,
+                            recv_ref            = none,
+                            connection_state    = pre_init,
+                            queue_collector     = Collector,
+                            heartbeater         = none,
+                            stats_timer         =
+                                rabbit_event:init_stats_timer(),
+                            channel_sup_sup_pid = ChannelSupSupPid,
+                            start_heartbeat_fun = StartHeartbeatFun
+                           },
+                        handshake, 8))
     catch
         Ex -> (if Ex == connection_closed_abruptly ->
                        fun rabbit_log:warning/2;
@@ -271,21 +301,18 @@ start_connection(Parent, Deb, Sock, SockTransform) ->
         %% output to be sent, which results in unnecessary delays.
         %%
         %% gen_tcp:close(ClientSock),
-        teardown_profiling(ProfilingValue),
-        rabbit_reader_queue_collector:shutdown(Collector),
-        rabbit_misc:unlink_and_capture_exit(Collector)
+        rabbit_event:notify(connection_closed, [{pid, self()}])
     end,
     done.
 
-mainloop(Parent, Deb, State = #v1{sock= Sock, recv_ref = Ref}) ->
+mainloop(Deb, State = #v1{parent = Parent, sock= Sock, recv_ref = Ref}) ->
     %%?LOGDEBUG("Reader mainloop: ~p bytes available, need ~p~n", [HaveBytes, WaitUntilNBytes]),
     receive
         {inet_async, Sock, Ref, {ok, Data}} ->
             {State1, Callback1, Length1} =
                 handle_input(State#v1.callback, Data,
                              State#v1{recv_ref = none}),
-            mainloop(Parent, Deb,
-                     switch_callback(State1, Callback1, Length1));
+            mainloop(Deb, switch_callback(State1, Callback1, Length1));
         {inet_async, Sock, Ref, {error, closed}} ->
             if State#v1.connection_state =:= closed ->
                     State;
@@ -294,6 +321,8 @@ mainloop(Parent, Deb, State = #v1{sock= Sock, recv_ref = Ref}) ->
             end;
         {inet_async, Sock, Ref, {error, Reason}} ->
             throw({inet_error, Reason});
+        {conserve_memory, Conserve} ->
+            mainloop(Deb, internal_conserve_memory(Conserve, State));
         {'EXIT', Parent, Reason} ->
             terminate(io_lib:format("broker forced connection closure "
                                     "with reason '~w'", [Reason]), State),
@@ -308,17 +337,17 @@ mainloop(Parent, Deb, State = #v1{sock= Sock, recv_ref = Ref}) ->
             exit(Reason);
         {channel_exit, _Chan, E = {writer, send_failed, _Error}} ->
             throw(E);
-        {channel_exit, Channel, Reason} ->
-            mainloop(Parent, Deb, handle_channel_exit(Channel, Reason, State));
-        {'EXIT', Pid, Reason} ->
-            mainloop(Parent, Deb, handle_dependent_exit(Pid, Reason, State));
+        {channel_exit, ChannelOrFrPid, Reason} ->
+            mainloop(Deb, handle_channel_exit(ChannelOrFrPid, Reason, State));
+        {'DOWN', _MRef, process, ChSupPid, Reason} ->
+            mainloop(Deb, handle_dependent_exit(ChSupPid, Reason, State));
         terminate_connection ->
             State;
         handshake_timeout ->
-            if State#v1.connection_state =:= running orelse
+            if ?IS_RUNNING(State) orelse
                State#v1.connection_state =:= closing orelse
                State#v1.connection_state =:= closed ->
-                    mainloop(Parent, Deb, State);
+                    mainloop(Deb, State);
                true ->
                     throw({handshake_timeout, State#v1.callback})
             end;
@@ -329,16 +358,21 @@ mainloop(Parent, Deb, State = #v1{sock= Sock, recv_ref = Ref}) ->
             gen_server:reply(From, ok),
             case ForceTermination of
                 force  -> ok;
-                normal -> mainloop(Parent, Deb, NewState)
+                normal -> mainloop(Deb, NewState)
             end;
         {'$gen_call', From, info} ->
             gen_server:reply(From, infos(?INFO_KEYS, State)),
-            mainloop(Parent, Deb, State);
+            mainloop(Deb, State);
         {'$gen_call', From, {info, Items}} ->
             gen_server:reply(From, try {ok, infos(Items, State)}
                                    catch Error -> {error, Error}
                                    end),
-            mainloop(Parent, Deb, State);
+            mainloop(Deb, State);
+        {'$gen_cast', emit_stats} ->
+            internal_emit_stats(State),
+            mainloop(Deb, State#v1{stats_timer =
+                                       rabbit_event:reset_stats_timer_after(
+                                         State#v1.stats_timer)});
         {system, From, Request} ->
             sys:handle_system_msg(Request, From,
                                   Parent, ?MODULE, Deb, State);
@@ -347,21 +381,44 @@ mainloop(Parent, Deb, State = #v1{sock= Sock, recv_ref = Ref}) ->
             exit({unexpected_message, Other})
     end.
 
-switch_callback(OldState, NewCallback, Length) ->
+switch_callback(State = #v1{connection_state = blocked,
+                            heartbeater = Heartbeater}, Callback, Length) ->
+    ok = rabbit_heartbeat:pause_monitor(Heartbeater),
+    State#v1{callback = Callback, recv_length = Length, recv_ref = none};
+switch_callback(State, Callback, Length) ->
     Ref = inet_op(fun () -> rabbit_net:async_recv(
-                              OldState#v1.sock, Length, infinity) end),
-    OldState#v1{callback = NewCallback,
-                recv_ref = Ref}.
+                              State#v1.sock, Length, infinity) end),
+    State#v1{callback = Callback, recv_length = Length, recv_ref = Ref}.
 
-terminate(Explanation, State = #v1{connection_state = running}) ->
+terminate(Explanation, State) when ?IS_RUNNING(State) ->
     {normal, send_exception(State, 0,
                             rabbit_misc:amqp_error(
                               connection_forced, Explanation, [], none))};
 terminate(_Explanation, State) ->
     {force, State}.
 
-close_connection(State = #v1{connection = #connection{
+internal_conserve_memory(true,  State = #v1{connection_state = running}) ->
+    State#v1{connection_state = blocking};
+internal_conserve_memory(false, State = #v1{connection_state = blocking}) ->
+    State#v1{connection_state = running};
+internal_conserve_memory(false, State = #v1{connection_state = blocked,
+                                            heartbeater      = Heartbeater,
+                                            callback         = Callback,
+                                            recv_length      = Length,
+                                            recv_ref         = none}) ->
+    ok = rabbit_heartbeat:resume_monitor(Heartbeater),
+    switch_callback(State#v1{connection_state = running}, Callback, Length);
+internal_conserve_memory(_Conserve, State) ->
+    State.
+
+close_connection(State = #v1{queue_collector = Collector,
+                             connection = #connection{
                                timeout_sec = TimeoutSec}}) ->
+    %% The spec says "Exclusive queues may only be accessed by the
+    %% current connection, and are deleted when that connection
+    %% closes."  This does not strictly imply synchrony, but in
+    %% practice it seems to be what people assume.
+    rabbit_queue_collector:delete_all(Collector),
     %% We terminate the connection after the specified interval, but
     %% no later than ?CLOSING_TIMEOUT seconds.
     TimeoutMillisec =
@@ -376,30 +433,45 @@ close_channel(Channel, State) ->
     put({channel, Channel}, closing),
     State.
 
+handle_channel_exit(ChFrPid, Reason, State) when is_pid(ChFrPid) ->
+    {channel, Channel} = get({ch_fr_pid, ChFrPid}),
+    handle_exception(State, Channel, Reason);
 handle_channel_exit(Channel, Reason, State) ->
     handle_exception(State, Channel, Reason).
 
-handle_dependent_exit(Pid, normal, State) ->
-    erase({chpid, Pid}),
-    maybe_close(State);
-handle_dependent_exit(Pid, Reason, State) ->
-    case channel_cleanup(Pid) of
-        undefined -> exit({abnormal_dependent_exit, Pid, Reason});
-        Channel   -> maybe_close(handle_exception(State, Channel, Reason))
+handle_dependent_exit(ChSupPid, Reason, State) ->
+    case termination_kind(Reason) of
+        controlled ->
+            case erase({ch_sup_pid, ChSupPid}) of
+                undefined                                -> ok;
+                {_Channel, {ch_fr_pid, _ChFrPid} = ChFr} -> erase(ChFr)
+            end,
+            maybe_close(State);
+        uncontrolled ->
+            case channel_cleanup(ChSupPid) of
+                undefined ->
+                    exit({abnormal_dependent_exit, ChSupPid, Reason});
+                Channel ->
+                    maybe_close(handle_exception(State, Channel, Reason))
+            end
     end.
 
-channel_cleanup(Pid) ->
-    case get({chpid, Pid}) of
-        undefined          -> undefined;
-        {channel, Channel} -> erase({channel, Channel}),
-                              erase({chpid, Pid}),
-                              Channel
+channel_cleanup(ChSupPid) ->
+    case get({ch_sup_pid, ChSupPid}) of
+        undefined                  -> undefined;
+        {{channel, Channel}, ChFr} -> erase({channel, Channel}),
+                                      erase(ChFr),
+                                      erase({ch_sup_pid, ChSupPid}),
+                                      Channel
     end.
 
-all_channels() -> [Pid || {{chpid, Pid},_} <- get()].
+all_channels() -> [ChFrPid || {{ch_sup_pid, _ChSupPid},
+                               {_Channel, {ch_fr_pid, ChFrPid}}} <- get()].
 
 terminate_channels() ->
-    NChannels = length([exit(Pid, normal) || Pid <- all_channels()]),
+    NChannels =
+        length([rabbit_framing_channel:shutdown(ChFrPid)
+                || ChFrPid <- all_channels()]),
     if NChannels > 0 ->
             Timeout = 1000 * ?CHANNEL_TERMINATION_TIMEOUT * NChannels,
             TimerRef = erlang:send_after(Timeout, self(), cancel_wait),
@@ -417,14 +489,15 @@ wait_for_channel_termination(0, TimerRef) ->
 
 wait_for_channel_termination(N, TimerRef) ->
     receive
-        {'EXIT', Pid, Reason} ->
-            case channel_cleanup(Pid) of
+        {'DOWN', _MRef, process, ChSupPid, Reason} ->
+            case channel_cleanup(ChSupPid) of
                 undefined ->
-                    exit({abnormal_dependent_exit, Pid, Reason});
+                    exit({abnormal_dependent_exit, ChSupPid, Reason});
                 Channel ->
-                    case Reason of
-                        normal -> ok;
-                        _ ->
+                    case termination_kind(Reason) of
+                        controlled ->
+                            ok;
+                        uncontrolled ->
                             rabbit_log:error(
                               "connection ~p, channel ~p - "
                               "error while terminating:~n~p~n",
@@ -437,24 +510,28 @@ wait_for_channel_termination(N, TimerRef) ->
     end.
 
 maybe_close(State = #v1{connection_state = closing,
-                        queue_collector = Collector}) ->
+                        connection = #connection{protocol = Protocol},
+                        sock = Sock}) ->
     case all_channels() of
         [] ->
-            %% Spec says "Exclusive queues may only be accessed by the current
-            %% connection, and are deleted when that connection closes."
-            %% This does not strictly imply synchrony, but in practice it seems
-            %% to be what people assume.
-            rabbit_reader_queue_collector:delete_all(Collector),
-            ok = send_on_channel0(State#v1.sock, #'connection.close_ok'{}),
-            close_connection(State);
+            NewState = close_connection(State),
+            ok = send_on_channel0(Sock, #'connection.close_ok'{}, Protocol),
+            NewState;
         _  -> State
     end;
 maybe_close(State) ->
     State.
 
-handle_frame(Type, 0, Payload, State = #v1{connection_state = CS})
+termination_kind(normal)            -> controlled;
+termination_kind(shutdown)          -> controlled;
+termination_kind({shutdown, _Term}) -> controlled;
+termination_kind(_)                 -> uncontrolled.
+
+handle_frame(Type, 0, Payload,
+             State = #v1{connection_state = CS,
+                         connection = #connection{protocol = Protocol}})
   when CS =:= closing; CS =:= closed ->
-    case analyze_frame(Type, Payload) of
+    case analyze_frame(Type, Payload, Protocol) of
         {method, MethodName, FieldsBin} ->
             handle_method0(MethodName, FieldsBin, State);
         _Other -> State
@@ -462,31 +539,38 @@ handle_frame(Type, 0, Payload, State = #v1{connection_state = CS})
 handle_frame(_Type, _Channel, _Payload, State = #v1{connection_state = CS})
   when CS =:= closing; CS =:= closed ->
     State;
-handle_frame(Type, 0, Payload, State) ->
-    case analyze_frame(Type, Payload) of
+handle_frame(Type, 0, Payload,
+             State = #v1{connection = #connection{protocol = Protocol}}) ->
+    case analyze_frame(Type, Payload, Protocol) of
         error     -> throw({unknown_frame, 0, Type, Payload});
         heartbeat -> State;
-        trace     -> State;
         {method, MethodName, FieldsBin} ->
             handle_method0(MethodName, FieldsBin, State);
         Other -> throw({unexpected_frame_on_channel0, Other})
     end;
-handle_frame(Type, Channel, Payload, State) ->
-    case analyze_frame(Type, Payload) of
+handle_frame(Type, Channel, Payload,
+             State = #v1{connection = #connection{protocol = Protocol}}) ->
+    case analyze_frame(Type, Payload, Protocol) of
         error         -> throw({unknown_frame, Channel, Type, Payload});
         heartbeat     -> throw({unexpected_heartbeat_frame, Channel});
-        trace         -> throw({unexpected_trace_frame, Channel});
         AnalyzedFrame ->
             %%?LOGDEBUG("Ch ~p Frame ~p~n", [Channel, AnalyzedFrame]),
             case get({channel, Channel}) of
-                {chpid, ChPid} ->
+                {ch_fr_pid, ChFrPid} ->
+                    ok = rabbit_framing_channel:process(ChFrPid, AnalyzedFrame),
                     case AnalyzedFrame of
                         {method, 'channel.close', _} ->
-                            erase({channel, Channel});
-                        _ -> ok
-                    end,
-                    ok = rabbit_framing_channel:process(ChPid, AnalyzedFrame),
-                    State;
+                            erase({channel, Channel}),
+                            State;
+                        {method, MethodName, _} ->
+                            case (State#v1.connection_state =:= blocking andalso
+                                  Protocol:method_has_content(MethodName)) of
+                                true  -> State#v1{connection_state = blocked};
+                                false -> State
+                            end;
+                        _ ->
+                            State
+                    end;
                 closing ->
                     %% According to the spec, after sending a
                     %% channel.close we must ignore all frames except
@@ -506,32 +590,37 @@ handle_frame(Type, Channel, Payload, State) ->
                     end,
                     State;
                 undefined ->
-                    case State#v1.connection_state of
-                        running -> ok = send_to_new_channel(
-                                          Channel, AnalyzedFrame, State),
-                                   State;
-                        Other   -> throw({channel_frame_while_starting,
-                                          Channel, Other, AnalyzedFrame})
+                    case ?IS_RUNNING(State) of
+                        true  -> ok = send_to_new_channel(
+                                        Channel, AnalyzedFrame, State),
+                                 State;
+                        false -> throw({channel_frame_while_starting,
+                                        Channel, State#v1.connection_state,
+                                        AnalyzedFrame})
                     end
             end
     end.
 
-analyze_frame(?FRAME_METHOD, <<ClassId:16, MethodId:16, MethodFields/binary>>) ->
-    {method, rabbit_framing:lookup_method_name({ClassId, MethodId}), MethodFields};
-analyze_frame(?FRAME_HEADER, <<ClassId:16, Weight:16, BodySize:64, Properties/binary>>) ->
+analyze_frame(?FRAME_METHOD,
+              <<ClassId:16, MethodId:16, MethodFields/binary>>,
+              Protocol) ->
+    MethodName = Protocol:lookup_method_name({ClassId, MethodId}),
+    {method, MethodName, MethodFields};
+analyze_frame(?FRAME_HEADER,
+              <<ClassId:16, Weight:16, BodySize:64, Properties/binary>>,
+              _Protocol) ->
     {content_header, ClassId, Weight, BodySize, Properties};
-analyze_frame(?FRAME_BODY, Body) ->
+analyze_frame(?FRAME_BODY, Body, _Protocol) ->
     {content_body, Body};
-analyze_frame(?FRAME_TRACE, _Body) ->
-    trace;
-analyze_frame(?FRAME_HEARTBEAT, <<>>) ->
+analyze_frame(?FRAME_HEARTBEAT, <<>>, _Protocol) ->
     heartbeat;
-analyze_frame(_Type, _Body) ->
+analyze_frame(_Type, _Body, _Protocol) ->
     error.
 
 handle_input(frame_header, <<Type:8,Channel:16,PayloadSize:32>>, State) ->
     %%?LOGDEBUG("Got frame header: ~p/~p/~p~n", [Type, Channel, PayloadSize]),
-    {State, {frame_payload, Type, Channel, PayloadSize}, PayloadSize + 1};
+    {ensure_stats_timer(State), {frame_payload, Type, Channel, PayloadSize},
+     PayloadSize + 1};
 
 handle_input({frame_payload, Type, Channel, PayloadSize}, PayloadAndMarker, State) ->
     case PayloadAndMarker of
@@ -543,54 +632,76 @@ handle_input({frame_payload, Type, Channel, PayloadSize}, PayloadAndMarker, Stat
             throw({bad_payload, PayloadAndMarker})
     end;
 
-handle_input(handshake, <<"AMQP",1,1,ProtocolMajor,ProtocolMinor>>,
-             State = #v1{sock = Sock, connection = Connection}) ->
-    case check_version({ProtocolMajor, ProtocolMinor},
-                       {?PROTOCOL_VERSION_MAJOR, ?PROTOCOL_VERSION_MINOR}) of
-        true ->
-            ok = send_on_channel0(
-                   Sock,
-                   #'connection.start'{
-                     version_major = ?PROTOCOL_VERSION_MAJOR,
-                     version_minor = ?PROTOCOL_VERSION_MINOR,
-                     server_properties = server_properties(),
-                     mechanisms = <<"PLAIN AMQPLAIN">>,
-                     locales = <<"en_US">> }),
-            {State#v1{connection = Connection#connection{
-                                     timeout_sec = ?NORMAL_TIMEOUT},
-                      connection_state = starting},
-             frame_header, 7};
-        false ->
-            throw({bad_version, ProtocolMajor, ProtocolMinor})
-    end;
+%% The two rules pertaining to version negotiation:
+%%
+%% * If the server cannot support the protocol specified in the
+%% protocol header, it MUST respond with a valid protocol header and
+%% then close the socket connection.
+%%
+%% * The server MUST provide a protocol version that is lower than or
+%% equal to that requested by the client in the protocol header.
+handle_input(handshake, <<"AMQP", 0, 0, 9, 1>>, State) ->
+    start_connection({0, 9, 1}, rabbit_framing_amqp_0_9_1, State);
+
+%% This is the protocol header for 0-9, which we can safely treat as
+%% though it were 0-9-1.
+handle_input(handshake, <<"AMQP", 1, 1, 0, 9>>, State) ->
+    start_connection({0, 9, 0}, rabbit_framing_amqp_0_9_1, State);
+
+%% This is what most clients send for 0-8.  The 0-8 spec, confusingly,
+%% defines the version as 8-0.
+handle_input(handshake, <<"AMQP", 1, 1, 8, 0>>, State) ->
+    start_connection({8, 0, 0}, rabbit_framing_amqp_0_8, State);
+
+%% The 0-8 spec as on the AMQP web site actually has this as the
+%% protocol header; some libraries e.g., py-amqplib, send it when they
+%% want 0-8.
+handle_input(handshake, <<"AMQP", 1, 1, 9, 1>>, State) ->
+    start_connection({8, 0, 0}, rabbit_framing_amqp_0_8, State);
+
+handle_input(handshake, <<"AMQP", A, B, C, D>>, #v1{sock = Sock}) ->
+    refuse_connection(Sock, {bad_version, A, B, C, D});
 
 handle_input(handshake, Other, #v1{sock = Sock}) ->
-    ok = inet_op(fun () -> rabbit_net:send(
-                             Sock, <<"AMQP",1,1,
-                                    ?PROTOCOL_VERSION_MAJOR,
-                                    ?PROTOCOL_VERSION_MINOR>>) end),
-    throw({bad_header, Other});
+    refuse_connection(Sock, {bad_header, Other});
 
 handle_input(Callback, Data, _State) ->
     throw({bad_input, Callback, Data}).
 
-%% the 0-8 spec, confusingly, defines the version as 8-0
-adjust_version({8,0})   -> {0,8};
-adjust_version(Version) -> Version.
-check_version(ClientVersion, ServerVersion) ->
-    {ClientMajor, ClientMinor} = adjust_version(ClientVersion),
-    {ServerMajor, ServerMinor} = adjust_version(ServerVersion),
-    ClientMajor > ServerMajor
-        orelse
-          (ClientMajor == ServerMajor andalso
-           ClientMinor >= ServerMinor).
+%% Offer a protocol version to the client.  Connection.start only
+%% includes a major and minor version number, Luckily 0-9 and 0-9-1
+%% are similar enough that clients will be happy with either.
+start_connection({ProtocolMajor, ProtocolMinor, _ProtocolRevision},
+                 Protocol,
+                 State = #v1{sock = Sock, connection = Connection}) ->
+    Start = #'connection.start'{ version_major = ProtocolMajor,
+                                 version_minor = ProtocolMinor,
+                                 server_properties = server_properties(),
+                                 mechanisms = <<"PLAIN AMQPLAIN">>,
+                                 locales = <<"en_US">> },
+    ok = send_on_channel0(Sock, Start, Protocol),
+    {State#v1{connection = Connection#connection{
+                             timeout_sec = ?NORMAL_TIMEOUT,
+                             protocol = Protocol},
+              connection_state = starting},
+     frame_header, 7}.
+
+refuse_connection(Sock, Exception) ->
+    ok = inet_op(fun () -> rabbit_net:send(Sock, <<"AMQP",0,0,9,1>>) end),
+    throw(Exception).
+
+ensure_stats_timer(State = #v1{stats_timer = StatsTimer}) ->
+    Self = self(),
+    State#v1{stats_timer = rabbit_event:ensure_stats_timer_after(
+                             StatsTimer,
+                             fun() -> emit_stats(Self) end)}.
 
 %%--------------------------------------------------------------------------
 
-handle_method0(MethodName, FieldsBin, State) ->
+handle_method0(MethodName, FieldsBin,
+               State = #v1{connection = #connection{protocol = Protocol}}) ->
     try
-        handle_method0(rabbit_framing:decode_method_fields(
-                         MethodName, FieldsBin),
+        handle_method0(Protocol:decode_method_fields(MethodName, FieldsBin),
                        State)
     catch exit:Reason ->
             CompleteReason = case Reason of
@@ -598,13 +709,14 @@ handle_method0(MethodName, FieldsBin, State) ->
                                      Reason#amqp_error{method = MethodName};
                                  OtherReason -> OtherReason
                              end,
-            case State#v1.connection_state of
-                running -> send_exception(State, 0, CompleteReason);
+            case ?IS_RUNNING(State) of
+                true  -> send_exception(State, 0, CompleteReason);
                 %% We don't trust the client at this point - force
                 %% them to wait for a bit so they can't DOS us with
                 %% repeated failed logins etc.
-                Other   -> timer:sleep(?SILENT_CLOSE_DELAY * 1000),
-                           throw({channel0_error, Other, CompleteReason})
+                false -> timer:sleep(?SILENT_CLOSE_DELAY * 1000),
+                         throw({channel0_error, State#v1.connection_state,
+                                CompleteReason})
             end
     end.
 
@@ -612,14 +724,14 @@ handle_method0(#'connection.start_ok'{mechanism = Mechanism,
                                       response = Response,
                                       client_properties = ClientProperties},
                State = #v1{connection_state = starting,
-                           connection = Connection,
+                           connection = Connection =
+                               #connection{protocol = Protocol},
                            sock = Sock}) ->
     User = rabbit_access_control:check_login(Mechanism, Response),
-    ok = send_on_channel0(
-           Sock,
-           #'connection.tune'{channel_max = 0,
+    Tune = #'connection.tune'{channel_max = 0,
                               frame_max = ?FRAME_MAX,
-                              heartbeat = 0}),
+                              heartbeat = 0},
+    ok = send_on_channel0(Sock, Tune, Protocol),
     State#v1{connection_state = tuning,
              connection = Connection#connection{
                             user = User,
@@ -628,7 +740,8 @@ handle_method0(#'connection.tune_ok'{frame_max = FrameMax,
                                      heartbeat = ClientHeartbeat},
                State = #v1{connection_state = tuning,
                            connection = Connection,
-                           sock = Sock}) ->
+                           sock = Sock,
+                           start_heartbeat_fun = SHF}) ->
     if (FrameMax /= 0) and (FrameMax < ?FRAME_MIN_SIZE) ->
             rabbit_misc:protocol_error(
               not_allowed, "frame_max=~w < ~w min size",
@@ -638,51 +751,42 @@ handle_method0(#'connection.tune_ok'{frame_max = FrameMax,
               not_allowed, "frame_max=~w > ~w max size",
               [FrameMax, ?FRAME_MAX]);
        true ->
-            rabbit_heartbeat:start_heartbeat(Sock, ClientHeartbeat),
+            Heartbeater = SHF(Sock, ClientHeartbeat),
             State#v1{connection_state = opening,
                      connection = Connection#connection{
                                     timeout_sec = ClientHeartbeat,
-                                    frame_max = FrameMax}}
+                                    frame_max = FrameMax},
+                     heartbeater = Heartbeater}
     end;
 
-handle_method0(#'connection.open'{virtual_host = VHostPath,
-                                  insist = Insist},
+handle_method0(#'connection.open'{virtual_host = VHostPath},
+
                State = #v1{connection_state = opening,
                            connection = Connection = #connection{
-                                          user = User},
+                                          user = User,
+                                          protocol = Protocol},
                            sock = Sock}) ->
     ok = rabbit_access_control:check_vhost_access(User, VHostPath),
     NewConnection = Connection#connection{vhost = VHostPath},
-    KnownHosts = format_listeners(rabbit_networking:active_listeners()),
-    Redirects = compute_redirects(Insist),
-    if Redirects == [] ->
-            ok = send_on_channel0(
-                   Sock,
-                   #'connection.open_ok'{known_hosts = KnownHosts}),
-            State#v1{connection_state = running,
-                     connection = NewConnection};
-       true ->
-            %% FIXME: 'host' is supposed to only contain one
-            %% address; but which one do we pick? This is
-            %% really a problem with the spec.
-            Host = format_listeners(Redirects),
-            rabbit_log:info("connection ~p redirecting to ~p~n",
-                            [self(), Host]),
-            ok = send_on_channel0(
-                   Sock,
-                   #'connection.redirect'{host = Host,
-                                          known_hosts = KnownHosts}),
-            close_connection(State#v1{connection = NewConnection})
-    end;
-handle_method0(#'connection.close'{},
-               State = #v1{connection_state = running}) ->
+    ok = send_on_channel0(Sock, #'connection.open_ok'{}, Protocol),
+    State1 = internal_conserve_memory(
+               rabbit_alarm:register(self(), {?MODULE, conserve_memory, []}),
+               State#v1{connection_state = running,
+                        connection = NewConnection}),
+    rabbit_event:notify(connection_created,
+                        infos(?CREATION_EVENT_KEYS, State1)),
+    State1;
+handle_method0(#'connection.close'{}, State) when ?IS_RUNNING(State) ->
     lists:foreach(fun rabbit_framing_channel:shutdown/1, all_channels()),
     maybe_close(State#v1{connection_state = closing});
-handle_method0(#'connection.close'{}, State = #v1{connection_state = CS})
+handle_method0(#'connection.close'{},
+               State = #v1{connection_state = CS,
+                           connection = #connection{protocol = Protocol},
+                           sock = Sock})
   when CS =:= closing; CS =:= closed ->
     %% We're already closed or closing, so we don't need to cleanup
     %% anything.
-    ok = send_on_channel0(State#v1.sock, #'connection.close_ok'{}),
+    ok = send_on_channel0(Sock, #'connection.close_ok'{}, Protocol),
     State;
 handle_method0(#'connection.close_ok'{},
                State = #v1{connection_state = closed}) ->
@@ -695,23 +799,8 @@ handle_method0(_Method, #v1{connection_state = S}) ->
     rabbit_misc:protocol_error(
       channel_error, "unexpected method in connection state ~w", [S]).
 
-send_on_channel0(Sock, Method) ->
-    ok = rabbit_writer:internal_send_command(Sock, 0, Method).
-
-format_listeners(Listeners) ->
-    list_to_binary(
-      rabbit_misc:intersperse(
-        $,,
-        [io_lib:format("~s:~w", [Host, Port]) ||
-            #listener{host = Host, port = Port} <- Listeners])).
-
-compute_redirects(true) -> [];
-compute_redirects(false) ->
-    Node = node(),
-    LNode = rabbit_load:pick(),
-    if Node == LNode -> [];
-       true -> rabbit_networking:node_listeners(LNode)
-    end.
+send_on_channel0(Sock, Method, Protocol) ->
+    ok = rabbit_writer:internal_send_command(Sock, 0, Method, Protocol).
 
 %%--------------------------------------------------------------------------
 
@@ -745,6 +834,10 @@ i(state, #v1{connection_state = S}) ->
     S;
 i(channels, #v1{}) ->
     length(all_channels());
+i(protocol, #v1{connection = #connection{protocol = none}}) ->
+    none;
+i(protocol, #v1{connection = #connection{protocol = Protocol}}) ->
+    Protocol:version();
 i(user, #v1{connection = #connection{user = #user{username = Username}}}) ->
     Username;
 i(user, #v1{connection = #connection{user = none}}) ->
@@ -763,19 +856,22 @@ i(Item, #v1{}) ->
 
 %%--------------------------------------------------------------------------
 
-send_to_new_channel(Channel, AnalyzedFrame,
-                    State = #v1{queue_collector = Collector}) ->
-    #v1{sock = Sock, connection = #connection{
-                       frame_max = FrameMax,
-                       user = #user{username = Username},
-                       vhost = VHost}} = State,
-    WriterPid = rabbit_writer:start(Sock, Channel, FrameMax),
-    ChPid = rabbit_framing_channel:start_link(
-              fun rabbit_channel:start_link/6,
-              [Channel, self(), WriterPid, Username, VHost, Collector]),
-    put({channel, Channel}, {chpid, ChPid}),
-    put({chpid, ChPid}, {channel, Channel}),
-    ok = rabbit_framing_channel:process(ChPid, AnalyzedFrame).
+send_to_new_channel(Channel, AnalyzedFrame, State) ->
+    #v1{sock = Sock, queue_collector = Collector,
+        channel_sup_sup_pid = ChanSupSup,
+        connection = #connection{protocol  = Protocol,
+                                 frame_max = FrameMax,
+                                 user      = #user{username = Username},
+                                 vhost     = VHost}} = State,
+    {ok, ChSupPid, ChFrPid} =
+        rabbit_channel_sup_sup:start_channel(
+          ChanSupSup, {Protocol, Sock, Channel, FrameMax,
+                       self(), Username, VHost, Collector}),
+    erlang:monitor(process, ChSupPid),
+    put({channel, Channel}, {ch_fr_pid, ChFrPid}),
+    put({ch_sup_pid, ChSupPid}, {{channel, Channel}, {ch_fr_pid, ChFrPid}}),
+    put({ch_fr_pid, ChFrPid}, {channel, Channel}),
+    ok = rabbit_framing_channel:process(ChFrPid, AnalyzedFrame).
 
 log_channel_error(ConnectionState, Channel, Reason) ->
     rabbit_log:error("connection ~p (~p), channel ~p - error:~n~p~n",
@@ -788,25 +884,27 @@ handle_exception(State = #v1{connection_state = CS}, Channel, Reason) ->
     log_channel_error(CS, Channel, Reason),
     send_exception(State, Channel, Reason).
 
-send_exception(State, Channel, Reason) ->
-    {ShouldClose, CloseChannel, CloseMethod} = map_exception(Channel, Reason),
+send_exception(State = #v1{connection = #connection{protocol = Protocol}},
+               Channel, Reason) ->
+    {ShouldClose, CloseChannel, CloseMethod} =
+        map_exception(Channel, Reason, Protocol),
     NewState = case ShouldClose of
                    true  -> terminate_channels(),
                             close_connection(State);
                    false -> close_channel(Channel, State)
                end,
     ok = rabbit_writer:internal_send_command(
-           NewState#v1.sock, CloseChannel, CloseMethod),
+           NewState#v1.sock, CloseChannel, CloseMethod, Protocol),
     NewState.
 
-map_exception(Channel, Reason) ->
+map_exception(Channel, Reason, Protocol) ->
     {SuggestedClose, ReplyCode, ReplyText, FailedMethod} =
-        lookup_amqp_exception(Reason),
+        lookup_amqp_exception(Reason, Protocol),
     ShouldClose = SuggestedClose or (Channel == 0),
     {ClassId, MethodId} = case FailedMethod of
                               {_, _} -> FailedMethod;
-                              none -> {0, 0};
-                              _ -> rabbit_framing:method_id(FailedMethod)
+                              none   -> {0, 0};
+                              _      -> Protocol:method_id(FailedMethod)
                           end,
     {CloseChannel, CloseMethod} =
         case ShouldClose of
@@ -821,22 +919,16 @@ map_exception(Channel, Reason) ->
         end,
     {ShouldClose, CloseChannel, CloseMethod}.
 
-%% FIXME: this clause can go when we move to AMQP spec >=8.1
-lookup_amqp_exception(#amqp_error{name        = precondition_failed,
-                                  explanation = Expl,
-                                  method      = Method}) ->
-    ExplBin = amqp_exception_explanation(<<"PRECONDITION_FAILED">>, Expl),
-    {false, 406, ExplBin, Method};
 lookup_amqp_exception(#amqp_error{name        = Name,
                                   explanation = Expl,
-                                  method      = Method}) ->
-    {ShouldClose, Code, Text} = rabbit_framing:lookup_amqp_exception(Name),
+                                  method      = Method},
+                      Protocol) ->
+    {ShouldClose, Code, Text} = Protocol:lookup_amqp_exception(Name),
     ExplBin = amqp_exception_explanation(Text, Expl),
     {ShouldClose, Code, ExplBin, Method};
-lookup_amqp_exception(Other) ->
+lookup_amqp_exception(Other, Protocol) ->
     rabbit_log:warning("Non-AMQP exit reason '~p'~n", [Other]),
-    {ShouldClose, Code, Text} =
-        rabbit_framing:lookup_amqp_exception(internal_error),
+    {ShouldClose, Code, Text} = Protocol:lookup_amqp_exception(internal_error),
     {ShouldClose, Code, Text, none}.
 
 amqp_exception_explanation(Text, Expl) ->
@@ -845,3 +937,6 @@ amqp_exception_explanation(Text, Expl) ->
     if size(CompleteTextBin) > 255 -> <<CompleteTextBin:252/binary, "...">>;
        true                        -> CompleteTextBin
     end.
+
+internal_emit_stats(State) ->
+    rabbit_event:notify(connection_stats, infos(?STATISTICS_KEYS, State)).
diff --git a/src/rabbit_router.erl b/src/rabbit_router.erl
index 75196bc0..bd57f737 100644
--- a/src/rabbit_router.erl
+++ b/src/rabbit_router.erl
@@ -33,15 +33,25 @@
 -include_lib("stdlib/include/qlc.hrl").
 -include("rabbit.hrl").
 
--export([deliver/2,
-         match_bindings/2,
-         match_routing_key/2]).
+-export([deliver/2, match_bindings/2, match_routing_key/2]).
 
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--spec(deliver/2 :: ([pid()], delivery()) -> {routing_result(), [pid()]}).
+-export_type([routing_key/0, routing_result/0]).
+
+-type(routing_key() :: binary()).
+-type(routing_result() :: 'routed' | 'unroutable' | 'not_delivered').
+-type(qpids() :: [pid()]).
+
+-spec(deliver/2 ::
+        (qpids(), rabbit_types:delivery()) -> {routing_result(), qpids()}).
+-spec(match_bindings/2 :: (rabbit_exchange:name(),
+                           fun ((rabbit_types:binding()) -> boolean())) ->
+    qpids()).
+-spec(match_routing_key/2 :: (rabbit_exchange:name(), routing_key() | '_') ->
+                                  qpids()).
 
 -endif.
 
@@ -63,8 +73,8 @@ deliver(QPids, Delivery = #delivery{mandatory = false,
 deliver(QPids, Delivery) ->
     {Success, _} =
         delegate:invoke(QPids,
-                        fun (Pid) -> 
-                                rabbit_amqqueue:deliver(Pid, Delivery) 
+                        fun (Pid) ->
+                                rabbit_amqqueue:deliver(Pid, Delivery)
                         end),
     {Routed, Handled} =
         lists:foldl(fun fold_deliveries/2, {false, []}, Success),
@@ -75,10 +85,10 @@ deliver(QPids, Delivery) ->
 %% TODO: This causes a full scan for each entry with the same exchange
 match_bindings(Name, Match) ->
     Query = qlc:q([QName || #route{binding = Binding = #binding{
-                                               exchange_name = ExchangeName,
+                                               exchange_name = XName,
                                                queue_name = QName}} <-
                                 mnesia:table(rabbit_route),
-                            ExchangeName == Name,
+                            XName == Name,
                             Match(Binding)]),
     lookup_qpids(mnesia:async_dirty(fun qlc:e/1, [Query])).
 
diff --git a/src/rabbit_sasl_report_file_h.erl b/src/rabbit_sasl_report_file_h.erl
index 434cdae0..eb2037c2 100644
--- a/src/rabbit_sasl_report_file_h.erl
+++ b/src/rabbit_sasl_report_file_h.erl
@@ -33,7 +33,8 @@
 
 -behaviour(gen_event).
 
--export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2, code_change/3]).
+-export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2,
+         code_change/3]).
 
 %% rabbit_sasl_report_file_h is a wrapper around the sasl_report_file_h
 %% module because the original's init/1 does not match properly
diff --git a/src/rabbit_sup.erl b/src/rabbit_sup.erl
index 2c5e5112..97613d17 100644
--- a/src/rabbit_sup.erl
+++ b/src/rabbit_sup.erl
@@ -34,7 +34,7 @@
 -behaviour(supervisor).
 
 -export([start_link/0, start_child/1, start_child/2, start_child/3,
-         start_restartable_child/1, start_restartable_child/2]).
+         start_restartable_child/1, start_restartable_child/2, stop_child/1]).
 
 -export([init/1]).
 
@@ -69,5 +69,11 @@ start_restartable_child(Mod, Args) ->
                  transient, infinity, supervisor, [rabbit_restartable_sup]}),
     ok.
 
+stop_child(ChildId) ->
+    case supervisor:terminate_child(?SERVER, ChildId) of
+        ok -> supervisor:delete_child(?SERVER, ChildId);
+        E  -> E
+    end.
+
 init([]) ->
     {ok, {{one_for_all, 0, 1}, []}}.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 34eec121..a72656b7 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -35,15 +35,15 @@
 
 -export([all_tests/0, test_parsing/0]).
 
-%% Exported so the hook mechanism can call back
--export([handle_hook/3, bad_handle_hook/3, extra_arg_hook/5]).
-
 -import(lists).
 
 -include("rabbit.hrl").
 -include("rabbit_framing.hrl").
 -include_lib("kernel/include/file.hrl").
 
+-define(PERSISTENT_MSG_STORE,     msg_store_persistent).
+-define(TRANSIENT_MSG_STORE,      msg_store_transient).
+
 test_content_prop_roundtrip(Datum, Binary) ->
     Types =  [element(1, E) || E <- Datum],
     Values = [element(2, E) || E <- Datum],
@@ -51,16 +51,24 @@ test_content_prop_roundtrip(Datum, Binary) ->
     Binary = rabbit_binary_generator:encode_properties(Types, Values). %% assertion
 
 all_tests() ->
+    application:set_env(rabbit, file_handles_high_watermark, 10, infinity),
+    ok = file_handle_cache:set_limit(10),
+    passed = test_file_handle_cache(),
+    passed = test_backing_queue(),
     passed = test_priority_queue(),
+    passed = test_bpqueue(),
     passed = test_pg_local(),
     passed = test_unfold(),
+    passed = test_supervisor_delayed_restart(),
     passed = test_parsing(),
     passed = test_content_framing(),
+    passed = test_content_transcoding(),
     passed = test_topic_matching(),
     passed = test_log_management(),
     passed = test_app_management(),
     passed = test_log_management_during_startup(),
-    passed = test_memory_pressure(),
+    passed = test_statistics(),
+    passed = test_option_parser(),
     passed = test_cluster_management(),
     passed = test_user_management(),
     passed = test_server_status(),
@@ -207,6 +215,143 @@ test_priority_queue(Q) ->
      priority_queue:to_list(Q),
      priority_queue_out_all(Q)}.
 
+test_bpqueue() ->
+    Q = bpqueue:new(),
+    true = bpqueue:is_empty(Q),
+    0 = bpqueue:len(Q),
+    [] = bpqueue:to_list(Q),
+
+    Q1 = bpqueue_test(fun bpqueue:in/3, fun bpqueue:out/1,
+                      fun bpqueue:to_list/1,
+                      fun bpqueue:foldl/3, fun bpqueue:map_fold_filter_l/4),
+    Q2 = bpqueue_test(fun bpqueue:in_r/3, fun bpqueue:out_r/1,
+                      fun (QR) -> lists:reverse(
+                                    [{P, lists:reverse(L)} ||
+                                        {P, L} <- bpqueue:to_list(QR)])
+                      end,
+                      fun bpqueue:foldr/3, fun bpqueue:map_fold_filter_r/4),
+
+    [{foo, [1, 2]}, {bar, [3]}] = bpqueue:to_list(bpqueue:join(Q, Q1)),
+    [{bar, [3]}, {foo, [2, 1]}] = bpqueue:to_list(bpqueue:join(Q2, Q)),
+    [{foo, [1, 2]}, {bar, [3, 3]}, {foo, [2,1]}] =
+        bpqueue:to_list(bpqueue:join(Q1, Q2)),
+
+    [{foo, [1, 2]}, {bar, [3]}, {foo, [1, 2]}, {bar, [3]}] =
+        bpqueue:to_list(bpqueue:join(Q1, Q1)),
+
+    [{foo, [1, 2]}, {bar, [3]}] =
+        bpqueue:to_list(
+          bpqueue:from_list(
+            [{x, []}, {foo, [1]}, {y, []}, {foo, [2]}, {bar, [3]}, {z, []}])),
+
+    [{undefined, [a]}] = bpqueue:to_list(bpqueue:from_list([{undefined, [a]}])),
+
+    {4, [a,b,c,d]} =
+        bpqueue:foldl(
+          fun (Prefix, Value, {Prefix, Acc}) ->
+                  {Prefix + 1, [Value | Acc]}
+          end,
+          {0, []}, bpqueue:from_list([{0,[d]}, {1,[c]}, {2,[b]}, {3,[a]}])),
+
+    [{bar,3}, {foo,2}, {foo,1}] =
+        bpqueue:foldr(fun (P, V, I) -> [{P,V} | I] end, [], Q2),
+
+    BPQL = [{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,7]}],
+    BPQ = bpqueue:from_list(BPQL),
+
+    %% no effect
+    {BPQL, 0} = bpqueue_mffl([none], {none, []}, BPQ),
+    {BPQL, 0} = bpqueue_mffl([foo,bar], {none, [1]}, BPQ),
+    {BPQL, 0} = bpqueue_mffl([bar], {none, [3]}, BPQ),
+    {BPQL, 0} = bpqueue_mffr([bar], {foo, [5]}, BPQ),
+
+    %% process 1 item
+    {[{foo,[-1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], 1} =
+        bpqueue_mffl([foo,bar], {foo, [2]}, BPQ),
+    {[{foo,[1,2,2]}, {bar,[-3,4,5]}, {foo,[5,6,7]}], 1} =
+        bpqueue_mffl([bar], {bar, [4]}, BPQ),
+    {[{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,-7]}], 1} =
+        bpqueue_mffr([foo,bar], {foo, [6]}, BPQ),
+    {[{foo,[1,2,2]}, {bar,[3,4]}, {baz,[-5]}, {foo,[5,6,7]}], 1} =
+        bpqueue_mffr([bar], {baz, [4]}, BPQ),
+
+    %% change prefix
+    {[{bar,[-1,-2,-2,-3,-4,-5,-5,-6,-7]}], 9} =
+        bpqueue_mffl([foo,bar], {bar, []}, BPQ),
+    {[{bar,[-1,-2,-2,3,4,5]}, {foo,[5,6,7]}], 3} =
+        bpqueue_mffl([foo], {bar, [5]}, BPQ),
+    {[{bar,[-1,-2,-2,3,4,5,-5,-6]}, {foo,[7]}], 5} =
+        bpqueue_mffl([foo], {bar, [7]}, BPQ),
+    {[{foo,[1,2,2,-3,-4]}, {bar,[5]}, {foo,[5,6,7]}], 2} =
+        bpqueue_mffl([bar], {foo, [5]}, BPQ),
+    {[{bar,[-1,-2,-2,3,4,5,-5,-6,-7]}], 6} =
+        bpqueue_mffl([foo], {bar, []}, BPQ),
+    {[{foo,[1,2,2,-3,-4,-5,5,6,7]}], 3} =
+        bpqueue_mffl([bar], {foo, []}, BPQ),
+
+    %% edge cases
+    {[{foo,[-1,-2,-2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], 3} =
+        bpqueue_mffl([foo], {foo, [5]}, BPQ),
+    {[{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[-5,-6,-7]}], 3} =
+        bpqueue_mffr([foo], {foo, [2]}, BPQ),
+
+    passed.
+
+bpqueue_test(In, Out, List, Fold, MapFoldFilter) ->
+    Q = bpqueue:new(),
+    {empty, _Q} = Out(Q),
+
+    ok = Fold(fun (Prefix, Value, ok) -> {error, Prefix, Value} end, ok, Q),
+    {Q1M, 0} = MapFoldFilter(fun(_P)     -> throw(explosion) end,
+                             fun(_V, _N) -> throw(explosion) end, 0, Q),
+    [] = bpqueue:to_list(Q1M),
+
+    Q1 = In(bar, 3, In(foo, 2, In(foo, 1, Q))),
+    false = bpqueue:is_empty(Q1),
+    3 = bpqueue:len(Q1),
+    [{foo, [1, 2]}, {bar, [3]}] = List(Q1),
+
+    {{value, foo, 1}, Q3}  = Out(Q1),
+    {{value, foo, 2}, Q4}  = Out(Q3),
+    {{value, bar, 3}, _Q5} = Out(Q4),
+
+    F = fun (QN) ->
+                MapFoldFilter(fun (foo) -> true;
+                                  (_)   -> false
+                              end,
+                              fun (2, _Num) -> stop;
+                                  (V, Num)  -> {bar, -V, V - Num} end,
+                              0, QN)
+        end,
+    {Q6, 0} = F(Q),
+    [] = bpqueue:to_list(Q6),
+    {Q7, 1} = F(Q1),
+    [{bar, [-1]}, {foo, [2]}, {bar, [3]}] = List(Q7),
+
+    Q1.
+
+bpqueue_mffl(FF1A, FF2A, BPQ) ->
+    bpqueue_mff(fun bpqueue:map_fold_filter_l/4, FF1A, FF2A, BPQ).
+
+bpqueue_mffr(FF1A, FF2A, BPQ) ->
+    bpqueue_mff(fun bpqueue:map_fold_filter_r/4, FF1A, FF2A, BPQ).
+
+bpqueue_mff(Fold, FF1A, FF2A, BPQ) ->
+    FF1 = fun (Prefixes) ->
+                  fun (P) -> lists:member(P, Prefixes) end
+          end,
+    FF2 = fun ({Prefix, Stoppers}) ->
+                  fun (Val, Num) ->
+                          case lists:member(Val, Stoppers) of
+                              true -> stop;
+                              false -> {Prefix, -Val, 1 + Num}
+                          end
+                  end
+          end,
+    Queue_to_list = fun ({LHS, RHS}) -> {bpqueue:to_list(LHS), RHS} end,
+
+    Queue_to_list(Fold(FF1(FF1A), FF2(FF2A), 0, BPQ)).
+
 test_simple_n_element_queue(N) ->
     Items = lists:seq(1, N),
     Q = priority_queue_in_all(priority_queue:new(), Items),
@@ -355,44 +500,87 @@ test_field_values() ->
     passed.
 
 %% Test that content frames don't exceed frame-max
-test_content_framing(FrameMax, Fragments) ->
+test_content_framing(FrameMax, BodyBin) ->
     [Header | Frames] =
         rabbit_binary_generator:build_simple_content_frames(
           1,
-          #content{class_id = 0, properties_bin = <<>>,
-                   payload_fragments_rev = Fragments},
-          FrameMax),
+          rabbit_binary_generator:ensure_content_encoded(
+            rabbit_basic:build_content(#'P_basic'{}, BodyBin),
+            rabbit_framing_amqp_0_9_1),
+          FrameMax,
+          rabbit_framing_amqp_0_9_1),
     %% header is formatted correctly and the size is the total of the
     %% fragments
     <<_FrameHeader:7/binary, _ClassAndWeight:4/binary,
       BodySize:64/unsigned, _Rest/binary>> = list_to_binary(Header),
-    BodySize = size(list_to_binary(Fragments)),
-    false = lists:any(
-              fun (ContentFrame) ->
-                      FrameBinary = list_to_binary(ContentFrame),
-                      %% assert
-                      <<_TypeAndChannel:3/binary,
-                        Size:32/unsigned,
-                        _Payload:Size/binary,
-                        16#CE>> = FrameBinary,
-                      size(FrameBinary) > FrameMax
-              end,
-              Frames),
+    BodySize = size(BodyBin),
+    true = lists:all(
+             fun (ContentFrame) ->
+                     FrameBinary = list_to_binary(ContentFrame),
+                     %% assert
+                     <<_TypeAndChannel:3/binary,
+                       Size:32/unsigned, _Payload:Size/binary, 16#CE>> =
+                         FrameBinary,
+                     size(FrameBinary) =< FrameMax
+             end, Frames),
     passed.
 
 test_content_framing() ->
     %% no content
-    passed = test_content_framing(4096, []),
-    passed = test_content_framing(4096, [<<>>]),
+    passed = test_content_framing(4096, <<>>),
     %% easily fit in one frame
-    passed = test_content_framing(4096,   [<<"Easy">>]),
+    passed = test_content_framing(4096, <<"Easy">>),
     %% exactly one frame (empty frame = 8 bytes)
-    passed = test_content_framing(11, [<<"One">>]),
+    passed = test_content_framing(11, <<"One">>),
     %% more than one frame
-    passed = test_content_framing(20, [<<"into more than one frame">>,
-                                       <<"This will have to go">>]),
+    passed = test_content_framing(11, <<"More than one frame">>),
     passed.
 
+test_content_transcoding() ->
+    %% there are no guarantees provided by 'clear' - it's just a hint
+    ClearDecoded = fun rabbit_binary_parser:clear_decoded_content/1,
+    ClearEncoded = fun rabbit_binary_generator:clear_encoded_content/1,
+    EnsureDecoded =
+        fun (C0) ->
+                C1 = rabbit_binary_parser:ensure_content_decoded(C0),
+                true = C1#content.properties =/= none,
+                C1
+        end,
+    EnsureEncoded =
+        fun (Protocol) ->
+                fun (C0) ->
+                        C1 = rabbit_binary_generator:ensure_content_encoded(
+                               C0, Protocol),
+                        true = C1#content.properties_bin =/= none,
+                        C1
+                end
+        end,
+    %% Beyond the assertions in Ensure*, the only testable guarantee
+    %% is that the operations should never fail.
+    %%
+    %% If we were using quickcheck we'd simply stuff all the above
+    %% into a generator for sequences of operations. In the absence of
+    %% quickcheck we pick particularly interesting sequences that:
+    %%
+    %% - execute every op twice since they are idempotent
+    %% - invoke clear_decoded, clear_encoded, decode and transcode
+    %%   with one or both of decoded and encoded content present
+    [begin
+         sequence_with_content([Op]),
+         sequence_with_content([ClearEncoded, Op]),
+         sequence_with_content([ClearDecoded, Op])
+     end || Op <- [ClearDecoded, ClearEncoded, EnsureDecoded,
+                   EnsureEncoded(rabbit_framing_amqp_0_9_1),
+                   EnsureEncoded(rabbit_framing_amqp_0_8)]],
+    passed.
+
+sequence_with_content(Sequence) ->
+    lists:foldl(fun (F, V) -> F(F(V)) end,
+                rabbit_binary_generator:ensure_content_encoded(
+                  rabbit_basic:build_content(#'P_basic'{}, <<>>),
+                  rabbit_framing_amqp_0_9_1),
+                Sequence).
+
 test_topic_match(P, R) ->
     test_topic_match(P, R, true).
 
@@ -583,6 +771,30 @@ test_log_management_during_startup() ->
     ok = control_action(start_app, []),
     passed.
 
+test_option_parser() ->
+    % command and arguments should just pass through
+    ok = check_get_options({["mock_command", "arg1", "arg2"], []},
+                           [], ["mock_command", "arg1", "arg2"]),
+
+    % get flags
+    ok = check_get_options(
+           {["mock_command", "arg1"], [{"-f", true}, {"-f2", false}]},
+           [{flag, "-f"}, {flag, "-f2"}], ["mock_command", "arg1", "-f"]),
+
+    % get options
+    ok = check_get_options(
+           {["mock_command"], [{"-foo", "bar"}, {"-baz", "notbaz"}]},
+           [{option, "-foo", "notfoo"}, {option, "-baz", "notbaz"}],
+           ["mock_command", "-foo", "bar"]),
+
+    % shuffled and interleaved arguments and options
+    ok = check_get_options(
+           {["a1", "a2", "a3"], [{"-o1", "hello"}, {"-o2", "noto2"}, {"-f", true}]},
+           [{option, "-o1", "noto1"}, {flag, "-f"}, {option, "-o2", "noto2"}],
+           ["-f", "a1", "-o1", "hello", "a2", "a3"]),
+
+    passed.
+
 test_cluster_management() ->
 
     %% 'cluster' and 'reset' should only work if the app is stopped
@@ -601,19 +813,19 @@ test_cluster_management() ->
 
     ok = control_action(reset, []),
     lists:foreach(fun (Arg) ->
-                          ok = control_action(cluster, Arg),
+                          ok = control_action(force_cluster, Arg),
                           ok
                   end,
                   ClusteringSequence),
     lists:foreach(fun (Arg) ->
                           ok = control_action(reset, []),
-                          ok = control_action(cluster, Arg),
+                          ok = control_action(force_cluster, Arg),
                           ok
                   end,
                   ClusteringSequence),
     ok = control_action(reset, []),
     lists:foreach(fun (Arg) ->
-                          ok = control_action(cluster, Arg),
+                          ok = control_action(force_cluster, Arg),
                           ok = control_action(start_app, []),
                           ok = control_action(stop_app, []),
                           ok
@@ -621,7 +833,7 @@ test_cluster_management() ->
                   ClusteringSequence),
     lists:foreach(fun (Arg) ->
                           ok = control_action(reset, []),
-                          ok = control_action(cluster, Arg),
+                          ok = control_action(force_cluster, Arg),
                           ok = control_action(start_app, []),
                           ok = control_action(stop_app, []),
                           ok
@@ -632,13 +844,13 @@ test_cluster_management() ->
     ok = control_action(reset, []),
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
-    ok = control_action(cluster, ["invalid1@invalid",
-                                  "invalid2@invalid"]),
+    ok = control_action(force_cluster, ["invalid1@invalid",
+                                        "invalid2@invalid"]),
 
     %% join a non-existing cluster as a ram node
     ok = control_action(reset, []),
-    ok = control_action(cluster, ["invalid1@invalid",
-                                  "invalid2@invalid"]),
+    ok = control_action(force_cluster, ["invalid1@invalid",
+                                        "invalid2@invalid"]),
 
     SecondaryNode = rabbit_misc:makenode("hare"),
     case net_adm:ping(SecondaryNode) of
@@ -663,18 +875,26 @@ test_cluster_management2(SecondaryNode) ->
 
     %% join cluster as a ram node
     ok = control_action(reset, []),
-    ok = control_action(cluster, [SecondaryNodeS, "invalid1@invalid"]),
+    ok = control_action(force_cluster, [SecondaryNodeS, "invalid1@invalid"]),
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
 
     %% change cluster config while remaining in same cluster
-    ok = control_action(cluster, ["invalid2@invalid", SecondaryNodeS]),
+    ok = control_action(force_cluster, ["invalid2@invalid", SecondaryNodeS]),
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
 
     %% join non-existing cluster as a ram node
-    ok = control_action(cluster, ["invalid1@invalid",
-                                  "invalid2@invalid"]),
+    ok = control_action(force_cluster, ["invalid1@invalid",
+                                        "invalid2@invalid"]),
+    ok = control_action(start_app, []),
+    ok = control_action(stop_app, []),
+
+    %% join empty cluster as a ram node
+    ok = control_action(cluster, []),
+    ok = control_action(start_app, []),
+    ok = control_action(stop_app, []),
+
     %% turn ram node into disk node
     ok = control_action(reset, []),
     ok = control_action(cluster, [SecondaryNodeS, NodeS]),
@@ -682,8 +902,8 @@ test_cluster_management2(SecondaryNode) ->
     ok = control_action(stop_app, []),
 
     %% convert a disk node into a ram node
-    ok = control_action(cluster, ["invalid1@invalid",
-                                  "invalid2@invalid"]),
+    ok = control_action(force_cluster, ["invalid1@invalid",
+                                        "invalid2@invalid"]),
 
     %% turn a disk node into a ram node
     ok = control_action(reset, []),
@@ -710,7 +930,7 @@ test_cluster_management2(SecondaryNode) ->
     %% attempt to leave cluster when no other node is alive
     ok = control_action(cluster, [SecondaryNodeS, NodeS]),
     ok = control_action(start_app, []),
-    ok = control_action(stop_app, SecondaryNode, []),
+    ok = control_action(stop_app, SecondaryNode, [], []),
     ok = control_action(stop_app, []),
     {error, {no_running_cluster_nodes, _, _}} =
         control_action(reset, []),
@@ -718,9 +938,9 @@ test_cluster_management2(SecondaryNode) ->
     %% leave system clustered, with the secondary node as a ram node
     ok = control_action(force_reset, []),
     ok = control_action(start_app, []),
-    ok = control_action(force_reset, SecondaryNode, []),
-    ok = control_action(cluster, SecondaryNode, [NodeS]),
-    ok = control_action(start_app, SecondaryNode, []),
+    ok = control_action(force_reset, SecondaryNode, [], []),
+    ok = control_action(cluster, SecondaryNode, [NodeS], []),
+    ok = control_action(start_app, SecondaryNode, [], []),
 
     passed.
 
@@ -740,15 +960,20 @@ test_user_management() ->
     {error, {no_such_user, _}} =
         control_action(list_user_permissions, ["foo"]),
     {error, {no_such_vhost, _}} =
-        control_action(list_permissions, ["-p", "/testhost"]),
+        control_action(list_permissions, [], [{"-p", "/testhost"}]),
     {error, {invalid_regexp, _, _}} =
         control_action(set_permissions, ["guest", "+foo", ".*", ".*"]),
+    {error, {invalid_scope, _}} =
+        control_action(set_permissions, ["guest", "foo", ".*", ".*"],
+                       [{"-s", "cilent"}]),
 
     %% user creation
     ok = control_action(add_user, ["foo", "bar"]),
     {error, {user_already_exists, _}} =
         control_action(add_user, ["foo", "bar"]),
     ok = control_action(change_password, ["foo", "baz"]),
+    ok = control_action(set_admin, ["foo"]),
+    ok = control_action(clear_admin, ["foo"]),
     ok = control_action(list_users, []),
 
     %% vhost creation
@@ -758,16 +983,21 @@ test_user_management() ->
     ok = control_action(list_vhosts, []),
 
     %% user/vhost mapping
-    ok = control_action(set_permissions, ["-p", "/testhost",
-                                          "foo", ".*", ".*", ".*"]),
-    ok = control_action(set_permissions, ["-p", "/testhost",
-                                          "foo", ".*", ".*", ".*"]),
-    ok = control_action(list_permissions, ["-p", "/testhost"]),
+    ok = control_action(set_permissions, ["foo", ".*", ".*", ".*"],
+                        [{"-p", "/testhost"}]),
+    ok = control_action(set_permissions, ["foo", ".*", ".*", ".*"],
+                        [{"-p", "/testhost"}]),
+    ok = control_action(set_permissions, ["foo", ".*", ".*", ".*"],
+                        [{"-p", "/testhost"}, {"-s", "client"}]),
+    ok = control_action(set_permissions, ["foo", ".*", ".*", ".*"],
+                        [{"-p", "/testhost"}, {"-s", "all"}]),
+    ok = control_action(list_permissions, [], [{"-p", "/testhost"}]),
+    ok = control_action(list_permissions, [], [{"-p", "/testhost"}]),
     ok = control_action(list_user_permissions, ["foo"]),
 
     %% user/vhost unmapping
-    ok = control_action(clear_permissions, ["-p", "/testhost", "foo"]),
-    ok = control_action(clear_permissions, ["-p", "/testhost", "foo"]),
+    ok = control_action(clear_permissions, ["foo"], [{"-p", "/testhost"}]),
+    ok = control_action(clear_permissions, ["foo"], [{"-p", "/testhost"}]),
 
     %% vhost deletion
     ok = control_action(delete_vhost, ["/testhost"]),
@@ -776,8 +1006,8 @@ test_user_management() ->
 
     %% deleting a populated vhost
     ok = control_action(add_vhost, ["/testhost"]),
-    ok = control_action(set_permissions, ["-p", "/testhost",
-                                          "foo", ".*", ".*", ".*"]),
+    ok = control_action(set_permissions, ["foo", ".*", ".*", ".*"],
+                        [{"-p", "/testhost"}]),
     ok = control_action(delete_vhost, ["/testhost"]),
 
     %% user deletion
@@ -790,8 +1020,9 @@ test_user_management() ->
 test_server_status() ->
     %% create a few things so there is some useful information to list
     Writer = spawn(fun () -> receive shutdown -> ok end end),
-    Ch = rabbit_channel:start_link(1, self(), Writer, <<"user">>, <<"/">>,
-                                   self()),
+    {ok, Ch} = rabbit_channel:start_link(1, self(), Writer,
+                                         <<"user">>, <<"/">>, self(),
+                                         fun (_) -> {ok, self()} end),
     [Q, Q2] = [Queue || Name <- [<<"foo">>, <<"bar">>],
                         {new, Queue = #amqqueue{}} <-
                             [rabbit_amqqueue:declare(
@@ -808,7 +1039,15 @@ test_server_status() ->
     ok = info_action(list_exchanges, rabbit_exchange:info_keys(), true),
 
     %% list bindings
-    ok = control_action(list_bindings, []),
+    ok = info_action(list_bindings, rabbit_binding:info_keys(), true),
+    %% misc binding listing APIs
+    [_|_] = rabbit_binding:list_for_exchange(
+              rabbit_misc:r(<<"/">>, exchange, <<"">>)),
+    [_] = rabbit_binding:list_for_queue(
+              rabbit_misc:r(<<"/">>, queue, <<"foo">>)),
+    [_] = rabbit_binding:list_for_exchange_and_queue(
+            rabbit_misc:r(<<"/">>, exchange, <<"">>),
+            rabbit_misc:r(<<"/">>, queue, <<"foo">>)),
 
     %% list connections
     [#listener{host = H, port = P} | _] =
@@ -832,180 +1071,112 @@ test_server_status() ->
 
     %% cleanup
     [{ok, _} = rabbit_amqqueue:delete(QR, false, false) || QR <- [Q, Q2]],
+
+    unlink(Ch),
     ok = rabbit_channel:shutdown(Ch),
 
     passed.
 
-test_hooks() ->
-    %% Firing of hooks calls all hooks in an isolated manner
-    rabbit_hooks:subscribe(test_hook, test, {rabbit_tests, handle_hook, []}),
-    rabbit_hooks:subscribe(test_hook, test2, {rabbit_tests, handle_hook, []}),
-    rabbit_hooks:subscribe(test_hook2, test2, {rabbit_tests, handle_hook, []}),
-    rabbit_hooks:trigger(test_hook, [arg1, arg2]),
-    [arg1, arg2] = get(test_hook_test_fired),
-    [arg1, arg2] = get(test_hook_test2_fired),
-    undefined = get(test_hook2_test2_fired),
-
-    %% Hook Deletion works
-    put(test_hook_test_fired, undefined),
-    put(test_hook_test2_fired, undefined),
-    rabbit_hooks:unsubscribe(test_hook, test),
-    rabbit_hooks:trigger(test_hook, [arg3, arg4]),
-    undefined = get(test_hook_test_fired),
-    [arg3, arg4] = get(test_hook_test2_fired),
-    undefined = get(test_hook2_test2_fired),
-
-    %% Catches exceptions from bad hooks
-    rabbit_hooks:subscribe(test_hook3, test, {rabbit_tests, bad_handle_hook, []}),
-    ok = rabbit_hooks:trigger(test_hook3, []),
-
-    %% Passing extra arguments to hooks
-    rabbit_hooks:subscribe(arg_hook, test, {rabbit_tests, extra_arg_hook, [1, 3]}),
-    rabbit_hooks:trigger(arg_hook, [arg1, arg2]),
-    {[arg1, arg2], 1, 3} = get(arg_hook_test_fired),
-
-    %% Invoking Pids
-    Remote = fun () ->
-        receive
-            {rabbitmq_hook,[remote_test,test,[],Target]} ->
-                Target ! invoked
-        end
-    end,
-    P = spawn(Remote),
-    rabbit_hooks:subscribe(remote_test, test, {rabbit_hooks, notify_remote, [P, [self()]]}),
-    rabbit_hooks:trigger(remote_test, []),
-    receive
-       invoked -> ok
-    after 100 ->
-       io:format("Remote hook not invoked"),
-       throw(timeout)
+test_spawn(Receiver) ->
+    Me = self(),
+    Writer = spawn(fun () -> Receiver(Me) end),
+    {ok, Ch} = rabbit_channel:start_link(1, Me, Writer,
+                                         <<"guest">>, <<"/">>, self(),
+                                         fun (_) -> {ok, self()} end),
+    ok = rabbit_channel:do(Ch, #'channel.open'{}),
+    receive #'channel.open_ok'{} -> ok
+    after 1000 -> throw(failed_to_receive_channel_open_ok)
     end,
-    passed.
+    {Writer, Ch}.
 
-test_memory_pressure_receiver(Pid) ->
+test_statistics_receiver(Pid) ->
     receive
         shutdown ->
             ok;
         {send_command, Method} ->
-            ok = case Method of
-                     #'channel.flow'{}    -> ok;
-                     #'basic.qos_ok'{}    -> ok;
-                     #'channel.open_ok'{} -> ok
-                 end,
             Pid ! Method,
-            test_memory_pressure_receiver(Pid);
-        sync ->
-            Pid ! sync,
-            test_memory_pressure_receiver(Pid)
+            test_statistics_receiver(Pid)
     end.
 
-test_memory_pressure_receive_flow(Active) ->
-    receive #'channel.flow'{active = Active} -> ok
-    after 1000 -> throw(failed_to_receive_channel_flow)
-    end,
-    receive #'channel.flow'{} ->
-            throw(pipelining_sync_commands_detected)
-    after 0 ->
-            ok
-    end.
-
-test_memory_pressure_sync(Ch, Writer) ->
-    ok = rabbit_channel:do(Ch, #'basic.qos'{}),
-    Writer ! sync,
-    receive sync -> ok after 1000 -> throw(failed_to_receive_writer_sync) end,
-    receive #'basic.qos_ok'{} -> ok
-    after 1000 -> throw(failed_to_receive_basic_qos_ok)
+test_statistics_event_receiver(Pid) ->
+    receive
+        Foo ->
+            Pid ! Foo,
+            test_statistics_event_receiver(Pid)
     end.
 
-test_memory_pressure_spawn() ->
-    Me = self(),
-    Writer = spawn(fun () -> test_memory_pressure_receiver(Me) end),
-    Ch = rabbit_channel:start_link(1, self(), Writer, <<"user">>, <<"/">>,
-                                   self()),
-    ok = rabbit_channel:do(Ch, #'channel.open'{}),
-    MRef = erlang:monitor(process, Ch),
-    receive #'channel.open_ok'{} -> ok
-    after 1000 -> throw(failed_to_receive_channel_open_ok)
-    end,
-    {Writer, Ch, MRef}.
-
-expect_normal_channel_termination(MRef, Ch) ->
-    receive {'DOWN', MRef, process, Ch, normal} -> ok
-    after 1000 -> throw(channel_failed_to_exit)
+test_statistics_receive_event(Ch, Matcher) ->
+    rabbit_channel:flush(Ch),
+    rabbit_channel:emit_stats(Ch),
+    test_statistics_receive_event1(Ch, Matcher).
+
+test_statistics_receive_event1(Ch, Matcher) ->
+    receive #event{type = channel_stats, props = Props} ->
+            case Matcher(Props) of
+                true -> Props;
+                _    -> test_statistics_receive_event1(Ch, Matcher)
+            end
+    after 1000 -> throw(failed_to_receive_event)
     end.
 
-test_memory_pressure() ->
-    {Writer0, Ch0, MRef0} = test_memory_pressure_spawn(),
-    [ok = rabbit_channel:conserve_memory(Ch0, Conserve) ||
-        Conserve <- [false, false, true, false, true, true, false]],
-    ok = test_memory_pressure_sync(Ch0, Writer0),
-    receive {'DOWN', MRef0, process, Ch0, Info0} ->
-            throw({channel_died_early, Info0})
-    after 0 -> ok
-    end,
-
-    %% we should have just 1 active=false waiting for us
-    ok = test_memory_pressure_receive_flow(false),
-
-    %% if we reply with flow_ok, we should immediately get an
-    %% active=true back
-    ok = rabbit_channel:do(Ch0, #'channel.flow_ok'{active = false}),
-    ok = test_memory_pressure_receive_flow(true),
-
-    %% if we publish at this point, the channel should die
-    Content = #content{class_id = element(1, rabbit_framing:method_id(
-                                               'basic.publish')),
-                       properties = none,
-                       properties_bin = <<>>,
-                       payload_fragments_rev = []},
-    ok = rabbit_channel:do(Ch0, #'basic.publish'{}, Content),
-    expect_normal_channel_termination(MRef0, Ch0),
-
-    {Writer1, Ch1, MRef1} = test_memory_pressure_spawn(),
-    ok = rabbit_channel:conserve_memory(Ch1, true),
-    ok = test_memory_pressure_receive_flow(false),
-    ok = rabbit_channel:do(Ch1, #'channel.flow_ok'{active = false}),
-    ok = test_memory_pressure_sync(Ch1, Writer1),
-    ok = rabbit_channel:conserve_memory(Ch1, false),
-    ok = test_memory_pressure_receive_flow(true),
-    %% send back the wrong flow_ok. Channel should die.
-    ok = rabbit_channel:do(Ch1, #'channel.flow_ok'{active = false}),
-    expect_normal_channel_termination(MRef1, Ch1),
-
-    {_Writer2, Ch2, MRef2} = test_memory_pressure_spawn(),
-    %% just out of the blue, send a flow_ok. Life should end.
-    ok = rabbit_channel:do(Ch2, #'channel.flow_ok'{active = true}),
-    expect_normal_channel_termination(MRef2, Ch2),
-
-    {_Writer3, Ch3, MRef3} = test_memory_pressure_spawn(),
-    ok = rabbit_channel:conserve_memory(Ch3, true),
-    receive {'DOWN', MRef3, process, Ch3, _} ->
-            ok
-    after 12000 ->
-            throw(channel_failed_to_exit)
-    end,
-
-    alarm_handler:set_alarm({vm_memory_high_watermark, []}),
-    Me = self(),
-    Writer4 = spawn(fun () -> test_memory_pressure_receiver(Me) end),
-    Ch4 = rabbit_channel:start_link(1, self(), Writer4, <<"user">>, <<"/">>,
-                                    self()),
-    ok = rabbit_channel:do(Ch4, #'channel.open'{}),
-    MRef4 = erlang:monitor(process, Ch4),
-    Writer4 ! sync,
-    receive sync -> ok after 1000 -> throw(failed_to_receive_writer_sync) end,
-    receive #'channel.open_ok'{} -> throw(unexpected_channel_open_ok)
-    after 0 -> ok
-    end,
-    alarm_handler:clear_alarm(vm_memory_high_watermark),
-    Writer4 ! sync,
-    receive sync -> ok after 1000 -> throw(failed_to_receive_writer_sync) end,
-    receive #'channel.open_ok'{} -> ok
-    after 1000 -> throw(failed_to_receive_channel_open_ok)
-    end,
-    rabbit_channel:shutdown(Ch4),
-    expect_normal_channel_termination(MRef4, Ch4),
-
+test_statistics() ->
+    application:set_env(rabbit, collect_statistics, fine),
+
+    %% ATM this just tests the queue / exchange stats in channels. That's
+    %% by far the most complex code though.
+
+    %% Set up a channel and queue
+    {_Writer, Ch} = test_spawn(fun test_statistics_receiver/1),
+    rabbit_channel:do(Ch, #'queue.declare'{}),
+    QName = receive #'queue.declare_ok'{queue = Q0} ->
+                    Q0
+            after 1000 -> throw(failed_to_receive_queue_declare_ok)
+            end,
+    {ok, Q} = rabbit_amqqueue:lookup(rabbit_misc:r(<<"/">>, queue, QName)),
+    QPid = Q#amqqueue.pid,
+    X = rabbit_misc:r(<<"/">>, exchange, <<"">>),
+
+    rabbit_tests_event_receiver:start(self()),
+
+    %% Check stats empty
+    Event = test_statistics_receive_event(Ch, fun (_) -> true end),
+    [] = proplists:get_value(channel_queue_stats, Event),
+    [] = proplists:get_value(channel_exchange_stats, Event),
+    [] = proplists:get_value(channel_queue_exchange_stats, Event),
+
+    %% Publish and get a message
+    rabbit_channel:do(Ch, #'basic.publish'{exchange = <<"">>,
+                                           routing_key = QName},
+                      rabbit_basic:build_content(#'P_basic'{}, <<"">>)),
+    rabbit_channel:do(Ch, #'basic.get'{queue = QName}),
+
+    %% Check the stats reflect that
+    Event2 = test_statistics_receive_event(
+               Ch,
+               fun (E) ->
+                       length(proplists:get_value(
+                                channel_queue_exchange_stats, E)) > 0
+               end),
+    [{QPid,[{get,1}]}] = proplists:get_value(channel_queue_stats, Event2),
+    [{X,[{publish,1}]}] = proplists:get_value(channel_exchange_stats, Event2),
+    [{{QPid,X},[{publish,1}]}] =
+        proplists:get_value(channel_queue_exchange_stats, Event2),
+
+    %% Check the stats remove stuff on queue deletion
+    rabbit_channel:do(Ch, #'queue.delete'{queue = QName}),
+    Event3 = test_statistics_receive_event(
+               Ch,
+               fun (E) ->
+                       length(proplists:get_value(
+                                channel_queue_exchange_stats, E)) == 0
+               end),
+
+    [] = proplists:get_value(channel_queue_stats, Event3),
+    [{X,[{publish,1}]}] = proplists:get_value(channel_exchange_stats, Event3),
+    [] = proplists:get_value(channel_queue_exchange_stats, Event3),
+
+    rabbit_channel:shutdown(Ch),
+    rabbit_tests_event_receiver:stop(),
     passed.
 
 test_delegates_async(SecondaryNode) ->
@@ -1097,11 +1268,16 @@ test_delegates_sync(SecondaryNode) ->
 
 %---------------------------------------------------------------------
 
-control_action(Command, Args) -> control_action(Command, node(), Args).
+control_action(Command, Args) ->
+    control_action(Command, node(), Args, default_options()).
+
+control_action(Command, Args, NewOpts) ->
+    control_action(Command, node(), Args,
+                   expand_options(default_options(), NewOpts)).
 
-control_action(Command, Node, Args) ->
+control_action(Command, Node, Args, Opts) ->
     case catch rabbit_control:action(
-                 Command, Node, Args,
+                 Command, Node, Args, Opts,
                  fun (Format, Args1) ->
                          io:format(Format ++ " ...~n", Args1)
                  end) of
@@ -1115,13 +1291,28 @@ control_action(Command, Node, Args) ->
 
 info_action(Command, Args, CheckVHost) ->
     ok = control_action(Command, []),
-    if CheckVHost -> ok = control_action(Command, ["-p", "/"]);
+    if CheckVHost -> ok = control_action(Command, []);
        true       -> ok
     end,
     ok = control_action(Command, lists:map(fun atom_to_list/1, Args)),
     {bad_argument, dummy} = control_action(Command, ["dummy"]),
     ok.
 
+default_options() -> [{"-s", "client"}, {"-p", "/"}, {"-q", "false"}].
+
+expand_options(As, Bs) ->
+    lists:foldl(fun({K, _}=A, R) ->
+                        case proplists:is_defined(K, R) of
+                            true -> R;
+                            false -> [A | R]
+                        end
+                end, Bs, As).
+
+check_get_options({ExpArgs, ExpOpts}, Defs, Args) ->
+    {ExpArgs, ResOpts} = rabbit_misc:get_options(Defs, Args),
+    true = lists:sort(ExpOpts) == lists:sort(ResOpts), % don't care about the order
+    ok.
+
 empty_files(Files) ->
     [case file:read_file_info(File) of
          {ok, FInfo} -> FInfo#file_info.size == 0;
@@ -1179,10 +1370,608 @@ delete_log_handlers(Handlers) ->
         Handler <- Handlers],
     ok.
 
-handle_hook(HookName, Handler, Args) ->
-    A = atom_to_list(HookName) ++ "_" ++ atom_to_list(Handler) ++ "_fired",
-    put(list_to_atom(A), Args).
-bad_handle_hook(_, _, _) ->
-    bad:bad().
-extra_arg_hook(Hookname, Handler, Args, Extra1, Extra2) ->
-    handle_hook(Hookname, Handler, {Args, Extra1, Extra2}).
+test_supervisor_delayed_restart() ->
+    test_sup:test_supervisor_delayed_restart().
+
+test_file_handle_cache() ->
+    %% test copying when there is just one spare handle
+    Limit = file_handle_cache:get_limit(),
+    ok = file_handle_cache:set_limit(5), %% 1 or 2 sockets, 2 msg_stores
+    TmpDir = filename:join(rabbit_mnesia:dir(), "tmp"),
+    ok = filelib:ensure_dir(filename:join(TmpDir, "nothing")),
+    Pid = spawn(fun () -> {ok, Hdl} = file_handle_cache:open(
+                                        filename:join(TmpDir, "file3"),
+                                        [write], []),
+                          receive close -> ok end,
+                          file_handle_cache:delete(Hdl)
+                end),
+    Src = filename:join(TmpDir, "file1"),
+    Dst = filename:join(TmpDir, "file2"),
+    Content = <<"foo">>,
+    ok = file:write_file(Src, Content),
+    {ok, SrcHdl} = file_handle_cache:open(Src, [read], []),
+    {ok, DstHdl} = file_handle_cache:open(Dst, [write], []),
+    Size = size(Content),
+    {ok, Size} = file_handle_cache:copy(SrcHdl, DstHdl, Size),
+    ok = file_handle_cache:delete(SrcHdl),
+    file_handle_cache:delete(DstHdl),
+    Pid ! close,
+    ok = file_handle_cache:set_limit(Limit),
+    passed.
+
+test_backing_queue() ->
+    case application:get_env(rabbit, backing_queue_module) of
+        {ok, rabbit_variable_queue} ->
+            {ok, FileSizeLimit} =
+                application:get_env(rabbit, msg_store_file_size_limit),
+            application:set_env(rabbit, msg_store_file_size_limit, 512,
+                                infinity),
+            {ok, MaxJournal} =
+                application:get_env(rabbit, queue_index_max_journal_entries),
+            application:set_env(rabbit, queue_index_max_journal_entries, 128,
+                                infinity),
+            passed = test_msg_store(),
+            application:set_env(rabbit, msg_store_file_size_limit,
+                                FileSizeLimit, infinity),
+            passed = test_queue_index(),
+            passed = test_variable_queue(),
+            passed = test_queue_recover(),
+            application:set_env(rabbit, queue_index_max_journal_entries,
+                                MaxJournal, infinity),
+            passed;
+        _ ->
+            passed
+    end.
+
+restart_msg_store_empty() ->
+    ok = rabbit_variable_queue:stop_msg_store(),
+    ok = rabbit_variable_queue:start_msg_store(
+           undefined, {fun (ok) -> finished end, ok}).
+
+guid_bin(X) ->
+    erlang:md5(term_to_binary(X)).
+
+msg_store_contains(Atom, Guids) ->
+    Atom = lists:foldl(
+             fun (Guid, Atom1) when Atom1 =:= Atom ->
+                     rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid) end,
+             Atom, Guids).
+
+msg_store_sync(Guids) ->
+    Ref = make_ref(),
+    Self = self(),
+    ok = rabbit_msg_store:sync(?PERSISTENT_MSG_STORE, Guids,
+                               fun () -> Self ! {sync, Ref} end),
+    receive
+        {sync, Ref} -> ok
+    after
+        10000 ->
+            io:format("Sync from msg_store missing for guids ~p~n", [Guids]),
+            throw(timeout)
+    end.
+
+msg_store_read(Guids, MSCState) ->
+    lists:foldl(fun (Guid, MSCStateM) ->
+                        {{ok, Guid}, MSCStateN} = rabbit_msg_store:read(
+                                                    ?PERSISTENT_MSG_STORE,
+                                                    Guid, MSCStateM),
+                        MSCStateN
+                end, MSCState, Guids).
+
+msg_store_write(Guids, MSCState) ->
+    lists:foldl(fun (Guid, {ok, MSCStateN}) ->
+                        rabbit_msg_store:write(?PERSISTENT_MSG_STORE,
+                                               Guid, Guid, MSCStateN)
+                end, {ok, MSCState}, Guids).
+
+msg_store_remove(Guids) ->
+    rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids).
+
+foreach_with_msg_store_client(MsgStore, Ref, Fun, L) ->
+    rabbit_msg_store:client_terminate(
+      lists:foldl(fun (Guid, MSCState) -> Fun(Guid, MsgStore, MSCState) end,
+                  rabbit_msg_store:client_init(MsgStore, Ref), L), MsgStore).
+
+test_msg_store() ->
+    restart_msg_store_empty(),
+    Self = self(),
+    Guids = [guid_bin(M) || M <- lists:seq(1,100)],
+    {Guids1stHalf, Guids2ndHalf} = lists:split(50, Guids),
+    %% check we don't contain any of the msgs we're about to publish
+    false = msg_store_contains(false, Guids),
+    Ref = rabbit_guid:guid(),
+    MSCState = rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref),
+    %% publish the first half
+    {ok, MSCState1} = msg_store_write(Guids1stHalf, MSCState),
+    %% sync on the first half
+    ok = msg_store_sync(Guids1stHalf),
+    %% publish the second half
+    {ok, MSCState2} = msg_store_write(Guids2ndHalf, MSCState1),
+    %% sync on the first half again - the msg_store will be dirty, but
+    %% we won't need the fsync
+    ok = msg_store_sync(Guids1stHalf),
+    %% check they're all in there
+    true = msg_store_contains(true, Guids),
+    %% publish the latter half twice so we hit the caching and ref count code
+    {ok, MSCState3} = msg_store_write(Guids2ndHalf, MSCState2),
+    %% check they're still all in there
+    true = msg_store_contains(true, Guids),
+    %% sync on the 2nd half, but do lots of individual syncs to try
+    %% and cause coalescing to happen
+    ok = lists:foldl(
+           fun (Guid, ok) -> rabbit_msg_store:sync(
+                                ?PERSISTENT_MSG_STORE,
+                                [Guid], fun () -> Self ! {sync, Guid} end)
+           end, ok, Guids2ndHalf),
+    lists:foldl(
+      fun(Guid, ok) ->
+              receive
+                  {sync, Guid} -> ok
+              after
+                  10000 ->
+                      io:format("Sync from msg_store missing (guid: ~p)~n",
+                                [Guid]),
+                      throw(timeout)
+              end
+      end, ok, Guids2ndHalf),
+    %% it's very likely we're not dirty here, so the 1st half sync
+    %% should hit a different code path
+    ok = msg_store_sync(Guids1stHalf),
+    %% read them all
+    MSCState4 = msg_store_read(Guids, MSCState3),
+    %% read them all again - this will hit the cache, not disk
+    MSCState5 = msg_store_read(Guids, MSCState4),
+    %% remove them all
+    ok = rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids),
+    %% check first half doesn't exist
+    false = msg_store_contains(false, Guids1stHalf),
+    %% check second half does exist
+    true = msg_store_contains(true, Guids2ndHalf),
+    %% read the second half again
+    MSCState6 = msg_store_read(Guids2ndHalf, MSCState5),
+    %% release the second half, just for fun (aka code coverage)
+    ok = rabbit_msg_store:release(?PERSISTENT_MSG_STORE, Guids2ndHalf),
+    %% read the second half again, just for fun (aka code coverage)
+    MSCState7 = msg_store_read(Guids2ndHalf, MSCState6),
+    ok = rabbit_msg_store:client_terminate(MSCState7, ?PERSISTENT_MSG_STORE),
+    %% stop and restart, preserving every other msg in 2nd half
+    ok = rabbit_variable_queue:stop_msg_store(),
+    ok = rabbit_variable_queue:start_msg_store(
+           [], {fun ([]) -> finished;
+                    ([Guid|GuidsTail])
+                      when length(GuidsTail) rem 2 == 0 ->
+                        {Guid, 1, GuidsTail};
+                    ([Guid|GuidsTail]) ->
+                        {Guid, 0, GuidsTail}
+                end, Guids2ndHalf}),
+    %% check we have the right msgs left
+    lists:foldl(
+      fun (Guid, Bool) ->
+              not(Bool = rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid))
+      end, false, Guids2ndHalf),
+    %% restart empty
+    restart_msg_store_empty(),
+    %% check we don't contain any of the msgs
+    false = msg_store_contains(false, Guids),
+    %% publish the first half again
+    MSCState8 = rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref),
+    {ok, MSCState9} = msg_store_write(Guids1stHalf, MSCState8),
+    %% this should force some sort of sync internally otherwise misread
+    ok = rabbit_msg_store:client_terminate(
+           msg_store_read(Guids1stHalf, MSCState9), ?PERSISTENT_MSG_STORE),
+    ok = rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids1stHalf),
+    %% restart empty
+    restart_msg_store_empty(), %% now safe to reuse guids
+    %% push a lot of msgs in... at least 100 files worth
+    {ok, FileSize} = application:get_env(rabbit, msg_store_file_size_limit),
+    PayloadSizeBits = 65536,
+    BigCount = trunc(100 * FileSize / (PayloadSizeBits div 8)),
+    GuidsBig = [guid_bin(X) || X <- lists:seq(1, BigCount)],
+    Payload = << 0:PayloadSizeBits >>,
+    ok = foreach_with_msg_store_client(
+           ?PERSISTENT_MSG_STORE, Ref,
+           fun (Guid, MsgStore, MSCStateM) ->
+                   {ok, MSCStateN} = rabbit_msg_store:write(
+                                       MsgStore, Guid, Payload, MSCStateM),
+                   MSCStateN
+           end, GuidsBig),
+    %% now read them to ensure we hit the fast client-side reading
+    ok = foreach_with_msg_store_client(
+           ?PERSISTENT_MSG_STORE, Ref,
+           fun (Guid, MsgStore, MSCStateM) ->
+                   {{ok, Payload}, MSCStateN} = rabbit_msg_store:read(
+                                                  MsgStore, Guid, MSCStateM),
+                   MSCStateN
+           end, GuidsBig),
+    %% .., then 3s by 1...
+    ok = msg_store_remove([guid_bin(X) || X <- lists:seq(BigCount, 1, -3)]),
+    %% .., then remove 3s by 2, from the young end first. This hits
+    %% GC (under 50% good data left, but no empty files. Must GC).
+    ok = msg_store_remove([guid_bin(X) || X <- lists:seq(BigCount-1, 1, -3)]),
+    %% .., then remove 3s by 3, from the young end first. This hits
+    %% GC...
+    ok = msg_store_remove([guid_bin(X) || X <- lists:seq(BigCount-2, 1, -3)]),
+    %% ensure empty
+    false = msg_store_contains(false, GuidsBig),
+    %% restart empty
+    restart_msg_store_empty(),
+    passed.
+
+queue_name(Name) ->
+    rabbit_misc:r(<<"/">>, queue, Name).
+
+test_queue() ->
+    queue_name(<<"test">>).
+
+init_test_queue() ->
+    rabbit_queue_index:init(
+      test_queue(), true, false,
+      fun (Guid) ->
+              rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid)
+      end).
+
+restart_test_queue(Qi) ->
+    _ = rabbit_queue_index:terminate([], Qi),
+    ok = rabbit_variable_queue:stop(),
+    ok = rabbit_variable_queue:start([test_queue()]),
+    init_test_queue().
+
+empty_test_queue() ->
+    ok = rabbit_variable_queue:stop(),
+    ok = rabbit_variable_queue:start([]),
+    {0, _Terms, Qi} = init_test_queue(),
+    _ = rabbit_queue_index:delete_and_terminate(Qi),
+    ok.
+
+with_empty_test_queue(Fun) ->
+    ok = empty_test_queue(),
+    {0, _Terms, Qi} = init_test_queue(),
+    rabbit_queue_index:delete_and_terminate(Fun(Qi)).
+
+queue_index_publish(SeqIds, Persistent, Qi) ->
+    Ref = rabbit_guid:guid(),
+    MsgStore = case Persistent of
+                   true  -> ?PERSISTENT_MSG_STORE;
+                   false -> ?TRANSIENT_MSG_STORE
+               end,
+    {A, B, MSCStateEnd} =
+        lists:foldl(
+          fun (SeqId, {QiN, SeqIdsGuidsAcc, MSCStateN}) ->
+                  Guid = rabbit_guid:guid(),
+                  QiM = rabbit_queue_index:publish(
+                          Guid, SeqId, Persistent, QiN),
+                  {ok, MSCStateM} = rabbit_msg_store:write(MsgStore, Guid,
+                                                           Guid, MSCStateN),
+                  {QiM, [{SeqId, Guid} | SeqIdsGuidsAcc], MSCStateM}
+          end, {Qi, [], rabbit_msg_store:client_init(MsgStore, Ref)}, SeqIds),
+    ok = rabbit_msg_store:client_delete_and_terminate(
+           MSCStateEnd, MsgStore, Ref),
+    {A, B}.
+
+verify_read_with_published(_Delivered, _Persistent, [], _) ->
+    ok;
+verify_read_with_published(Delivered, Persistent,
+                           [{Guid, SeqId, Persistent, Delivered}|Read],
+                           [{SeqId, Guid}|Published]) ->
+    verify_read_with_published(Delivered, Persistent, Read, Published);
+verify_read_with_published(_Delivered, _Persistent, _Read, _Published) ->
+    ko.
+
+test_queue_index() ->
+    SegmentSize = rabbit_queue_index:next_segment_boundary(0),
+    TwoSegs = SegmentSize + SegmentSize,
+    MostOfASegment = trunc(SegmentSize*0.75),
+    SeqIdsA = lists:seq(0, MostOfASegment-1),
+    SeqIdsB = lists:seq(MostOfASegment, 2*MostOfASegment),
+    SeqIdsC = lists:seq(0, trunc(SegmentSize/2)),
+    SeqIdsD = lists:seq(0, SegmentSize*4),
+
+    with_empty_test_queue(
+      fun (Qi0) ->
+              {0, 0, Qi1} = rabbit_queue_index:bounds(Qi0),
+              {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1),
+              {0, SegmentSize, Qi3} = rabbit_queue_index:bounds(Qi2),
+              {ReadA, Qi4} = rabbit_queue_index:read(0, SegmentSize, Qi3),
+              ok = verify_read_with_published(false, false, ReadA,
+                                              lists:reverse(SeqIdsGuidsA)),
+              %% should get length back as 0, as all the msgs were transient
+              {0, _Terms1, Qi6} = restart_test_queue(Qi4),
+              {0, 0, Qi7} = rabbit_queue_index:bounds(Qi6),
+              {Qi8, SeqIdsGuidsB} = queue_index_publish(SeqIdsB, true, Qi7),
+              {0, TwoSegs, Qi9} = rabbit_queue_index:bounds(Qi8),
+              {ReadB, Qi10} = rabbit_queue_index:read(0, SegmentSize, Qi9),
+              ok = verify_read_with_published(false, true, ReadB,
+                                              lists:reverse(SeqIdsGuidsB)),
+              %% should get length back as MostOfASegment
+              LenB = length(SeqIdsB),
+              {LenB, _Terms2, Qi12} = restart_test_queue(Qi10),
+              {0, TwoSegs, Qi13} = rabbit_queue_index:bounds(Qi12),
+              Qi14 = rabbit_queue_index:deliver(SeqIdsB, Qi13),
+              {ReadC, Qi15} = rabbit_queue_index:read(0, SegmentSize, Qi14),
+              ok = verify_read_with_published(true, true, ReadC,
+                                              lists:reverse(SeqIdsGuidsB)),
+              Qi16 = rabbit_queue_index:ack(SeqIdsB, Qi15),
+              Qi17 = rabbit_queue_index:flush(Qi16),
+              %% Everything will have gone now because #pubs == #acks
+              {0, 0, Qi18} = rabbit_queue_index:bounds(Qi17),
+              %% should get length back as 0 because all persistent
+              %% msgs have been acked
+              {0, _Terms3, Qi19} = restart_test_queue(Qi18),
+              Qi19
+      end),
+
+    %% These next bits are just to hit the auto deletion of segment files.
+    %% First, partials:
+    %% a) partial pub+del+ack, then move to new segment
+    with_empty_test_queue(
+      fun (Qi0) ->
+              {Qi1, _SeqIdsGuidsC} = queue_index_publish(SeqIdsC,
+                                                         false, Qi0),
+              Qi2 = rabbit_queue_index:deliver(SeqIdsC, Qi1),
+              Qi3 = rabbit_queue_index:ack(SeqIdsC, Qi2),
+              Qi4 = rabbit_queue_index:flush(Qi3),
+              {Qi5, _SeqIdsGuidsC1} = queue_index_publish([SegmentSize],
+                                                          false, Qi4),
+              Qi5
+      end),
+
+    %% b) partial pub+del, then move to new segment, then ack all in old segment
+    with_empty_test_queue(
+      fun (Qi0) ->
+              {Qi1, _SeqIdsGuidsC2} = queue_index_publish(SeqIdsC,
+                                                          false, Qi0),
+              Qi2 = rabbit_queue_index:deliver(SeqIdsC, Qi1),
+              {Qi3, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize],
+                                                          false, Qi2),
+              Qi4 = rabbit_queue_index:ack(SeqIdsC, Qi3),
+              rabbit_queue_index:flush(Qi4)
+      end),
+
+    %% c) just fill up several segments of all pubs, then +dels, then +acks
+    with_empty_test_queue(
+      fun (Qi0) ->
+              {Qi1, _SeqIdsGuidsD} = queue_index_publish(SeqIdsD,
+                                                          false, Qi0),
+              Qi2 = rabbit_queue_index:deliver(SeqIdsD, Qi1),
+              Qi3 = rabbit_queue_index:ack(SeqIdsD, Qi2),
+              rabbit_queue_index:flush(Qi3)
+      end),
+
+    %% d) get messages in all states to a segment, then flush, then do
+    %% the same again, don't flush and read. This will hit all
+    %% possibilities in combining the segment with the journal.
+    with_empty_test_queue(
+      fun (Qi0) ->
+              {Qi1, [Seven,Five,Four|_]} = queue_index_publish([0,1,2,4,5,7],
+                                                               false, Qi0),
+              Qi2 = rabbit_queue_index:deliver([0,1,4], Qi1),
+              Qi3 = rabbit_queue_index:ack([0], Qi2),
+              Qi4 = rabbit_queue_index:flush(Qi3),
+              {Qi5, [Eight,Six|_]} = queue_index_publish([3,6,8], false, Qi4),
+              Qi6 = rabbit_queue_index:deliver([2,3,5,6], Qi5),
+              Qi7 = rabbit_queue_index:ack([1,2,3], Qi6),
+              {[], Qi8} = rabbit_queue_index:read(0, 4, Qi7),
+              {ReadD, Qi9} = rabbit_queue_index:read(4, 7, Qi8),
+              ok = verify_read_with_published(true, false, ReadD,
+                                              [Four, Five, Six]),
+              {ReadE, Qi10} = rabbit_queue_index:read(7, 9, Qi9),
+              ok = verify_read_with_published(false, false, ReadE,
+                                              [Seven, Eight]),
+              Qi10
+      end),
+
+    %% e) as for (d), but use terminate instead of read, which will
+    %% exercise journal_minus_segment, not segment_plus_journal.
+    with_empty_test_queue(
+      fun (Qi0) ->
+              {Qi1, _SeqIdsGuidsE} = queue_index_publish([0,1,2,4,5,7],
+                                                         true, Qi0),
+              Qi2 = rabbit_queue_index:deliver([0,1,4], Qi1),
+              Qi3 = rabbit_queue_index:ack([0], Qi2),
+              {5, _Terms9, Qi4} = restart_test_queue(Qi3),
+              {Qi5, _SeqIdsGuidsF} = queue_index_publish([3,6,8], true, Qi4),
+              Qi6 = rabbit_queue_index:deliver([2,3,5,6], Qi5),
+              Qi7 = rabbit_queue_index:ack([1,2,3], Qi6),
+              {5, _Terms10, Qi8} = restart_test_queue(Qi7),
+              Qi8
+      end),
+
+    ok = rabbit_variable_queue:stop(),
+    ok = rabbit_variable_queue:start([]),
+
+    passed.
+
+variable_queue_publish(IsPersistent, Count, VQ) ->
+    lists:foldl(
+      fun (_N, VQN) ->
+              rabbit_variable_queue:publish(
+                rabbit_basic:message(
+                  rabbit_misc:r(<<>>, exchange, <<>>),
+                  <<>>, #'P_basic'{delivery_mode = case IsPersistent of
+                                                       true  -> 2;
+                                                       false -> 1
+                                                   end}, <<>>), VQN)
+      end, VQ, lists:seq(1, Count)).
+
+variable_queue_fetch(Count, IsPersistent, IsDelivered, Len, VQ) ->
+    lists:foldl(fun (N, {VQN, AckTagsAcc}) ->
+                        Rem = Len - N,
+                        {{#basic_message { is_persistent = IsPersistent },
+                          IsDelivered, AckTagN, Rem}, VQM} =
+                            rabbit_variable_queue:fetch(true, VQN),
+                        {VQM, [AckTagN | AckTagsAcc]}
+                end, {VQ, []}, lists:seq(1, Count)).
+
+assert_prop(List, Prop, Value) ->
+    Value = proplists:get_value(Prop, List).
+
+assert_props(List, PropVals) ->
+    [assert_prop(List, Prop, Value) || {Prop, Value} <- PropVals].
+
+with_fresh_variable_queue(Fun) ->
+    ok = empty_test_queue(),
+    VQ = rabbit_variable_queue:init(test_queue(), true, false),
+    S0 = rabbit_variable_queue:status(VQ),
+    assert_props(S0, [{q1, 0}, {q2, 0},
+                      {delta, {delta, undefined, 0, undefined}},
+                      {q3, 0}, {q4, 0},
+                      {len, 0}]),
+    _ = rabbit_variable_queue:delete_and_terminate(Fun(VQ)),
+    passed.
+
+test_variable_queue() ->
+    [passed = with_fresh_variable_queue(F) ||
+        F <- [fun test_variable_queue_dynamic_duration_change/1,
+              fun test_variable_queue_partial_segments_delta_thing/1,
+              fun test_variable_queue_all_the_bits_not_covered_elsewhere1/1,
+              fun test_variable_queue_all_the_bits_not_covered_elsewhere2/1]],
+    passed.
+
+test_variable_queue_dynamic_duration_change(VQ0) ->
+    SegmentSize = rabbit_queue_index:next_segment_boundary(0),
+
+    %% start by sending in a couple of segments worth
+    Len = 2*SegmentSize,
+    VQ1 = variable_queue_publish(false, Len, VQ0),
+
+    %% squeeze and relax queue
+    Churn = Len div 32,
+    VQ2 = publish_fetch_and_ack(Churn, Len, VQ1),
+    {Duration, VQ3} = rabbit_variable_queue:ram_duration(VQ2),
+    VQ7 = lists:foldl(
+            fun (Duration1, VQ4) ->
+                    {_Duration, VQ5} = rabbit_variable_queue:ram_duration(VQ4),
+                    io:format("~p:~n~p~n",
+                              [Duration1, rabbit_variable_queue:status(VQ5)]),
+                    VQ6 = rabbit_variable_queue:set_ram_duration_target(
+                            Duration1, VQ5),
+                    publish_fetch_and_ack(Churn, Len, VQ6)
+            end, VQ3, [Duration / 4, 0, Duration / 4, infinity]),
+
+    %% drain
+    {VQ8, AckTags} = variable_queue_fetch(Len, false, false, Len, VQ7),
+    VQ9 = rabbit_variable_queue:ack(AckTags, VQ8),
+    {empty, VQ10} = rabbit_variable_queue:fetch(true, VQ9),
+
+    VQ10.
+
+publish_fetch_and_ack(0, _Len, VQ0) ->
+    VQ0;
+publish_fetch_and_ack(N, Len, VQ0) ->
+    VQ1 = variable_queue_publish(false, 1, VQ0),
+    {{_Msg, false, AckTag, Len}, VQ2} = rabbit_variable_queue:fetch(true, VQ1),
+    publish_fetch_and_ack(N-1, Len, rabbit_variable_queue:ack([AckTag], VQ2)).
+
+test_variable_queue_partial_segments_delta_thing(VQ0) ->
+    SegmentSize = rabbit_queue_index:next_segment_boundary(0),
+    HalfSegment = SegmentSize div 2,
+    OneAndAHalfSegment = SegmentSize + HalfSegment,
+    VQ1 = variable_queue_publish(true, OneAndAHalfSegment, VQ0),
+    {_Duration, VQ2} = rabbit_variable_queue:ram_duration(VQ1),
+    VQ3 = check_variable_queue_status(
+            rabbit_variable_queue:set_ram_duration_target(0, VQ2),
+            %% one segment in q3 as betas, and half a segment in delta
+            [{delta, {delta, SegmentSize, HalfSegment, OneAndAHalfSegment}},
+             {q3, SegmentSize},
+             {len, SegmentSize + HalfSegment}]),
+    VQ4 = rabbit_variable_queue:set_ram_duration_target(infinity, VQ3),
+    VQ5 = check_variable_queue_status(
+            variable_queue_publish(true, 1, VQ4),
+            %% one alpha, but it's in the same segment as the deltas
+            [{q1, 1},
+             {delta, {delta, SegmentSize, HalfSegment, OneAndAHalfSegment}},
+             {q3, SegmentSize},
+             {len, SegmentSize + HalfSegment + 1}]),
+    {VQ6, AckTags} = variable_queue_fetch(SegmentSize, true, false,
+                                          SegmentSize + HalfSegment + 1, VQ5),
+    VQ7 = check_variable_queue_status(
+            VQ6,
+            %% the half segment should now be in q3 as betas
+            [{q1, 1},
+             {delta, {delta, undefined, 0, undefined}},
+             {q3, HalfSegment},
+             {len, HalfSegment + 1}]),
+    {VQ8, AckTags1} = variable_queue_fetch(HalfSegment + 1, true, false,
+                                           HalfSegment + 1, VQ7),
+    VQ9 = rabbit_variable_queue:ack(AckTags ++ AckTags1, VQ8),
+    %% should be empty now
+    {empty, VQ10} = rabbit_variable_queue:fetch(true, VQ9),
+    VQ10.
+
+check_variable_queue_status(VQ0, Props) ->
+    VQ1 = variable_queue_wait_for_shuffling_end(VQ0),
+    S = rabbit_variable_queue:status(VQ1),
+    io:format("~p~n", [S]),
+    assert_props(S, Props),
+    VQ1.
+
+variable_queue_wait_for_shuffling_end(VQ) ->
+    case rabbit_variable_queue:needs_idle_timeout(VQ) of
+        true  -> variable_queue_wait_for_shuffling_end(
+                  rabbit_variable_queue:idle_timeout(VQ));
+        false -> VQ
+    end.
+
+test_variable_queue_all_the_bits_not_covered_elsewhere1(VQ0) ->
+    Count = 2 * rabbit_queue_index:next_segment_boundary(0),
+    VQ1 = variable_queue_publish(true, Count, VQ0),
+    VQ2 = variable_queue_publish(false, Count, VQ1),
+    VQ3 = rabbit_variable_queue:set_ram_duration_target(0, VQ2),
+    {VQ4, _AckTags}  = variable_queue_fetch(Count, true, false,
+                                            Count + Count, VQ3),
+    {VQ5, _AckTags1} = variable_queue_fetch(Count, false, false,
+                                            Count, VQ4),
+    _VQ6 = rabbit_variable_queue:terminate(VQ5),
+    VQ7 = rabbit_variable_queue:init(test_queue(), true, true),
+    {{_Msg1, true, _AckTag1, Count1}, VQ8} =
+        rabbit_variable_queue:fetch(true, VQ7),
+    VQ9 = variable_queue_publish(false, 1, VQ8),
+    VQ10 = rabbit_variable_queue:set_ram_duration_target(0, VQ9),
+    {VQ11, _AckTags2} = variable_queue_fetch(Count1, true, true, Count, VQ10),
+    {VQ12, _AckTags3} = variable_queue_fetch(1, false, false, 1, VQ11),
+    VQ12.
+
+test_variable_queue_all_the_bits_not_covered_elsewhere2(VQ0) ->
+    VQ1 = rabbit_variable_queue:set_ram_duration_target(0, VQ0),
+    VQ2 = variable_queue_publish(false, 4, VQ1),
+    {VQ3, AckTags} = variable_queue_fetch(2, false, false, 4, VQ2),
+    VQ4 = rabbit_variable_queue:requeue(AckTags, VQ3),
+    VQ5 = rabbit_variable_queue:idle_timeout(VQ4),
+    _VQ6 = rabbit_variable_queue:terminate(VQ5),
+    VQ7 = rabbit_variable_queue:init(test_queue(), true, true),
+    {empty, VQ8} = rabbit_variable_queue:fetch(false, VQ7),
+    VQ8.
+
+test_queue_recover() ->
+    Count = 2 * rabbit_queue_index:next_segment_boundary(0),
+    TxID = rabbit_guid:guid(),
+    {new, #amqqueue { pid = QPid, name = QName }} =
+        rabbit_amqqueue:declare(test_queue(), true, false, [], none),
+    Msg = rabbit_basic:message(rabbit_misc:r(<<>>, exchange, <<>>),
+                               <<>>, #'P_basic'{delivery_mode = 2}, <<>>),
+    Delivery = #delivery{mandatory = false, immediate = false, txn = TxID,
+                         sender = self(), message = Msg},
+    [true = rabbit_amqqueue:deliver(QPid, Delivery) ||
+        _ <- lists:seq(1, Count)],
+    rabbit_amqqueue:commit_all([QPid], TxID, self()),
+    exit(QPid, kill),
+    MRef = erlang:monitor(process, QPid),
+    receive {'DOWN', MRef, process, QPid, _Info} -> ok
+    after 10000 -> exit(timeout_waiting_for_queue_death)
+    end,
+    rabbit_amqqueue:stop(),
+    ok = rabbit_amqqueue:start(),
+    rabbit_amqqueue:with_or_die(
+      QName,
+      fun (Q1 = #amqqueue { pid = QPid1 }) ->
+              CountMinusOne = Count - 1,
+              {ok, CountMinusOne, {QName, QPid1, _AckTag, true, _Msg}} =
+                  rabbit_amqqueue:basic_get(Q1, self(), false),
+              exit(QPid1, shutdown),
+              VQ1 = rabbit_variable_queue:init(QName, true, true),
+              {{_Msg1, true, _AckTag1, CountMinusOne}, VQ2} =
+                  rabbit_variable_queue:fetch(true, VQ1),
+              _VQ3 = rabbit_variable_queue:delete_and_terminate(VQ2),
+              rabbit_amqqueue:internal_delete(QName)
+      end),
+    passed.
diff --git a/src/rabbit_tests_event_receiver.erl b/src/rabbit_tests_event_receiver.erl
new file mode 100644
index 00000000..a92e3da7
--- /dev/null
+++ b/src/rabbit_tests_event_receiver.erl
@@ -0,0 +1,66 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_tests_event_receiver).
+
+-export([start/1, stop/0]).
+
+-export([init/1, handle_call/2, handle_event/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+start(Pid) ->
+    gen_event:add_handler(rabbit_event, ?MODULE, [Pid]).
+
+stop() ->
+    gen_event:delete_handler(rabbit_event, ?MODULE, []).
+
+%%----------------------------------------------------------------------------
+
+init([Pid]) ->
+    {ok, Pid}.
+
+handle_call(_Request, Pid) ->
+    {ok, not_understood, Pid}.
+
+handle_event(Event, Pid) ->
+    Pid ! Event,
+    {ok, Pid}.
+
+handle_info(_Info, Pid) ->
+    {ok, Pid}.
+
+terminate(_Arg, _Pid) ->
+    ok.
+
+code_change(_OldVsn, Pid, _Extra) ->
+    {ok, Pid}.
+
+%%----------------------------------------------------------------------------
diff --git a/src/rabbit_types.erl b/src/rabbit_types.erl
new file mode 100644
index 00000000..0b6a15ec
--- /dev/null
+++ b/src/rabbit_types.erl
@@ -0,0 +1,152 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_types).
+
+-include("rabbit.hrl").
+
+-ifdef(use_specs).
+
+-export_type([txn/0, maybe/1, info/0, info_key/0, message/0, basic_message/0,
+              delivery/0, content/0, decoded_content/0, undecoded_content/0,
+              unencoded_content/0, encoded_content/0, vhost/0, ctag/0,
+              amqp_error/0, r/1, r2/2, r3/3, listener/0,
+              binding/0, amqqueue/0, exchange/0, connection/0, protocol/0,
+              user/0, ok/1, error/1, ok_or_error/1, ok_or_error2/2,
+              ok_pid_or_error/0, channel_exit/0, connection_exit/0]).
+
+-type(channel_exit() :: no_return()).
+-type(connection_exit() :: no_return()).
+
+-type(maybe(T) :: T | 'none').
+-type(vhost() :: binary()).
+-type(ctag() :: binary()).
+
+%% TODO: make this more precise by tying specific class_ids to
+%% specific properties
+-type(undecoded_content() ::
+      #content{class_id              :: rabbit_framing:amqp_class_id(),
+               properties            :: 'none',
+               properties_bin        :: binary(),
+               payload_fragments_rev :: [binary()]} |
+      #content{class_id              :: rabbit_framing:amqp_class_id(),
+               properties            :: rabbit_framing:amqp_property_record(),
+               properties_bin        :: 'none',
+               payload_fragments_rev :: [binary()]}).
+-type(unencoded_content() :: undecoded_content()).
+-type(decoded_content() ::
+      #content{class_id              :: rabbit_framing:amqp_class_id(),
+               properties            :: rabbit_framing:amqp_property_record(),
+               properties_bin        :: maybe(binary()),
+               payload_fragments_rev :: [binary()]}).
+-type(encoded_content() ::
+      #content{class_id       :: rabbit_framing:amqp_class_id(),
+               properties     :: maybe(rabbit_framing:amqp_property_record()),
+               properties_bin        :: binary(),
+               payload_fragments_rev :: [binary()]}).
+-type(content() :: undecoded_content() | decoded_content()).
+-type(basic_message() ::
+      #basic_message{exchange_name  :: rabbit_exchange:name(),
+                     routing_key    :: rabbit_router:routing_key(),
+                     content        :: content(),
+                     guid           :: rabbit_guid:guid(),
+                     is_persistent  :: boolean()}).
+-type(message() :: basic_message()).
+-type(delivery() ::
+      #delivery{mandatory :: boolean(),
+                immediate :: boolean(),
+                txn       :: maybe(txn()),
+                sender    :: pid(),
+                message   :: message()}).
+
+%% this is really an abstract type, but dialyzer does not support them
+-type(txn() :: rabbit_guid:guid()).
+
+-type(info_key() :: atom()).
+-type(info() :: {info_key(), any()}).
+
+-type(amqp_error() ::
+      #amqp_error{name        :: rabbit_framing:amqp_exception(),
+                  explanation :: string(),
+                  method      :: rabbit_framing:amqp_method_name()}).
+
+-type(r(Kind) ::
+        r2(vhost(), Kind)).
+-type(r2(VirtualHost, Kind) ::
+        r3(VirtualHost, Kind, rabbit_misc:resource_name())).
+-type(r3(VirtualHost, Kind, Name) ::
+        #resource{virtual_host :: VirtualHost,
+                  kind         :: Kind,
+                  name         :: Name}).
+
+-type(listener() ::
+      #listener{node     :: node(),
+                protocol :: atom(),
+                host     :: rabbit_networking:hostname(),
+                port     :: rabbit_networking:ip_port()}).
+
+-type(binding() ::
+      #binding{exchange_name    :: rabbit_exchange:name(),
+               queue_name       :: rabbit_amqqueue:name(),
+               key              :: rabbit_binding:key(),
+               args             :: rabbit_framing:amqp_table()}).
+
+-type(amqqueue() ::
+      #amqqueue{name            :: rabbit_amqqueue:name(),
+                durable         :: boolean(),
+                auto_delete     :: boolean(),
+                exclusive_owner :: rabbit_types:maybe(pid()),
+                arguments       :: rabbit_framing:amqp_table(),
+                pid             :: rabbit_types:maybe(pid())}).
+
+-type(exchange() ::
+      #exchange{name        :: rabbit_exchange:name(),
+                type        :: rabbit_exchange:type(),
+                durable     :: boolean(),
+                auto_delete :: boolean(),
+                arguments   :: rabbit_framing:amqp_table()}).
+
+-type(connection() :: pid()).
+
+-type(protocol() :: 'rabbit_framing_amqp_0_8' | 'rabbit_framing_amqp_0_9_1').
+
+-type(user() ::
+      #user{username :: rabbit_access_control:username(),
+            password :: rabbit_access_control:password(),
+            is_admin :: boolean()}).
+
+-type(ok(A) :: {'ok', A}).
+-type(error(A) :: {'error', A}).
+-type(ok_or_error(A) :: 'ok' | error(A)).
+-type(ok_or_error2(A, B) :: ok(A) | error(B)).
+-type(ok_pid_or_error() :: ok_or_error2(pid(), any())).
+
+-endif. % use_specs
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
new file mode 100644
index 00000000..30d3a8ae
--- /dev/null
+++ b/src/rabbit_variable_queue.erl
@@ -0,0 +1,1433 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_variable_queue).
+
+-export([init/3, terminate/1, delete_and_terminate/1,
+         purge/1, publish/2, publish_delivered/3, fetch/2, ack/2,
+         tx_publish/3, tx_ack/3, tx_rollback/2, tx_commit/3,
+         requeue/2, len/1, is_empty/1,
+         set_ram_duration_target/2, ram_duration/1,
+         needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1,
+         status/1]).
+
+-export([start/1, stop/0]).
+
+%% exported for testing only
+-export([start_msg_store/2, stop_msg_store/0]).
+
+%%----------------------------------------------------------------------------
+%% Definitions:
+
+%% alpha: this is a message where both the message itself, and its
+%%        position within the queue are held in RAM
+%%
+%% beta: this is a message where the message itself is only held on
+%%        disk, but its position within the queue is held in RAM.
+%%
+%% gamma: this is a message where the message itself is only held on
+%%        disk, but its position is both in RAM and on disk.
+%%
+%% delta: this is a collection of messages, represented by a single
+%%        term, where the messages and their position are only held on
+%%        disk.
+%%
+%% Note that for persistent messages, the message and its position
+%% within the queue are always held on disk, *in addition* to being in
+%% one of the above classifications.
+%%
+%% Also note that within this code, the term gamma never
+%% appears. Instead, gammas are defined by betas who have had their
+%% queue position recorded on disk.
+%%
+%% In general, messages move q1 -> q2 -> delta -> q3 -> q4, though
+%% many of these steps are frequently skipped. q1 and q4 only hold
+%% alphas, q2 and q3 hold both betas and gammas (as queues of queues,
+%% using the bpqueue module where the block prefix determines whether
+%% they're betas or gammas). When a message arrives, its
+%% classification is determined. It is then added to the rightmost
+%% appropriate queue.
+%%
+%% If a new message is determined to be a beta or gamma, q1 is
+%% empty. If a new message is determined to be a delta, q1 and q2 are
+%% empty (and actually q4 too).
+%%
+%% When removing messages from a queue, if q4 is empty then q3 is read
+%% directly. If q3 becomes empty then the next segment's worth of
+%% messages from delta are read into q3, reducing the size of
+%% delta. If the queue is non empty, either q4 or q3 contain
+%% entries. It is never permitted for delta to hold all the messages
+%% in the queue.
+%%
+%% The duration indicated to us by the memory_monitor is used to
+%% calculate, given our current ingress and egress rates, how many
+%% messages we should hold in RAM. When we need to push alphas to
+%% betas or betas to gammas, we favour writing out messages that are
+%% further from the head of the queue. This minimises writes to disk,
+%% as the messages closer to the tail of the queue stay in the queue
+%% for longer, thus do not need to be replaced as quickly by sending
+%% other messages to disk.
+%%
+%% Whilst messages are pushed to disk and forgotten from RAM as soon
+%% as requested by a new setting of the queue RAM duration, the
+%% inverse is not true: we only load messages back into RAM as
+%% demanded as the queue is read from. Thus only publishes to the
+%% queue will take up available spare capacity.
+%%
+%% When we report our duration to the memory monitor, we calculate
+%% average ingress and egress rates over the last two samples, and
+%% then calculate our duration based on the sum of the ingress and
+%% egress rates. More than two samples could be used, but it's a
+%% balance between responding quickly enough to changes in
+%% producers/consumers versus ignoring temporary blips. The problem
+%% with temporary blips is that with just a few queues, they can have
+%% substantial impact on the calculation of the average duration and
+%% hence cause unnecessary I/O. Another alternative is to increase the
+%% amqqueue_process:RAM_DURATION_UPDATE_PERIOD to beyond 5
+%% seconds. However, that then runs the risk of being too slow to
+%% inform the memory monitor of changes. Thus a 5 second interval,
+%% plus a rolling average over the last two samples seems to work
+%% well in practice.
+%%
+%% The sum of the ingress and egress rates is used because the egress
+%% rate alone is not sufficient. Adding in the ingress rate means that
+%% queues which are being flooded by messages are given more memory,
+%% resulting in them being able to process the messages faster (by
+%% doing less I/O, or at least deferring it) and thus helping keep
+%% their mailboxes empty and thus the queue as a whole is more
+%% responsive. If such a queue also has fast but previously idle
+%% consumers, the consumer can then start to be driven as fast as it
+%% can go, whereas if only egress rate was being used, the incoming
+%% messages may have to be written to disk and then read back in,
+%% resulting in the hard disk being a bottleneck in driving the
+%% consumers. Generally, we want to give Rabbit every chance of
+%% getting rid of messages as fast as possible and remaining
+%% responsive, and using only the egress rate impacts that goal.
+%%
+%% If a queue is full of transient messages, then the transition from
+%% betas to deltas will be potentially very expensive as millions of
+%% entries must be written to disk by the queue_index module. This can
+%% badly stall the queue. In order to avoid this, the proportion of
+%% gammas / (betas+gammas) must not be lower than (betas+gammas) /
+%% (alphas+betas+gammas). As the queue grows or available memory
+%% shrinks, the latter ratio increases, requiring the conversion of
+%% more gammas to betas in order to maintain the invariant. At the
+%% point at which betas and gammas must be converted to deltas, there
+%% should be very few betas remaining, thus the transition is fast (no
+%% work needs to be done for the gamma -> delta transition).
+%%
+%% The conversion of betas to gammas is done in batches of exactly
+%% ?IO_BATCH_SIZE. This value should not be too small, otherwise the
+%% frequent operations on the queues of q2 and q3 will not be
+%% effectively amortised (switching the direction of queue access
+%% defeats amortisation), nor should it be too big, otherwise
+%% converting a batch stalls the queue for too long. Therefore, it
+%% must be just right. ram_index_count is used here and is the number
+%% of betas.
+%%
+%% The conversion from alphas to betas is also chunked, but only to
+%% ensure no more than ?IO_BATCH_SIZE alphas are converted to betas at
+%% any one time. This further smooths the effects of changes to the
+%% target_ram_msg_count and ensures the queue remains responsive
+%% even when there is a large amount of IO work to do. The
+%% idle_timeout callback is utilised to ensure that conversions are
+%% done as promptly as possible whilst ensuring the queue remains
+%% responsive.
+%%
+%% In the queue we keep track of both messages that are pending
+%% delivery and messages that are pending acks. This ensures that
+%% purging (deleting the former) and deletion (deleting the former and
+%% the latter) are both cheap and do require any scanning through qi
+%% segments.
+%%
+%% Notes on Clean Shutdown
+%% (This documents behaviour in variable_queue, queue_index and
+%% msg_store.)
+%%
+%% In order to try to achieve as fast a start-up as possible, if a
+%% clean shutdown occurs, we try to save out state to disk to reduce
+%% work on startup. In the msg_store this takes the form of the
+%% index_module's state, plus the file_summary ets table, and client
+%% refs. In the VQ, this takes the form of the count of persistent
+%% messages in the queue and references into the msg_stores. The
+%% queue_index adds to these terms the details of its segments and
+%% stores the terms in the queue directory.
+%%
+%% Two message stores are used. One is created for persistent messages
+%% to durable queues that must survive restarts, and the other is used
+%% for all other messages that just happen to need to be written to
+%% disk. On start up we can therefore nuke the transient message
+%% store, and be sure that the messages in the persistent store are
+%% all that we need.
+%%
+%% The references to the msg_stores are there so that the msg_store
+%% knows to only trust its saved state if all of the queues it was
+%% previously talking to come up cleanly. Likewise, the queues
+%% themselves (esp queue_index) skips work in init if all the queues
+%% and msg_store were shutdown cleanly. This gives both good speed
+%% improvements and also robustness so that if anything possibly went
+%% wrong in shutdown (or there was subsequent manual tampering), all
+%% messages and queues that can be recovered are recovered, safely.
+%%
+%% To delete transient messages lazily, the variable_queue, on
+%% startup, stores the next_seq_id reported by the queue_index as the
+%% transient_threshold. From that point on, whenever it's reading a
+%% message off disk via the queue_index, if the seq_id is below this
+%% threshold and the message is transient then it drops the message
+%% (the message itself won't exist on disk because it would have been
+%% stored in the transient msg_store which would have had its saved
+%% state nuked on startup). This avoids the expensive operation of
+%% scanning the entire queue on startup in order to delete transient
+%% messages that were only pushed to disk to save memory.
+%%
+%%----------------------------------------------------------------------------
+
+-behaviour(rabbit_backing_queue).
+
+-record(vqstate,
+        { q1,
+          q2,
+          delta,
+          q3,
+          q4,
+          next_seq_id,
+          pending_ack,
+          index_state,
+          msg_store_clients,
+          on_sync,
+          durable,
+          transient_threshold,
+
+          len,
+          persistent_count,
+
+          duration_target,
+          target_ram_msg_count,
+          ram_msg_count,
+          ram_msg_count_prev,
+          ram_index_count,
+          out_counter,
+          in_counter,
+          rates
+         }).
+
+-record(rates, { egress, ingress, avg_egress, avg_ingress, timestamp }).
+
+-record(msg_status,
+        { seq_id,
+          guid,
+          msg,
+          is_persistent,
+          is_delivered,
+          msg_on_disk,
+          index_on_disk
+         }).
+
+-record(delta,
+        { start_seq_id, %% start_seq_id is inclusive
+          count,
+          end_seq_id    %% end_seq_id is exclusive
+         }).
+
+-record(tx, { pending_messages, pending_acks }).
+
+-record(sync, { acks_persistent, acks_all, pubs, funs }).
+
+%% When we discover, on publish, that we should write some indices to
+%% disk for some betas, the RAM_INDEX_BATCH_SIZE sets the number of
+%% betas that we must be due to write indices for before we do any
+%% work at all. This is both a minimum and a maximum - we don't write
+%% fewer than RAM_INDEX_BATCH_SIZE indices out in one go, and we don't
+%% write more - we can always come back on the next publish to do
+%% more.
+-define(IO_BATCH_SIZE, 64).
+-define(PERSISTENT_MSG_STORE, msg_store_persistent).
+-define(TRANSIENT_MSG_STORE,  msg_store_transient).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(timestamp() :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}).
+-type(seq_id()  :: non_neg_integer()).
+-type(ack()     :: seq_id() | 'blank_ack').
+
+-type(rates() :: #rates { egress      :: {timestamp(), non_neg_integer()},
+                          ingress     :: {timestamp(), non_neg_integer()},
+                          avg_egress  :: float(),
+                          avg_ingress :: float(),
+                          timestamp   :: timestamp() }).
+
+-type(delta() :: #delta { start_seq_id :: non_neg_integer(),
+                          count        :: non_neg_integer (),
+                          end_seq_id   :: non_neg_integer() }).
+
+-type(sync() :: #sync { acks_persistent :: [[seq_id()]],
+                        acks_all        :: [[seq_id()]],
+                        pubs            :: [[rabbit_guid:guid()]],
+                        funs            :: [fun (() -> any())] }).
+
+-type(state() :: #vqstate {
+             q1                   :: queue(),
+             q2                   :: bpqueue:bpqueue(),
+             delta                :: delta(),
+             q3                   :: bpqueue:bpqueue(),
+             q4                   :: queue(),
+             next_seq_id          :: seq_id(),
+             pending_ack          :: dict:dictionary(),
+             index_state          :: any(),
+             msg_store_clients    :: 'undefined' | {{any(), binary()},
+                                                    {any(), binary()}},
+             on_sync              :: sync(),
+             durable              :: boolean(),
+
+             len                  :: non_neg_integer(),
+             persistent_count     :: non_neg_integer(),
+
+             transient_threshold  :: non_neg_integer(),
+             duration_target      :: number() | 'infinity',
+             target_ram_msg_count :: non_neg_integer() | 'infinity',
+             ram_msg_count        :: non_neg_integer(),
+             ram_msg_count_prev   :: non_neg_integer(),
+             ram_index_count      :: non_neg_integer(),
+             out_counter          :: non_neg_integer(),
+             in_counter           :: non_neg_integer(),
+             rates                :: rates() }).
+
+-include("rabbit_backing_queue_spec.hrl").
+
+-endif.
+
+-define(BLANK_DELTA, #delta { start_seq_id = undefined,
+                              count        = 0,
+                              end_seq_id   = undefined }).
+-define(BLANK_DELTA_PATTERN(Z), #delta { start_seq_id = Z,
+                                         count        = 0,
+                                         end_seq_id   = Z }).
+
+-define(BLANK_SYNC, #sync { acks_persistent = [],
+                            acks_all        = [],
+                            pubs            = [],
+                            funs            = [] }).
+
+%%----------------------------------------------------------------------------
+%% Public API
+%%----------------------------------------------------------------------------
+
+start(DurableQueues) ->
+    {AllTerms, StartFunState} = rabbit_queue_index:recover(DurableQueues),
+    start_msg_store(
+      [Ref || Terms <- AllTerms,
+              begin
+                  Ref = proplists:get_value(persistent_ref, Terms),
+                  Ref =/= undefined
+              end],
+      StartFunState).
+
+stop() -> stop_msg_store().
+
+start_msg_store(Refs, StartFunState) ->
+    ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store,
+                                [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(),
+                                 undefined,  {fun (ok) -> finished end, ok}]),
+    ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store,
+                                [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(),
+                                 Refs, StartFunState]).
+
+stop_msg_store() ->
+    ok = rabbit_sup:stop_child(?PERSISTENT_MSG_STORE),
+    ok = rabbit_sup:stop_child(?TRANSIENT_MSG_STORE).
+
+init(QueueName, IsDurable, Recover) ->
+    {DeltaCount, Terms, IndexState} =
+        rabbit_queue_index:init(
+          QueueName, Recover,
+          rabbit_msg_store:successfully_recovered_state(?PERSISTENT_MSG_STORE),
+          fun (Guid) ->
+                  rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid)
+          end),
+    {LowSeqId, NextSeqId, IndexState1} = rabbit_queue_index:bounds(IndexState),
+
+    {PRef, TRef, Terms1} =
+        case [persistent_ref, transient_ref] -- proplists:get_keys(Terms) of
+            [] -> {proplists:get_value(persistent_ref, Terms),
+                   proplists:get_value(transient_ref, Terms),
+                   Terms};
+            _  -> {rabbit_guid:guid(), rabbit_guid:guid(), []}
+        end,
+    DeltaCount1 = proplists:get_value(persistent_count, Terms1, DeltaCount),
+    Delta = case DeltaCount1 == 0 andalso DeltaCount /= undefined of
+                true  -> ?BLANK_DELTA;
+                false -> #delta { start_seq_id = LowSeqId,
+                                  count        = DeltaCount1,
+                                  end_seq_id   = NextSeqId }
+            end,
+    Now = now(),
+    PersistentClient =
+        case IsDurable of
+            true  -> rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, PRef);
+            false -> undefined
+        end,
+    TransientClient  = rabbit_msg_store:client_init(?TRANSIENT_MSG_STORE, TRef),
+    State = #vqstate {
+      q1                   = queue:new(),
+      q2                   = bpqueue:new(),
+      delta                = Delta,
+      q3                   = bpqueue:new(),
+      q4                   = queue:new(),
+      next_seq_id          = NextSeqId,
+      pending_ack          = dict:new(),
+      index_state          = IndexState1,
+      msg_store_clients    = {{PersistentClient, PRef},
+                              {TransientClient, TRef}},
+      on_sync              = ?BLANK_SYNC,
+      durable              = IsDurable,
+      transient_threshold  = NextSeqId,
+
+      len                  = DeltaCount1,
+      persistent_count     = DeltaCount1,
+
+      duration_target      = infinity,
+      target_ram_msg_count = infinity,
+      ram_msg_count        = 0,
+      ram_msg_count_prev   = 0,
+      ram_index_count      = 0,
+      out_counter          = 0,
+      in_counter           = 0,
+      rates                = #rates { egress      = {Now, 0},
+                                      ingress     = {Now, DeltaCount1},
+                                      avg_egress  = 0.0,
+                                      avg_ingress = 0.0,
+                                      timestamp   = Now } },
+    a(maybe_deltas_to_betas(State)).
+
+terminate(State) ->
+    State1 = #vqstate { persistent_count  = PCount,
+                        index_state       = IndexState,
+                        msg_store_clients = {{MSCStateP, PRef},
+                                             {MSCStateT, TRef}} } =
+        remove_pending_ack(true, tx_commit_index(State)),
+    case MSCStateP of
+        undefined -> ok;
+        _         -> rabbit_msg_store:client_terminate(
+                       MSCStateP, ?PERSISTENT_MSG_STORE)
+    end,
+    rabbit_msg_store:client_terminate(MSCStateT, ?TRANSIENT_MSG_STORE),
+    Terms = [{persistent_ref, PRef},
+             {transient_ref, TRef},
+             {persistent_count, PCount}],
+    a(State1 #vqstate { index_state       = rabbit_queue_index:terminate(
+                                              Terms, IndexState),
+                        msg_store_clients = undefined }).
+
+%% the only difference between purge and delete is that delete also
+%% needs to delete everything that's been delivered and not ack'd.
+delete_and_terminate(State) ->
+    %% TODO: there is no need to interact with qi at all - which we do
+    %% as part of 'purge' and 'remove_pending_ack', other than
+    %% deleting it.
+    {_PurgeCount, State1} = purge(State),
+    State2 = #vqstate { index_state         = IndexState,
+                        msg_store_clients   = {{MSCStateP, PRef},
+                                               {MSCStateT, TRef}} } =
+        remove_pending_ack(false, State1),
+    IndexState1 = rabbit_queue_index:delete_and_terminate(IndexState),
+    case MSCStateP of
+        undefined -> ok;
+        _         -> rabbit_msg_store:client_delete_and_terminate(
+                       MSCStateP, ?PERSISTENT_MSG_STORE, PRef)
+    end,
+    rabbit_msg_store:client_delete_and_terminate(
+      MSCStateT, ?TRANSIENT_MSG_STORE, TRef),
+    a(State2 #vqstate { index_state       = IndexState1,
+                        msg_store_clients = undefined }).
+
+purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
+    %% TODO: when there are no pending acks, which is a common case,
+    %% we could simply wipe the qi instead of issuing delivers and
+    %% acks for all the messages.
+    IndexState1 = remove_queue_entries(fun rabbit_misc:queue_fold/3, Q4,
+                                       IndexState),
+    State1 = #vqstate { q1 = Q1, index_state = IndexState2 } =
+        purge_betas_and_deltas(State #vqstate { q4          = queue:new(),
+                                                index_state = IndexState1 }),
+    IndexState3 = remove_queue_entries(fun rabbit_misc:queue_fold/3, Q1,
+                                       IndexState2),
+    {Len, a(State1 #vqstate { q1               = queue:new(),
+                              index_state      = IndexState3,
+                              len              = 0,
+                              ram_msg_count    = 0,
+                              ram_index_count  = 0,
+                              persistent_count = 0 })}.
+
+publish(Msg, State) ->
+    {_SeqId, State1} = publish(Msg, false, false, State),
+    a(reduce_memory_use(State1)).
+
+publish_delivered(false, _Msg, State = #vqstate { len = 0 }) ->
+    {blank_ack, a(State)};
+publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent },
+                  State = #vqstate { len               = 0,
+                                     next_seq_id       = SeqId,
+                                     out_counter       = OutCount,
+                                     in_counter        = InCount,
+                                     persistent_count  = PCount,
+                                     pending_ack       = PA,
+                                     durable           = IsDurable }) ->
+    IsPersistent1 = IsDurable andalso IsPersistent,
+    MsgStatus = (msg_status(IsPersistent1, SeqId, Msg))
+        #msg_status { is_delivered = true },
+    {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State),
+    PA1 = record_pending_ack(m(MsgStatus1), PA),
+    PCount1 = PCount + one_if(IsPersistent1),
+    {SeqId, a(State1 #vqstate { next_seq_id       = SeqId    + 1,
+                                out_counter       = OutCount + 1,
+                                in_counter        = InCount  + 1,
+                                persistent_count  = PCount1,
+                                pending_ack       = PA1 })}.
+
+fetch(AckRequired, State = #vqstate { q4               = Q4,
+                                      ram_msg_count    = RamMsgCount,
+                                      out_counter      = OutCount,
+                                      index_state      = IndexState,
+                                      len              = Len,
+                                      persistent_count = PCount,
+                                      pending_ack      = PA }) ->
+    case queue:out(Q4) of
+        {empty, _Q4} ->
+            case fetch_from_q3_to_q4(State) of
+                {empty, State1} = Result -> a(State1), Result;
+                {loaded, State1}         -> fetch(AckRequired, State1)
+            end;
+        {{value, MsgStatus = #msg_status {
+                   msg = Msg, guid = Guid, seq_id = SeqId,
+                   is_persistent = IsPersistent, is_delivered = IsDelivered,
+                   msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk }},
+         Q4a} ->
+
+            %% 1. Mark it delivered if necessary
+            IndexState1 = maybe_write_delivered(
+                            IndexOnDisk andalso not IsDelivered,
+                            SeqId, IndexState),
+
+            %% 2. Remove from msg_store and queue index, if necessary
+            MsgStore = find_msg_store(IsPersistent),
+            Rem = fun () -> ok = rabbit_msg_store:remove(MsgStore, [Guid]) end,
+            Ack = fun () -> rabbit_queue_index:ack([SeqId], IndexState1) end,
+            IndexState2 =
+                case {AckRequired, MsgOnDisk, IndexOnDisk, IsPersistent} of
+                    {false, true, false,     _} -> Rem(), IndexState1;
+                    {false, true,  true,     _} -> Rem(), Ack();
+                    { true, true,  true, false} -> Ack();
+                    _                           -> IndexState1
+                end,
+
+            %% 3. If an ack is required, add something sensible to PA
+            {AckTag, PA1} = case AckRequired of
+                                true  -> PA2 = record_pending_ack(
+                                                 MsgStatus #msg_status {
+                                                   is_delivered = true }, PA),
+                                         {SeqId, PA2};
+                                false -> {blank_ack, PA}
+                            end,
+
+            PCount1 = PCount - one_if(IsPersistent andalso not AckRequired),
+            Len1 = Len - 1,
+            {{Msg, IsDelivered, AckTag, Len1},
+             a(State #vqstate { q4               = Q4a,
+                                ram_msg_count    = RamMsgCount - 1,
+                                out_counter      = OutCount + 1,
+                                index_state      = IndexState2,
+                                len              = Len1,
+                                persistent_count = PCount1,
+                                pending_ack      = PA1 })}
+    end.
+
+ack(AckTags, State) ->
+    a(ack(fun rabbit_msg_store:remove/2,
+          fun (_AckEntry, State1) -> State1 end,
+          AckTags, State)).
+
+tx_publish(Txn, Msg = #basic_message { is_persistent = IsPersistent },
+           State = #vqstate { durable           = IsDurable,
+                              msg_store_clients = MSCState }) ->
+    Tx = #tx { pending_messages = Pubs } = lookup_tx(Txn),
+    store_tx(Txn, Tx #tx { pending_messages = [Msg | Pubs] }),
+    a(case IsPersistent andalso IsDurable of
+          true  -> MsgStatus = msg_status(true, undefined, Msg),
+                   {#msg_status { msg_on_disk = true }, MSCState1} =
+                       maybe_write_msg_to_disk(false, MsgStatus, MSCState),
+                   State #vqstate { msg_store_clients = MSCState1 };
+          false -> State
+      end).
+
+tx_ack(Txn, AckTags, State) ->
+    Tx = #tx { pending_acks = Acks } = lookup_tx(Txn),
+    store_tx(Txn, Tx #tx { pending_acks = [AckTags | Acks] }),
+    State.
+
+tx_rollback(Txn, State = #vqstate { durable = IsDurable }) ->
+    #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn),
+    erase_tx(Txn),
+    ok = case IsDurable of
+             true  -> rabbit_msg_store:remove(?PERSISTENT_MSG_STORE,
+                                              persistent_guids(Pubs));
+             false -> ok
+         end,
+    {lists:append(AckTags), a(State)}.
+
+tx_commit(Txn, Fun, State = #vqstate { durable = IsDurable }) ->
+    #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn),
+    erase_tx(Txn),
+    PubsOrdered = lists:reverse(Pubs),
+    AckTags1 = lists:append(AckTags),
+    PersistentGuids = persistent_guids(PubsOrdered),
+    HasPersistentPubs = PersistentGuids =/= [],
+    {AckTags1,
+     a(case IsDurable andalso HasPersistentPubs of
+           true  -> ok = rabbit_msg_store:sync(
+                           ?PERSISTENT_MSG_STORE, PersistentGuids,
+                           msg_store_callback(PersistentGuids,
+                                              PubsOrdered, AckTags1, Fun)),
+                    State;
+           false -> tx_commit_post_msg_store(
+                      HasPersistentPubs, PubsOrdered, AckTags1, Fun, State)
+       end)}.
+
+requeue(AckTags, State) ->
+    a(reduce_memory_use(
+        ack(fun rabbit_msg_store:release/2,
+            fun (#msg_status { msg = Msg }, State1) ->
+                    {_SeqId, State2} = publish(Msg, true, false, State1),
+                    State2;
+                ({IsPersistent, Guid}, State1) ->
+                    #vqstate { msg_store_clients = MSCState } = State1,
+                    {{ok, Msg = #basic_message{}}, MSCState1} =
+                        read_from_msg_store(MSCState, IsPersistent, Guid),
+                    State2 = State1 #vqstate { msg_store_clients = MSCState1 },
+                    {_SeqId, State3} = publish(Msg, true, true, State2),
+                    State3
+            end,
+            AckTags, State))).
+
+len(#vqstate { len = Len }) -> Len.
+
+is_empty(State) -> 0 == len(State).
+
+set_ram_duration_target(DurationTarget,
+                        State = #vqstate {
+                          rates = #rates { avg_egress  = AvgEgressRate,
+                                           avg_ingress = AvgIngressRate },
+                          target_ram_msg_count = TargetRamMsgCount }) ->
+    Rate = AvgEgressRate + AvgIngressRate,
+    TargetRamMsgCount1 =
+        case DurationTarget of
+            infinity  -> infinity;
+            _         -> trunc(DurationTarget * Rate) %% msgs = sec * msgs/sec
+        end,
+    State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1,
+                              duration_target      = DurationTarget },
+    a(case TargetRamMsgCount1 == infinity orelse
+          (TargetRamMsgCount =/= infinity andalso
+           TargetRamMsgCount1 >= TargetRamMsgCount) of
+          true  -> State1;
+          false -> reduce_memory_use(State1)
+      end).
+
+ram_duration(State = #vqstate {
+               rates              = #rates { egress    = Egress,
+                                             ingress   = Ingress,
+                                             timestamp = Timestamp } = Rates,
+               in_counter         = InCount,
+               out_counter        = OutCount,
+               ram_msg_count      = RamMsgCount,
+               duration_target    = DurationTarget,
+               ram_msg_count_prev = RamMsgCountPrev }) ->
+    Now = now(),
+    {AvgEgressRate,   Egress1} = update_rate(Now, Timestamp, OutCount, Egress),
+    {AvgIngressRate, Ingress1} = update_rate(Now, Timestamp, InCount, Ingress),
+
+    Duration = %% msgs / (msgs/sec) == sec
+        case AvgEgressRate == 0 andalso AvgIngressRate == 0 of
+            true  -> infinity;
+            false -> (RamMsgCountPrev + RamMsgCount) /
+                         (2 * (AvgEgressRate + AvgIngressRate))
+        end,
+
+    {Duration, set_ram_duration_target(
+                 DurationTarget,
+                 State #vqstate {
+                   rates              = Rates #rates {
+                                          egress      = Egress1,
+                                          ingress     = Ingress1,
+                                          avg_egress  = AvgEgressRate,
+                                          avg_ingress = AvgIngressRate,
+                                          timestamp   = Now },
+                   in_counter         = 0,
+                   out_counter        = 0,
+                   ram_msg_count_prev = RamMsgCount })}.
+
+needs_idle_timeout(State = #vqstate { on_sync = ?BLANK_SYNC }) ->
+    {Res, _State} = reduce_memory_use(fun (_Quota, State1) -> State1 end,
+                                      fun (_Quota, State1) -> State1 end,
+                                      fun (State1)         -> State1 end,
+                                      State),
+    Res;
+needs_idle_timeout(_State) ->
+    true.
+
+idle_timeout(State) -> a(reduce_memory_use(tx_commit_index(State))).
+
+handle_pre_hibernate(State = #vqstate { index_state = IndexState }) ->
+    State #vqstate { index_state = rabbit_queue_index:flush(IndexState) }.
+
+status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
+                  len                  = Len,
+                  pending_ack          = PA,
+                  on_sync              = #sync { funs = From },
+                  target_ram_msg_count = TargetRamMsgCount,
+                  ram_msg_count        = RamMsgCount,
+                  ram_index_count      = RamIndexCount,
+                  next_seq_id          = NextSeqId,
+                  persistent_count     = PersistentCount,
+                  rates                = #rates {
+                    avg_egress  = AvgEgressRate,
+                    avg_ingress = AvgIngressRate } }) ->
+    [ {q1                   , queue:len(Q1)},
+      {q2                   , bpqueue:len(Q2)},
+      {delta                , Delta},
+      {q3                   , bpqueue:len(Q3)},
+      {q4                   , queue:len(Q4)},
+      {len                  , Len},
+      {pending_acks         , dict:size(PA)},
+      {outstanding_txns     , length(From)},
+      {target_ram_msg_count , TargetRamMsgCount},
+      {ram_msg_count        , RamMsgCount},
+      {ram_index_count      , RamIndexCount},
+      {next_seq_id          , NextSeqId},
+      {persistent_count     , PersistentCount},
+      {avg_egress_rate      , AvgEgressRate},
+      {avg_ingress_rate     , AvgIngressRate} ].
+
+%%----------------------------------------------------------------------------
+%% Minor helpers
+%%----------------------------------------------------------------------------
+
+a(State = #vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
+                     len                  = Len,
+                     persistent_count     = PersistentCount,
+                     ram_msg_count        = RamMsgCount,
+                     ram_index_count      = RamIndexCount }) ->
+    E1 = queue:is_empty(Q1),
+    E2 = bpqueue:is_empty(Q2),
+    ED = Delta#delta.count == 0,
+    E3 = bpqueue:is_empty(Q3),
+    E4 = queue:is_empty(Q4),
+    LZ = Len == 0,
+
+    true = E1 or not E3,
+    true = E2 or not ED,
+    true = ED or not E3,
+    true = LZ == (E3 and E4),
+
+    true = Len             >= 0,
+    true = PersistentCount >= 0,
+    true = RamMsgCount     >= 0,
+    true = RamIndexCount   >= 0,
+
+    State.
+
+m(MsgStatus = #msg_status { msg           = Msg,
+                            is_persistent = IsPersistent,
+                            msg_on_disk   = MsgOnDisk,
+                            index_on_disk = IndexOnDisk }) ->
+    true = (not IsPersistent) or IndexOnDisk,
+    true = (not IndexOnDisk) or MsgOnDisk,
+    true = (Msg =/= undefined) or MsgOnDisk,
+
+    MsgStatus.
+
+one_if(true ) -> 1;
+one_if(false) -> 0.
+
+cons_if(true,   E, L) -> [E | L];
+cons_if(false, _E, L) -> L.
+
+msg_status(IsPersistent, SeqId, Msg = #basic_message { guid = Guid }) ->
+    #msg_status { seq_id = SeqId, guid = Guid, msg = Msg,
+                  is_persistent = IsPersistent, is_delivered = false,
+                  msg_on_disk = false, index_on_disk = false }.
+
+find_msg_store(true)  -> ?PERSISTENT_MSG_STORE;
+find_msg_store(false) -> ?TRANSIENT_MSG_STORE.
+
+with_msg_store_state({{MSCStateP, PRef}, MSCStateT}, true, Fun) ->
+    {Result, MSCStateP1} = Fun(?PERSISTENT_MSG_STORE, MSCStateP),
+    {Result, {{MSCStateP1, PRef}, MSCStateT}};
+with_msg_store_state({MSCStateP, {MSCStateT, TRef}}, false, Fun) ->
+    {Result, MSCStateT1} = Fun(?TRANSIENT_MSG_STORE, MSCStateT),
+    {Result, {MSCStateP, {MSCStateT1, TRef}}}.
+
+read_from_msg_store(MSCState, IsPersistent, Guid) ->
+    with_msg_store_state(
+      MSCState, IsPersistent,
+      fun (MsgStore, MSCState1) ->
+              rabbit_msg_store:read(MsgStore, Guid, MSCState1)
+      end).
+
+maybe_write_delivered(false, _SeqId, IndexState) ->
+    IndexState;
+maybe_write_delivered(true, SeqId, IndexState) ->
+    rabbit_queue_index:deliver([SeqId], IndexState).
+
+lookup_tx(Txn) -> case get({txn, Txn}) of
+                      undefined -> #tx { pending_messages = [],
+                                         pending_acks     = [] };
+                      V         -> V
+                  end.
+
+store_tx(Txn, Tx) -> put({txn, Txn}, Tx).
+
+erase_tx(Txn) -> erase({txn, Txn}).
+
+persistent_guids(Pubs) ->
+    [Guid || #basic_message { guid = Guid, is_persistent = true } <- Pubs].
+
+betas_from_index_entries(List, TransientThreshold, IndexState) ->
+    {Filtered, Delivers, Acks} =
+        lists:foldr(
+          fun ({Guid, SeqId, IsPersistent, IsDelivered},
+               {Filtered1, Delivers1, Acks1}) ->
+                  case SeqId < TransientThreshold andalso not IsPersistent of
+                      true  -> {Filtered1,
+                                cons_if(not IsDelivered, SeqId, Delivers1),
+                                [SeqId | Acks1]};
+                      false -> {[m(#msg_status { msg           = undefined,
+                                                 guid          = Guid,
+                                                 seq_id        = SeqId,
+                                                 is_persistent = IsPersistent,
+                                                 is_delivered  = IsDelivered,
+                                                 msg_on_disk   = true,
+                                                 index_on_disk = true
+                                               }) | Filtered1],
+                                Delivers1,
+                                Acks1}
+                  end
+          end, {[], [], []}, List),
+    {bpqueue:from_list([{true, Filtered}]),
+     rabbit_queue_index:ack(Acks,
+                            rabbit_queue_index:deliver(Delivers, IndexState))}.
+
+%% the first arg is the older delta
+combine_deltas(?BLANK_DELTA_PATTERN(X), ?BLANK_DELTA_PATTERN(Y)) ->
+    ?BLANK_DELTA;
+combine_deltas(?BLANK_DELTA_PATTERN(X), #delta { start_seq_id = Start,
+                                                 count        = Count,
+                                                 end_seq_id   = End } = B) ->
+    true = Start + Count =< End, %% ASSERTION
+    B;
+combine_deltas(#delta { start_seq_id = Start,
+                        count        = Count,
+                        end_seq_id   = End } = A, ?BLANK_DELTA_PATTERN(Y)) ->
+    true = Start + Count =< End, %% ASSERTION
+    A;
+combine_deltas(#delta { start_seq_id = StartLow,
+                        count        = CountLow,
+                        end_seq_id   = EndLow },
+               #delta { start_seq_id = StartHigh,
+                        count        = CountHigh,
+                        end_seq_id   = EndHigh }) ->
+    Count = CountLow + CountHigh,
+    true = (StartLow =< StartHigh) %% ASSERTIONS
+        andalso ((StartLow + CountLow) =< EndLow)
+        andalso ((StartHigh + CountHigh) =< EndHigh)
+        andalso ((StartLow + Count) =< EndHigh),
+    #delta { start_seq_id = StartLow, count = Count, end_seq_id = EndHigh }.
+
+beta_fold(Fun, Init, Q) ->
+    bpqueue:foldr(fun (_Prefix, Value, Acc) -> Fun(Value, Acc) end, Init, Q).
+
+update_rate(Now, Then, Count, {OThen, OCount}) ->
+    %% avg over the current period and the previous
+    {1000000.0 * (Count + OCount) / timer:now_diff(Now, OThen), {Then, Count}}.
+
+%%----------------------------------------------------------------------------
+%% Internal major helpers for Public API
+%%----------------------------------------------------------------------------
+
+msg_store_callback(PersistentGuids, Pubs, AckTags, Fun) ->
+    Self = self(),
+    F = fun () -> rabbit_amqqueue:maybe_run_queue_via_backing_queue(
+                    Self, fun (StateN) -> tx_commit_post_msg_store(
+                                            true, Pubs, AckTags, Fun, StateN)
+                          end)
+        end,
+    fun () -> spawn(fun () -> ok = rabbit_misc:with_exit_handler(
+                                     fun () -> rabbit_msg_store:remove(
+                                                 ?PERSISTENT_MSG_STORE,
+                                                 PersistentGuids)
+                                     end, F)
+                    end)
+    end.
+
+tx_commit_post_msg_store(HasPersistentPubs, Pubs, AckTags, Fun,
+                         State = #vqstate {
+                           on_sync     = OnSync = #sync {
+                                           acks_persistent = SPAcks,
+                                           acks_all        = SAcks,
+                                           pubs            = SPubs,
+                                           funs            = SFuns },
+                           pending_ack = PA,
+                           durable     = IsDurable }) ->
+    PersistentAcks =
+        case IsDurable of
+            true  -> [AckTag || AckTag <- AckTags,
+                                case dict:fetch(AckTag, PA) of
+                                    #msg_status {}        -> false;
+                                    {IsPersistent, _Guid} -> IsPersistent
+                                end];
+            false -> []
+        end,
+    case IsDurable andalso (HasPersistentPubs orelse PersistentAcks =/= []) of
+        true  -> State #vqstate { on_sync = #sync {
+                                    acks_persistent = [PersistentAcks | SPAcks],
+                                    acks_all        = [AckTags | SAcks],
+                                    pubs            = [Pubs | SPubs],
+                                    funs            = [Fun | SFuns] }};
+        false -> State1 = tx_commit_index(
+                            State #vqstate { on_sync = #sync {
+                                               acks_persistent = [],
+                                               acks_all        = [AckTags],
+                                               pubs            = [Pubs],
+                                               funs            = [Fun] } }),
+                 State1 #vqstate { on_sync = OnSync }
+    end.
+
+tx_commit_index(State = #vqstate { on_sync = ?BLANK_SYNC }) ->
+    State;
+tx_commit_index(State = #vqstate { on_sync = #sync {
+                                     acks_persistent = SPAcks,
+                                     acks_all        = SAcks,
+                                     pubs            = SPubs,
+                                     funs            = SFuns },
+                                   durable = IsDurable }) ->
+    PAcks = lists:append(SPAcks),
+    Acks  = lists:append(SAcks),
+    Pubs  = lists:append(lists:reverse(SPubs)),
+    {SeqIds, State1 = #vqstate { index_state = IndexState }} =
+        lists:foldl(
+          fun (Msg = #basic_message { is_persistent = IsPersistent },
+               {SeqIdsAcc, State2}) ->
+                  IsPersistent1 = IsDurable andalso IsPersistent,
+                  {SeqId, State3} = publish(Msg, false, IsPersistent1, State2),
+                  {cons_if(IsPersistent1, SeqId, SeqIdsAcc), State3}
+          end, {PAcks, ack(Acks, State)}, Pubs),
+    IndexState1 = rabbit_queue_index:sync(SeqIds, IndexState),
+    [ Fun() || Fun <- lists:reverse(SFuns) ],
+    reduce_memory_use(
+      State1 #vqstate { index_state = IndexState1, on_sync = ?BLANK_SYNC }).
+
+purge_betas_and_deltas(State = #vqstate { q3          = Q3,
+                                          index_state = IndexState }) ->
+    case bpqueue:is_empty(Q3) of
+        true  -> State;
+        false -> IndexState1 = remove_queue_entries(fun beta_fold/3, Q3,
+                                                    IndexState),
+                 purge_betas_and_deltas(
+                   maybe_deltas_to_betas(
+                     State #vqstate { q3          = bpqueue:new(),
+                                      index_state = IndexState1 }))
+    end.
+
+remove_queue_entries(Fold, Q, IndexState) ->
+    {GuidsByStore, Delivers, Acks} =
+        Fold(fun remove_queue_entries1/2, {orddict:new(), [], []}, Q),
+    ok = orddict:fold(fun (MsgStore, Guids, ok) ->
+                              rabbit_msg_store:remove(MsgStore, Guids)
+                      end, ok, GuidsByStore),
+    rabbit_queue_index:ack(Acks,
+                           rabbit_queue_index:deliver(Delivers, IndexState)).
+
+remove_queue_entries1(
+  #msg_status { guid = Guid, seq_id = SeqId,
+                is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
+                index_on_disk = IndexOnDisk, is_persistent = IsPersistent },
+  {GuidsByStore, Delivers, Acks}) ->
+    {case MsgOnDisk of
+         true  -> rabbit_misc:orddict_cons(find_msg_store(IsPersistent), Guid,
+                                           GuidsByStore);
+         false -> GuidsByStore
+     end,
+     cons_if(IndexOnDisk andalso not IsDelivered, SeqId, Delivers),
+     cons_if(IndexOnDisk, SeqId, Acks)}.
+
+%%----------------------------------------------------------------------------
+%% Internal gubbins for publishing
+%%----------------------------------------------------------------------------
+
+publish(Msg = #basic_message { is_persistent = IsPersistent },
+        IsDelivered, MsgOnDisk,
+        State = #vqstate { q1 = Q1, q3 = Q3, q4 = Q4,
+                           next_seq_id      = SeqId,
+                           len              = Len,
+                           in_counter       = InCount,
+                           persistent_count = PCount,
+                           durable          = IsDurable,
+                           ram_msg_count    = RamMsgCount }) ->
+    IsPersistent1 = IsDurable andalso IsPersistent,
+    MsgStatus = (msg_status(IsPersistent1, SeqId, Msg))
+        #msg_status { is_delivered = IsDelivered, msg_on_disk = MsgOnDisk },
+    {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State),
+    State2 = case bpqueue:is_empty(Q3) of
+                 false -> State1 #vqstate { q1 = queue:in(m(MsgStatus1), Q1) };
+                 true  -> State1 #vqstate { q4 = queue:in(m(MsgStatus1), Q4) }
+             end,
+    PCount1 = PCount + one_if(IsPersistent1),
+    {SeqId, State2 #vqstate { next_seq_id      = SeqId   + 1,
+                              len              = Len     + 1,
+                              in_counter       = InCount + 1,
+                              persistent_count = PCount1,
+                              ram_msg_count    = RamMsgCount + 1}}.
+
+maybe_write_msg_to_disk(_Force, MsgStatus = #msg_status {
+                                  msg_on_disk = true }, MSCState) ->
+    {MsgStatus, MSCState};
+maybe_write_msg_to_disk(Force, MsgStatus = #msg_status {
+                                 msg = Msg, guid = Guid,
+                                 is_persistent = IsPersistent }, MSCState)
+  when Force orelse IsPersistent ->
+    {ok, MSCState1} =
+        with_msg_store_state(
+          MSCState, IsPersistent,
+          fun (MsgStore, MSCState2) ->
+                  Msg1 = Msg #basic_message {
+                           %% don't persist any recoverable decoded properties
+                           content = rabbit_binary_parser:clear_decoded_content(
+                                       Msg #basic_message.content)},
+                  rabbit_msg_store:write(MsgStore, Guid, Msg1, MSCState2)
+          end),
+    {MsgStatus #msg_status { msg_on_disk = true }, MSCState1};
+maybe_write_msg_to_disk(_Force, MsgStatus, MSCState) ->
+    {MsgStatus, MSCState}.
+
+maybe_write_index_to_disk(_Force, MsgStatus = #msg_status {
+                                    index_on_disk = true }, IndexState) ->
+    true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION
+    {MsgStatus, IndexState};
+maybe_write_index_to_disk(Force, MsgStatus = #msg_status {
+                                   guid = Guid, seq_id = SeqId,
+                                   is_persistent = IsPersistent,
+                                   is_delivered = IsDelivered }, IndexState)
+  when Force orelse IsPersistent ->
+    true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION
+    IndexState1 = rabbit_queue_index:publish(Guid, SeqId, IsPersistent,
+                                             IndexState),
+    {MsgStatus #msg_status { index_on_disk = true },
+     maybe_write_delivered(IsDelivered, SeqId, IndexState1)};
+maybe_write_index_to_disk(_Force, MsgStatus, IndexState) ->
+    {MsgStatus, IndexState}.
+
+maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus,
+                    State = #vqstate { index_state       = IndexState,
+                                       msg_store_clients = MSCState }) ->
+    {MsgStatus1, MSCState1}   = maybe_write_msg_to_disk(
+                                  ForceMsg, MsgStatus, MSCState),
+    {MsgStatus2, IndexState1} = maybe_write_index_to_disk(
+                                  ForceIndex, MsgStatus1, IndexState),
+    {MsgStatus2, State #vqstate { index_state       = IndexState1,
+                                  msg_store_clients = MSCState1 }}.
+
+%%----------------------------------------------------------------------------
+%% Internal gubbins for acks
+%%----------------------------------------------------------------------------
+
+record_pending_ack(#msg_status { guid = Guid, seq_id = SeqId,
+                                 is_persistent = IsPersistent,
+                                 msg_on_disk = MsgOnDisk } = MsgStatus, PA) ->
+    AckEntry = case MsgOnDisk of
+                   true  -> {IsPersistent, Guid};
+                   false -> MsgStatus
+               end,
+    dict:store(SeqId, AckEntry, PA).
+
+remove_pending_ack(KeepPersistent,
+                   State = #vqstate { pending_ack = PA,
+                                      index_state = IndexState }) ->
+    {SeqIds, GuidsByStore} = dict:fold(fun accumulate_ack/3,
+                                       {[], orddict:new()}, PA),
+    State1 = State #vqstate { pending_ack = dict:new() },
+    case KeepPersistent of
+        true  -> case orddict:find(?TRANSIENT_MSG_STORE, GuidsByStore) of
+                     error       -> State1;
+                     {ok, Guids} -> ok = rabbit_msg_store:remove(
+                                           ?TRANSIENT_MSG_STORE, Guids),
+                                    State1
+                 end;
+        false -> IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
+                 ok = orddict:fold(
+                        fun (MsgStore, Guids, ok) ->
+                                rabbit_msg_store:remove(MsgStore, Guids)
+                        end, ok, GuidsByStore),
+                 State1 #vqstate { index_state = IndexState1 }
+    end.
+
+ack(_MsgStoreFun, _Fun, [], State) ->
+    State;
+ack(MsgStoreFun, Fun, AckTags, State) ->
+    {{SeqIds, GuidsByStore}, State1 = #vqstate { index_state      = IndexState,
+                                                 persistent_count = PCount }} =
+        lists:foldl(
+          fun (SeqId, {Acc, State2 = #vqstate { pending_ack = PA }}) ->
+                  {ok, AckEntry} = dict:find(SeqId, PA),
+                  {accumulate_ack(SeqId, AckEntry, Acc),
+                   Fun(AckEntry, State2 #vqstate {
+                                   pending_ack = dict:erase(SeqId, PA) })}
+          end, {{[], orddict:new()}, State}, AckTags),
+    IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
+    ok = orddict:fold(fun (MsgStore, Guids, ok) ->
+                              MsgStoreFun(MsgStore, Guids)
+                      end, ok, GuidsByStore),
+    PCount1 = PCount - case orddict:find(?PERSISTENT_MSG_STORE, GuidsByStore) of
+                           error       -> 0;
+                           {ok, Guids} -> length(Guids)
+                       end,
+    State1 #vqstate { index_state      = IndexState1,
+                      persistent_count = PCount1 }.
+
+accumulate_ack(_SeqId, #msg_status { is_persistent = false, %% ASSERTIONS
+                                     msg_on_disk   = false,
+                                     index_on_disk = false }, Acc) ->
+    Acc;
+accumulate_ack(SeqId, {IsPersistent, Guid}, {SeqIdsAcc, Dict}) ->
+    {cons_if(IsPersistent, SeqId, SeqIdsAcc),
+     rabbit_misc:orddict_cons(find_msg_store(IsPersistent), Guid, Dict)}.
+
+%%----------------------------------------------------------------------------
+%% Phase changes
+%%----------------------------------------------------------------------------
+
+%% Determine whether a reduction in memory use is necessary, and call
+%% functions to perform the required phase changes. The function can
+%% also be used to just do the former, by passing in dummy phase
+%% change functions.
+%%
+%% The function does not report on any needed beta->delta conversions,
+%% though the conversion function for that is called as necessary. The
+%% reason is twofold. Firstly, this is safe because the conversion is
+%% only ever necessary just after a transition to a
+%% target_ram_msg_count of zero or after an incremental alpha->beta
+%% conversion. In the former case the conversion is performed straight
+%% away (i.e. any betas present at the time are converted to deltas),
+%% and in the latter case the need for a conversion is flagged up
+%% anyway. Secondly, this is necessary because we do not have a
+%% precise and cheap predicate for determining whether a beta->delta
+%% conversion is necessary - due to the complexities of retaining up
+%% one segment's worth of messages in q3 - and thus would risk
+%% perpetually reporting the need for a conversion when no such
+%% conversion is needed. That in turn could cause an infinite loop.
+reduce_memory_use(AlphaBetaFun, BetaGammaFun, BetaDeltaFun, State) ->
+    {Reduce, State1} = case chunk_size(State #vqstate.ram_msg_count,
+                                       State #vqstate.target_ram_msg_count) of
+                           0  -> {false, State};
+                           S1 -> {true, AlphaBetaFun(S1, State)}
+                       end,
+    case State1 #vqstate.target_ram_msg_count of
+        infinity -> {Reduce, State1};
+        0        -> {Reduce, BetaDeltaFun(State1)};
+        _        -> case chunk_size(State1 #vqstate.ram_index_count,
+                                   permitted_ram_index_count(State1)) of
+                        ?IO_BATCH_SIZE = S2 -> {true, BetaGammaFun(S2, State1)};
+                        _                   -> {Reduce, State1}
+                    end
+    end.
+
+reduce_memory_use(State) ->
+    {_, State1} = reduce_memory_use(fun push_alphas_to_betas/2,
+                                    fun limit_ram_index/2,
+                                    fun push_betas_to_deltas/1,
+                                    State),
+    State1.
+
+limit_ram_index(Quota, State = #vqstate { q2 = Q2, q3 = Q3,
+                                          index_state = IndexState,
+                                          ram_index_count = RamIndexCount }) ->
+    {Q2a, {Quota1, IndexState1}} = limit_ram_index(
+                                     fun bpqueue:map_fold_filter_r/4,
+                                     Q2, {Quota, IndexState}),
+    %% TODO: we shouldn't be writing index entries for messages that
+    %% can never end up in delta due them residing in the only segment
+    %% held by q3.
+    {Q3a, {Quota2, IndexState2}} = limit_ram_index(
+                                     fun bpqueue:map_fold_filter_r/4,
+                                     Q3, {Quota1, IndexState1}),
+    State #vqstate { q2 = Q2a, q3 = Q3a,
+                     index_state = IndexState2,
+                     ram_index_count = RamIndexCount - (Quota - Quota2) }.
+
+limit_ram_index(_MapFoldFilterFun, Q, {0, IndexState}) ->
+    {Q, {0, IndexState}};
+limit_ram_index(MapFoldFilterFun, Q, {Quota, IndexState}) ->
+    MapFoldFilterFun(
+      fun erlang:'not'/1,
+      fun (MsgStatus, {0, _IndexStateN}) ->
+              false = MsgStatus #msg_status.index_on_disk, %% ASSERTION
+              stop;
+          (MsgStatus, {N, IndexStateN}) when N > 0 ->
+              false = MsgStatus #msg_status.index_on_disk, %% ASSERTION
+              {MsgStatus1, IndexStateN1} =
+                  maybe_write_index_to_disk(true, MsgStatus, IndexStateN),
+              {true, m(MsgStatus1), {N-1, IndexStateN1}}
+      end, {Quota, IndexState}, Q).
+
+permitted_ram_index_count(#vqstate { len = 0 }) ->
+    infinity;
+permitted_ram_index_count(#vqstate { len   = Len,
+                                     q2    = Q2,
+                                     q3    = Q3,
+                                     delta = #delta { count = DeltaCount } }) ->
+    BetaLen = bpqueue:len(Q2) + bpqueue:len(Q3),
+    BetaLen - trunc(BetaLen * BetaLen / (Len - DeltaCount)).
+
+chunk_size(Current, Permitted)
+  when Permitted =:= infinity orelse Permitted >= Current ->
+    0;
+chunk_size(Current, Permitted) ->
+    lists:min([Current - Permitted, ?IO_BATCH_SIZE]).
+
+fetch_from_q3_to_q4(State = #vqstate {
+                      q1                = Q1,
+                      q2                = Q2,
+                      delta             = #delta { count = DeltaCount },
+                      q3                = Q3,
+                      q4                = Q4,
+                      ram_msg_count     = RamMsgCount,
+                      ram_index_count   = RamIndexCount,
+                      msg_store_clients = MSCState }) ->
+    case bpqueue:out(Q3) of
+        {empty, _Q3} ->
+            {empty, State};
+        {{value, IndexOnDisk, MsgStatus = #msg_status {
+                                msg = undefined, guid = Guid,
+                                is_persistent = IsPersistent }}, Q3a} ->
+            {{ok, Msg = #basic_message {}}, MSCState1} =
+                read_from_msg_store(MSCState, IsPersistent, Guid),
+            Q4a = queue:in(m(MsgStatus #msg_status { msg = Msg }), Q4),
+            RamIndexCount1 = RamIndexCount - one_if(not IndexOnDisk),
+            true = RamIndexCount1 >= 0, %% ASSERTION
+            State1 = State #vqstate { q3                = Q3a,
+                                      q4                = Q4a,
+                                      ram_msg_count     = RamMsgCount + 1,
+                                      ram_index_count   = RamIndexCount1,
+                                      msg_store_clients = MSCState1 },
+            State2 =
+                case {bpqueue:is_empty(Q3a), 0 == DeltaCount} of
+                    {true, true} ->
+                        %% q3 is now empty, it wasn't before; delta is
+                        %% still empty. So q2 must be empty, and q1
+                        %% can now be joined onto q4
+                        true = bpqueue:is_empty(Q2), %% ASSERTION
+                        State1 #vqstate { q1 = queue:new(),
+                                          q4 = queue:join(Q4a, Q1) };
+                    {true, false} ->
+                        maybe_deltas_to_betas(State1);
+                    {false, _} ->
+                        %% q3 still isn't empty, we've not touched
+                        %% delta, so the invariants between q1, q2,
+                        %% delta and q3 are maintained
+                        State1
+                end,
+            {loaded, State2}
+    end.
+
+maybe_deltas_to_betas(State = #vqstate { delta = ?BLANK_DELTA_PATTERN(X) }) ->
+    State;
+maybe_deltas_to_betas(State = #vqstate {
+                        q2                   = Q2,
+                        delta                = Delta,
+                        q3                   = Q3,
+                        index_state          = IndexState,
+                        target_ram_msg_count = TargetRamMsgCount,
+                        transient_threshold  = TransientThreshold }) ->
+    case bpqueue:is_empty(Q3) orelse (TargetRamMsgCount /= 0) of
+        false ->
+            State;
+        true ->
+            #delta { start_seq_id = DeltaSeqId,
+                     count        = DeltaCount,
+                     end_seq_id   = DeltaSeqIdEnd } = Delta,
+            DeltaSeqId1 =
+                lists:min([rabbit_queue_index:next_segment_boundary(DeltaSeqId),
+                           DeltaSeqIdEnd]),
+            {List, IndexState1} =
+                rabbit_queue_index:read(DeltaSeqId, DeltaSeqId1, IndexState),
+            {Q3a, IndexState2} = betas_from_index_entries(
+                                   List, TransientThreshold, IndexState1),
+            State1 = State #vqstate { index_state = IndexState2 },
+            case bpqueue:len(Q3a) of
+                0 ->
+                    %% we ignored every message in the segment due to
+                    %% it being transient and below the threshold
+                    maybe_deltas_to_betas(
+                      State #vqstate {
+                        delta = Delta #delta { start_seq_id = DeltaSeqId1 }});
+                Q3aLen ->
+                    Q3b = bpqueue:join(Q3, Q3a),
+                    case DeltaCount - Q3aLen of
+                        0 ->
+                            %% delta is now empty, but it wasn't
+                            %% before, so can now join q2 onto q3
+                            State1 #vqstate { q2    = bpqueue:new(),
+                                              delta = ?BLANK_DELTA,
+                                              q3    = bpqueue:join(Q3b, Q2) };
+                        N when N > 0 ->
+                            Delta1 = #delta { start_seq_id = DeltaSeqId1,
+                                              count        = N,
+                                              end_seq_id   = DeltaSeqIdEnd },
+                            State1 #vqstate { delta = Delta1,
+                                              q3    = Q3b }
+                    end
+            end
+    end.
+
+push_alphas_to_betas(Quota, State) ->
+    { Quota1, State1} = maybe_push_q1_to_betas(Quota,  State),
+    {_Quota2, State2} = maybe_push_q4_to_betas(Quota1, State1),
+    State2.
+
+maybe_push_q1_to_betas(Quota, State = #vqstate { q1 = Q1 }) ->
+    maybe_push_alphas_to_betas(
+      fun queue:out/1,
+      fun (MsgStatus = #msg_status { index_on_disk = IndexOnDisk },
+           Q1a, State1 = #vqstate { q3 = Q3, delta = #delta { count = 0 } }) ->
+              State1 #vqstate { q1 = Q1a,
+                                q3 = bpqueue:in(IndexOnDisk, MsgStatus, Q3) };
+          (MsgStatus = #msg_status { index_on_disk = IndexOnDisk },
+           Q1a, State1 = #vqstate { q2 = Q2 }) ->
+              State1 #vqstate { q1 = Q1a,
+                                q2 = bpqueue:in(IndexOnDisk, MsgStatus, Q2) }
+      end, Quota, Q1, State).
+
+maybe_push_q4_to_betas(Quota, State = #vqstate { q4 = Q4 }) ->
+    maybe_push_alphas_to_betas(
+      fun queue:out_r/1,
+      fun (MsgStatus = #msg_status { index_on_disk = IndexOnDisk },
+           Q4a, State1 = #vqstate { q3 = Q3 }) ->
+              State1 #vqstate { q3 = bpqueue:in_r(IndexOnDisk, MsgStatus, Q3),
+                                q4 = Q4a }
+      end, Quota, Q4, State).
+
+maybe_push_alphas_to_betas(_Generator, _Consumer, Quota, _Q,
+                           State = #vqstate {
+                             ram_msg_count        = RamMsgCount,
+                             target_ram_msg_count = TargetRamMsgCount })
+  when Quota =:= 0 orelse
+       TargetRamMsgCount =:= infinity orelse TargetRamMsgCount >= RamMsgCount ->
+    {Quota, State};
+maybe_push_alphas_to_betas(Generator, Consumer, Quota, Q, State) ->
+    case Generator(Q) of
+        {empty, _Q} ->
+            {Quota, State};
+        {{value, MsgStatus}, Qa} ->
+            {MsgStatus1 = #msg_status { msg_on_disk = true,
+                                        index_on_disk = IndexOnDisk },
+             State1 = #vqstate { ram_msg_count   = RamMsgCount,
+                                 ram_index_count = RamIndexCount }} =
+                maybe_write_to_disk(true, false, MsgStatus, State),
+            MsgStatus2 = m(MsgStatus1 #msg_status { msg = undefined }),
+            RamIndexCount1 = RamIndexCount + one_if(not IndexOnDisk),
+            State2 = State1 #vqstate { ram_msg_count = RamMsgCount - 1,
+                                       ram_index_count = RamIndexCount1 },
+            maybe_push_alphas_to_betas(Generator, Consumer, Quota - 1, Qa,
+                                       Consumer(MsgStatus2, Qa, State2))
+    end.
+
+push_betas_to_deltas(State = #vqstate { q2              = Q2,
+                                        delta           = Delta,
+                                        q3              = Q3,
+                                        index_state     = IndexState,
+                                        ram_index_count = RamIndexCount }) ->
+    {Delta2, Q2a, RamIndexCount2, IndexState2} =
+        push_betas_to_deltas(fun (Q2MinSeqId) -> Q2MinSeqId end,
+                             fun bpqueue:out/1, Q2,
+                             RamIndexCount, IndexState),
+    {Delta3, Q3a, RamIndexCount3, IndexState3} =
+        push_betas_to_deltas(fun rabbit_queue_index:next_segment_boundary/1,
+                             fun bpqueue:out_r/1, Q3,
+                             RamIndexCount2, IndexState2),
+    Delta4 = combine_deltas(Delta3, combine_deltas(Delta, Delta2)),
+    State #vqstate { q2              = Q2a,
+                     delta           = Delta4,
+                     q3              = Q3a,
+                     index_state     = IndexState3,
+                     ram_index_count = RamIndexCount3 }.
+
+push_betas_to_deltas(LimitFun, Generator, Q, RamIndexCount, IndexState) ->
+    case bpqueue:out(Q) of
+        {empty, _Q} ->
+            {?BLANK_DELTA, Q, RamIndexCount, IndexState};
+        {{value, _IndexOnDisk1, #msg_status { seq_id = MinSeqId }}, _Qa} ->
+            {{value, _IndexOnDisk2, #msg_status { seq_id = MaxSeqId }}, _Qb} =
+                bpqueue:out_r(Q),
+            Limit = LimitFun(MinSeqId),
+            case MaxSeqId < Limit of
+                true  -> {?BLANK_DELTA, Q, RamIndexCount, IndexState};
+                false -> {Len, Qc, RamIndexCount1, IndexState1} =
+                             push_betas_to_deltas(Generator, Limit, Q, 0,
+                                                  RamIndexCount, IndexState),
+                         {#delta { start_seq_id = Limit,
+                                   count        = Len,
+                                   end_seq_id   = MaxSeqId + 1 },
+                          Qc, RamIndexCount1, IndexState1}
+            end
+    end.
+
+push_betas_to_deltas(Generator, Limit, Q, Count, RamIndexCount, IndexState) ->
+    case Generator(Q) of
+        {empty, _Q} ->
+            {Count, Q, RamIndexCount, IndexState};
+        {{value, _IndexOnDisk, #msg_status { seq_id = SeqId }}, _Qa}
+          when SeqId < Limit ->
+            {Count, Q, RamIndexCount, IndexState};
+        {{value, IndexOnDisk, MsgStatus}, Qa} ->
+            {RamIndexCount1, IndexState1} =
+                case IndexOnDisk of
+                    true  -> {RamIndexCount, IndexState};
+                    false -> {#msg_status { index_on_disk = true },
+                              IndexState2} =
+                                 maybe_write_index_to_disk(true, MsgStatus,
+                                                           IndexState),
+                             {RamIndexCount - 1, IndexState2}
+                end,
+            push_betas_to_deltas(
+              Generator, Limit, Qa, Count + 1, RamIndexCount1, IndexState1)
+    end.
diff --git a/src/rabbit_writer.erl b/src/rabbit_writer.erl
index 3d10dc12..aa986e54 100644
--- a/src/rabbit_writer.erl
+++ b/src/rabbit_writer.erl
@@ -33,14 +33,14 @@
 -include("rabbit.hrl").
 -include("rabbit_framing.hrl").
 
--export([start/3, start_link/3, shutdown/1, mainloop/1]).
--export([send_command/2, send_command/3, send_command_and_signal_back/3,
-         send_command_and_signal_back/4, send_command_and_notify/5]).
--export([internal_send_command/3, internal_send_command/5]).
+-export([start/5, start_link/5, mainloop/2, mainloop1/2]).
+-export([send_command/2, send_command/3, send_command_sync/2,
+         send_command_sync/3, send_command_and_notify/5]).
+-export([internal_send_command/4, internal_send_command/6]).
 
 -import(gen_tcp).
 
--record(wstate, {sock, channel, frame_max}).
+-record(wstate, {sock, channel, frame_max, protocol}).
 
 -define(HIBERNATE_AFTER, 5000).
 
@@ -48,80 +48,96 @@
 
 -ifdef(use_specs).
 
--spec(start/3 :: (socket(), channel_number(), non_neg_integer()) -> pid()).
--spec(start_link/3 :: (socket(), channel_number(), non_neg_integer()) -> pid()).
--spec(send_command/2 :: (pid(), amqp_method_record()) -> 'ok').
--spec(send_command/3 :: (pid(), amqp_method_record(), content()) -> 'ok').
--spec(send_command_and_signal_back/3 :: (pid(), amqp_method(), pid()) -> 'ok').
--spec(send_command_and_signal_back/4 ::
-      (pid(), amqp_method(), content(), pid()) -> 'ok').
+-spec(start/5 ::
+        (rabbit_net:socket(), rabbit_channel:channel_number(),
+         non_neg_integer(), rabbit_types:protocol(), pid())
+        -> rabbit_types:ok(pid())).
+-spec(start_link/5 ::
+        (rabbit_net:socket(), rabbit_channel:channel_number(),
+         non_neg_integer(), rabbit_types:protocol(), pid())
+        -> rabbit_types:ok(pid())).
+-spec(send_command/2 ::
+        (pid(), rabbit_framing:amqp_method_record()) -> 'ok').
+-spec(send_command/3 ::
+        (pid(), rabbit_framing:amqp_method_record(), rabbit_types:content())
+        -> 'ok').
+-spec(send_command_sync/2 ::
+        (pid(), rabbit_framing:amqp_method()) -> 'ok').
+-spec(send_command_sync/3 ::
+        (pid(), rabbit_framing:amqp_method(), rabbit_types:content()) -> 'ok').
 -spec(send_command_and_notify/5 ::
-      (pid(), pid(), pid(), amqp_method_record(), content()) -> 'ok').
--spec(internal_send_command/3 ::
-      (socket(), channel_number(), amqp_method_record()) -> 'ok').
--spec(internal_send_command/5 ::
-      (socket(), channel_number(), amqp_method_record(),
-       content(), non_neg_integer()) -> 'ok').
+        (pid(), pid(), pid(), rabbit_framing:amqp_method_record(),
+         rabbit_types:content())
+        -> 'ok').
+-spec(internal_send_command/4 ::
+        (rabbit_net:socket(), rabbit_channel:channel_number(),
+         rabbit_framing:amqp_method_record(), rabbit_types:protocol())
+        -> 'ok').
+-spec(internal_send_command/6 ::
+        (rabbit_net:socket(), rabbit_channel:channel_number(),
+         rabbit_framing:amqp_method_record(), rabbit_types:content(),
+         non_neg_integer(), rabbit_types:protocol())
+        -> 'ok').
 
 -endif.
 
 %%----------------------------------------------------------------------------
 
-start(Sock, Channel, FrameMax) ->
-    spawn(?MODULE, mainloop, [#wstate{sock = Sock,
-                                      channel = Channel,
-                                      frame_max = FrameMax}]).
-
-start_link(Sock, Channel, FrameMax) ->
-    spawn_link(?MODULE, mainloop, [#wstate{sock = Sock,
-                                           channel = Channel,
-                                           frame_max = FrameMax}]).
-
-mainloop(State) ->
+start(Sock, Channel, FrameMax, Protocol, ReaderPid) ->
+    {ok,
+     proc_lib:spawn(?MODULE, mainloop, [ReaderPid,
+                                        #wstate{sock = Sock,
+                                                channel = Channel,
+                                                frame_max = FrameMax,
+                                                protocol = Protocol}])}.
+
+start_link(Sock, Channel, FrameMax, Protocol, ReaderPid) ->
+    {ok,
+     proc_lib:spawn_link(?MODULE, mainloop, [ReaderPid,
+                                             #wstate{sock = Sock,
+                                                     channel = Channel,
+                                                     frame_max = FrameMax,
+                                                     protocol = Protocol}])}.
+
+mainloop(ReaderPid, State) ->
+    try
+        mainloop1(ReaderPid, State)
+    catch
+        exit:Error -> ReaderPid ! {channel_exit, #wstate.channel, Error}
+    end,
+    done.
+
+mainloop1(ReaderPid, State) ->
     receive
-        Message -> ?MODULE:mainloop(handle_message(Message, State))
+        Message -> ?MODULE:mainloop1(ReaderPid, handle_message(Message, State))
     after ?HIBERNATE_AFTER ->
-            erlang:hibernate(?MODULE, mainloop, [State])
+            erlang:hibernate(?MODULE, mainloop, [ReaderPid, State])
     end.
 
-handle_message({send_command, MethodRecord},
-               State = #wstate{sock = Sock, channel = Channel}) ->
-    ok = internal_send_command_async(Sock, Channel, MethodRecord),
+handle_message({send_command, MethodRecord}, State) ->
+    ok = internal_send_command_async(MethodRecord, State),
     State;
-handle_message({send_command, MethodRecord, Content},
-               State = #wstate{sock = Sock,
-                               channel = Channel,
-                               frame_max = FrameMax}) ->
-    ok = internal_send_command_async(Sock, Channel, MethodRecord,
-                                     Content, FrameMax),
+handle_message({send_command, MethodRecord, Content}, State) ->
+    ok = internal_send_command_async(MethodRecord, Content, State),
     State;
-handle_message({send_command_and_signal_back, MethodRecord, Parent},
-               State = #wstate{sock = Sock, channel = Channel}) ->
-    ok = internal_send_command_async(Sock, Channel, MethodRecord),
-    Parent ! rabbit_writer_send_command_signal,
+handle_message({'$gen_call', From, {send_command_sync, MethodRecord}}, State) ->
+    ok = internal_send_command_async(MethodRecord, State),
+    gen_server:reply(From, ok),
     State;
-handle_message({send_command_and_signal_back, MethodRecord, Content, Parent},
-               State = #wstate{sock = Sock,
-                               channel = Channel,
-                               frame_max = FrameMax}) ->
-    ok = internal_send_command_async(Sock, Channel, MethodRecord,
-                                     Content, FrameMax),
-    Parent ! rabbit_writer_send_command_signal,
+handle_message({'$gen_call', From, {send_command_sync, MethodRecord, Content}},
+               State) ->
+    ok = internal_send_command_async(MethodRecord, Content, State),
+    gen_server:reply(From, ok),
     State;
 handle_message({send_command_and_notify, QPid, ChPid, MethodRecord, Content},
-               State = #wstate{sock = Sock,
-                               channel = Channel,
-                               frame_max = FrameMax}) ->
-    ok = internal_send_command_async(Sock, Channel, MethodRecord,
-                                     Content, FrameMax),
+               State) ->
+    ok = internal_send_command_async(MethodRecord, Content, State),
     rabbit_amqqueue:notify_sent(QPid, ChPid),
     State;
 handle_message({inet_reply, _, ok}, State) ->
     State;
 handle_message({inet_reply, _, Status}, _State) ->
     exit({writer, send_failed, Status});
-handle_message(shutdown, _State) ->
-    exit(normal);
 handle_message(Message, _State) ->
     exit({writer, message_not_understood, Message}).
 
@@ -135,48 +151,50 @@ send_command(W, MethodRecord, Content) ->
     W ! {send_command, MethodRecord, Content},
     ok.
 
-send_command_and_signal_back(W, MethodRecord, Parent) ->
-    W ! {send_command_and_signal_back, MethodRecord, Parent},
-    ok.
+send_command_sync(W, MethodRecord) ->
+    call(W, {send_command_sync, MethodRecord}).
 
-send_command_and_signal_back(W, MethodRecord, Content, Parent) ->
-    W ! {send_command_and_signal_back, MethodRecord, Content, Parent},
-    ok.
+send_command_sync(W, MethodRecord, Content) ->
+    call(W, {send_command_sync, MethodRecord, Content}).
 
 send_command_and_notify(W, Q, ChPid, MethodRecord, Content) ->
     W ! {send_command_and_notify, Q, ChPid, MethodRecord, Content},
     ok.
 
-shutdown(W) ->
-    W ! shutdown,
-    ok.
+%---------------------------------------------------------------------------
+
+call(Pid, Msg) ->
+    {ok, Res} = gen:call(Pid, '$gen_call', Msg, infinity),
+    Res.
 
 %---------------------------------------------------------------------------
 
-assemble_frames(Channel, MethodRecord) ->
+assemble_frames(Channel, MethodRecord, Protocol) ->
     ?LOGMESSAGE(out, Channel, MethodRecord, none),
-    rabbit_binary_generator:build_simple_method_frame(Channel, MethodRecord).
+    rabbit_binary_generator:build_simple_method_frame(
+      Channel, MethodRecord, Protocol).
 
-assemble_frames(Channel, MethodRecord, Content, FrameMax) ->
+assemble_frames(Channel, MethodRecord, Content, FrameMax, Protocol) ->
     ?LOGMESSAGE(out, Channel, MethodRecord, Content),
     MethodName = rabbit_misc:method_record_type(MethodRecord),
-    true = rabbit_framing:method_has_content(MethodName), % assertion
+    true = Protocol:method_has_content(MethodName), % assertion
     MethodFrame = rabbit_binary_generator:build_simple_method_frame(
-                    Channel, MethodRecord),
+                    Channel, MethodRecord, Protocol),
     ContentFrames = rabbit_binary_generator:build_simple_content_frames(
-                      Channel, Content, FrameMax),
+                      Channel, Content, FrameMax, Protocol),
     [MethodFrame | ContentFrames].
 
 tcp_send(Sock, Data) ->
     rabbit_misc:throw_on_error(inet_error,
                                fun () -> rabbit_net:send(Sock, Data) end).
 
-internal_send_command(Sock, Channel, MethodRecord) ->
-    ok = tcp_send(Sock, assemble_frames(Channel, MethodRecord)).
+internal_send_command(Sock, Channel, MethodRecord, Protocol) ->
+    ok = tcp_send(Sock, assemble_frames(Channel, MethodRecord, Protocol)).
 
-internal_send_command(Sock, Channel, MethodRecord, Content, FrameMax) ->
+internal_send_command(Sock, Channel, MethodRecord, Content, FrameMax,
+                      Protocol) ->
     ok = tcp_send(Sock, assemble_frames(Channel, MethodRecord,
-                                        Content, FrameMax)).
+                                        Content, FrameMax, Protocol)).
 
 %% gen_tcp:send/2 does a selective receive of {inet_reply, Sock,
 %% Status} to obtain the result. That is bad when it is called from
@@ -196,13 +214,20 @@ internal_send_command(Sock, Channel, MethodRecord, Content, FrameMax) ->
 %% Also note that the port has bounded buffers and port_command blocks
 %% when these are full. So the fact that we process the result
 %% asynchronously does not impact flow control.
-internal_send_command_async(Sock, Channel, MethodRecord) ->
-    true = port_cmd(Sock, assemble_frames(Channel, MethodRecord)),
+internal_send_command_async(MethodRecord,
+                            #wstate{sock      = Sock,
+                                    channel   = Channel,
+                                    protocol  = Protocol}) ->
+    true = port_cmd(Sock, assemble_frames(Channel, MethodRecord, Protocol)),
     ok.
 
-internal_send_command_async(Sock, Channel, MethodRecord, Content, FrameMax) ->
+internal_send_command_async(MethodRecord, Content,
+                            #wstate{sock      = Sock,
+                                    channel   = Channel,
+                                    frame_max = FrameMax,
+                                    protocol  = Protocol}) ->
     true = port_cmd(Sock, assemble_frames(Channel, MethodRecord,
-                                              Content, FrameMax)),
+                                          Content, FrameMax, Protocol)),
     ok.
 
 port_cmd(Sock, Data) ->
diff --git a/src/supervisor2.erl b/src/supervisor2.erl
index 0b1d7265..93adfcb1 100644
--- a/src/supervisor2.erl
+++ b/src/supervisor2.erl
@@ -4,27 +4,57 @@
 %% 1) the module name is supervisor2
 %%
 %% 2) there is a new strategy called
-%% simple_one_for_one_terminate. This is exactly the same as for
-%% simple_one_for_one, except that children *are* explicitly
-%% terminated as per the shutdown component of the child_spec.
+%%    simple_one_for_one_terminate. This is exactly the same as for
+%%    simple_one_for_one, except that children *are* explicitly
+%%    terminated as per the shutdown component of the child_spec.
 %%
-%% All modifications are (C) 2010 LShift Ltd.
+%% 3) child specifications can contain, as the restart type, a tuple
+%%    {permanent, Delay} | {transient, Delay} where Delay >= 0. The
+%%    delay, in seconds, indicates what should happen if a child, upon
+%%    being restarted, exceeds the MaxT and MaxR parameters. Thus, if
+%%    a child exits, it is restarted as normal. If it exits
+%%    sufficiently quickly and often to exceed the boundaries set by
+%%    the MaxT and MaxR parameters, and a Delay is specified, then
+%%    rather than stopping the supervisor, the supervisor instead
+%%    continues and tries to start up the child again, Delay seconds
+%%    later.
+%%
+%%    Note that you can never restart more frequently than the MaxT
+%%    and MaxR parameters allow: i.e. you must wait until *both* the
+%%    Delay has passed *and* the MaxT and MaxR parameters allow the
+%%    child to be restarted.
+%%
+%%    Also note that the Delay is a *minimum*. There is no guarantee
+%%    that the child will be restarted within that time, especially if
+%%    other processes are dying and being restarted at the same time -
+%%    essentially we have to wait for the delay to have passed and for
+%%    the MaxT and MaxR parameters to permit the child to be
+%%    restarted. This may require waiting for longer than Delay.
+%%
+%% 4) Added an 'intrinsic' restart type. Like the transient type, this
+%%    type means the child should only be restarted if the child exits
+%%    abnormally. Unlike the transient type, if the child exits
+%%    normally, the supervisor itself also exits normally. If the
+%%    child is a supervisor and it exits normally (i.e. with reason of
+%%    'shutdown') then the child's parent also exits normally.
+%%
+%% All modifications are (C) 2010 Rabbit Technologies Ltd.
 %%
 %% %CopyrightBegin%
-%% 
+%%
 %% Copyright Ericsson AB 1996-2009. All Rights Reserved.
-%% 
+%%
 %% The contents of this file are subject to the Erlang Public License,
 %% Version 1.1, (the "License"); you may not use this file except in
 %% compliance with the License. You should have received a copy of the
 %% Erlang Public License along with this software. If not, it can be
 %% retrieved online at http://www.erlang.org/.
-%% 
+%%
 %% Software distributed under the License is distributed on an "AS IS"
 %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 %% the License for the specific language governing rights and limitations
 %% under the License.
-%% 
+%%
 %% %CopyrightEnd%
 %%
 -module(supervisor2).
@@ -35,7 +65,7 @@
 -export([start_link/2,start_link/3,
 	 start_child/2, restart_child/2,
 	 delete_child/2, terminate_child/2,
-	 which_children/1,
+	 which_children/1, find_child/2,
 	 check_childspecs/1]).
 
 -export([behaviour_info/1]).
@@ -43,6 +73,7 @@
 %% Internal exports
 -export([init/1, handle_call/3, handle_info/2, terminate/2, code_change/3]).
 -export([handle_cast/2]).
+-export([delayed_restart/2]).
 
 -define(DICT, dict).
 
@@ -109,6 +140,10 @@ terminate_child(Supervisor, Name) ->
 which_children(Supervisor) ->
     call(Supervisor, which_children).
 
+find_child(Supervisor, Name) ->
+    [Pid || {Name1, Pid, _Type, _Modules} <- which_children(Supervisor),
+            Name1 =:= Name].
+
 call(Supervisor, Req) ->
     gen_server:call(Supervisor, Req, infinity).
 
@@ -119,6 +154,9 @@ check_childspecs(ChildSpecs) when is_list(ChildSpecs) ->
     end;
 check_childspecs(X) -> {error, {badarg, X}}.
 
+delayed_restart(Supervisor, RestartDetails) ->
+    gen_server:cast(Supervisor, {delayed_restart, RestartDetails}).
+
 %%% ---------------------------------------------------
 %%% 
 %%% Initialize the supervisor.
@@ -315,6 +353,20 @@ handle_call(which_children, _From, State) ->
     {reply, Resp, State}.
 
 
+handle_cast({delayed_restart, {RestartType, Reason, Child}}, State)
+  when ?is_simple(State) ->
+    {ok, NState} = do_restart(RestartType, Reason, Child, State),
+    {noreply, NState};
+handle_cast({delayed_restart, {RestartType, Reason, Child}}, State)
+  when not (?is_simple(State)) ->
+    case get_child(Child#child.name, State) of
+        {value, Child} ->
+            {ok, NState} = do_restart(RestartType, Reason, Child, State),
+            {noreply, NState};
+        _ ->
+            {noreply, State}
+    end;
+
 %%% Hopefully cause a function-clause as there is no API function
 %%% that utilizes cast.
 handle_cast(null, State) ->
@@ -480,16 +532,32 @@ restart_child(Pid, Reason, State) ->
 	    {ok, State}
     end.
 
+do_restart({RestartType, Delay}, Reason, Child, State) ->
+    case restart1(Child, State) of
+        {ok, NState} ->
+            {ok, NState};
+        {terminate, NState} ->
+            {ok, _TRef} = timer:apply_after(
+                            trunc(Delay*1000), ?MODULE, delayed_restart,
+                            [self(), {{RestartType, Delay}, Reason, Child}]),
+            {ok, NState}
+    end;
 do_restart(permanent, Reason, Child, State) ->
     report_error(child_terminated, Reason, Child, State#state.name),
     restart(Child, State);
+do_restart(intrinsic, normal, Child, State) ->
+    {shutdown, state_del_child(Child, State)};
+do_restart(intrinsic, shutdown, Child = #child{child_type = supervisor},
+           State) ->
+    {shutdown, state_del_child(Child, State)};
 do_restart(_, normal, Child, State) ->
     NState = state_del_child(Child, State),
     {ok, NState};
 do_restart(_, shutdown, Child, State) ->
     NState = state_del_child(Child, State),
     {ok, NState};
-do_restart(transient, Reason, Child, State) ->
+do_restart(Type, Reason, Child, State) when Type =:= transient orelse
+                                            Type =:= intrinsic ->
     report_error(child_terminated, Reason, Child, State#state.name),
     restart(Child, State);
 do_restart(temporary, Reason, Child, State) ->
@@ -500,14 +568,27 @@ do_restart(temporary, Reason, Child, State) ->
 restart(Child, State) ->
     case add_restart(State) of
 	{ok, NState} ->
-	    restart(NState#state.strategy, Child, NState);
+	    restart(NState#state.strategy, Child, NState, fun restart/2);
 	{terminate, NState} ->
 	    report_error(shutdown, reached_max_restart_intensity,
 			 Child, State#state.name),
-	    {shutdown, remove_child(Child, NState)}
+	    {shutdown, state_del_child(Child, NState)}
+    end.
+
+restart1(Child, State) ->
+    case add_restart(State) of
+	{ok, NState} ->
+	    restart(NState#state.strategy, Child, NState, fun restart1/2);
+	{terminate, _NState} ->
+            %% we've reached the max restart intensity, but the
+            %% add_restart will have added to the restarts
+            %% field. Given we don't want to die here, we need to go
+            %% back to the old restarts field otherwise we'll never
+            %% attempt to restart later.
+            {terminate, State}
     end.
 
-restart(Strategy, Child, State)
+restart(Strategy, Child, State, Restart)
   when Strategy =:= simple_one_for_one orelse
        Strategy =:= simple_one_for_one_terminate ->
     #child{mfa = {M, F, A}} = Child,
@@ -521,9 +602,9 @@ restart(Strategy, Child, State)
 	    {ok, NState};
 	{error, Error} ->
 	    report_error(start_error, Error, Child, State#state.name),
-	    restart(Child, State)
+	    Restart(Child, State)
     end;
-restart(one_for_one, Child, State) ->
+restart(one_for_one, Child, State, Restart) ->
     case do_start_child(State#state.name, Child) of
 	{ok, Pid} ->
 	    NState = replace_child(Child#child{pid = Pid}, State),
@@ -533,25 +614,25 @@ restart(one_for_one, Child, State) ->
 	    {ok, NState};
 	{error, Reason} ->
 	    report_error(start_error, Reason, Child, State#state.name),
-	    restart(Child, State)
+	    Restart(Child, State)
     end;
-restart(rest_for_one, Child, State) ->
+restart(rest_for_one, Child, State, Restart) ->
     {ChAfter, ChBefore} = split_child(Child#child.pid, State#state.children),
     ChAfter2 = terminate_children(ChAfter, State#state.name),
     case start_children(ChAfter2, State#state.name) of
 	{ok, ChAfter3} ->
 	    {ok, State#state{children = ChAfter3 ++ ChBefore}};
 	{error, ChAfter3} ->
-	    restart(Child, State#state{children = ChAfter3 ++ ChBefore})
+	    Restart(Child, State#state{children = ChAfter3 ++ ChBefore})
     end;
-restart(one_for_all, Child, State) ->
+restart(one_for_all, Child, State, Restart) ->
     Children1 = del_child(Child#child.pid, State#state.children),
     Children2 = terminate_children(Children1, State#state.name),
     case start_children(Children2, State#state.name) of
 	{ok, NChs} ->
 	    {ok, State#state{children = NChs}};
 	{error, NChs} ->
-	    restart(Child, State#state{children = NChs})
+	    Restart(Child, State#state{children = NChs})
     end.
 
 %%-----------------------------------------------------------------
@@ -577,14 +658,22 @@ terminate_simple_children(Child, Dynamics, SupName) ->
     ok.
 
 do_terminate(Child, SupName) when Child#child.pid =/= undefined ->
-    case shutdown(Child#child.pid,
-		  Child#child.shutdown) of
-	ok ->
-	    Child#child{pid = undefined};
-	{error, OtherReason} ->
-	    report_error(shutdown_error, OtherReason, Child, SupName),
-	    Child#child{pid = undefined}
-    end;
+    ReportError = fun (Reason) ->
+                          report_error(shutdown_error, Reason, Child, SupName)
+                  end,
+    case shutdown(Child#child.pid, Child#child.shutdown) of
+        ok ->
+            ok;
+        {error, normal} ->
+            case Child#child.restart_type of
+                permanent           -> ReportError(normal);
+                {permanent, _Delay} -> ReportError(normal);
+                _                   -> ok
+            end;
+        {error, OtherReason} ->
+            ReportError(OtherReason)
+    end,
+    Child#child{pid = undefined};
 do_terminate(Child, _SupName) ->
     Child.
 
@@ -769,7 +858,9 @@ supname(N,_)      -> N.
 %%%    {Name, Func, RestartType, Shutdown, ChildType, Modules}
 %%% where Name is an atom
 %%%       Func is {Mod, Fun, Args} == {atom, atom, list}
-%%%       RestartType is permanent | temporary | transient
+%%%       RestartType is permanent | temporary | transient |
+%%%                      intrinsic | {permanent, Delay} |
+%%%                      {transient, Delay} where Delay >= 0
 %%%       Shutdown = integer() | infinity | brutal_kill
 %%%       ChildType = supervisor | worker
 %%%       Modules = [atom()] | dynamic
@@ -815,10 +906,18 @@ validFunc({M, F, A}) when is_atom(M),
                           is_list(A) -> true;
 validFunc(Func)                      -> throw({invalid_mfa, Func}).
 
-validRestartType(permanent)   -> true;
-validRestartType(temporary)   -> true;
-validRestartType(transient)   -> true;
-validRestartType(RestartType) -> throw({invalid_restart_type, RestartType}).
+validRestartType(permanent)          -> true;
+validRestartType(temporary)          -> true;
+validRestartType(transient)          -> true;
+validRestartType(intrinsic)          -> true;
+validRestartType({permanent, Delay}) -> validDelay(Delay);
+validRestartType({transient, Delay}) -> validDelay(Delay);
+validRestartType(RestartType)        -> throw({invalid_restart_type,
+                                               RestartType}).
+
+validDelay(Delay) when is_number(Delay),
+                       Delay >= 0 -> true;
+validDelay(What)                  -> throw({invalid_delay, What}).
 
 validShutdown(Shutdown, _) 
   when is_integer(Shutdown), Shutdown > 0 -> true;
diff --git a/src/tcp_acceptor.erl b/src/tcp_acceptor.erl
index cc4982c9..c9809ace 100644
--- a/src/tcp_acceptor.erl
+++ b/src/tcp_acceptor.erl
@@ -55,6 +55,7 @@ handle_call(_Request, _From, State) ->
     {noreply, State}.
 
 handle_cast(accept, State) ->
+    ok = file_handle_cache:obtain(),
     accept(State);
 
 handle_cast(_Msg, State) ->
@@ -83,7 +84,8 @@ handle_info({inet_async, LSock, Ref, {ok, Sock}},
         %% is drained.
         gen_event:which_handlers(error_logger),
         %% handle
-        file_handle_cache:release_on_death(apply(M, F, A ++ [Sock]))
+        file_handle_cache:transfer(apply(M, F, A ++ [Sock])),
+        ok = file_handle_cache:obtain()
     catch {inet_error, Reason} ->
             gen_tcp:close(Sock),
             error_logger:error_msg("unable to accept TCP connection: ~p~n",
@@ -92,11 +94,13 @@ handle_info({inet_async, LSock, Ref, {ok, Sock}},
 
     %% accept more
     accept(State);
+
 handle_info({inet_async, LSock, Ref, {error, closed}},
             State=#state{sock=LSock, ref=Ref}) ->
     %% It would be wrong to attempt to restart the acceptor when we
     %% know this will fail.
     {stop, normal, State};
+
 handle_info(_Info, State) ->
     {noreply, State}.
 
@@ -111,7 +115,6 @@ code_change(_OldVsn, State, _Extra) ->
 inet_op(F) -> rabbit_misc:throw_on_error(inet_error, F).
 
 accept(State = #state{sock=LSock}) ->
-    ok = file_handle_cache:obtain(),
     case prim_inet:async_accept(LSock, -1) of
         {ok, Ref} -> {noreply, State#state{ref=Ref}};
         Error     -> {stop, {cannot_accept, Error}, State}
diff --git a/src/tcp_client_sup.erl b/src/tcp_client_sup.erl
index 1b785843..02d7e0e4 100644
--- a/src/tcp_client_sup.erl
+++ b/src/tcp_client_sup.erl
@@ -31,19 +31,19 @@
 
 -module(tcp_client_sup).
 
--behaviour(supervisor).
+-behaviour(supervisor2).
 
 -export([start_link/1, start_link/2]).
 
 -export([init/1]).
 
 start_link(Callback) ->
-    supervisor:start_link(?MODULE, Callback).
+    supervisor2:start_link(?MODULE, Callback).
 
 start_link(SupName, Callback) ->
-    supervisor:start_link(SupName, ?MODULE, Callback).
+    supervisor2:start_link(SupName, ?MODULE, Callback).
 
 init({M,F,A}) ->
-    {ok, {{simple_one_for_one, 10, 10},
+    {ok, {{simple_one_for_one_terminate, 10, 10},
           [{tcp_client, {M,F,A},
-            temporary, brutal_kill, worker, [M]}]}}.
+            temporary, infinity, supervisor, [M]}]}}.
diff --git a/src/test_sup.erl b/src/test_sup.erl
new file mode 100644
index 00000000..f41793bc
--- /dev/null
+++ b/src/test_sup.erl
@@ -0,0 +1,94 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(test_sup).
+
+-behaviour(supervisor2).
+
+-export([test_supervisor_delayed_restart/0,
+         init/1, start_child/0]).
+
+test_supervisor_delayed_restart() ->
+    passed = with_sup(simple_one_for_one_terminate,
+                      fun (SupPid) ->
+                              {ok, _ChildPid} =
+                                  supervisor2:start_child(SupPid, []),
+                              test_supervisor_delayed_restart(SupPid)
+                      end),
+    passed = with_sup(one_for_one, fun test_supervisor_delayed_restart/1).
+
+test_supervisor_delayed_restart(SupPid) ->
+    ok = ping_child(SupPid),
+    ok = exit_child(SupPid),
+    timer:sleep(10),
+    ok = ping_child(SupPid),
+    ok = exit_child(SupPid),
+    timer:sleep(10),
+    timeout = ping_child(SupPid),
+    timer:sleep(1010),
+    ok = ping_child(SupPid),
+    passed.
+
+with_sup(RestartStrategy, Fun) ->
+    {ok, SupPid} = supervisor2:start_link(?MODULE, [RestartStrategy]),
+    Res = Fun(SupPid),
+    exit(SupPid, shutdown),
+    rabbit_misc:unlink_and_capture_exit(SupPid),
+    Res.
+
+init([RestartStrategy]) ->
+    {ok, {{RestartStrategy, 1, 1},
+          [{test, {test_sup, start_child, []}, {permanent, 1},
+            16#ffffffff, worker, [test_sup]}]}}.
+
+start_child() ->
+    {ok, proc_lib:spawn_link(fun run_child/0)}.
+
+ping_child(SupPid) ->
+    Ref = make_ref(),
+    get_child_pid(SupPid) ! {ping, Ref, self()},
+    receive {pong, Ref} -> ok
+    after 1000          -> timeout
+    end.
+
+exit_child(SupPid) ->
+    true = exit(get_child_pid(SupPid), abnormal),
+    ok.
+
+get_child_pid(SupPid) ->
+    [{_Id, ChildPid, worker, [test_sup]}] =
+        supervisor2:which_children(SupPid),
+    ChildPid.
+
+run_child() ->
+    receive {ping, Ref, Pid} -> Pid ! {pong, Ref},
+                                run_child()
+    end.
diff --git a/src/vm_memory_monitor.erl b/src/vm_memory_monitor.erl
index cd03fcc6..e658f005 100644
--- a/src/vm_memory_monitor.erl
+++ b/src/vm_memory_monitor.erl
@@ -72,11 +72,10 @@
 
 -ifdef(use_specs).
 
--spec(start_link/1 :: (float()) ->
-             ('ignore' | {'error', any()} | {'ok', pid()})).
+-spec(start_link/1 :: (float()) -> {'ok', pid()} | {'error', any()}).
 -spec(update/0 :: () -> 'ok').
 -spec(get_total_memory/0 :: () -> (non_neg_integer() | 'unknown')).
--spec(get_vm_limit/0 :: () -> (non_neg_integer() | 'unknown')).
+-spec(get_vm_limit/0 :: () -> non_neg_integer()).
 -spec(get_memory_limit/0 :: () -> (non_neg_integer() | 'undefined')).
 -spec(get_check_interval/0 :: () -> non_neg_integer()).
 -spec(set_check_interval/1 :: (non_neg_integer()) -> 'ok').
diff --git a/src/worker_pool.erl b/src/worker_pool.erl
index 97e07545..595884e0 100644
--- a/src/worker_pool.erl
+++ b/src/worker_pool.erl
@@ -52,7 +52,7 @@
 
 -ifdef(use_specs).
 
--spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(start_link/0 :: () -> {'ok', pid()} | {'error', any()}).
 -spec(submit/1 :: (fun (() -> A) | {atom(), atom(), [any()]}) -> A).
 -spec(submit_async/1 ::
       (fun (() -> any()) | {atom(), atom(), [any()]}) -> 'ok').
diff --git a/src/worker_pool_sup.erl b/src/worker_pool_sup.erl
index 4ded63a8..177a1453 100644
--- a/src/worker_pool_sup.erl
+++ b/src/worker_pool_sup.erl
@@ -41,9 +41,8 @@
 
 -ifdef(use_specs).
 
--spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}).
--spec(start_link/1 ::
-        (non_neg_integer()) -> {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(start_link/0 :: () -> {'ok', pid()} | {'error', any()}).
+-spec(start_link/1 :: (non_neg_integer()) -> {'ok', pid()} | {'error', any()}).
 
 -endif.
 
diff --git a/src/worker_pool_worker.erl b/src/worker_pool_worker.erl
index 57901fd5..f461a539 100644
--- a/src/worker_pool_worker.erl
+++ b/src/worker_pool_worker.erl
@@ -38,13 +38,13 @@
 -export([set_maximum_since_use/2]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3]).
+         terminate/2, code_change/3, prioritise_cast/2]).
 
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--spec(start_link/1 :: (any()) -> {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(start_link/1 :: (any()) -> {'ok', pid()} | {'error', any()}).
 -spec(submit/2 :: (pid(), fun (() -> A) | {atom(), atom(), [any()]}) -> A).
 -spec(submit_async/2 ::
       (pid(), fun (() -> any()) | {atom(), atom(), [any()]}) -> 'ok').
@@ -71,7 +71,7 @@ submit_async(Pid, Fun) ->
     gen_server2:cast(Pid, {submit_async, Fun}).
 
 set_maximum_since_use(Pid, Age) ->
-    gen_server2:pcast(Pid, 8, {set_maximum_since_use, Age}).
+    gen_server2:cast(Pid, {set_maximum_since_use, Age}).
 
 run({M, F, A}) ->
     apply(M, F, A);
@@ -88,6 +88,9 @@ init([WId]) ->
     {ok, WId, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
+prioritise_cast({set_maximum_since_use, _Age}, _State) -> 8;
+prioritise_cast(_Msg,                          _State) -> 0.
+
 handle_call({submit, Fun}, From, WId) ->
     gen_server2:reply(From, run(Fun)),
     ok = worker_pool:idle(WId),
author	David Wragg <david@rabbitmq.com>	2010-09-17 17:22:18 +0100
committer	David Wragg <david@rabbitmq.com>	2010-09-17 17:22:18 +0100
commit	9fd840798eb72a96fbaf726fb9e1f22d9283f088 (patch)
tree	84d7e9c458a6625efcd0bc9aeae4032a3c76b3bc
parent	ec0b517396c195534b0190f10ef0387c50279df1 (diff)
parent	582ef11941c9ebcb27fb1e1b4f38e30a372e2118 (diff)
download	rabbitmq-server-bug22926.tar.gz