summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Hankins <dhankins@isc.org>2007-12-18 18:05:23 +0000
committerDavid Hankins <dhankins@isc.org>2007-12-18 18:05:23 +0000
commit7351cb7e814ceb35813b6911b9c8c7af46110f17 (patch)
treeb5c8386391790643a336b197ff4e3aeca84d856b
parent19ce0bacecb9fad881400ad8527f71d9b2f2e43c (diff)
downloadisc-dhcp-7351cb7e814ceb35813b6911b9c8c7af46110f17.tar.gz
- A bug in failover pool rebalancing that caused POOLREQ message ping-pongs
was repaired. [ISC-Bugs #17228] - A flaw in failover pool rebalancing that could cause POOLREQ messages to be sent outside of the min-balance/max-balance scheduled intervals has been repaired. [ISC-Bugs #17228]
-rw-r--r--RELNOTES7
-rw-r--r--server/failover.c87
2 files changed, 70 insertions, 24 deletions
diff --git a/RELNOTES b/RELNOTES
index 09b2db65..f7451c30 100644
--- a/RELNOTES
+++ b/RELNOTES
@@ -94,6 +94,13 @@ the README file.
'conflict-done' state message to be logged as 'unknown-state' has been
repaired. It is now logged correctly.
+- A bug in failover pool rebalancing that caused POOLREQ message ping-pongs
+ was repaired.
+
+- A flaw in failover pool rebalancing that could cause POOLREQ messages to
+ be sent outside of the min-balance/max-balance scheduled intervals has
+ been repaired.
+
Changes since 3.1.0rc1
- The parse warning that 'deny dyanmic bootp;' must be configured for
diff --git a/server/failover.c b/server/failover.c
index 5c4ce382..6a4ae1ca 100644
--- a/server/failover.c
+++ b/server/failover.c
@@ -34,7 +34,7 @@
#ifndef lint
static char copyright[] =
-"$Id: failover.c,v 1.63.56.14 2007/12/13 16:59:59 dhankins Exp $ Copyright (c) 2004-2007 Internet Systems Consortium. All rights reserved.\n";
+"$Id: failover.c,v 1.63.56.15 2007/12/18 18:05:23 dhankins Exp $ Copyright (c) 2004-2007 Internet Systems Consortium. All rights reserved.\n";
#endif /* not lint */
#include "dhcpd.h"
@@ -55,7 +55,8 @@ static isc_result_t failover_message_dereference (failover_message_t **,
static void dhcp_failover_pool_balance(dhcp_failover_state_t *state);
static void dhcp_failover_pool_reqbalance(dhcp_failover_state_t *state);
-static int dhcp_failover_pool_dobalance(dhcp_failover_state_t *state);
+static int dhcp_failover_pool_dobalance(dhcp_failover_state_t *state,
+ isc_boolean_t *sendreq);
static INLINE int secondary_not_hoarding(dhcp_failover_state_t *state,
struct pool *p);
@@ -2250,7 +2251,11 @@ isc_result_t dhcp_failover_peer_state_changed (dhcp_failover_state_t *state,
return ISC_R_SUCCESS;
}
-/* Balance operation manual entry. */
+/*
+ * Balance operation manual entry; startup, entrance to normal state. No
+ * sense sending a POOLREQ at this stage; the peer is likely about to schedule
+ * their own rebalance event upon entering normal themselves.
+ */
static void
dhcp_failover_pool_balance(dhcp_failover_state_t *state)
{
@@ -2258,25 +2263,36 @@ dhcp_failover_pool_balance(dhcp_failover_state_t *state)
cancel_timeout(dhcp_failover_pool_rebalance, state);
state->sched_balance = 0;
- dhcp_failover_pool_dobalance(state);
+ dhcp_failover_pool_dobalance(state, NULL);
}
-/* Balance operation entry from timer event. */
+/*
+ * Balance operation entry from timer event. Once per timer interval is
+ * the only time we want to emit POOLREQs (asserting an interrupt in our
+ * peer).
+ */
void
dhcp_failover_pool_rebalance(void *failover_state)
{
dhcp_failover_state_t *state;
+ isc_boolean_t sendreq = ISC_FALSE;
state = (dhcp_failover_state_t *)failover_state;
/* Clear scheduled event indicator. */
state->sched_balance = 0;
- if (dhcp_failover_pool_dobalance(state))
+ if (dhcp_failover_pool_dobalance(state, &sendreq))
dhcp_failover_send_updates(state);
+
+ if (sendreq)
+ dhcp_failover_send_poolreq(state);
}
-/* Balance operation entry from POOLREQ protocol message. */
+/*
+ * Balance operation entry from POOLREQ protocol message. Do not permit a
+ * POOLREQ to send back a POOLREQ. Ping pong.
+ */
static void
dhcp_failover_pool_reqbalance(dhcp_failover_state_t *state)
{
@@ -2286,7 +2302,7 @@ dhcp_failover_pool_reqbalance(dhcp_failover_state_t *state)
cancel_timeout(dhcp_failover_pool_rebalance, state);
state->sched_balance = 0;
- queued = dhcp_failover_pool_dobalance(state);
+ queued = dhcp_failover_pool_dobalance(state, NULL);
dhcp_failover_send_poolresp(state, queued);
@@ -2298,12 +2314,19 @@ dhcp_failover_pool_reqbalance(dhcp_failover_state_t *state)
state->name);
}
-/* Do the meat of the work common to all forms of pool rebalance. */
-static int dhcp_failover_pool_dobalance(dhcp_failover_state_t *state)
+/*
+ * Do the meat of the work common to all forms of pool rebalance. If the
+ * caller deems it appropriate to transmit POOLREQ messages, it can use the
+ * sendreq pointer to pass in the address of a FALSE value which this function
+ * will conditionally turn TRUE if a POOLREQ is determined to be necessary.
+ * A NULL value may be passed, in which case no action is taken.
+ */
+static int
+dhcp_failover_pool_dobalance(dhcp_failover_state_t *state,
+ isc_boolean_t *sendreq)
{
- int lts, total, thresh, hold, pass;
+ int lts, total, thresh, hold, panic, pass;
int leases_queued = 0;
- int reqsent = 0;
struct lease *lp = (struct lease *)0;
struct lease *next = (struct lease *)0;
struct shared_network *s;
@@ -2313,7 +2336,7 @@ static int dhcp_failover_pool_dobalance(dhcp_failover_state_t *state)
binding_state_t my_lease_state;
struct lease **lq;
int (*log_func)(const char *, ...);
- const char *result;
+ const char *result, *reqlog;
if (state -> me.state != normal)
return 0;
@@ -2348,21 +2371,37 @@ static int dhcp_failover_pool_dobalance(dhcp_failover_state_t *state)
thresh = ((total * state->max_lease_misbalance) + 50) / 100;
hold = ((total * state->max_lease_ownership) + 50) / 100;
+ /*
+ * If we need leases (so lts is negative) more than negative
+ * double the thresh%, panic and send poolreq to hopefully wake
+ * up the peer (but more likely the db is inconsistent). But,
+ * if this comes out zero, switch to -1 so that the POOLREQ is
+ * sent on lts == -2 rather than right away at -1.
+ *
+ * Note that we do not subtract -1 from panic all the time
+ * because thresh% and hold% may come out to the same number,
+ * and that is correct operation...where thresh% and hold% are
+ * both -1, we want to send poolreq when lts reaches -3. So,
+ * "-3 < -2", lts < panic.
+ */
+ panic = thresh * -2;
+
+ if (panic == 0)
+ panic = -1;
+
+ if ((sendreq != NULL) && (lts < panic)) {
+ reqlog = " (requesting peer rebalance!)";
+ *sendreq = ISC_TRUE;
+ } else
+ reqlog = "";
+
log_info("balancing pool %lx %s total %d free %d "
- "backup %d lts %d max-own (+/-)%d",
+ "backup %d lts %d max-own (+/-)%d%s",
(unsigned long)p,
(p->shared_network ?
p->shared_network->name : ""), p->lease_count,
- p->free_leases, p->backup_leases, lts, hold);
-
- /* If lts is in the negatives (we need leases) more than
- * negative double the thresh%, panic and send poolreq to
- * hopefully wake up the peer.
- */
- if (!reqsent && (lts < (thresh * -2))) {
- dhcp_failover_send_poolreq(state);
- reqsent = 1;
- }
+ p->free_leases, p->backup_leases, lts, hold,
+ reqlog);
/* In the first pass, try to allocate leases to the
* peer which it would normally be responsible for (if