summaryrefslogtreecommitdiff
path: root/storage/bdb/rep/rep_util.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/bdb/rep/rep_util.c')
-rw-r--r--storage/bdb/rep/rep_util.c867
1 files changed, 867 insertions, 0 deletions
diff --git a/storage/bdb/rep/rep_util.c b/storage/bdb/rep/rep_util.c
new file mode 100644
index 00000000000..9c99d33ed4a
--- /dev/null
+++ b/storage/bdb/rep/rep_util.c
@@ -0,0 +1,867 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 2001-2002
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: rep_util.c,v 1.51 2002/09/05 02:30:00 margo Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
+#include "dbinc/fop.h"
+#include "dbinc/hash.h"
+#include "dbinc/log.h"
+#include "dbinc/qam.h"
+#include "dbinc/rep.h"
+#include "dbinc/txn.h"
+
+/*
+ * rep_util.c:
+ * Miscellaneous replication-related utility functions, including
+ * those called by other subsystems.
+ */
+static int __rep_cmp_bylsn __P((const void *, const void *));
+static int __rep_cmp_bypage __P((const void *, const void *));
+
+#ifdef REP_DIAGNOSTIC
+static void __rep_print_logmsg __P((DB_ENV *, const DBT *, DB_LSN *));
+#endif
+
+/*
+ * __rep_check_alloc --
+ * Make sure the array of TXN_REC entries is of at least size n.
+ * (This function is called by the __*_getpgnos() functions in
+ * *.src.)
+ *
+ * PUBLIC: int __rep_check_alloc __P((DB_ENV *, TXN_RECS *, int));
+ */
+int
+__rep_check_alloc(dbenv, r, n)
+ DB_ENV *dbenv;
+ TXN_RECS *r;
+ int n;
+{
+ int nalloc, ret;
+
+ while (r->nalloc < r->npages + n) {
+ nalloc = r->nalloc == 0 ? 20 : r->nalloc * 2;
+
+ if ((ret = __os_realloc(dbenv, nalloc * sizeof(LSN_PAGE),
+ &r->array)) != 0)
+ return (ret);
+
+ r->nalloc = nalloc;
+ }
+
+ return (0);
+}
+
+/*
+ * __rep_send_message --
+ * This is a wrapper for sending a message. It takes care of constructing
+ * the REP_CONTROL structure and calling the user's specified send function.
+ *
+ * PUBLIC: int __rep_send_message __P((DB_ENV *, int,
+ * PUBLIC: u_int32_t, DB_LSN *, const DBT *, u_int32_t));
+ */
+int
+__rep_send_message(dbenv, eid, rtype, lsnp, dbtp, flags)
+ DB_ENV *dbenv;
+ int eid;
+ u_int32_t rtype;
+ DB_LSN *lsnp;
+ const DBT *dbtp;
+ u_int32_t flags;
+{
+ DB_REP *db_rep;
+ REP *rep;
+ DBT cdbt, scrap_dbt;
+ REP_CONTROL cntrl;
+ u_int32_t send_flags;
+ int ret;
+
+ db_rep = dbenv->rep_handle;
+ rep = db_rep->region;
+
+ /* Set up control structure. */
+ memset(&cntrl, 0, sizeof(cntrl));
+ if (lsnp == NULL)
+ ZERO_LSN(cntrl.lsn);
+ else
+ cntrl.lsn = *lsnp;
+ cntrl.rectype = rtype;
+ cntrl.flags = flags;
+ cntrl.rep_version = DB_REPVERSION;
+ cntrl.log_version = DB_LOGVERSION;
+ MUTEX_LOCK(dbenv, db_rep->mutexp);
+ cntrl.gen = rep->gen;
+ MUTEX_UNLOCK(dbenv, db_rep->mutexp);
+
+ memset(&cdbt, 0, sizeof(cdbt));
+ cdbt.data = &cntrl;
+ cdbt.size = sizeof(cntrl);
+
+ /* Don't assume the send function will be tolerant of NULL records. */
+ if (dbtp == NULL) {
+ memset(&scrap_dbt, 0, sizeof(DBT));
+ dbtp = &scrap_dbt;
+ }
+
+ send_flags = (LF_ISSET(DB_PERMANENT) ? DB_REP_PERMANENT : 0);
+
+#if 0
+ __rep_print_message(dbenv, eid, &cntrl, "rep_send_message");
+#endif
+#ifdef REP_DIAGNOSTIC
+ if (rtype == REP_LOG)
+ __rep_print_logmsg(dbenv, dbtp, lsnp);
+#endif
+ ret = db_rep->rep_send(dbenv, &cdbt, dbtp, eid, send_flags);
+
+ /*
+ * We don't hold the rep lock, so this could miscount if we race.
+ * I don't think it's worth grabbing the mutex for that bit of
+ * extra accuracy.
+ */
+ if (ret == 0)
+ rep->stat.st_msgs_sent++;
+ else
+ rep->stat.st_msgs_send_failures++;
+
+ return (ret);
+}
+
+#ifdef REP_DIAGNOSTIC
+
+/*
+ * __rep_print_logmsg --
+ * This is a debugging routine for printing out log records that
+ * we are about to transmit to a client.
+ */
+
+static void
+__rep_print_logmsg(dbenv, logdbt, lsnp)
+ DB_ENV *dbenv;
+ const DBT *logdbt;
+ DB_LSN *lsnp;
+{
+ /* Static structures to hold the printing functions. */
+ static int (**ptab)__P((DB_ENV *,
+ DBT *, DB_LSN *, db_recops, void *)) = NULL;
+ size_t ptabsize = 0;
+
+ if (ptabsize == 0) {
+ /* Initialize the table. */
+ (void)__bam_init_print(dbenv, &ptab, &ptabsize);
+ (void)__crdel_init_print(dbenv, &ptab, &ptabsize);
+ (void)__db_init_print(dbenv, &ptab, &ptabsize);
+ (void)__dbreg_init_print(dbenv, &ptab, &ptabsize);
+ (void)__fop_init_print(dbenv, &ptab, &ptabsize);
+ (void)__qam_init_print(dbenv, &ptab, &ptabsize);
+ (void)__ham_init_print(dbenv, &ptab, &ptabsize);
+ (void)__txn_init_print(dbenv, &ptab, &ptabsize);
+ }
+
+ (void)__db_dispatch(dbenv,
+ ptab, ptabsize, (DBT *)logdbt, lsnp, DB_TXN_PRINT, NULL);
+}
+
+#endif
+/*
+ * __rep_new_master --
+ * Called after a master election to sync back up with a new master.
+ * It's possible that we already know of this new master in which case
+ * we don't need to do anything.
+ *
+ * This is written assuming that this message came from the master; we
+ * need to enforce that in __rep_process_record, but right now, we have
+ * no way to identify the master.
+ *
+ * PUBLIC: int __rep_new_master __P((DB_ENV *, REP_CONTROL *, int));
+ */
+int
+__rep_new_master(dbenv, cntrl, eid)
+ DB_ENV *dbenv;
+ REP_CONTROL *cntrl;
+ int eid;
+{
+ DB_LOG *dblp;
+ DB_LOGC *logc;
+ DB_LSN last_lsn, lsn;
+ DB_REP *db_rep;
+ DBT dbt;
+ LOG *lp;
+ REP *rep;
+ int change, ret, t_ret;
+
+ db_rep = dbenv->rep_handle;
+ rep = db_rep->region;
+ MUTEX_LOCK(dbenv, db_rep->mutexp);
+ ELECTION_DONE(rep);
+ change = rep->gen != cntrl->gen || rep->master_id != eid;
+ if (change) {
+ rep->gen = cntrl->gen;
+ rep->master_id = eid;
+ F_SET(rep, REP_F_RECOVER);
+ rep->stat.st_master_changes++;
+ }
+ MUTEX_UNLOCK(dbenv, db_rep->mutexp);
+
+ if (!change)
+ return (0);
+
+ /*
+ * If the master changed, we need to start the process of
+ * figuring out what our last valid log record is. However,
+ * if both the master and we agree that the max LSN is 0,0,
+ * then there is no recovery to be done. If we are at 0 and
+ * the master is not, then we just need to request all the log
+ * records from the master.
+ */
+ dblp = dbenv->lg_handle;
+ lp = dblp->reginfo.primary;
+ R_LOCK(dbenv, &dblp->reginfo);
+ last_lsn = lsn = lp->lsn;
+ if (last_lsn.offset > sizeof(LOGP))
+ last_lsn.offset -= lp->len;
+ R_UNLOCK(dbenv, &dblp->reginfo);
+ if (IS_INIT_LSN(lsn) || IS_ZERO_LSN(lsn)) {
+empty: MUTEX_LOCK(dbenv, db_rep->mutexp);
+ F_CLR(rep, REP_F_RECOVER);
+ MUTEX_UNLOCK(dbenv, db_rep->mutexp);
+
+ if (IS_INIT_LSN(cntrl->lsn))
+ ret = 0;
+ else
+ ret = __rep_send_message(dbenv, rep->master_id,
+ REP_ALL_REQ, &lsn, NULL, 0);
+
+ if (ret == 0)
+ ret = DB_REP_NEWMASTER;
+ return (ret);
+ } else if (last_lsn.offset <= sizeof(LOGP)) {
+ /*
+ * We have just changed log files and need to set lastlsn
+ * to the last record in the previous log files.
+ */
+ if ((ret = dbenv->log_cursor(dbenv, &logc, 0)) != 0)
+ return (ret);
+ memset(&dbt, 0, sizeof(dbt));
+ ret = logc->get(logc, &last_lsn, &dbt, DB_LAST);
+ if ((t_ret = logc->close(logc, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ if (ret == DB_NOTFOUND)
+ goto empty;
+ if (ret != 0)
+ return (ret);
+ }
+
+ R_LOCK(dbenv, &dblp->reginfo);
+ lp->verify_lsn = last_lsn;
+ R_UNLOCK(dbenv, &dblp->reginfo);
+ if ((ret = __rep_send_message(dbenv,
+ eid, REP_VERIFY_REQ, &last_lsn, NULL, 0)) != 0)
+ return (ret);
+
+ return (DB_REP_NEWMASTER);
+}
+
+/*
+ * __rep_lockpgno_init
+ * Create a dispatch table for acquiring locks on each log record.
+ *
+ * PUBLIC: int __rep_lockpgno_init __P((DB_ENV *,
+ * PUBLIC: int (***)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *),
+ * PUBLIC: size_t *));
+ */
+int
+__rep_lockpgno_init(dbenv, dtabp, dtabsizep)
+ DB_ENV *dbenv;
+ int (***dtabp)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ size_t *dtabsizep;
+{
+ int ret;
+
+ /* Initialize dispatch table. */
+ *dtabsizep = 0;
+ *dtabp = NULL;
+ if ((ret = __bam_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 ||
+ (ret = __crdel_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 ||
+ (ret = __db_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 ||
+ (ret = __dbreg_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 ||
+ (ret = __fop_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 ||
+ (ret = __qam_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 ||
+ (ret = __ham_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 ||
+ (ret = __txn_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0)
+ return (ret);
+
+ return (0);
+}
+
+/*
+ * __rep_unlockpages --
+ * Unlock the pages locked in __rep_lockpages.
+ *
+ * PUBLIC: int __rep_unlockpages __P((DB_ENV *, u_int32_t));
+ */
+int
+__rep_unlockpages(dbenv, lid)
+ DB_ENV *dbenv;
+ u_int32_t lid;
+{
+ DB_LOCKREQ req, *lvp;
+
+ req.op = DB_LOCK_PUT_ALL;
+ return (dbenv->lock_vec(dbenv, lid, 0, &req, 1, &lvp));
+}
+
+/*
+ * __rep_lockpages --
+ * Called to gather and lock pages in preparation for both
+ * single transaction apply as well as client synchronization
+ * with a new master. A non-NULL key_lsn means that we're locking
+ * in order to apply a single log record during client recovery
+ * to the joint LSN. A non-NULL max_lsn means that we are applying
+ * a transaction whose commit is at max_lsn.
+ *
+ * PUBLIC: int __rep_lockpages __P((DB_ENV *,
+ * PUBLIC: int (**)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *),
+ * PUBLIC: size_t, DB_LSN *, DB_LSN *, TXN_RECS *, u_int32_t));
+ */
+int
+__rep_lockpages(dbenv, dtab, dtabsize, key_lsn, max_lsn, recs, lid)
+ DB_ENV *dbenv;
+ int (**dtab)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ size_t dtabsize;
+ DB_LSN *key_lsn, *max_lsn;
+ TXN_RECS *recs;
+ u_int32_t lid;
+{
+ DBT data_dbt, lo;
+ DB_LOCK l;
+ DB_LOCKREQ *lvp;
+ DB_LOGC *logc;
+ DB_LSN tmp_lsn;
+ TXN_RECS tmp, *t;
+ db_pgno_t cur_pgno;
+ linfo_t locks;
+ int i, ret, t_ret, unique;
+ u_int32_t cur_fid;
+
+ /*
+ * There are two phases: First, we have to traverse backwards through
+ * the log records gathering the list of all the pages accessed. Once
+ * we have this information we can acquire all the locks we need.
+ */
+
+ /* Initialization */
+ memset(&locks, 0, sizeof(locks));
+ ret = 0;
+
+ t = recs != NULL ? recs : &tmp;
+ t->npages = t->nalloc = 0;
+ t->array = NULL;
+
+ /*
+ * We've got to be in one mode or the other; else life will either
+ * be excessively boring or overly exciting.
+ */
+ DB_ASSERT(key_lsn != NULL || max_lsn != NULL);
+ DB_ASSERT(key_lsn == NULL || max_lsn == NULL);
+
+ /*
+ * Phase 1: Fill in the pgno array.
+ */
+ memset(&data_dbt, 0, sizeof(data_dbt));
+ if (F_ISSET(dbenv, DB_ENV_THREAD))
+ F_SET(&data_dbt, DB_DBT_REALLOC);
+
+ /* Single transaction apply. */
+ if (max_lsn != NULL) {
+ DB_ASSERT(0); /* XXX */
+ /*
+ tmp_lsn = *max_lsn;
+ if ((ret = __rep_apply_thread(dbenv, dtab, dtabsize,
+ &data_dbt, &tmp_lsn, t)) != 0)
+ goto err;
+ */
+ }
+
+ /* In recovery. */
+ if (key_lsn != NULL) {
+ if ((ret = dbenv->log_cursor(dbenv, &logc, 0)) != 0)
+ goto err;
+ ret = logc->get(logc, key_lsn, &data_dbt, DB_SET);
+
+ /* Save lsn values, since dispatch functions can change them. */
+ tmp_lsn = *key_lsn;
+ ret = __db_dispatch(dbenv,
+ dtab, dtabsize, &data_dbt, &tmp_lsn, DB_TXN_GETPGNOS, t);
+
+ if ((t_ret = logc->close(logc, 0)) != 0 && ret == 0)
+ ret = t_ret;
+
+ /*
+ * If ret == DB_DELETED, this record refers to a temporary
+ * file and there's nothing to apply.
+ */
+ if (ret == DB_DELETED) {
+ ret = 0;
+ goto out;
+ } else if (ret != 0)
+ goto err;
+ }
+
+ if (t->npages == 0)
+ goto out;
+
+ /* Phase 2: Write lock all the pages. */
+
+ /* Sort the entries in the array by page number. */
+ qsort(t->array, t->npages, sizeof(LSN_PAGE), __rep_cmp_bypage);
+
+ /* Count the number of unique pages. */
+ cur_fid = DB_LOGFILEID_INVALID;
+ cur_pgno = PGNO_INVALID;
+ unique = 0;
+ for (i = 0; i < t->npages; i++) {
+ if (F_ISSET(&t->array[i], LSN_PAGE_NOLOCK))
+ continue;
+ if (t->array[i].pgdesc.pgno != cur_pgno ||
+ t->array[i].fid != cur_fid) {
+ cur_pgno = t->array[i].pgdesc.pgno;
+ cur_fid = t->array[i].fid;
+ unique++;
+ }
+ }
+
+ if (unique == 0)
+ goto out;
+
+ /* Handle single lock case specially, else allocate space for locks. */
+ if (unique == 1) {
+ memset(&lo, 0, sizeof(lo));
+ lo.data = &t->array[0].pgdesc;
+ lo.size = sizeof(t->array[0].pgdesc);
+ ret = dbenv->lock_get(dbenv, lid, 0, &lo, DB_LOCK_WRITE, &l);
+ goto out2;
+ }
+
+ /* Multi-lock case. */
+ locks.n = unique;
+ if ((ret = __os_calloc(dbenv,
+ unique, sizeof(DB_LOCKREQ), &locks.reqs)) != 0)
+ goto err;
+ if ((ret = __os_calloc(dbenv, unique, sizeof(DBT), &locks.objs)) != 0)
+ goto err;
+
+ unique = 0;
+ cur_fid = DB_LOGFILEID_INVALID;
+ cur_pgno = PGNO_INVALID;
+ for (i = 0; i < t->npages; i++) {
+ if (F_ISSET(&t->array[i], LSN_PAGE_NOLOCK))
+ continue;
+ if (t->array[i].pgdesc.pgno != cur_pgno ||
+ t->array[i].fid != cur_fid) {
+ cur_pgno = t->array[i].pgdesc.pgno;
+ cur_fid = t->array[i].fid;
+ locks.reqs[unique].op = DB_LOCK_GET;
+ locks.reqs[unique].mode = DB_LOCK_WRITE;
+ locks.reqs[unique].obj = &locks.objs[unique];
+ locks.objs[unique].data = &t->array[i].pgdesc;
+ locks.objs[unique].size = sizeof(t->array[i].pgdesc);
+ unique++;
+ }
+ }
+
+ /* Finally, get the locks. */
+ if ((ret =
+ dbenv->lock_vec(dbenv, lid, 0, locks.reqs, unique, &lvp)) != 0) {
+ /*
+ * If we were unsuccessful, unlock any locks we acquired before
+ * the error and return the original error value.
+ */
+ (void)__rep_unlockpages(dbenv, lid);
+ }
+
+err:
+out: if (locks.objs != NULL)
+ __os_free(dbenv, locks.objs);
+ if (locks.reqs != NULL)
+ __os_free(dbenv, locks.reqs);
+
+ /*
+ * Before we return, sort by LSN so that we apply records in the
+ * right order.
+ */
+ qsort(t->array, t->npages, sizeof(LSN_PAGE), __rep_cmp_bylsn);
+
+out2: if ((ret != 0 || recs == NULL) && t->nalloc != 0) {
+ __os_free(dbenv, t->array);
+ t->array = NULL;
+ t->npages = t->nalloc = 0;
+ }
+
+ if (F_ISSET(&data_dbt, DB_DBT_REALLOC) && data_dbt.data != NULL)
+ __os_ufree(dbenv, data_dbt.data);
+
+ return (ret);
+}
+
+/*
+ * __rep_cmp_bypage and __rep_cmp_bylsn --
+ * Sort functions for qsort. "bypage" sorts first by page numbers and
+ * then by the LSN. "bylsn" sorts first by the LSN, then by page numbers.
+ */
+static int
+__rep_cmp_bypage(a, b)
+ const void *a, *b;
+{
+ LSN_PAGE *ap, *bp;
+
+ ap = (LSN_PAGE *)a;
+ bp = (LSN_PAGE *)b;
+
+ if (ap->fid < bp->fid)
+ return (-1);
+
+ if (ap->fid > bp->fid)
+ return (1);
+
+ if (ap->pgdesc.pgno < bp->pgdesc.pgno)
+ return (-1);
+
+ if (ap->pgdesc.pgno > bp->pgdesc.pgno)
+ return (1);
+
+ if (ap->lsn.file < bp->lsn.file)
+ return (-1);
+
+ if (ap->lsn.file > bp->lsn.file)
+ return (1);
+
+ if (ap->lsn.offset < bp->lsn.offset)
+ return (-1);
+
+ if (ap->lsn.offset > bp->lsn.offset)
+ return (1);
+
+ return (0);
+}
+
+static int
+__rep_cmp_bylsn(a, b)
+ const void *a, *b;
+{
+ LSN_PAGE *ap, *bp;
+
+ ap = (LSN_PAGE *)a;
+ bp = (LSN_PAGE *)b;
+
+ if (ap->lsn.file < bp->lsn.file)
+ return (-1);
+
+ if (ap->lsn.file > bp->lsn.file)
+ return (1);
+
+ if (ap->lsn.offset < bp->lsn.offset)
+ return (-1);
+
+ if (ap->lsn.offset > bp->lsn.offset)
+ return (1);
+
+ if (ap->fid < bp->fid)
+ return (-1);
+
+ if (ap->fid > bp->fid)
+ return (1);
+
+ if (ap->pgdesc.pgno < bp->pgdesc.pgno)
+ return (-1);
+
+ if (ap->pgdesc.pgno > bp->pgdesc.pgno)
+ return (1);
+
+ return (0);
+}
+
+/*
+ * __rep_is_client
+ * Used by other subsystems to figure out if this is a replication
+ * client sites.
+ *
+ * PUBLIC: int __rep_is_client __P((DB_ENV *));
+ */
+int
+__rep_is_client(dbenv)
+ DB_ENV *dbenv;
+{
+ DB_REP *db_rep;
+ REP *rep;
+ int ret;
+
+ if ((db_rep = dbenv->rep_handle) == NULL)
+ return (0);
+ rep = db_rep->region;
+
+ MUTEX_LOCK(dbenv, db_rep->mutexp);
+ ret = F_ISSET(rep, REP_F_UPGRADE | REP_F_LOGSONLY);
+ MUTEX_UNLOCK(dbenv, db_rep->mutexp);
+ return (ret);
+}
+
+/*
+ * __rep_send_vote
+ * Send this site's vote for the election.
+ *
+ * PUBLIC: int __rep_send_vote __P((DB_ENV *, DB_LSN *, int, int, int));
+ */
+int
+__rep_send_vote(dbenv, lsnp, nsites, pri, tiebreaker)
+ DB_ENV *dbenv;
+ DB_LSN *lsnp;
+ int nsites, pri, tiebreaker;
+{
+ DBT vote_dbt;
+ REP_VOTE_INFO vi;
+
+ memset(&vi, 0, sizeof(vi));
+
+ vi.priority = pri;
+ vi.nsites = nsites;
+ vi.tiebreaker = tiebreaker;
+
+ memset(&vote_dbt, 0, sizeof(vote_dbt));
+ vote_dbt.data = &vi;
+ vote_dbt.size = sizeof(vi);
+
+ return (__rep_send_message(dbenv,
+ DB_EID_BROADCAST, REP_VOTE1, lsnp, &vote_dbt, 0));
+}
+
+/*
+ * __rep_grow_sites --
+ * Called to allocate more space in the election tally information.
+ * Called with the rep mutex held. We need to call the region mutex, so
+ * we need to make sure that we *never* acquire those mutexes in the
+ * opposite order.
+ *
+ * PUBLIC: int __rep_grow_sites __P((DB_ENV *dbenv, int nsites));
+ */
+int
+__rep_grow_sites(dbenv, nsites)
+ DB_ENV *dbenv;
+ int nsites;
+{
+ REGENV *renv;
+ REGINFO *infop;
+ REP *rep;
+ int nalloc, ret, *tally;
+
+ rep = ((DB_REP *)dbenv->rep_handle)->region;
+
+ /*
+ * Allocate either twice the current allocation or nsites,
+ * whichever is more.
+ */
+
+ nalloc = 2 * rep->asites;
+ if (nalloc < nsites)
+ nalloc = nsites;
+
+ infop = dbenv->reginfo;
+ renv = infop->primary;
+ MUTEX_LOCK(dbenv, &renv->mutex);
+ if ((ret = __db_shalloc(infop->addr,
+ sizeof(nalloc * sizeof(int)), sizeof(int), &tally)) == 0) {
+ if (rep->tally_off != INVALID_ROFF)
+ __db_shalloc_free(infop->addr,
+ R_ADDR(infop, rep->tally_off));
+ rep->asites = nalloc;
+ rep->nsites = nsites;
+ rep->tally_off = R_OFFSET(infop, tally);
+ }
+ MUTEX_UNLOCK(dbenv, &renv->mutex);
+ return (ret);
+}
+
+#ifdef NOTYET
+static int __rep_send_file __P((DB_ENV *, DBT *, u_int32_t));
+/*
+ * __rep_send_file --
+ * Send an entire file, one block at a time.
+ */
+static int
+__rep_send_file(dbenv, rec, eid)
+ DB_ENV *dbenv;
+ DBT *rec;
+ u_int32_t eid;
+{
+ DB *dbp;
+ DB_LOCK lk;
+ DB_MPOOLFILE *mpf;
+ DBC *dbc;
+ DBT rec_dbt;
+ PAGE *pagep;
+ db_pgno_t last_pgno, pgno;
+ int ret, t_ret;
+
+ dbp = NULL;
+ dbc = NULL;
+ pagep = NULL;
+ mpf = NULL;
+ LOCK_INIT(lk);
+
+ if ((ret = db_create(&dbp, dbenv, 0)) != 0)
+ goto err;
+
+ if ((ret = dbp->open(dbp, rec->data, NULL, DB_UNKNOWN, 0, 0)) != 0)
+ goto err;
+
+ if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
+ goto err;
+ /*
+ * Force last_pgno to some value that will let us read the meta-dat
+ * page in the following loop.
+ */
+ memset(&rec_dbt, 0, sizeof(rec_dbt));
+ last_pgno = 1;
+ for (pgno = 0; pgno <= last_pgno; pgno++) {
+ if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_READ, 0, &lk)) != 0)
+ goto err;
+
+ if ((ret = mpf->get(mpf, &pgno, 0, &pagep)) != 0)
+ goto err;
+
+ if (pgno == 0)
+ last_pgno = ((DBMETA *)pagep)->last_pgno;
+
+ rec_dbt.data = pagep;
+ rec_dbt.size = dbp->pgsize;
+ if ((ret = __rep_send_message(dbenv, eid,
+ REP_FILE, NULL, &rec_dbt, pgno == last_pgno)) != 0)
+ goto err;
+ ret = mpf->put(mpf, pagep, 0);
+ pagep = NULL;
+ if (ret != 0)
+ goto err;
+ ret = __LPUT(dbc, lk);
+ LOCK_INIT(lk);
+ if (ret != 0)
+ goto err;
+ }
+
+err: if (LOCK_ISSET(lk) && (t_ret = __LPUT(dbc, lk)) != 0 && ret == 0)
+ ret = t_ret;
+ if (dbc != NULL && (t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
+ ret = t_ret;
+ if (pagep != NULL && (t_ret = mpf->put(mpf, pagep, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ if (dbp != NULL && (t_ret = dbp->close(dbp, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ return (ret);
+}
+#endif
+
+#if 0
+/*
+ * PUBLIC: void __rep_print_message __P((DB_ENV *, int, REP_CONTROL *, char *));
+ */
+void
+__rep_print_message(dbenv, eid, rp, str)
+ DB_ENV *dbenv;
+ int eid;
+ REP_CONTROL *rp;
+ char *str;
+{
+ char *type;
+ switch (rp->rectype) {
+ case REP_ALIVE:
+ type = "alive";
+ break;
+ case REP_ALIVE_REQ:
+ type = "alive_req";
+ break;
+ case REP_ALL_REQ:
+ type = "all_req";
+ break;
+ case REP_ELECT:
+ type = "elect";
+ break;
+ case REP_FILE:
+ type = "file";
+ break;
+ case REP_FILE_REQ:
+ type = "file_req";
+ break;
+ case REP_LOG:
+ type = "log";
+ break;
+ case REP_LOG_MORE:
+ type = "log_more";
+ break;
+ case REP_LOG_REQ:
+ type = "log_req";
+ break;
+ case REP_MASTER_REQ:
+ type = "master_req";
+ break;
+ case REP_NEWCLIENT:
+ type = "newclient";
+ break;
+ case REP_NEWFILE:
+ type = "newfile";
+ break;
+ case REP_NEWMASTER:
+ type = "newmaster";
+ break;
+ case REP_NEWSITE:
+ type = "newsite";
+ break;
+ case REP_PAGE:
+ type = "page";
+ break;
+ case REP_PAGE_REQ:
+ type = "page_req";
+ break;
+ case REP_PLIST:
+ type = "plist";
+ break;
+ case REP_PLIST_REQ:
+ type = "plist_req";
+ break;
+ case REP_VERIFY:
+ type = "verify";
+ break;
+ case REP_VERIFY_FAIL:
+ type = "verify_fail";
+ break;
+ case REP_VERIFY_REQ:
+ type = "verify_req";
+ break;
+ case REP_VOTE1:
+ type = "vote1";
+ break;
+ case REP_VOTE2:
+ type = "vote2";
+ break;
+ default:
+ type = "NOTYPE";
+ break;
+ }
+ printf("%s %s: gen = %d eid %d, type %s, LSN [%u][%u]\n",
+ dbenv->db_home, str, rp->gen, eid, type, rp->lsn.file,
+ rp->lsn.offset);
+}
+#endif