diff options
Diffstat (limited to 'storage/bdb/qam/qam_files.c')
-rw-r--r-- | storage/bdb/qam/qam_files.c | 642 |
1 files changed, 642 insertions, 0 deletions
diff --git a/storage/bdb/qam/qam_files.c b/storage/bdb/qam/qam_files.c new file mode 100644 index 00000000000..f15a88d546d --- /dev/null +++ b/storage/bdb/qam/qam_files.c @@ -0,0 +1,642 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999-2002 + * Sleepycat Software. All rights reserved. + */ + +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: qam_files.c,v 1.52 2002/08/26 17:52:18 margo Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> +#include <stdlib.h> + +#include <string.h> +#endif + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/qam.h" +#include "dbinc/db_am.h" + +/* + * __qam_fprobe -- calculate and open extent + * + * Calculate which extent the page is in, open and create if necessary. + * + * PUBLIC: int __qam_fprobe + * PUBLIC: __P((DB *, db_pgno_t, void *, qam_probe_mode, u_int32_t)); + */ +int +__qam_fprobe(dbp, pgno, addrp, mode, flags) + DB *dbp; + db_pgno_t pgno; + void *addrp; + qam_probe_mode mode; + u_int32_t flags; +{ + DB_ENV *dbenv; + DB_MPOOLFILE *mpf; + MPFARRAY *array; + QUEUE *qp; + u_int8_t fid[DB_FILE_ID_LEN]; + u_int32_t extid, maxext, openflags; + char buf[MAXPATHLEN]; + int numext, offset, oldext, ret; + + dbenv = dbp->dbenv; + qp = (QUEUE *)dbp->q_internal; + ret = 0; + + if (qp->page_ext == 0) { + mpf = dbp->mpf; + return (mode == QAM_PROBE_GET ? + mpf->get(mpf, &pgno, flags, addrp) : + mpf->put(mpf, addrp, flags)); + } + + mpf = NULL; + + /* + * Need to lock long enough to find the mpf or create the file. + * The file cannot go away because we must have a record locked + * in that file. + */ + MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); + extid = (pgno - 1) / qp->page_ext; + + /* Array1 will always be in use if array2 is in use. */ + array = &qp->array1; + if (array->n_extent == 0) { + /* Start with 4 extents */ + oldext = 0; + array->n_extent = 4; + array->low_extent = extid; + offset = 0; + numext = 0; + goto alloc; + } + + offset = extid - qp->array1.low_extent; + if (qp->array2.n_extent != 0 && + abs(offset) > abs(extid - qp->array2.low_extent)) { + array = &qp->array2; + offset = extid - array->low_extent; + } + + /* + * Check to see if the requested extent is outside the range of + * extents in the array. This is true by default if there are + * no extents here yet. + */ + if (offset < 0 || (unsigned) offset >= array->n_extent) { + oldext = array->n_extent; + numext = array->hi_extent - array->low_extent + 1; + if (offset < 0 && + (unsigned) -offset + numext <= array->n_extent) { + /* + * If we can fit this one into the existing array by + * shifting the existing entries then we do not have + * to allocate. + */ + memmove(&array->mpfarray[-offset], + array->mpfarray, numext + * sizeof(array->mpfarray[0])); + memset(array->mpfarray, 0, -offset + * sizeof(array->mpfarray[0])); + offset = 0; + } else if ((u_int32_t)offset == array->n_extent && + mode != QAM_PROBE_MPF && array->mpfarray[0].pinref == 0) { + /* + * If this is at the end of the array and the file at + * the begining has a zero pin count we can close + * the bottom extent and put this one at the end. + */ + mpf = array->mpfarray[0].mpf; + if (mpf != NULL && (ret = mpf->close(mpf, 0)) != 0) + goto err; + memmove(&array->mpfarray[0], &array->mpfarray[1], + (array->n_extent - 1) * sizeof(array->mpfarray[0])); + array->low_extent++; + array->hi_extent++; + offset--; + array->mpfarray[offset].mpf = NULL; + array->mpfarray[offset].pinref = 0; + } else { + /* + * See if we have wrapped around the queue. + * If it has then allocate the second array. + * Otherwise just expand the one we are using. + */ + maxext = (u_int32_t) UINT32_T_MAX + / (qp->page_ext * qp->rec_page); + if ((u_int32_t) abs(offset) >= maxext/2) { + array = &qp->array2; + DB_ASSERT(array->n_extent == 0); + oldext = 0; + array->n_extent = 4; + array->low_extent = extid; + offset = 0; + numext = 0; + } else { + /* + * Increase the size to at least include + * the new one and double it. + */ + array->n_extent += abs(offset); + array->n_extent <<= 2; + } + alloc: + if ((ret = __os_realloc(dbenv, + array->n_extent * sizeof(struct __qmpf), + &array->mpfarray)) != 0) + goto err; + + if (offset < 0) { + /* + * Move the array up and put the new one + * in the first slot. + */ + offset = -offset; + memmove(&array->mpfarray[offset], + array->mpfarray, + numext * sizeof(array->mpfarray[0])); + memset(array->mpfarray, 0, + offset * sizeof(array->mpfarray[0])); + memset(&array->mpfarray[numext + offset], 0, + (array->n_extent - (numext + offset)) + * sizeof(array->mpfarray[0])); + offset = 0; + } + else + /* Clear the new part of the array. */ + memset(&array->mpfarray[oldext], 0, + (array->n_extent - oldext) * + sizeof(array->mpfarray[0])); + } + } + + /* Update the low and hi range of saved extents. */ + if (extid < array->low_extent) + array->low_extent = extid; + if (extid > array->hi_extent) + array->hi_extent = extid; + + /* If the extent file is not yet open, open it. */ + if (array->mpfarray[offset].mpf == NULL) { + snprintf(buf, sizeof(buf), + QUEUE_EXTENT, qp->dir, PATH_SEPARATOR[0], qp->name, extid); + if ((ret = dbenv->memp_fcreate( + dbenv, &array->mpfarray[offset].mpf, 0)) != 0) + goto err; + mpf = array->mpfarray[offset].mpf; + (void)mpf->set_lsn_offset(mpf, 0); + (void)mpf->set_pgcookie(mpf, &qp->pgcookie); + + /* Set up the fileid for this extent. */ + __qam_exid(dbp, fid, extid); + (void)mpf->set_fileid(mpf, fid); + openflags = DB_EXTENT; + if (LF_ISSET(DB_MPOOL_CREATE)) + openflags |= DB_CREATE; + if (F_ISSET(dbp, DB_AM_RDONLY)) + openflags |= DB_RDONLY; + if (F_ISSET(dbenv, DB_ENV_DIRECT_DB)) + openflags |= DB_DIRECT; + if ((ret = mpf->open( + mpf, buf, openflags, qp->mode, dbp->pgsize)) != 0) { + array->mpfarray[offset].mpf = NULL; + (void)mpf->close(mpf, 0); + goto err; + } + } + + mpf = array->mpfarray[offset].mpf; + if (mode == QAM_PROBE_GET) + array->mpfarray[offset].pinref++; + if (LF_ISSET(DB_MPOOL_CREATE)) + mpf->set_unlink(mpf, 0); + +err: + MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); + + if (ret == 0) { + if (mode == QAM_PROBE_MPF) { + *(DB_MPOOLFILE **)addrp = mpf; + return (0); + } + pgno--; + pgno %= qp->page_ext; + if (mode == QAM_PROBE_GET) + return (mpf->get(mpf, &pgno, flags, addrp)); + ret = mpf->put(mpf, addrp, flags); + MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); + array->mpfarray[offset].pinref--; + MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); + } + return (ret); +} + +/* + * __qam_fclose -- close an extent. + * + * Calculate which extent the page is in and close it. + * We assume the mpf entry is present. + * + * PUBLIC: int __qam_fclose __P((DB *, db_pgno_t)); + */ +int +__qam_fclose(dbp, pgnoaddr) + DB *dbp; + db_pgno_t pgnoaddr; +{ + DB_ENV *dbenv; + DB_MPOOLFILE *mpf; + MPFARRAY *array; + QUEUE *qp; + u_int32_t extid; + int offset, ret; + + ret = 0; + dbenv = dbp->dbenv; + qp = (QUEUE *)dbp->q_internal; + + MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); + + extid = (pgnoaddr - 1) / qp->page_ext; + array = &qp->array1; + if (array->low_extent > extid || array->hi_extent < extid) + array = &qp->array2; + offset = extid - array->low_extent; + + DB_ASSERT(offset >= 0 && (unsigned) offset < array->n_extent); + + /* If other threads are still using this file, leave it. */ + if (array->mpfarray[offset].pinref != 0) + goto done; + + mpf = array->mpfarray[offset].mpf; + array->mpfarray[offset].mpf = NULL; + ret = mpf->close(mpf, 0); + +done: + MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); + return (ret); +} + +/* + * __qam_fremove -- remove an extent. + * + * Calculate which extent the page is in and remove it. There is no way + * to remove an extent without probing it first and seeing that is is empty + * so we assume the mpf entry is present. + * + * PUBLIC: int __qam_fremove __P((DB *, db_pgno_t)); + */ +int +__qam_fremove(dbp, pgnoaddr) + DB *dbp; + db_pgno_t pgnoaddr; +{ + DB_ENV *dbenv; + DB_MPOOLFILE *mpf; + MPFARRAY *array; + QUEUE *qp; + u_int32_t extid; +#if CONFIG_TEST + char buf[MAXPATHLEN], *real_name; +#endif + int offset, ret; + + qp = (QUEUE *)dbp->q_internal; + dbenv = dbp->dbenv; + ret = 0; + + MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); + + extid = (pgnoaddr - 1) / qp->page_ext; + array = &qp->array1; + if (array->low_extent > extid || array->hi_extent < extid) + array = &qp->array2; + offset = extid - array->low_extent; + + DB_ASSERT(offset >= 0 && (unsigned) offset < array->n_extent); + +#if CONFIG_TEST + real_name = NULL; + /* Find the real name of the file. */ + snprintf(buf, sizeof(buf), + QUEUE_EXTENT, qp->dir, PATH_SEPARATOR[0], qp->name, extid); + if ((ret = __db_appname(dbenv, + DB_APP_DATA, buf, 0, NULL, &real_name)) != 0) + goto err; +#endif + /* + * The log must be flushed before the file is deleted. We depend on + * the log record of the last delete to recreate the file if we crash. + */ + if (LOGGING_ON(dbenv) && (ret = dbenv->log_flush(dbenv, NULL)) != 0) + goto err; + + mpf = array->mpfarray[offset].mpf; + array->mpfarray[offset].mpf = NULL; + mpf->set_unlink(mpf, 1); + if ((ret = mpf->close(mpf, 0)) != 0) + goto err; + + /* + * If the file is at the bottom of the array + * shift things down and adjust the end points. + */ + if (offset == 0) { + memmove(array->mpfarray, &array->mpfarray[1], + (array->hi_extent - array->low_extent) + * sizeof(array->mpfarray[0])); + array->mpfarray[ + array->hi_extent - array->low_extent].mpf = NULL; + if (array->low_extent != array->hi_extent) + array->low_extent++; + } else { + if (extid == array->hi_extent) + array->hi_extent--; + } + +err: + MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); +#if CONFIG_TEST + if (real_name != NULL) + __os_free(dbenv, real_name); +#endif + return (ret); +} + +/* + * __qam_sync -- + * Flush the database cache. + * + * PUBLIC: int __qam_sync __P((DB *, u_int32_t)); + */ +int +__qam_sync(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + DB_ENV *dbenv; + DB_MPOOLFILE *mpf; + MPFARRAY *array; + QUEUE *qp; + QUEUE_FILELIST *filelist; + struct __qmpf *mpfp; + u_int32_t i; + int done, ret; + + dbenv = dbp->dbenv; + mpf = dbp->mpf; + + PANIC_CHECK(dbenv); + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->sync"); + + if ((ret = __db_syncchk(dbp, flags)) != 0) + return (ret); + + /* Read-only trees never need to be sync'd. */ + if (F_ISSET(dbp, DB_AM_RDONLY)) + return (0); + + /* If the tree was never backed by a database file, we're done. */ + if (F_ISSET(dbp, DB_AM_INMEM)) + return (0); + + /* Flush any dirty pages from the cache to the backing file. */ + if ((ret = mpf->sync(dbp->mpf)) != 0) + return (ret); + + qp = (QUEUE *)dbp->q_internal; + if (qp->page_ext == 0) + return (0); + + /* We do this for the side effect of opening all active extents. */ + if ((ret = __qam_gen_filelist(dbp, &filelist)) != 0) + return (ret); + + if (filelist == NULL) + return (0); + + __os_free(dbp->dbenv, filelist); + + done = 0; + qp = (QUEUE *)dbp->q_internal; + array = &qp->array1; + + MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); +again: + mpfp = array->mpfarray; + for (i = array->low_extent; i <= array->hi_extent; i++, mpfp++) + if ((mpf = mpfp->mpf) != NULL) { + if ((ret = mpf->sync(mpf)) != 0) + goto err; + /* + * If we are the only ones with this file open + * then close it so it might be removed. + */ + if (mpfp->pinref == 0) { + mpfp->mpf = NULL; + if ((ret = mpf->close(mpf, 0)) != 0) + goto err; + } + } + + if (done == 0 && qp->array2.n_extent != 0) { + array = &qp->array2; + done = 1; + goto again; + } + +err: + MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); + return (ret); +} + +/* + * __qam_gen_filelist -- generate a list of extent files. + * Another thread may close the handle so this should only + * be used single threaded or with care. + * + * PUBLIC: int __qam_gen_filelist __P(( DB *, QUEUE_FILELIST **)); + */ +int +__qam_gen_filelist(dbp, filelistp) + DB *dbp; + QUEUE_FILELIST **filelistp; +{ + DB_ENV *dbenv; + DB_MPOOLFILE *mpf; + QUEUE *qp; + QMETA *meta; + db_pgno_t i, last, start; + db_recno_t current, first; + QUEUE_FILELIST *fp; + int ret; + + dbenv = dbp->dbenv; + mpf = dbp->mpf; + qp = (QUEUE *)dbp->q_internal; + *filelistp = NULL; + + if (qp->page_ext == 0) + return (0); + + /* This may happen during metapage recovery. */ + if (qp->name == NULL) + return (0); + + /* Find out the page number of the last page in the database. */ + i = PGNO_BASE_MD; + if ((ret = mpf->get(mpf, &i, 0, &meta)) != 0) + return (ret); + + current = meta->cur_recno; + first = meta->first_recno; + + if ((ret = mpf->put(mpf, meta, 0)) != 0) + return (ret); + + last = QAM_RECNO_PAGE(dbp, current); + start = QAM_RECNO_PAGE(dbp, first); + + /* Allocate the worst case plus 1 for null termination. */ + if (last >= start) + ret = last - start + 2; + else + ret = last + (QAM_RECNO_PAGE(dbp, UINT32_T_MAX) - start) + 1; + if ((ret = __os_calloc(dbenv, + ret, sizeof(QUEUE_FILELIST), filelistp)) != 0) + return (ret); + fp = *filelistp; + i = start; + +again: for (; i <= last; i += qp->page_ext) { + if ((ret = + __qam_fprobe(dbp, i, &fp->mpf, QAM_PROBE_MPF, 0)) != 0) { + if (ret == ENOENT) + continue; + return (ret); + } + fp->id = (i - 1) / qp->page_ext; + fp++; + } + + if (last < start) { + i = 1; + start = 0; + goto again; + } + + return (0); +} + +/* + * __qam_extent_names -- generate a list of extent files names. + * + * PUBLIC: int __qam_extent_names __P((DB_ENV *, char *, char ***)); + */ +int +__qam_extent_names(dbenv, name, namelistp) + DB_ENV *dbenv; + char *name; + char ***namelistp; +{ + DB *dbp; + QUEUE *qp; + QUEUE_FILELIST *filelist, *fp; + char buf[MAXPATHLEN], *dir, **cp, *freep; + int cnt, len, ret; + + *namelistp = NULL; + filelist = NULL; + if ((ret = db_create(&dbp, dbenv, 0)) != 0) + return (ret); + if ((ret = + __db_open(dbp, NULL, name, NULL, DB_QUEUE, DB_RDONLY, 0)) != 0) + return (ret); + qp = dbp->q_internal; + if (qp->page_ext == 0) + goto done; + + if ((ret = __qam_gen_filelist(dbp, &filelist)) != 0) + goto done; + + if (filelist == NULL) + goto done; + + cnt = 0; + for (fp = filelist; fp->mpf != NULL; fp++) + cnt++; + dir = ((QUEUE *)dbp->q_internal)->dir; + name = ((QUEUE *)dbp->q_internal)->name; + + /* QUEUE_EXTENT contains extra chars, but add 6 anyway for the int. */ + len = (u_int32_t)(cnt * (sizeof(**namelistp) + + strlen(QUEUE_EXTENT) + strlen(dir) + strlen(name) + 6)); + + if ((ret = + __os_malloc(dbp->dbenv, len, namelistp)) != 0) + goto done; + cp = *namelistp; + freep = (char *)(cp + cnt + 1); + for (fp = filelist; fp->mpf != NULL; fp++) { + snprintf(buf, sizeof(buf), + QUEUE_EXTENT, dir, PATH_SEPARATOR[0], name, fp->id); + len = (u_int32_t)strlen(buf); + *cp++ = freep; + strcpy(freep, buf); + freep += len + 1; + } + *cp = NULL; + +done: + if (filelist != NULL) + __os_free(dbp->dbenv, filelist); + (void)dbp->close(dbp, DB_NOSYNC); + + return (ret); +} + +/* + * __qam_exid -- + * Generate a fileid for an extent based on the fileid of the main + * file. Since we do not log schema creates/deletes explicitly, the log + * never captures the fileid of an extent file. In order that masters and + * replicas have the same fileids (so they can explicitly delete them), we + * use computed fileids for the extent files of Queue files. + * + * An extent file id retains the low order 12 bytes of the file id and + * overwrites the dev/inode fields, placing a 0 in the inode field, and + * the extent number in the dev field. + * + * PUBLIC: void __qam_exid __P((DB *, u_int8_t *, u_int32_t)); + */ +void +__qam_exid(dbp, fidp, exnum) + DB *dbp; + u_int8_t *fidp; + u_int32_t exnum; +{ + int i; + u_int8_t *p; + + /* Copy the fileid from the master. */ + memcpy(fidp, dbp->fileid, DB_FILE_ID_LEN); + + /* The first four bytes are the inode or the FileIndexLow; 0 it. */ + for (i = sizeof(u_int32_t); i > 0; --i) + *fidp++ = 0; + + /* The next four bytes are the dev/FileIndexHigh; insert the exnum . */ + for (p = (u_int8_t *)&exnum, i = sizeof(u_int32_t); i > 0; --i) + *fidp++ = *p++; +} |