/*------------------------------------------------------------------------- * * dbcommands.c * Database management commands (create/drop database). * * Note: database creation/destruction commands use exclusive locks on * the database objects (as expressed by LockSharedObject()) to avoid * stepping on each others' toes. Formerly we used table-level locks * on pg_database, but that's too coarse-grained. * * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * src/backend/commands/dbcommands.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include #include #include #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" #include "access/multixact.h" #include "access/tableam.h" #include "access/xact.h" #include "access/xloginsert.h" #include "access/xlogrecovery.h" #include "access/xlogutils.h" #include "catalog/catalog.h" #include "catalog/dependency.h" #include "catalog/indexing.h" #include "catalog/objectaccess.h" #include "catalog/pg_authid.h" #include "catalog/pg_collation.h" #include "catalog/pg_database.h" #include "catalog/pg_db_role_setting.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "commands/comment.h" #include "commands/dbcommands.h" #include "commands/dbcommands_xlog.h" #include "commands/defrem.h" #include "commands/seclabel.h" #include "commands/tablespace.h" #include "common/file_perm.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "pgstat.h" #include "postmaster/bgwriter.h" #include "replication/slot.h" #include "storage/copydir.h" #include "storage/fd.h" #include "storage/ipc.h" #include "storage/lmgr.h" #include "storage/md.h" #include "storage/procarray.h" #include "storage/smgr.h" #include "utils/acl.h" #include "utils/builtins.h" #include "utils/fmgroids.h" #include "utils/guc.h" #include "utils/pg_locale.h" #include "utils/relmapper.h" #include "utils/snapmgr.h" #include "utils/syscache.h" /* * Create database strategy. * * CREATEDB_WAL_LOG will copy the database at the block level and WAL log each * copied block. * * CREATEDB_FILE_COPY will simply perform a file system level copy of the * database and log a single record for each tablespace copied. To make this * safe, it also triggers checkpoints before and after the operation. */ typedef enum CreateDBStrategy { CREATEDB_WAL_LOG, CREATEDB_FILE_COPY } CreateDBStrategy; typedef struct { Oid src_dboid; /* source (template) DB */ Oid dest_dboid; /* DB we are trying to create */ CreateDBStrategy strategy; /* create db strategy */ } createdb_failure_params; typedef struct { Oid dest_dboid; /* DB we are trying to move */ Oid dest_tsoid; /* tablespace we are trying to move to */ } movedb_failure_params; /* * Information about a relation to be copied when creating a database. */ typedef struct CreateDBRelInfo { RelFileLocator rlocator; /* physical relation identifier */ Oid reloid; /* relation oid */ bool permanent; /* relation is permanent or unlogged */ } CreateDBRelInfo; /* non-export function prototypes */ static void createdb_failure_callback(int code, Datum arg); static void movedb(const char *dbname, const char *tblspcname); static void movedb_failure_callback(int code, Datum arg); static bool get_db_info(const char *name, LOCKMODE lockmode, Oid *dbIdP, Oid *ownerIdP, int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP, TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP, Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIculocale, char **dbIcurules, char *dbLocProvider, char **dbCollversion); static void remove_dbtablespaces(Oid db_id); static bool check_db_file_conflict(Oid db_id); static int errdetail_busy_db(int notherbackends, int npreparedxacts); static void CreateDatabaseUsingWalLog(Oid src_dboid, Oid dst_dboid, Oid src_tsid, Oid dst_tsid); static List *ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath); static List *ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, Oid dbid, char *srcpath, List *rlocatorlist, Snapshot snapshot); static CreateDBRelInfo *ScanSourceDatabasePgClassTuple(HeapTupleData *tuple, Oid tbid, Oid dbid, char *srcpath); static void CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo); static void CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid, Oid dst_tsid); static void recovery_create_dbdir(char *path, bool only_tblspc); /* * Create a new database using the WAL_LOG strategy. * * Each copied block is separately written to the write-ahead log. */ static void CreateDatabaseUsingWalLog(Oid src_dboid, Oid dst_dboid, Oid src_tsid, Oid dst_tsid) { char *srcpath; char *dstpath; List *rlocatorlist = NULL; ListCell *cell; LockRelId srcrelid; LockRelId dstrelid; RelFileLocator srcrlocator; RelFileLocator dstrlocator; CreateDBRelInfo *relinfo; /* Get source and destination database paths. */ srcpath = GetDatabasePath(src_dboid, src_tsid); dstpath = GetDatabasePath(dst_dboid, dst_tsid); /* Create database directory and write PG_VERSION file. */ CreateDirAndVersionFile(dstpath, dst_dboid, dst_tsid, false); /* Copy relmap file from source database to the destination database. */ RelationMapCopy(dst_dboid, dst_tsid, srcpath, dstpath); /* Get list of relfilelocators to copy from the source database. */ rlocatorlist = ScanSourceDatabasePgClass(src_tsid, src_dboid, srcpath); Assert(rlocatorlist != NIL); /* * Database IDs will be the same for all relations so set them before * entering the loop. */ srcrelid.dbId = src_dboid; dstrelid.dbId = dst_dboid; /* Loop over our list of relfilelocators and copy each one. */ foreach(cell, rlocatorlist) { relinfo = lfirst(cell); srcrlocator = relinfo->rlocator; /* * If the relation is from the source db's default tablespace then we * need to create it in the destination db's default tablespace. * Otherwise, we need to create in the same tablespace as it is in the * source database. */ if (srcrlocator.spcOid == src_tsid) dstrlocator.spcOid = dst_tsid; else dstrlocator.spcOid = srcrlocator.spcOid; dstrlocator.dbOid = dst_dboid; dstrlocator.relNumber = srcrlocator.relNumber; /* * Acquire locks on source and target relations before copying. * * We typically do not read relation data into shared_buffers without * holding a relation lock. It's unclear what could go wrong if we * skipped it in this case, because nobody can be modifying either the * source or destination database at this point, and we have locks on * both databases, too, but let's take the conservative route. */ dstrelid.relId = srcrelid.relId = relinfo->reloid; LockRelationId(&srcrelid, AccessShareLock); LockRelationId(&dstrelid, AccessShareLock); /* Copy relation storage from source to the destination. */ CreateAndCopyRelationData(srcrlocator, dstrlocator, relinfo->permanent); /* Release the relation locks. */ UnlockRelationId(&srcrelid, AccessShareLock); UnlockRelationId(&dstrelid, AccessShareLock); } pfree(srcpath); pfree(dstpath); list_free_deep(rlocatorlist); } /* * Scan the pg_class table in the source database to identify the relations * that need to be copied to the destination database. * * This is an exception to the usual rule that cross-database access is * not possible. We can make it work here because we know that there are no * connections to the source database and (since there can't be prepared * transactions touching that database) no in-doubt tuples either. This * means that we don't need to worry about pruning removing anything from * under us, and we don't need to be too picky about our snapshot either. * As long as it sees all previously-committed XIDs as committed and all * aborted XIDs as aborted, we should be fine: nothing else is possible * here. * * We can't rely on the relcache for anything here, because that only knows * about the database to which we are connected, and can't handle access to * other databases. That also means we can't rely on the heap scan * infrastructure, which would be a bad idea anyway since it might try * to do things like HOT pruning which we definitely can't do safely in * a database to which we're not even connected. */ static List * ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath) { RelFileLocator rlocator; BlockNumber nblocks; BlockNumber blkno; Buffer buf; RelFileNumber relfilenumber; Page page; List *rlocatorlist = NIL; LockRelId relid; Snapshot snapshot; SMgrRelation smgr; BufferAccessStrategy bstrategy; /* Get pg_class relfilenumber. */ relfilenumber = RelationMapOidToFilenumberForDatabase(srcpath, RelationRelationId); /* Don't read data into shared_buffers without holding a relation lock. */ relid.dbId = dbid; relid.relId = RelationRelationId; LockRelationId(&relid, AccessShareLock); /* Prepare a RelFileLocator for the pg_class relation. */ rlocator.spcOid = tbid; rlocator.dbOid = dbid; rlocator.relNumber = relfilenumber; smgr = smgropen(rlocator, InvalidBackendId); nblocks = smgrnblocks(smgr, MAIN_FORKNUM); smgrclose(smgr); /* Use a buffer access strategy since this is a bulk read operation. */ bstrategy = GetAccessStrategy(BAS_BULKREAD); /* * As explained in the function header comments, we need a snapshot that * will see all committed transactions as committed, and our transaction * snapshot - or the active snapshot - might not be new enough for that, * but the return value of GetLatestSnapshot() should work fine. */ snapshot = GetLatestSnapshot(); /* Process the relation block by block. */ for (blkno = 0; blkno < nblocks; blkno++) { CHECK_FOR_INTERRUPTS(); buf = ReadBufferWithoutRelcache(rlocator, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy, true); LockBuffer(buf, BUFFER_LOCK_SHARE); page = BufferGetPage(buf); if (PageIsNew(page) || PageIsEmpty(page)) { UnlockReleaseBuffer(buf); continue; } /* Append relevant pg_class tuples for current page to rlocatorlist. */ rlocatorlist = ScanSourceDatabasePgClassPage(page, buf, tbid, dbid, srcpath, rlocatorlist, snapshot); UnlockReleaseBuffer(buf); } /* Release relation lock. */ UnlockRelationId(&relid, AccessShareLock); return rlocatorlist; } /* * Scan one page of the source database's pg_class relation and add relevant * entries to rlocatorlist. The return value is the updated list. */ static List * ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, Oid dbid, char *srcpath, List *rlocatorlist, Snapshot snapshot) { BlockNumber blkno = BufferGetBlockNumber(buf); OffsetNumber offnum; OffsetNumber maxoff; HeapTupleData tuple; maxoff = PageGetMaxOffsetNumber(page); /* Loop over offsets. */ for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { ItemId itemid; itemid = PageGetItemId(page, offnum); /* Nothing to do if slot is empty or already dead. */ if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid) || ItemIdIsRedirected(itemid)) continue; Assert(ItemIdIsNormal(itemid)); ItemPointerSet(&(tuple.t_self), blkno, offnum); /* Initialize a HeapTupleData structure. */ tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationRelationId; /* Skip tuples that are not visible to this snapshot. */ if (HeapTupleSatisfiesVisibility(&tuple, snapshot, buf)) { CreateDBRelInfo *relinfo; /* * ScanSourceDatabasePgClassTuple is in charge of constructing a * CreateDBRelInfo object for this tuple, but can also decide that * this tuple isn't something we need to copy. If we do need to * copy the relation, add it to the list. */ relinfo = ScanSourceDatabasePgClassTuple(&tuple, tbid, dbid, srcpath); if (relinfo != NULL) rlocatorlist = lappend(rlocatorlist, relinfo); } } return rlocatorlist; } /* * Decide whether a certain pg_class tuple represents something that * needs to be copied from the source database to the destination database, * and if so, construct a CreateDBRelInfo for it. * * Visibility checks are handled by the caller, so our job here is just * to assess the data stored in the tuple. */ CreateDBRelInfo * ScanSourceDatabasePgClassTuple(HeapTupleData *tuple, Oid tbid, Oid dbid, char *srcpath) { CreateDBRelInfo *relinfo; Form_pg_class classForm; RelFileNumber relfilenumber = InvalidRelFileNumber; classForm = (Form_pg_class) GETSTRUCT(tuple); /* * Return NULL if this object does not need to be copied. * * Shared objects don't need to be copied, because they are shared. * Objects without storage can't be copied, because there's nothing to * copy. Temporary relations don't need to be copied either, because they * are inaccessible outside of the session that created them, which must * be gone already, and couldn't connect to a different database if it * still existed. autovacuum will eventually remove the pg_class entries * as well. */ if (classForm->reltablespace == GLOBALTABLESPACE_OID || !RELKIND_HAS_STORAGE(classForm->relkind) || classForm->relpersistence == RELPERSISTENCE_TEMP) return NULL; /* * If relfilenumber is valid then directly use it. Otherwise, consult the * relmap. */ if (RelFileNumberIsValid(classForm->relfilenode)) relfilenumber = classForm->relfilenode; else relfilenumber = RelationMapOidToFilenumberForDatabase(srcpath, classForm->oid); /* We must have a valid relfilenumber. */ if (!RelFileNumberIsValid(relfilenumber)) elog(ERROR, "relation with OID %u does not have a valid relfilenumber", classForm->oid); /* Prepare a rel info element and add it to the list. */ relinfo = (CreateDBRelInfo *) palloc(sizeof(CreateDBRelInfo)); if (OidIsValid(classForm->reltablespace)) relinfo->rlocator.spcOid = classForm->reltablespace; else relinfo->rlocator.spcOid = tbid; relinfo->rlocator.dbOid = dbid; relinfo->rlocator.relNumber = relfilenumber; relinfo->reloid = classForm->oid; /* Temporary relations were rejected above. */ Assert(classForm->relpersistence != RELPERSISTENCE_TEMP); relinfo->permanent = (classForm->relpersistence == RELPERSISTENCE_PERMANENT) ? true : false; return relinfo; } /* * Create database directory and write out the PG_VERSION file in the database * path. If isRedo is true, it's okay for the database directory to exist * already. */ static void CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo) { int fd; int nbytes; char versionfile[MAXPGPATH]; char buf[16]; /* * Prepare version data before starting a critical section. * * Note that we don't have to copy this from the source database; there's * only one legal value. */ sprintf(buf, "%s\n", PG_MAJORVERSION); nbytes = strlen(PG_MAJORVERSION) + 1; /* If we are not in WAL replay then write the WAL. */ if (!isRedo) { xl_dbase_create_wal_log_rec xlrec; XLogRecPtr lsn; START_CRIT_SECTION(); xlrec.db_id = dbid; xlrec.tablespace_id = tsid; XLogBeginInsert(); XLogRegisterData((char *) (&xlrec), sizeof(xl_dbase_create_wal_log_rec)); lsn = XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_WAL_LOG); /* As always, WAL must hit the disk before the data update does. */ XLogFlush(lsn); } /* Create database directory. */ if (MakePGDirectory(dbpath) < 0) { /* Failure other than already exists or not in WAL replay? */ if (errno != EEXIST || !isRedo) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", dbpath))); } /* * Create PG_VERSION file in the database path. If the file already * exists and we are in WAL replay then try again to open it in write * mode. */ snprintf(versionfile, sizeof(versionfile), "%s/%s", dbpath, "PG_VERSION"); fd = OpenTransientFile(versionfile, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY); if (fd < 0 && errno == EEXIST && isRedo) fd = OpenTransientFile(versionfile, O_WRONLY | O_TRUNC | PG_BINARY); if (fd < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create file \"%s\": %m", versionfile))); /* Write PG_MAJORVERSION in the PG_VERSION file. */ pgstat_report_wait_start(WAIT_EVENT_VERSION_FILE_WRITE); errno = 0; if ((int) write(fd, buf, nbytes) != nbytes) { /* If write didn't set errno, assume problem is no disk space. */ if (errno == 0) errno = ENOSPC; ereport(ERROR, (errcode_for_file_access(), errmsg("could not write to file \"%s\": %m", versionfile))); } pgstat_report_wait_end(); /* Close the version file. */ CloseTransientFile(fd); /* Critical section done. */ if (!isRedo) END_CRIT_SECTION(); } /* * Create a new database using the FILE_COPY strategy. * * Copy each tablespace at the filesystem level, and log a single WAL record * for each tablespace copied. This requires a checkpoint before and after the * copy, which may be expensive, but it does greatly reduce WAL generation * if the copied database is large. */ static void CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid, Oid dst_tsid) { TableScanDesc scan; Relation rel; HeapTuple tuple; /* * Force a checkpoint before starting the copy. This will force all dirty * buffers, including those of unlogged tables, out to disk, to ensure * source database is up-to-date on disk for the copy. * FlushDatabaseBuffers() would suffice for that, but we also want to * process any pending unlink requests. Otherwise, if a checkpoint * happened while we're copying files, a file might be deleted just when * we're about to copy it, causing the lstat() call in copydir() to fail * with ENOENT. */ RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT | CHECKPOINT_FLUSH_ALL); /* * Iterate through all tablespaces of the template database, and copy each * one to the new database. */ rel = table_open(TableSpaceRelationId, AccessShareLock); scan = table_beginscan_catalog(rel, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_tablespace spaceform = (Form_pg_tablespace) GETSTRUCT(tuple); Oid srctablespace = spaceform->oid; Oid dsttablespace; char *srcpath; char *dstpath; struct stat st; /* No need to copy global tablespace */ if (srctablespace == GLOBALTABLESPACE_OID) continue; srcpath = GetDatabasePath(src_dboid, srctablespace); if (stat(srcpath, &st) < 0 || !S_ISDIR(st.st_mode) || directory_is_empty(srcpath)) { /* Assume we can ignore it */ pfree(srcpath); continue; } if (srctablespace == src_tsid) dsttablespace = dst_tsid; else dsttablespace = srctablespace; dstpath = GetDatabasePath(dst_dboid, dsttablespace); /* * Copy this subdirectory to the new location * * We don't need to copy subdirectories */ copydir(srcpath, dstpath, false); /* Record the filesystem change in XLOG */ { xl_dbase_create_file_copy_rec xlrec; xlrec.db_id = dst_dboid; xlrec.tablespace_id = dsttablespace; xlrec.src_db_id = src_dboid; xlrec.src_tablespace_id = srctablespace; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_file_copy_rec)); (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_FILE_COPY | XLR_SPECIAL_REL_UPDATE); } pfree(srcpath); pfree(dstpath); } table_endscan(scan); table_close(rel, AccessShareLock); /* * We force a checkpoint before committing. This effectively means that * committed XLOG_DBASE_CREATE_FILE_COPY operations will never need to be * replayed (at least not in ordinary crash recovery; we still have to * make the XLOG entry for the benefit of PITR operations). This avoids * two nasty scenarios: * * #1: When PITR is off, we don't XLOG the contents of newly created * indexes; therefore the drop-and-recreate-whole-directory behavior of * DBASE_CREATE replay would lose such indexes. * * #2: Since we have to recopy the source database during DBASE_CREATE * replay, we run the risk of copying changes in it that were committed * after the original CREATE DATABASE command but before the system crash * that led to the replay. This is at least unexpected and at worst could * lead to inconsistencies, eg duplicate table names. * * (Both of these were real bugs in releases 8.0 through 8.0.3.) * * In PITR replay, the first of these isn't an issue, and the second is * only a risk if the CREATE DATABASE and subsequent template database * change both occur while a base backup is being taken. There doesn't * seem to be much we can do about that except document it as a * limitation. * * See CreateDatabaseUsingWalLog() for a less cheesy CREATE DATABASE * strategy that avoids these problems. */ RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT); } /* * CREATE DATABASE */ Oid createdb(ParseState *pstate, const CreatedbStmt *stmt) { Oid src_dboid; Oid src_owner; int src_encoding = -1; char *src_collate = NULL; char *src_ctype = NULL; char *src_iculocale = NULL; char *src_icurules = NULL; char src_locprovider = '\0'; char *src_collversion = NULL; bool src_istemplate; bool src_allowconn; TransactionId src_frozenxid = InvalidTransactionId; MultiXactId src_minmxid = InvalidMultiXactId; Oid src_deftablespace; volatile Oid dst_deftablespace; Relation pg_database_rel; HeapTuple tuple; Datum new_record[Natts_pg_database] = {0}; bool new_record_nulls[Natts_pg_database] = {0}; Oid dboid = InvalidOid; Oid datdba; ListCell *option; DefElem *dtablespacename = NULL; DefElem *downer = NULL; DefElem *dtemplate = NULL; DefElem *dencoding = NULL; DefElem *dlocale = NULL; DefElem *dcollate = NULL; DefElem *dctype = NULL; DefElem *diculocale = NULL; DefElem *dicurules = NULL; DefElem *dlocprovider = NULL; DefElem *distemplate = NULL; DefElem *dallowconnections = NULL; DefElem *dconnlimit = NULL; DefElem *dcollversion = NULL; DefElem *dstrategy = NULL; char *dbname = stmt->dbname; char *dbowner = NULL; const char *dbtemplate = NULL; char *dbcollate = NULL; char *dbctype = NULL; char *dbiculocale = NULL; char *dbicurules = NULL; char dblocprovider = '\0'; char *canonname; int encoding = -1; bool dbistemplate = false; bool dballowconnections = true; int dbconnlimit = -1; char *dbcollversion = NULL; int notherbackends; int npreparedxacts; CreateDBStrategy dbstrategy = CREATEDB_WAL_LOG; createdb_failure_params fparms; /* Extract options from the statement node tree */ foreach(option, stmt->options) { DefElem *defel = (DefElem *) lfirst(option); if (strcmp(defel->defname, "tablespace") == 0) { if (dtablespacename) errorConflictingDefElem(defel, pstate); dtablespacename = defel; } else if (strcmp(defel->defname, "owner") == 0) { if (downer) errorConflictingDefElem(defel, pstate); downer = defel; } else if (strcmp(defel->defname, "template") == 0) { if (dtemplate) errorConflictingDefElem(defel, pstate); dtemplate = defel; } else if (strcmp(defel->defname, "encoding") == 0) { if (dencoding) errorConflictingDefElem(defel, pstate); dencoding = defel; } else if (strcmp(defel->defname, "locale") == 0) { if (dlocale) errorConflictingDefElem(defel, pstate); dlocale = defel; } else if (strcmp(defel->defname, "lc_collate") == 0) { if (dcollate) errorConflictingDefElem(defel, pstate); dcollate = defel; } else if (strcmp(defel->defname, "lc_ctype") == 0) { if (dctype) errorConflictingDefElem(defel, pstate); dctype = defel; } else if (strcmp(defel->defname, "icu_locale") == 0) { if (diculocale) errorConflictingDefElem(defel, pstate); diculocale = defel; } else if (strcmp(defel->defname, "icu_rules") == 0) { if (dicurules) errorConflictingDefElem(defel, pstate); dicurules = defel; } else if (strcmp(defel->defname, "locale_provider") == 0) { if (dlocprovider) errorConflictingDefElem(defel, pstate); dlocprovider = defel; } else if (strcmp(defel->defname, "is_template") == 0) { if (distemplate) errorConflictingDefElem(defel, pstate); distemplate = defel; } else if (strcmp(defel->defname, "allow_connections") == 0) { if (dallowconnections) errorConflictingDefElem(defel, pstate); dallowconnections = defel; } else if (strcmp(defel->defname, "connection_limit") == 0) { if (dconnlimit) errorConflictingDefElem(defel, pstate); dconnlimit = defel; } else if (strcmp(defel->defname, "collation_version") == 0) { if (dcollversion) errorConflictingDefElem(defel, pstate); dcollversion = defel; } else if (strcmp(defel->defname, "location") == 0) { ereport(WARNING, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("LOCATION is not supported anymore"), errhint("Consider using tablespaces instead."), parser_errposition(pstate, defel->location))); } else if (strcmp(defel->defname, "oid") == 0) { dboid = defGetObjectId(defel); /* * We don't normally permit new databases to be created with * system-assigned OIDs. pg_upgrade tries to preserve database * OIDs, so we can't allow any database to be created with an OID * that might be in use in a freshly-initialized cluster created * by some future version. We assume all such OIDs will be from * the system-managed OID range. * * As an exception, however, we permit any OID to be assigned when * allow_system_table_mods=on (so that initdb can assign system * OIDs to template0 and postgres) or when performing a binary * upgrade (so that pg_upgrade can preserve whatever OIDs it finds * in the source cluster). */ if (dboid < FirstNormalObjectId && !allowSystemTableMods && !IsBinaryUpgrade) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE)), errmsg("OIDs less than %u are reserved for system objects", FirstNormalObjectId)); } else if (strcmp(defel->defname, "strategy") == 0) { if (dstrategy) errorConflictingDefElem(defel, pstate); dstrategy = defel; } else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("option \"%s\" not recognized", defel->defname), parser_errposition(pstate, defel->location))); } if (downer && downer->arg) dbowner = defGetString(downer); if (dtemplate && dtemplate->arg) dbtemplate = defGetString(dtemplate); if (dencoding && dencoding->arg) { const char *encoding_name; if (IsA(dencoding->arg, Integer)) { encoding = defGetInt32(dencoding); encoding_name = pg_encoding_to_char(encoding); if (strcmp(encoding_name, "") == 0 || pg_valid_server_encoding(encoding_name) < 0) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("%d is not a valid encoding code", encoding), parser_errposition(pstate, dencoding->location))); } else { encoding_name = defGetString(dencoding); encoding = pg_valid_server_encoding(encoding_name); if (encoding < 0) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("%s is not a valid encoding name", encoding_name), parser_errposition(pstate, dencoding->location))); } } if (dlocale && dlocale->arg) { dbcollate = defGetString(dlocale); dbctype = defGetString(dlocale); } if (dcollate && dcollate->arg) dbcollate = defGetString(dcollate); if (dctype && dctype->arg) dbctype = defGetString(dctype); if (diculocale && diculocale->arg) dbiculocale = defGetString(diculocale); if (dicurules && dicurules->arg) dbicurules = defGetString(dicurules); if (dlocprovider && dlocprovider->arg) { char *locproviderstr = defGetString(dlocprovider); if (pg_strcasecmp(locproviderstr, "icu") == 0) dblocprovider = COLLPROVIDER_ICU; else if (pg_strcasecmp(locproviderstr, "libc") == 0) dblocprovider = COLLPROVIDER_LIBC; else ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("unrecognized locale provider: %s", locproviderstr))); } if (distemplate && distemplate->arg) dbistemplate = defGetBoolean(distemplate); if (dallowconnections && dallowconnections->arg) dballowconnections = defGetBoolean(dallowconnections); if (dconnlimit && dconnlimit->arg) { dbconnlimit = defGetInt32(dconnlimit); if (dbconnlimit < -1) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid connection limit: %d", dbconnlimit))); } if (dcollversion) dbcollversion = defGetString(dcollversion); /* obtain OID of proposed owner */ if (dbowner) datdba = get_role_oid(dbowner, false); else datdba = GetUserId(); /* * To create a database, must have createdb privilege and must be able to * become the target role (this does not imply that the target role itself * must have createdb privilege). The latter provision guards against * "giveaway" attacks. Note that a superuser will always have both of * these privileges a fortiori. */ if (!have_createdb_privilege()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied to create database"))); check_can_set_role(GetUserId(), datdba); /* * Lookup database (template) to be cloned, and obtain share lock on it. * ShareLock allows two CREATE DATABASEs to work from the same template * concurrently, while ensuring no one is busy dropping it in parallel * (which would be Very Bad since we'd likely get an incomplete copy * without knowing it). This also prevents any new connections from being * made to the source until we finish copying it, so we can be sure it * won't change underneath us. */ if (!dbtemplate) dbtemplate = "template1"; /* Default template database name */ if (!get_db_info(dbtemplate, ShareLock, &src_dboid, &src_owner, &src_encoding, &src_istemplate, &src_allowconn, &src_frozenxid, &src_minmxid, &src_deftablespace, &src_collate, &src_ctype, &src_iculocale, &src_icurules, &src_locprovider, &src_collversion)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("template database \"%s\" does not exist", dbtemplate))); /* * Permission check: to copy a DB that's not marked datistemplate, you * must be superuser or the owner thereof. */ if (!src_istemplate) { if (!object_ownercheck(DatabaseRelationId, src_dboid, GetUserId())) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied to copy database \"%s\"", dbtemplate))); } /* Validate the database creation strategy. */ if (dstrategy && dstrategy->arg) { char *strategy; strategy = defGetString(dstrategy); if (strcmp(strategy, "wal_log") == 0) dbstrategy = CREATEDB_WAL_LOG; else if (strcmp(strategy, "file_copy") == 0) dbstrategy = CREATEDB_FILE_COPY; else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid create database strategy \"%s\"", strategy), errhint("Valid strategies are \"wal_log\", and \"file_copy\"."))); } /* If encoding or locales are defaulted, use source's setting */ if (encoding < 0) encoding = src_encoding; if (dbcollate == NULL) dbcollate = src_collate; if (dbctype == NULL) dbctype = src_ctype; if (dblocprovider == '\0') dblocprovider = src_locprovider; if (dbiculocale == NULL && dblocprovider == COLLPROVIDER_ICU) dbiculocale = src_iculocale; if (dbicurules == NULL && dblocprovider == COLLPROVIDER_ICU) dbicurules = src_icurules; /* Some encodings are client only */ if (!PG_VALID_BE_ENCODING(encoding)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("invalid server encoding %d", encoding))); /* Check that the chosen locales are valid, and get canonical spellings */ if (!check_locale(LC_COLLATE, dbcollate, &canonname)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("invalid locale name: \"%s\"", dbcollate))); dbcollate = canonname; if (!check_locale(LC_CTYPE, dbctype, &canonname)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("invalid locale name: \"%s\"", dbctype))); dbctype = canonname; check_encoding_locale_matches(encoding, dbcollate, dbctype); if (dblocprovider == COLLPROVIDER_ICU) { if (!(is_encoding_supported_by_icu(encoding))) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("encoding \"%s\" is not supported with ICU provider", pg_encoding_to_char(encoding)))); /* * This would happen if template0 uses the libc provider but the new * database uses icu. */ if (!dbiculocale) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("ICU locale must be specified"))); /* * During binary upgrade, or when the locale came from the template * database, preserve locale string. Otherwise, canonicalize to a * language tag. */ if (!IsBinaryUpgrade && dbiculocale != src_iculocale) { char *langtag = icu_language_tag(dbiculocale, icu_validation_level); if (langtag && strcmp(dbiculocale, langtag) != 0) { ereport(NOTICE, (errmsg("using standard form \"%s\" for locale \"%s\"", langtag, dbiculocale))); dbiculocale = langtag; } } icu_validate_locale(dbiculocale); } else { if (dbiculocale) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("ICU locale cannot be specified unless locale provider is ICU"))); if (dbicurules) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("ICU rules cannot be specified unless locale provider is ICU"))); } /* * Check that the new encoding and locale settings match the source * database. We insist on this because we simply copy the source data --- * any non-ASCII data would be wrongly encoded, and any indexes sorted * according to the source locale would be wrong. * * However, we assume that template0 doesn't contain any non-ASCII data * nor any indexes that depend on collation or ctype, so template0 can be * used as template for creating a database with any encoding or locale. */ if (strcmp(dbtemplate, "template0") != 0) { if (encoding != src_encoding) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("new encoding (%s) is incompatible with the encoding of the template database (%s)", pg_encoding_to_char(encoding), pg_encoding_to_char(src_encoding)), errhint("Use the same encoding as in the template database, or use template0 as template."))); if (strcmp(dbcollate, src_collate) != 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("new collation (%s) is incompatible with the collation of the template database (%s)", dbcollate, src_collate), errhint("Use the same collation as in the template database, or use template0 as template."))); if (strcmp(dbctype, src_ctype) != 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("new LC_CTYPE (%s) is incompatible with the LC_CTYPE of the template database (%s)", dbctype, src_ctype), errhint("Use the same LC_CTYPE as in the template database, or use template0 as template."))); if (dblocprovider != src_locprovider) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("new locale provider (%s) does not match locale provider of the template database (%s)", collprovider_name(dblocprovider), collprovider_name(src_locprovider)), errhint("Use the same locale provider as in the template database, or use template0 as template."))); if (dblocprovider == COLLPROVIDER_ICU) { char *val1; char *val2; Assert(dbiculocale); Assert(src_iculocale); if (strcmp(dbiculocale, src_iculocale) != 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("new ICU locale (%s) is incompatible with the ICU locale of the template database (%s)", dbiculocale, src_iculocale), errhint("Use the same ICU locale as in the template database, or use template0 as template."))); val1 = dbicurules; if (!val1) val1 = ""; val2 = src_icurules; if (!val2) val2 = ""; if (strcmp(val1, val2) != 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("new ICU collation rules (%s) are incompatible with the ICU collation rules of the template database (%s)", val1, val2), errhint("Use the same ICU collation rules as in the template database, or use template0 as template."))); } } /* * If we got a collation version for the template database, check that it * matches the actual OS collation version. Otherwise error; the user * needs to fix the template database first. Don't complain if a * collation version was specified explicitly as a statement option; that * is used by pg_upgrade to reproduce the old state exactly. * * (If the template database has no collation version, then either the * platform/provider does not support collation versioning, or it's * template0, for which we stipulate that it does not contain * collation-using objects.) */ if (src_collversion && !dcollversion) { char *actual_versionstr; actual_versionstr = get_collation_actual_version(dblocprovider, dblocprovider == COLLPROVIDER_ICU ? dbiculocale : dbcollate); if (!actual_versionstr) ereport(ERROR, (errmsg("template database \"%s\" has a collation version, but no actual collation version could be determined", dbtemplate))); if (strcmp(actual_versionstr, src_collversion) != 0) ereport(ERROR, (errmsg("template database \"%s\" has a collation version mismatch", dbtemplate), errdetail("The template database was created using collation version %s, " "but the operating system provides version %s.", src_collversion, actual_versionstr), errhint("Rebuild all objects in the template database that use the default collation and run " "ALTER DATABASE %s REFRESH COLLATION VERSION, " "or build PostgreSQL with the right library version.", quote_identifier(dbtemplate)))); } if (dbcollversion == NULL) dbcollversion = src_collversion; /* * Normally, we copy the collation version from the template database. * This last resort only applies if the template database does not have a * collation version, which is normally only the case for template0. */ if (dbcollversion == NULL) dbcollversion = get_collation_actual_version(dblocprovider, dblocprovider == COLLPROVIDER_ICU ? dbiculocale : dbcollate); /* Resolve default tablespace for new database */ if (dtablespacename && dtablespacename->arg) { char *tablespacename; AclResult aclresult; tablespacename = defGetString(dtablespacename); dst_deftablespace = get_tablespace_oid(tablespacename, false); /* check permissions */ aclresult = object_aclcheck(TableSpaceRelationId, dst_deftablespace, GetUserId(), ACL_CREATE); if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, OBJECT_TABLESPACE, tablespacename); /* pg_global must never be the default tablespace */ if (dst_deftablespace == GLOBALTABLESPACE_OID) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("pg_global cannot be used as default tablespace"))); /* * If we are trying to change the default tablespace of the template, * we require that the template not have any files in the new default * tablespace. This is necessary because otherwise the copied * database would contain pg_class rows that refer to its default * tablespace both explicitly (by OID) and implicitly (as zero), which * would cause problems. For example another CREATE DATABASE using * the copied database as template, and trying to change its default * tablespace again, would yield outright incorrect results (it would * improperly move tables to the new default tablespace that should * stay in the same tablespace). */ if (dst_deftablespace != src_deftablespace) { char *srcpath; struct stat st; srcpath = GetDatabasePath(src_dboid, dst_deftablespace); if (stat(srcpath, &st) == 0 && S_ISDIR(st.st_mode) && !directory_is_empty(srcpath)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot assign new default tablespace \"%s\"", tablespacename), errdetail("There is a conflict because database \"%s\" already has some tables in this tablespace.", dbtemplate))); pfree(srcpath); } } else { /* Use template database's default tablespace */ dst_deftablespace = src_deftablespace; /* Note there is no additional permission check in this path */ } /* * If built with appropriate switch, whine when regression-testing * conventions for database names are violated. But don't complain during * initdb. */ #ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS if (IsUnderPostmaster && strstr(dbname, "regression") == NULL) elog(WARNING, "databases created by regression test cases should have names including \"regression\""); #endif /* * Check for db name conflict. This is just to give a more friendly error * message than "unique index violation". There's a race condition but * we're willing to accept the less friendly message in that case. */ if (OidIsValid(get_database_oid(dbname, true))) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_DATABASE), errmsg("database \"%s\" already exists", dbname))); /* * The source DB can't have any active backends, except this one * (exception is to allow CREATE DB while connected to template1). * Otherwise we might copy inconsistent data. * * This should be last among the basic error checks, because it involves * potential waiting; we may as well throw an error first if we're gonna * throw one. */ if (CountOtherDBBackends(src_dboid, ¬herbackends, &npreparedxacts)) ereport(ERROR, (errcode(ERRCODE_OBJECT_IN_USE), errmsg("source database \"%s\" is being accessed by other users", dbtemplate), errdetail_busy_db(notherbackends, npreparedxacts))); /* * Select an OID for the new database, checking that it doesn't have a * filename conflict with anything already existing in the tablespace * directories. */ pg_database_rel = table_open(DatabaseRelationId, RowExclusiveLock); /* * If database OID is configured, check if the OID is already in use or * data directory already exists. */ if (OidIsValid(dboid)) { char *existing_dbname = get_database_name(dboid); if (existing_dbname != NULL) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE)), errmsg("database OID %u is already in use by database \"%s\"", dboid, existing_dbname)); if (check_db_file_conflict(dboid)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE)), errmsg("data directory with the specified OID %u already exists", dboid)); } else { /* Select an OID for the new database if is not explicitly configured. */ do { dboid = GetNewOidWithIndex(pg_database_rel, DatabaseOidIndexId, Anum_pg_database_oid); } while (check_db_file_conflict(dboid)); } /* * Insert a new tuple into pg_database. This establishes our ownership of * the new database name (anyone else trying to insert the same name will * block on the unique index, and fail after we commit). */ Assert((dblocprovider == COLLPROVIDER_ICU && dbiculocale) || (dblocprovider != COLLPROVIDER_ICU && !dbiculocale)); /* Form tuple */ new_record[Anum_pg_database_oid - 1] = ObjectIdGetDatum(dboid); new_record[Anum_pg_database_datname - 1] = DirectFunctionCall1(namein, CStringGetDatum(dbname)); new_record[Anum_pg_database_datdba - 1] = ObjectIdGetDatum(datdba); new_record[Anum_pg_database_encoding - 1] = Int32GetDatum(encoding); new_record[Anum_pg_database_datlocprovider - 1] = CharGetDatum(dblocprovider); new_record[Anum_pg_database_datistemplate - 1] = BoolGetDatum(dbistemplate); new_record[Anum_pg_database_datallowconn - 1] = BoolGetDatum(dballowconnections); new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit); new_record[Anum_pg_database_datfrozenxid - 1] = TransactionIdGetDatum(src_frozenxid); new_record[Anum_pg_database_datminmxid - 1] = TransactionIdGetDatum(src_minmxid); new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_deftablespace); new_record[Anum_pg_database_datcollate - 1] = CStringGetTextDatum(dbcollate); new_record[Anum_pg_database_datctype - 1] = CStringGetTextDatum(dbctype); if (dbiculocale) new_record[Anum_pg_database_daticulocale - 1] = CStringGetTextDatum(dbiculocale); else new_record_nulls[Anum_pg_database_daticulocale - 1] = true; if (dbicurules) new_record[Anum_pg_database_daticurules - 1] = CStringGetTextDatum(dbicurules); else new_record_nulls[Anum_pg_database_daticurules - 1] = true; if (dbcollversion) new_record[Anum_pg_database_datcollversion - 1] = CStringGetTextDatum(dbcollversion); else new_record_nulls[Anum_pg_database_datcollversion - 1] = true; /* * We deliberately set datacl to default (NULL), rather than copying it * from the template database. Copying it would be a bad idea when the * owner is not the same as the template's owner. */ new_record_nulls[Anum_pg_database_datacl - 1] = true; tuple = heap_form_tuple(RelationGetDescr(pg_database_rel), new_record, new_record_nulls); CatalogTupleInsert(pg_database_rel, tuple); /* * Now generate additional catalog entries associated with the new DB */ /* Register owner dependency */ recordDependencyOnOwner(DatabaseRelationId, dboid, datdba); /* Create pg_shdepend entries for objects within database */ copyTemplateDependencies(src_dboid, dboid); /* Post creation hook for new database */ InvokeObjectPostCreateHook(DatabaseRelationId, dboid, 0); /* * If we're going to be reading data for the to-be-created database into * shared_buffers, take a lock on it. Nobody should know that this * database exists yet, but it's good to maintain the invariant that an * AccessExclusiveLock on the database is sufficient to drop all * of its buffers without worrying about more being read later. * * Note that we need to do this before entering the * PG_ENSURE_ERROR_CLEANUP block below, because createdb_failure_callback * expects this lock to be held already. */ if (dbstrategy == CREATEDB_WAL_LOG) LockSharedObject(DatabaseRelationId, dboid, 0, AccessShareLock); /* * Once we start copying subdirectories, we need to be able to clean 'em * up if we fail. Use an ENSURE block to make sure this happens. (This * is not a 100% solution, because of the possibility of failure during * transaction commit after we leave this routine, but it should handle * most scenarios.) */ fparms.src_dboid = src_dboid; fparms.dest_dboid = dboid; fparms.strategy = dbstrategy; PG_ENSURE_ERROR_CLEANUP(createdb_failure_callback, PointerGetDatum(&fparms)); { /* * If the user has asked to create a database with WAL_LOG strategy * then call CreateDatabaseUsingWalLog, which will copy the database * at the block level and it will WAL log each copied block. * Otherwise, call CreateDatabaseUsingFileCopy that will copy the * database file by file. */ if (dbstrategy == CREATEDB_WAL_LOG) CreateDatabaseUsingWalLog(src_dboid, dboid, src_deftablespace, dst_deftablespace); else CreateDatabaseUsingFileCopy(src_dboid, dboid, src_deftablespace, dst_deftablespace); /* * Close pg_database, but keep lock till commit. */ table_close(pg_database_rel, NoLock); /* * Force synchronous commit, thus minimizing the window between * creation of the database files and committal of the transaction. If * we crash before committing, we'll have a DB that's taking up disk * space but is not in pg_database, which is not good. */ ForceSyncCommit(); } PG_END_ENSURE_ERROR_CLEANUP(createdb_failure_callback, PointerGetDatum(&fparms)); return dboid; } /* * Check whether chosen encoding matches chosen locale settings. This * restriction is necessary because libc's locale-specific code usually * fails when presented with data in an encoding it's not expecting. We * allow mismatch in four cases: * * 1. locale encoding = SQL_ASCII, which means that the locale is C/POSIX * which works with any encoding. * * 2. locale encoding = -1, which means that we couldn't determine the * locale's encoding and have to trust the user to get it right. * * 3. selected encoding is UTF8 and platform is win32. This is because * UTF8 is a pseudo codepage that is supported in all locales since it's * converted to UTF16 before being used. * * 4. selected encoding is SQL_ASCII, but only if you're a superuser. This * is risky but we have historically allowed it --- notably, the * regression tests require it. * * Note: if you change this policy, fix initdb to match. */ void check_encoding_locale_matches(int encoding, const char *collate, const char *ctype) { int ctype_encoding = pg_get_encoding_from_locale(ctype, true); int collate_encoding = pg_get_encoding_from_locale(collate, true); if (!(ctype_encoding == encoding || ctype_encoding == PG_SQL_ASCII || ctype_encoding == -1 || #ifdef WIN32 encoding == PG_UTF8 || #endif (encoding == PG_SQL_ASCII && superuser()))) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("encoding \"%s\" does not match locale \"%s\"", pg_encoding_to_char(encoding), ctype), errdetail("The chosen LC_CTYPE setting requires encoding \"%s\".", pg_encoding_to_char(ctype_encoding)))); if (!(collate_encoding == encoding || collate_encoding == PG_SQL_ASCII || collate_encoding == -1 || #ifdef WIN32 encoding == PG_UTF8 || #endif (encoding == PG_SQL_ASCII && superuser()))) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("encoding \"%s\" does not match locale \"%s\"", pg_encoding_to_char(encoding), collate), errdetail("The chosen LC_COLLATE setting requires encoding \"%s\".", pg_encoding_to_char(collate_encoding)))); } /* Error cleanup callback for createdb */ static void createdb_failure_callback(int code, Datum arg) { createdb_failure_params *fparms = (createdb_failure_params *) DatumGetPointer(arg); /* * If we were copying database at block levels then drop pages for the * destination database that are in the shared buffer cache. And tell * checkpointer to forget any pending fsync and unlink requests for files * in the database. The reasoning behind doing this is same as explained * in dropdb function. But unlike dropdb we don't need to call * pgstat_drop_database because this database is still not created so * there should not be any stat for this. */ if (fparms->strategy == CREATEDB_WAL_LOG) { DropDatabaseBuffers(fparms->dest_dboid); ForgetDatabaseSyncRequests(fparms->dest_dboid); /* Release lock on the target database. */ UnlockSharedObject(DatabaseRelationId, fparms->dest_dboid, 0, AccessShareLock); } /* * Release lock on source database before doing recursive remove. This is * not essential but it seems desirable to release the lock as soon as * possible. */ UnlockSharedObject(DatabaseRelationId, fparms->src_dboid, 0, ShareLock); /* Throw away any successfully copied subdirectories */ remove_dbtablespaces(fparms->dest_dboid); } /* * DROP DATABASE */ void dropdb(const char *dbname, bool missing_ok, bool force) { Oid db_id; bool db_istemplate; Relation pgdbrel; HeapTuple tup; int notherbackends; int npreparedxacts; int nslots, nslots_active; int nsubscriptions; /* * Look up the target database's OID, and get exclusive lock on it. We * need this to ensure that no new backend starts up in the target * database while we are deleting it (see postinit.c), and that no one is * using it as a CREATE DATABASE template or trying to delete it for * themselves. */ pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock); if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL, &db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) { if (!missing_ok) { ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname))); } else { /* Close pg_database, release the lock, since we changed nothing */ table_close(pgdbrel, RowExclusiveLock); ereport(NOTICE, (errmsg("database \"%s\" does not exist, skipping", dbname))); return; } } /* * Permission checks */ if (!object_ownercheck(DatabaseRelationId, db_id, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE, dbname); /* DROP hook for the database being removed */ InvokeObjectDropHook(DatabaseRelationId, db_id, 0); /* * Disallow dropping a DB that is marked istemplate. This is just to * prevent people from accidentally dropping template0 or template1; they * can do so if they're really determined ... */ if (db_istemplate) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot drop a template database"))); /* Obviously can't drop my own database */ if (db_id == MyDatabaseId) ereport(ERROR, (errcode(ERRCODE_OBJECT_IN_USE), errmsg("cannot drop the currently open database"))); /* * Check whether there are active logical slots that refer to the * to-be-dropped database. The database lock we are holding prevents the * creation of new slots using the database or existing slots becoming * active. */ (void) ReplicationSlotsCountDBSlots(db_id, &nslots, &nslots_active); if (nslots_active) { ereport(ERROR, (errcode(ERRCODE_OBJECT_IN_USE), errmsg("database \"%s\" is used by an active logical replication slot", dbname), errdetail_plural("There is %d active slot.", "There are %d active slots.", nslots_active, nslots_active))); } /* * Check if there are subscriptions defined in the target database. * * We can't drop them automatically because they might be holding * resources in other databases/instances. */ if ((nsubscriptions = CountDBSubscriptions(db_id)) > 0) ereport(ERROR, (errcode(ERRCODE_OBJECT_IN_USE), errmsg("database \"%s\" is being used by logical replication subscription", dbname), errdetail_plural("There is %d subscription.", "There are %d subscriptions.", nsubscriptions, nsubscriptions))); /* * Attempt to terminate all existing connections to the target database if * the user has requested to do so. */ if (force) TerminateOtherDBBackends(db_id); /* * Check for other backends in the target database. (Because we hold the * database lock, no new ones can start after this.) * * As in CREATE DATABASE, check this after other error conditions. */ if (CountOtherDBBackends(db_id, ¬herbackends, &npreparedxacts)) ereport(ERROR, (errcode(ERRCODE_OBJECT_IN_USE), errmsg("database \"%s\" is being accessed by other users", dbname), errdetail_busy_db(notherbackends, npreparedxacts))); /* * Remove the database's tuple from pg_database. */ tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(db_id)); if (!HeapTupleIsValid(tup)) elog(ERROR, "cache lookup failed for database %u", db_id); CatalogTupleDelete(pgdbrel, &tup->t_self); ReleaseSysCache(tup); /* * Delete any comments or security labels associated with the database. */ DeleteSharedComments(db_id, DatabaseRelationId); DeleteSharedSecurityLabel(db_id, DatabaseRelationId); /* * Remove settings associated with this database */ DropSetting(db_id, InvalidOid); /* * Remove shared dependency references for the database. */ dropDatabaseDependencies(db_id); /* * Drop db-specific replication slots. */ ReplicationSlotsDropDBSlots(db_id); /* * Drop pages for this database that are in the shared buffer cache. This * is important to ensure that no remaining backend tries to write out a * dirty buffer to the dead database later... */ DropDatabaseBuffers(db_id); /* * Tell the cumulative stats system to forget it immediately, too. */ pgstat_drop_database(db_id); /* * Tell checkpointer to forget any pending fsync and unlink requests for * files in the database; else the fsyncs will fail at next checkpoint, or * worse, it will delete files that belong to a newly created database * with the same OID. */ ForgetDatabaseSyncRequests(db_id); /* * Force a checkpoint to make sure the checkpointer has received the * message sent by ForgetDatabaseSyncRequests. */ RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT); /* Close all smgr fds in all backends. */ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE)); /* * Remove all tablespace subdirs belonging to the database. */ remove_dbtablespaces(db_id); /* * Close pg_database, but keep lock till commit. */ table_close(pgdbrel, NoLock); /* * Force synchronous commit, thus minimizing the window between removal of * the database files and committal of the transaction. If we crash before * committing, we'll have a DB that's gone on disk but still there * according to pg_database, which is not good. */ ForceSyncCommit(); } /* * Rename database */ ObjectAddress RenameDatabase(const char *oldname, const char *newname) { Oid db_id; HeapTuple newtup; Relation rel; int notherbackends; int npreparedxacts; ObjectAddress address; /* * Look up the target database's OID, and get exclusive lock on it. We * need this for the same reasons as DROP DATABASE. */ rel = table_open(DatabaseRelationId, RowExclusiveLock); if (!get_db_info(oldname, AccessExclusiveLock, &db_id, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", oldname))); /* must be owner */ if (!object_ownercheck(DatabaseRelationId, db_id, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE, oldname); /* must have createdb rights */ if (!have_createdb_privilege()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied to rename database"))); /* * If built with appropriate switch, whine when regression-testing * conventions for database names are violated. */ #ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS if (strstr(newname, "regression") == NULL) elog(WARNING, "databases created by regression test cases should have names including \"regression\""); #endif /* * Make sure the new name doesn't exist. See notes for same error in * CREATE DATABASE. */ if (OidIsValid(get_database_oid(newname, true))) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_DATABASE), errmsg("database \"%s\" already exists", newname))); /* * XXX Client applications probably store the current database somewhere, * so renaming it could cause confusion. On the other hand, there may not * be an actual problem besides a little confusion, so think about this * and decide. */ if (db_id == MyDatabaseId) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("current database cannot be renamed"))); /* * Make sure the database does not have active sessions. This is the same * concern as above, but applied to other sessions. * * As in CREATE DATABASE, check this after other error conditions. */ if (CountOtherDBBackends(db_id, ¬herbackends, &npreparedxacts)) ereport(ERROR, (errcode(ERRCODE_OBJECT_IN_USE), errmsg("database \"%s\" is being accessed by other users", oldname), errdetail_busy_db(notherbackends, npreparedxacts))); /* rename */ newtup = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(db_id)); if (!HeapTupleIsValid(newtup)) elog(ERROR, "cache lookup failed for database %u", db_id); namestrcpy(&(((Form_pg_database) GETSTRUCT(newtup))->datname), newname); CatalogTupleUpdate(rel, &newtup->t_self, newtup); InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0); ObjectAddressSet(address, DatabaseRelationId, db_id); /* * Close pg_database, but keep lock till commit. */ table_close(rel, NoLock); return address; } /* * ALTER DATABASE SET TABLESPACE */ static void movedb(const char *dbname, const char *tblspcname) { Oid db_id; Relation pgdbrel; int notherbackends; int npreparedxacts; HeapTuple oldtuple, newtuple; Oid src_tblspcoid, dst_tblspcoid; ScanKeyData scankey; SysScanDesc sysscan; AclResult aclresult; char *src_dbpath; char *dst_dbpath; DIR *dstdir; struct dirent *xlde; movedb_failure_params fparms; /* * Look up the target database's OID, and get exclusive lock on it. We * need this to ensure that no new backend starts up in the database while * we are moving it, and that no one is using it as a CREATE DATABASE * template or trying to delete it. */ pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock); if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL, NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL, NULL, NULL, NULL, NULL)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname))); /* * We actually need a session lock, so that the lock will persist across * the commit/restart below. (We could almost get away with letting the * lock be released at commit, except that someone could try to move * relations of the DB back into the old directory while we rmtree() it.) */ LockSharedObjectForSession(DatabaseRelationId, db_id, 0, AccessExclusiveLock); /* * Permission checks */ if (!object_ownercheck(DatabaseRelationId, db_id, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE, dbname); /* * Obviously can't move the tables of my own database */ if (db_id == MyDatabaseId) ereport(ERROR, (errcode(ERRCODE_OBJECT_IN_USE), errmsg("cannot change the tablespace of the currently open database"))); /* * Get tablespace's oid */ dst_tblspcoid = get_tablespace_oid(tblspcname, false); /* * Permission checks */ aclresult = object_aclcheck(TableSpaceRelationId, dst_tblspcoid, GetUserId(), ACL_CREATE); if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, OBJECT_TABLESPACE, tblspcname); /* * pg_global must never be the default tablespace */ if (dst_tblspcoid == GLOBALTABLESPACE_OID) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("pg_global cannot be used as default tablespace"))); /* * No-op if same tablespace */ if (src_tblspcoid == dst_tblspcoid) { table_close(pgdbrel, NoLock); UnlockSharedObjectForSession(DatabaseRelationId, db_id, 0, AccessExclusiveLock); return; } /* * Check for other backends in the target database. (Because we hold the * database lock, no new ones can start after this.) * * As in CREATE DATABASE, check this after other error conditions. */ if (CountOtherDBBackends(db_id, ¬herbackends, &npreparedxacts)) ereport(ERROR, (errcode(ERRCODE_OBJECT_IN_USE), errmsg("database \"%s\" is being accessed by other users", dbname), errdetail_busy_db(notherbackends, npreparedxacts))); /* * Get old and new database paths */ src_dbpath = GetDatabasePath(db_id, src_tblspcoid); dst_dbpath = GetDatabasePath(db_id, dst_tblspcoid); /* * Force a checkpoint before proceeding. This will force all dirty * buffers, including those of unlogged tables, out to disk, to ensure * source database is up-to-date on disk for the copy. * FlushDatabaseBuffers() would suffice for that, but we also want to * process any pending unlink requests. Otherwise, the check for existing * files in the target directory might fail unnecessarily, not to mention * that the copy might fail due to source files getting deleted under it. * On Windows, this also ensures that background procs don't hold any open * files, which would cause rmdir() to fail. */ RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT | CHECKPOINT_FLUSH_ALL); /* Close all smgr fds in all backends. */ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE)); /* * Now drop all buffers holding data of the target database; they should * no longer be dirty so DropDatabaseBuffers is safe. * * It might seem that we could just let these buffers age out of shared * buffers naturally, since they should not get referenced anymore. The * problem with that is that if the user later moves the database back to * its original tablespace, any still-surviving buffers would appear to * contain valid data again --- but they'd be missing any changes made in * the database while it was in the new tablespace. In any case, freeing * buffers that should never be used again seems worth the cycles. * * Note: it'd be sufficient to get rid of buffers matching db_id and * src_tblspcoid, but bufmgr.c presently provides no API for that. */ DropDatabaseBuffers(db_id); /* * Check for existence of files in the target directory, i.e., objects of * this database that are already in the target tablespace. We can't * allow the move in such a case, because we would need to change those * relations' pg_class.reltablespace entries to zero, and we don't have * access to the DB's pg_class to do so. */ dstdir = AllocateDir(dst_dbpath); if (dstdir != NULL) { while ((xlde = ReadDir(dstdir, dst_dbpath)) != NULL) { if (strcmp(xlde->d_name, ".") == 0 || strcmp(xlde->d_name, "..") == 0) continue; ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("some relations of database \"%s\" are already in tablespace \"%s\"", dbname, tblspcname), errhint("You must move them back to the database's default tablespace before using this command."))); } FreeDir(dstdir); /* * The directory exists but is empty. We must remove it before using * the copydir function. */ if (rmdir(dst_dbpath) != 0) elog(ERROR, "could not remove directory \"%s\": %m", dst_dbpath); } /* * Use an ENSURE block to make sure we remove the debris if the copy fails * (eg, due to out-of-disk-space). This is not a 100% solution, because * of the possibility of failure during transaction commit, but it should * handle most scenarios. */ fparms.dest_dboid = db_id; fparms.dest_tsoid = dst_tblspcoid; PG_ENSURE_ERROR_CLEANUP(movedb_failure_callback, PointerGetDatum(&fparms)); { Datum new_record[Natts_pg_database] = {0}; bool new_record_nulls[Natts_pg_database] = {0}; bool new_record_repl[Natts_pg_database] = {0}; /* * Copy files from the old tablespace to the new one */ copydir(src_dbpath, dst_dbpath, false); /* * Record the filesystem change in XLOG */ { xl_dbase_create_file_copy_rec xlrec; xlrec.db_id = db_id; xlrec.tablespace_id = dst_tblspcoid; xlrec.src_db_id = db_id; xlrec.src_tablespace_id = src_tblspcoid; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_file_copy_rec)); (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_FILE_COPY | XLR_SPECIAL_REL_UPDATE); } /* * Update the database's pg_database tuple */ ScanKeyInit(&scankey, Anum_pg_database_datname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(dbname)); sysscan = systable_beginscan(pgdbrel, DatabaseNameIndexId, true, NULL, 1, &scankey); oldtuple = systable_getnext(sysscan); if (!HeapTupleIsValid(oldtuple)) /* shouldn't happen... */ ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname))); new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_tblspcoid); new_record_repl[Anum_pg_database_dattablespace - 1] = true; newtuple = heap_modify_tuple(oldtuple, RelationGetDescr(pgdbrel), new_record, new_record_nulls, new_record_repl); CatalogTupleUpdate(pgdbrel, &oldtuple->t_self, newtuple); InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0); systable_endscan(sysscan); /* * Force another checkpoint here. As in CREATE DATABASE, this is to * ensure that we don't have to replay a committed * XLOG_DBASE_CREATE_FILE_COPY operation, which would cause us to lose * any unlogged operations done in the new DB tablespace before the * next checkpoint. */ RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT); /* * Force synchronous commit, thus minimizing the window between * copying the database files and committal of the transaction. If we * crash before committing, we'll leave an orphaned set of files on * disk, which is not fatal but not good either. */ ForceSyncCommit(); /* * Close pg_database, but keep lock till commit. */ table_close(pgdbrel, NoLock); } PG_END_ENSURE_ERROR_CLEANUP(movedb_failure_callback, PointerGetDatum(&fparms)); /* * Commit the transaction so that the pg_database update is committed. If * we crash while removing files, the database won't be corrupt, we'll * just leave some orphaned files in the old directory. * * (This is OK because we know we aren't inside a transaction block.) * * XXX would it be safe/better to do this inside the ensure block? Not * convinced it's a good idea; consider elog just after the transaction * really commits. */ PopActiveSnapshot(); CommitTransactionCommand(); /* Start new transaction for the remaining work; don't need a snapshot */ StartTransactionCommand(); /* * Remove files from the old tablespace */ if (!rmtree(src_dbpath, true)) ereport(WARNING, (errmsg("some useless files may be left behind in old database directory \"%s\"", src_dbpath))); /* * Record the filesystem change in XLOG */ { xl_dbase_drop_rec xlrec; xlrec.db_id = db_id; xlrec.ntablespaces = 1; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec)); XLogRegisterData((char *) &src_tblspcoid, sizeof(Oid)); (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE); } /* Now it's safe to release the database lock */ UnlockSharedObjectForSession(DatabaseRelationId, db_id, 0, AccessExclusiveLock); pfree(src_dbpath); pfree(dst_dbpath); } /* Error cleanup callback for movedb */ static void movedb_failure_callback(int code, Datum arg) { movedb_failure_params *fparms = (movedb_failure_params *) DatumGetPointer(arg); char *dstpath; /* Get rid of anything we managed to copy to the target directory */ dstpath = GetDatabasePath(fparms->dest_dboid, fparms->dest_tsoid); (void) rmtree(dstpath, true); pfree(dstpath); } /* * Process options and call dropdb function. */ void DropDatabase(ParseState *pstate, DropdbStmt *stmt) { bool force = false; ListCell *lc; foreach(lc, stmt->options) { DefElem *opt = (DefElem *) lfirst(lc); if (strcmp(opt->defname, "force") == 0) force = true; else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("unrecognized DROP DATABASE option \"%s\"", opt->defname), parser_errposition(pstate, opt->location))); } dropdb(stmt->dbname, stmt->missing_ok, force); } /* * ALTER DATABASE name ... */ Oid AlterDatabase(ParseState *pstate, AlterDatabaseStmt *stmt, bool isTopLevel) { Relation rel; Oid dboid; HeapTuple tuple, newtuple; Form_pg_database datform; ScanKeyData scankey; SysScanDesc scan; ListCell *option; bool dbistemplate = false; bool dballowconnections = true; int dbconnlimit = -1; DefElem *distemplate = NULL; DefElem *dallowconnections = NULL; DefElem *dconnlimit = NULL; DefElem *dtablespace = NULL; Datum new_record[Natts_pg_database] = {0}; bool new_record_nulls[Natts_pg_database] = {0}; bool new_record_repl[Natts_pg_database] = {0}; /* Extract options from the statement node tree */ foreach(option, stmt->options) { DefElem *defel = (DefElem *) lfirst(option); if (strcmp(defel->defname, "is_template") == 0) { if (distemplate) errorConflictingDefElem(defel, pstate); distemplate = defel; } else if (strcmp(defel->defname, "allow_connections") == 0) { if (dallowconnections) errorConflictingDefElem(defel, pstate); dallowconnections = defel; } else if (strcmp(defel->defname, "connection_limit") == 0) { if (dconnlimit) errorConflictingDefElem(defel, pstate); dconnlimit = defel; } else if (strcmp(defel->defname, "tablespace") == 0) { if (dtablespace) errorConflictingDefElem(defel, pstate); dtablespace = defel; } else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("option \"%s\" not recognized", defel->defname), parser_errposition(pstate, defel->location))); } if (dtablespace) { /* * While the SET TABLESPACE syntax doesn't allow any other options, * somebody could write "WITH TABLESPACE ...". Forbid any other * options from being specified in that case. */ if (list_length(stmt->options) != 1) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("option \"%s\" cannot be specified with other options", dtablespace->defname), parser_errposition(pstate, dtablespace->location))); /* this case isn't allowed within a transaction block */ PreventInTransactionBlock(isTopLevel, "ALTER DATABASE SET TABLESPACE"); movedb(stmt->dbname, defGetString(dtablespace)); return InvalidOid; } if (distemplate && distemplate->arg) dbistemplate = defGetBoolean(distemplate); if (dallowconnections && dallowconnections->arg) dballowconnections = defGetBoolean(dallowconnections); if (dconnlimit && dconnlimit->arg) { dbconnlimit = defGetInt32(dconnlimit); if (dbconnlimit < -1) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid connection limit: %d", dbconnlimit))); } /* * Get the old tuple. We don't need a lock on the database per se, * because we're not going to do anything that would mess up incoming * connections. */ rel = table_open(DatabaseRelationId, RowExclusiveLock); ScanKeyInit(&scankey, Anum_pg_database_datname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(stmt->dbname)); scan = systable_beginscan(rel, DatabaseNameIndexId, true, NULL, 1, &scankey); tuple = systable_getnext(scan); if (!HeapTupleIsValid(tuple)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", stmt->dbname))); datform = (Form_pg_database) GETSTRUCT(tuple); dboid = datform->oid; if (!object_ownercheck(DatabaseRelationId, dboid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE, stmt->dbname); /* * In order to avoid getting locked out and having to go through * standalone mode, we refuse to disallow connections to the database * we're currently connected to. Lockout can still happen with concurrent * sessions but the likeliness of that is not high enough to worry about. */ if (!dballowconnections && dboid == MyDatabaseId) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("cannot disallow connections for current database"))); /* * Build an updated tuple, perusing the information just obtained */ if (distemplate) { new_record[Anum_pg_database_datistemplate - 1] = BoolGetDatum(dbistemplate); new_record_repl[Anum_pg_database_datistemplate - 1] = true; } if (dallowconnections) { new_record[Anum_pg_database_datallowconn - 1] = BoolGetDatum(dballowconnections); new_record_repl[Anum_pg_database_datallowconn - 1] = true; } if (dconnlimit) { new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit); new_record_repl[Anum_pg_database_datconnlimit - 1] = true; } newtuple = heap_modify_tuple(tuple, RelationGetDescr(rel), new_record, new_record_nulls, new_record_repl); CatalogTupleUpdate(rel, &tuple->t_self, newtuple); InvokeObjectPostAlterHook(DatabaseRelationId, dboid, 0); systable_endscan(scan); /* Close pg_database, but keep lock till commit */ table_close(rel, NoLock); return dboid; } /* * ALTER DATABASE name REFRESH COLLATION VERSION */ ObjectAddress AlterDatabaseRefreshColl(AlterDatabaseRefreshCollStmt *stmt) { Relation rel; ScanKeyData scankey; SysScanDesc scan; Oid db_id; HeapTuple tuple; Form_pg_database datForm; ObjectAddress address; Datum datum; bool isnull; char *oldversion; char *newversion; rel = table_open(DatabaseRelationId, RowExclusiveLock); ScanKeyInit(&scankey, Anum_pg_database_datname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(stmt->dbname)); scan = systable_beginscan(rel, DatabaseNameIndexId, true, NULL, 1, &scankey); tuple = systable_getnext(scan); if (!HeapTupleIsValid(tuple)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", stmt->dbname))); datForm = (Form_pg_database) GETSTRUCT(tuple); db_id = datForm->oid; if (!object_ownercheck(DatabaseRelationId, db_id, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE, stmt->dbname); datum = heap_getattr(tuple, Anum_pg_database_datcollversion, RelationGetDescr(rel), &isnull); oldversion = isnull ? NULL : TextDatumGetCString(datum); datum = heap_getattr(tuple, datForm->datlocprovider == COLLPROVIDER_ICU ? Anum_pg_database_daticulocale : Anum_pg_database_datcollate, RelationGetDescr(rel), &isnull); if (isnull) elog(ERROR, "unexpected null in pg_database"); newversion = get_collation_actual_version(datForm->datlocprovider, TextDatumGetCString(datum)); /* cannot change from NULL to non-NULL or vice versa */ if ((!oldversion && newversion) || (oldversion && !newversion)) elog(ERROR, "invalid collation version change"); else if (oldversion && newversion && strcmp(newversion, oldversion) != 0) { bool nulls[Natts_pg_database] = {0}; bool replaces[Natts_pg_database] = {0}; Datum values[Natts_pg_database] = {0}; ereport(NOTICE, (errmsg("changing version from %s to %s", oldversion, newversion))); values[Anum_pg_database_datcollversion - 1] = CStringGetTextDatum(newversion); replaces[Anum_pg_database_datcollversion - 1] = true; tuple = heap_modify_tuple(tuple, RelationGetDescr(rel), values, nulls, replaces); CatalogTupleUpdate(rel, &tuple->t_self, tuple); heap_freetuple(tuple); } else ereport(NOTICE, (errmsg("version has not changed"))); InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0); ObjectAddressSet(address, DatabaseRelationId, db_id); systable_endscan(scan); table_close(rel, NoLock); return address; } /* * ALTER DATABASE name SET ... */ Oid AlterDatabaseSet(AlterDatabaseSetStmt *stmt) { Oid datid = get_database_oid(stmt->dbname, false); /* * Obtain a lock on the database and make sure it didn't go away in the * meantime. */ shdepLockAndCheckObject(DatabaseRelationId, datid); if (!object_ownercheck(DatabaseRelationId, datid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE, stmt->dbname); AlterSetting(datid, InvalidOid, stmt->setstmt); UnlockSharedObject(DatabaseRelationId, datid, 0, AccessShareLock); return datid; } /* * ALTER DATABASE name OWNER TO newowner */ ObjectAddress AlterDatabaseOwner(const char *dbname, Oid newOwnerId) { Oid db_id; HeapTuple tuple; Relation rel; ScanKeyData scankey; SysScanDesc scan; Form_pg_database datForm; ObjectAddress address; /* * Get the old tuple. We don't need a lock on the database per se, * because we're not going to do anything that would mess up incoming * connections. */ rel = table_open(DatabaseRelationId, RowExclusiveLock); ScanKeyInit(&scankey, Anum_pg_database_datname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(dbname)); scan = systable_beginscan(rel, DatabaseNameIndexId, true, NULL, 1, &scankey); tuple = systable_getnext(scan); if (!HeapTupleIsValid(tuple)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname))); datForm = (Form_pg_database) GETSTRUCT(tuple); db_id = datForm->oid; /* * If the new owner is the same as the existing owner, consider the * command to have succeeded. This is to be consistent with other * objects. */ if (datForm->datdba != newOwnerId) { Datum repl_val[Natts_pg_database]; bool repl_null[Natts_pg_database] = {0}; bool repl_repl[Natts_pg_database] = {0}; Acl *newAcl; Datum aclDatum; bool isNull; HeapTuple newtuple; /* Otherwise, must be owner of the existing object */ if (!object_ownercheck(DatabaseRelationId, db_id, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE, dbname); /* Must be able to become new owner */ check_can_set_role(GetUserId(), newOwnerId); /* * must have createdb rights * * NOTE: This is different from other alter-owner checks in that the * current user is checked for createdb privileges instead of the * destination owner. This is consistent with the CREATE case for * databases. Because superusers will always have this right, we need * no special case for them. */ if (!have_createdb_privilege()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied to change owner of database"))); repl_repl[Anum_pg_database_datdba - 1] = true; repl_val[Anum_pg_database_datdba - 1] = ObjectIdGetDatum(newOwnerId); /* * Determine the modified ACL for the new owner. This is only * necessary when the ACL is non-null. */ aclDatum = heap_getattr(tuple, Anum_pg_database_datacl, RelationGetDescr(rel), &isNull); if (!isNull) { newAcl = aclnewowner(DatumGetAclP(aclDatum), datForm->datdba, newOwnerId); repl_repl[Anum_pg_database_datacl - 1] = true; repl_val[Anum_pg_database_datacl - 1] = PointerGetDatum(newAcl); } newtuple = heap_modify_tuple(tuple, RelationGetDescr(rel), repl_val, repl_null, repl_repl); CatalogTupleUpdate(rel, &newtuple->t_self, newtuple); heap_freetuple(newtuple); /* Update owner dependency reference */ changeDependencyOnOwner(DatabaseRelationId, db_id, newOwnerId); } InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0); ObjectAddressSet(address, DatabaseRelationId, db_id); systable_endscan(scan); /* Close pg_database, but keep lock till commit */ table_close(rel, NoLock); return address; } Datum pg_database_collation_actual_version(PG_FUNCTION_ARGS) { Oid dbid = PG_GETARG_OID(0); HeapTuple tp; char datlocprovider; Datum datum; char *version; tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbid)); if (!HeapTupleIsValid(tp)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("database with OID %u does not exist", dbid))); datlocprovider = ((Form_pg_database) GETSTRUCT(tp))->datlocprovider; datum = SysCacheGetAttrNotNull(DATABASEOID, tp, datlocprovider == COLLPROVIDER_ICU ? Anum_pg_database_daticulocale : Anum_pg_database_datcollate); version = get_collation_actual_version(datlocprovider, TextDatumGetCString(datum)); ReleaseSysCache(tp); if (version) PG_RETURN_TEXT_P(cstring_to_text(version)); else PG_RETURN_NULL(); } /* * Helper functions */ /* * Look up info about the database named "name". If the database exists, * obtain the specified lock type on it, fill in any of the remaining * parameters that aren't NULL, and return true. If no such database, * return false. */ static bool get_db_info(const char *name, LOCKMODE lockmode, Oid *dbIdP, Oid *ownerIdP, int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP, TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP, Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIculocale, char **dbIcurules, char *dbLocProvider, char **dbCollversion) { bool result = false; Relation relation; Assert(name); /* Caller may wish to grab a better lock on pg_database beforehand... */ relation = table_open(DatabaseRelationId, AccessShareLock); /* * Loop covers the rare case where the database is renamed before we can * lock it. We try again just in case we can find a new one of the same * name. */ for (;;) { ScanKeyData scanKey; SysScanDesc scan; HeapTuple tuple; Oid dbOid; /* * there's no syscache for database-indexed-by-name, so must do it the * hard way */ ScanKeyInit(&scanKey, Anum_pg_database_datname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(name)); scan = systable_beginscan(relation, DatabaseNameIndexId, true, NULL, 1, &scanKey); tuple = systable_getnext(scan); if (!HeapTupleIsValid(tuple)) { /* definitely no database of that name */ systable_endscan(scan); break; } dbOid = ((Form_pg_database) GETSTRUCT(tuple))->oid; systable_endscan(scan); /* * Now that we have a database OID, we can try to lock the DB. */ if (lockmode != NoLock) LockSharedObject(DatabaseRelationId, dbOid, 0, lockmode); /* * And now, re-fetch the tuple by OID. If it's still there and still * the same name, we win; else, drop the lock and loop back to try * again. */ tuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbOid)); if (HeapTupleIsValid(tuple)) { Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple); if (strcmp(name, NameStr(dbform->datname)) == 0) { Datum datum; bool isnull; /* oid of the database */ if (dbIdP) *dbIdP = dbOid; /* oid of the owner */ if (ownerIdP) *ownerIdP = dbform->datdba; /* character encoding */ if (encodingP) *encodingP = dbform->encoding; /* allowed as template? */ if (dbIsTemplateP) *dbIsTemplateP = dbform->datistemplate; /* allowing connections? */ if (dbAllowConnP) *dbAllowConnP = dbform->datallowconn; /* limit of frozen XIDs */ if (dbFrozenXidP) *dbFrozenXidP = dbform->datfrozenxid; /* minimum MultiXactId */ if (dbMinMultiP) *dbMinMultiP = dbform->datminmxid; /* default tablespace for this database */ if (dbTablespace) *dbTablespace = dbform->dattablespace; /* default locale settings for this database */ if (dbLocProvider) *dbLocProvider = dbform->datlocprovider; if (dbCollate) { datum = SysCacheGetAttrNotNull(DATABASEOID, tuple, Anum_pg_database_datcollate); *dbCollate = TextDatumGetCString(datum); } if (dbCtype) { datum = SysCacheGetAttrNotNull(DATABASEOID, tuple, Anum_pg_database_datctype); *dbCtype = TextDatumGetCString(datum); } if (dbIculocale) { datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_daticulocale, &isnull); if (isnull) *dbIculocale = NULL; else *dbIculocale = TextDatumGetCString(datum); } if (dbIcurules) { datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_daticurules, &isnull); if (isnull) *dbIcurules = NULL; else *dbIcurules = TextDatumGetCString(datum); } if (dbCollversion) { datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_datcollversion, &isnull); if (isnull) *dbCollversion = NULL; else *dbCollversion = TextDatumGetCString(datum); } ReleaseSysCache(tuple); result = true; break; } /* can only get here if it was just renamed */ ReleaseSysCache(tuple); } if (lockmode != NoLock) UnlockSharedObject(DatabaseRelationId, dbOid, 0, lockmode); } table_close(relation, AccessShareLock); return result; } /* Check if current user has createdb privileges */ bool have_createdb_privilege(void) { bool result = false; HeapTuple utup; /* Superusers can always do everything */ if (superuser()) return true; utup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(GetUserId())); if (HeapTupleIsValid(utup)) { result = ((Form_pg_authid) GETSTRUCT(utup))->rolcreatedb; ReleaseSysCache(utup); } return result; } /* * Remove tablespace directories * * We don't know what tablespaces db_id is using, so iterate through all * tablespaces removing /db_id */ static void remove_dbtablespaces(Oid db_id) { Relation rel; TableScanDesc scan; HeapTuple tuple; List *ltblspc = NIL; ListCell *cell; int ntblspc; int i; Oid *tablespace_ids; rel = table_open(TableSpaceRelationId, AccessShareLock); scan = table_beginscan_catalog(rel, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_tablespace spcform = (Form_pg_tablespace) GETSTRUCT(tuple); Oid dsttablespace = spcform->oid; char *dstpath; struct stat st; /* Don't mess with the global tablespace */ if (dsttablespace == GLOBALTABLESPACE_OID) continue; dstpath = GetDatabasePath(db_id, dsttablespace); if (lstat(dstpath, &st) < 0 || !S_ISDIR(st.st_mode)) { /* Assume we can ignore it */ pfree(dstpath); continue; } if (!rmtree(dstpath, true)) ereport(WARNING, (errmsg("some useless files may be left behind in old database directory \"%s\"", dstpath))); ltblspc = lappend_oid(ltblspc, dsttablespace); pfree(dstpath); } ntblspc = list_length(ltblspc); if (ntblspc == 0) { table_endscan(scan); table_close(rel, AccessShareLock); return; } tablespace_ids = (Oid *) palloc(ntblspc * sizeof(Oid)); i = 0; foreach(cell, ltblspc) tablespace_ids[i++] = lfirst_oid(cell); /* Record the filesystem change in XLOG */ { xl_dbase_drop_rec xlrec; xlrec.db_id = db_id; xlrec.ntablespaces = ntblspc; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, MinSizeOfDbaseDropRec); XLogRegisterData((char *) tablespace_ids, ntblspc * sizeof(Oid)); (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE); } list_free(ltblspc); pfree(tablespace_ids); table_endscan(scan); table_close(rel, AccessShareLock); } /* * Check for existing files that conflict with a proposed new DB OID; * return true if there are any * * If there were a subdirectory in any tablespace matching the proposed new * OID, we'd get a create failure due to the duplicate name ... and then we'd * try to remove that already-existing subdirectory during the cleanup in * remove_dbtablespaces. Nuking existing files seems like a bad idea, so * instead we make this extra check before settling on the OID of the new * database. This exactly parallels what GetNewRelFileNumber() does for table * relfilenumber values. */ static bool check_db_file_conflict(Oid db_id) { bool result = false; Relation rel; TableScanDesc scan; HeapTuple tuple; rel = table_open(TableSpaceRelationId, AccessShareLock); scan = table_beginscan_catalog(rel, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_tablespace spcform = (Form_pg_tablespace) GETSTRUCT(tuple); Oid dsttablespace = spcform->oid; char *dstpath; struct stat st; /* Don't mess with the global tablespace */ if (dsttablespace == GLOBALTABLESPACE_OID) continue; dstpath = GetDatabasePath(db_id, dsttablespace); if (lstat(dstpath, &st) == 0) { /* Found a conflicting file (or directory, whatever) */ pfree(dstpath); result = true; break; } pfree(dstpath); } table_endscan(scan); table_close(rel, AccessShareLock); return result; } /* * Issue a suitable errdetail message for a busy database */ static int errdetail_busy_db(int notherbackends, int npreparedxacts) { if (notherbackends > 0 && npreparedxacts > 0) /* * We don't deal with singular versus plural here, since gettext * doesn't support multiple plurals in one string. */ errdetail("There are %d other session(s) and %d prepared transaction(s) using the database.", notherbackends, npreparedxacts); else if (notherbackends > 0) errdetail_plural("There is %d other session using the database.", "There are %d other sessions using the database.", notherbackends, notherbackends); else errdetail_plural("There is %d prepared transaction using the database.", "There are %d prepared transactions using the database.", npreparedxacts, npreparedxacts); return 0; /* just to keep ereport macro happy */ } /* * get_database_oid - given a database name, look up the OID * * If missing_ok is false, throw an error if database name not found. If * true, just return InvalidOid. */ Oid get_database_oid(const char *dbname, bool missing_ok) { Relation pg_database; ScanKeyData entry[1]; SysScanDesc scan; HeapTuple dbtuple; Oid oid; /* * There's no syscache for pg_database indexed by name, so we must look * the hard way. */ pg_database = table_open(DatabaseRelationId, AccessShareLock); ScanKeyInit(&entry[0], Anum_pg_database_datname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(dbname)); scan = systable_beginscan(pg_database, DatabaseNameIndexId, true, NULL, 1, entry); dbtuple = systable_getnext(scan); /* We assume that there can be at most one matching tuple */ if (HeapTupleIsValid(dbtuple)) oid = ((Form_pg_database) GETSTRUCT(dbtuple))->oid; else oid = InvalidOid; systable_endscan(scan); table_close(pg_database, AccessShareLock); if (!OidIsValid(oid) && !missing_ok) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname))); return oid; } /* * get_database_name - given a database OID, look up the name * * Returns a palloc'd string, or NULL if no such database. */ char * get_database_name(Oid dbid) { HeapTuple dbtuple; char *result; dbtuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbid)); if (HeapTupleIsValid(dbtuple)) { result = pstrdup(NameStr(((Form_pg_database) GETSTRUCT(dbtuple))->datname)); ReleaseSysCache(dbtuple); } else result = NULL; return result; } /* * recovery_create_dbdir() * * During recovery, there's a case where we validly need to recover a missing * tablespace directory so that recovery can continue. This happens when * recovery wants to create a database but the holding tablespace has been * removed before the server stopped. Since we expect that the directory will * be gone before reaching recovery consistency, and we have no knowledge about * the tablespace other than its OID here, we create a real directory under * pg_tblspc here instead of restoring the symlink. * * If only_tblspc is true, then the requested directory must be in pg_tblspc/ */ static void recovery_create_dbdir(char *path, bool only_tblspc) { struct stat st; Assert(RecoveryInProgress()); if (stat(path, &st) == 0) return; if (only_tblspc && strstr(path, "pg_tblspc/") == NULL) elog(PANIC, "requested to created invalid directory: %s", path); if (reachedConsistency && !allow_in_place_tablespaces) ereport(PANIC, errmsg("missing directory \"%s\"", path)); elog(reachedConsistency ? WARNING : DEBUG1, "creating missing directory: %s", path); if (pg_mkdir_p(path, pg_dir_create_mode) != 0) ereport(PANIC, errmsg("could not create missing directory \"%s\": %m", path)); } /* * DATABASE resource manager's routines */ void dbase_redo(XLogReaderState *record) { uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; /* Backup blocks are not used in dbase records */ Assert(!XLogRecHasAnyBlockRefs(record)); if (info == XLOG_DBASE_CREATE_FILE_COPY) { xl_dbase_create_file_copy_rec *xlrec = (xl_dbase_create_file_copy_rec *) XLogRecGetData(record); char *src_path; char *dst_path; char *parent_path; struct stat st; src_path = GetDatabasePath(xlrec->src_db_id, xlrec->src_tablespace_id); dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id); /* * Our theory for replaying a CREATE is to forcibly drop the target * subdirectory if present, then re-copy the source data. This may be * more work than needed, but it is simple to implement. */ if (stat(dst_path, &st) == 0 && S_ISDIR(st.st_mode)) { if (!rmtree(dst_path, true)) /* If this failed, copydir() below is going to error. */ ereport(WARNING, (errmsg("some useless files may be left behind in old database directory \"%s\"", dst_path))); } /* * If the parent of the target path doesn't exist, create it now. This * enables us to create the target underneath later. */ parent_path = pstrdup(dst_path); get_parent_directory(parent_path); if (stat(parent_path, &st) < 0) { if (errno != ENOENT) ereport(FATAL, errmsg("could not stat directory \"%s\": %m", dst_path)); /* create the parent directory if needed and valid */ recovery_create_dbdir(parent_path, true); } pfree(parent_path); /* * There's a case where the copy source directory is missing for the * same reason above. Create the empty source directory so that * copydir below doesn't fail. The directory will be dropped soon by * recovery. */ if (stat(src_path, &st) < 0 && errno == ENOENT) recovery_create_dbdir(src_path, false); /* * Force dirty buffers out to disk, to ensure source database is * up-to-date for the copy. */ FlushDatabaseBuffers(xlrec->src_db_id); /* Close all sgmr fds in all backends. */ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE)); /* * Copy this subdirectory to the new location * * We don't need to copy subdirectories */ copydir(src_path, dst_path, false); pfree(src_path); pfree(dst_path); } else if (info == XLOG_DBASE_CREATE_WAL_LOG) { xl_dbase_create_wal_log_rec *xlrec = (xl_dbase_create_wal_log_rec *) XLogRecGetData(record); char *dbpath; char *parent_path; dbpath = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id); /* create the parent directory if needed and valid */ parent_path = pstrdup(dbpath); get_parent_directory(parent_path); recovery_create_dbdir(parent_path, true); /* Create the database directory with the version file. */ CreateDirAndVersionFile(dbpath, xlrec->db_id, xlrec->tablespace_id, true); pfree(dbpath); } else if (info == XLOG_DBASE_DROP) { xl_dbase_drop_rec *xlrec = (xl_dbase_drop_rec *) XLogRecGetData(record); char *dst_path; int i; if (InHotStandby) { /* * Lock database while we resolve conflicts to ensure that * InitPostgres() cannot fully re-execute concurrently. This * avoids backends re-connecting automatically to same database, * which can happen in some cases. * * This will lock out walsenders trying to connect to db-specific * slots for logical decoding too, so it's safe for us to drop * slots. */ LockSharedObjectForSession(DatabaseRelationId, xlrec->db_id, 0, AccessExclusiveLock); ResolveRecoveryConflictWithDatabase(xlrec->db_id); } /* Drop any database-specific replication slots */ ReplicationSlotsDropDBSlots(xlrec->db_id); /* Drop pages for this database that are in the shared buffer cache */ DropDatabaseBuffers(xlrec->db_id); /* Also, clean out any fsync requests that might be pending in md.c */ ForgetDatabaseSyncRequests(xlrec->db_id); /* Clean out the xlog relcache too */ XLogDropDatabase(xlrec->db_id); /* Close all sgmr fds in all backends. */ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE)); for (i = 0; i < xlrec->ntablespaces; i++) { dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_ids[i]); /* And remove the physical files */ if (!rmtree(dst_path, true)) ereport(WARNING, (errmsg("some useless files may be left behind in old database directory \"%s\"", dst_path))); pfree(dst_path); } if (InHotStandby) { /* * Release locks prior to commit. XXX There is a race condition * here that may allow backends to reconnect, but the window for * this is small because the gap between here and commit is mostly * fairly small and it is unlikely that people will be dropping * databases that we are trying to connect to anyway. */ UnlockSharedObjectForSession(DatabaseRelationId, xlrec->db_id, 0, AccessExclusiveLock); } } else elog(PANIC, "dbase_redo: unknown op code %u", info); }