diff options
Diffstat (limited to 'src/backend/access')
39 files changed, 2772 insertions, 2898 deletions
diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index 8b9714184c..2ba59ab5e9 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -6,7 +6,7 @@ * * NOTE: there is massive duplication of code in this module to * support both the convention that a null is marked by a bool TRUE, - * and the convention that a null is marked by a char 'n'. The latter + * and the convention that a null is marked by a char 'n'. The latter * convention is deprecated but it'll probably be a long time before * we can get rid of it entirely. * @@ -16,7 +16,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/common/heaptuple.c,v 1.99 2005/03/21 01:23:55 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/common/heaptuple.c,v 1.100 2005/10/15 02:49:08 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -452,8 +452,8 @@ nocachegetattr(HeapTuple tuple, int j; /* - * In for(), we test <= and not < because we want to see if we - * can go past it in initializing offsets. + * In for(), we test <= and not < because we want to see if we can + * go past it in initializing offsets. */ for (j = 0; j <= attnum; j++) { @@ -467,10 +467,9 @@ nocachegetattr(HeapTuple tuple, } /* - * If slow is false, and we got here, we know that we have a tuple - * with no nulls or var-widths before the target attribute. If - * possible, we also want to initialize the remainder of the attribute - * cached offset values. + * If slow is false, and we got here, we know that we have a tuple with no + * nulls or var-widths before the target attribute. If possible, we also + * want to initialize the remainder of the attribute cached offset values. */ if (!slow) { @@ -513,11 +512,11 @@ nocachegetattr(HeapTuple tuple, /* * Now we know that we have to walk the tuple CAREFULLY. * - * Note - This loop is a little tricky. For each non-null attribute, - * we have to first account for alignment padding before the attr, - * then advance over the attr based on its length. Nulls have no - * storage and no alignment padding either. We can use/set attcacheoff - * until we pass either a null or a var-width attribute. + * Note - This loop is a little tricky. For each non-null attribute, we + * have to first account for alignment padding before the attr, then + * advance over the attr based on its length. Nulls have no storage + * and no alignment padding either. We can use/set attcacheoff until + * we pass either a null or a var-width attribute. */ for (i = 0; i < attnum; i++) @@ -597,15 +596,13 @@ heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull) break; /* - * If the attribute number is 0, then we are supposed to - * return the entire tuple as a row-type Datum. (Using zero - * for this purpose is unclean since it risks confusion with - * "invalid attr" result codes, but it's not worth changing - * now.) + * If the attribute number is 0, then we are supposed to return + * the entire tuple as a row-type Datum. (Using zero for this + * purpose is unclean since it risks confusion with "invalid attr" + * result codes, but it's not worth changing now.) * - * We have to make a copy of the tuple so we can safely insert - * the Datum overhead fields, which are not set in on-disk - * tuples. + * We have to make a copy of the tuple so we can safely insert the + * Datum overhead fields, which are not set in on-disk tuples. */ case InvalidAttrNumber: { @@ -708,15 +705,15 @@ heap_form_tuple(TupleDesc tupleDescriptor, numberOfAttributes, MaxTupleAttributeNumber))); /* - * Check for nulls and embedded tuples; expand any toasted attributes - * in embedded tuples. This preserves the invariant that toasting can - * only go one level deep. + * Check for nulls and embedded tuples; expand any toasted attributes in + * embedded tuples. This preserves the invariant that toasting can only + * go one level deep. * * We can skip calling toast_flatten_tuple_attribute() if the attribute * couldn't possibly be of composite type. All composite datums are - * varlena and have alignment 'd'; furthermore they aren't arrays. - * Also, if an attribute is already toasted, it must have been sent to - * disk already and so cannot contain toasted attributes. + * varlena and have alignment 'd'; furthermore they aren't arrays. Also, + * if an attribute is already toasted, it must have been sent to disk + * already and so cannot contain toasted attributes. */ for (i = 0; i < numberOfAttributes; i++) { @@ -757,8 +754,8 @@ heap_form_tuple(TupleDesc tupleDescriptor, tuple->t_data = td = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); /* - * And fill in the information. Note we fill the Datum fields even - * though this tuple may never become a Datum. + * And fill in the information. Note we fill the Datum fields even though + * this tuple may never become a Datum. */ tuple->t_len = len; ItemPointerSetInvalid(&(tuple->t_self)); @@ -816,15 +813,15 @@ heap_formtuple(TupleDesc tupleDescriptor, numberOfAttributes, MaxTupleAttributeNumber))); /* - * Check for nulls and embedded tuples; expand any toasted attributes - * in embedded tuples. This preserves the invariant that toasting can - * only go one level deep. + * Check for nulls and embedded tuples; expand any toasted attributes in + * embedded tuples. This preserves the invariant that toasting can only + * go one level deep. * * We can skip calling toast_flatten_tuple_attribute() if the attribute * couldn't possibly be of composite type. All composite datums are - * varlena and have alignment 'd'; furthermore they aren't arrays. - * Also, if an attribute is already toasted, it must have been sent to - * disk already and so cannot contain toasted attributes. + * varlena and have alignment 'd'; furthermore they aren't arrays. Also, + * if an attribute is already toasted, it must have been sent to disk + * already and so cannot contain toasted attributes. */ for (i = 0; i < numberOfAttributes; i++) { @@ -865,8 +862,8 @@ heap_formtuple(TupleDesc tupleDescriptor, tuple->t_data = td = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); /* - * And fill in the information. Note we fill the Datum fields even - * though this tuple may never become a Datum. + * And fill in the information. Note we fill the Datum fields even though + * this tuple may never become a Datum. */ tuple->t_len = len; ItemPointerSetInvalid(&(tuple->t_self)); @@ -917,15 +914,15 @@ heap_modify_tuple(HeapTuple tuple, HeapTuple newTuple; /* - * allocate and fill values and isnull arrays from either the tuple or - * the repl information, as appropriate. + * allocate and fill values and isnull arrays from either the tuple or the + * repl information, as appropriate. * * NOTE: it's debatable whether to use heap_deform_tuple() here or just - * heap_getattr() only the non-replaced colums. The latter could win - * if there are many replaced columns and few non-replaced ones. - * However, heap_deform_tuple costs only O(N) while the heap_getattr - * way would cost O(N^2) if there are many non-replaced columns, so it - * seems better to err on the side of linear cost. + * heap_getattr() only the non-replaced colums. The latter could win if + * there are many replaced columns and few non-replaced ones. However, + * heap_deform_tuple costs only O(N) while the heap_getattr way would cost + * O(N^2) if there are many non-replaced columns, so it seems better to + * err on the side of linear cost. */ values = (Datum *) palloc(numberOfAttributes * sizeof(Datum)); isnull = (bool *) palloc(numberOfAttributes * sizeof(bool)); @@ -950,8 +947,8 @@ heap_modify_tuple(HeapTuple tuple, pfree(isnull); /* - * copy the identification info of the old tuple: t_ctid, t_self, and - * OID (if any) + * copy the identification info of the old tuple: t_ctid, t_self, and OID + * (if any) */ newTuple->t_data->t_ctid = tuple->t_data->t_ctid; newTuple->t_self = tuple->t_self; @@ -986,15 +983,15 @@ heap_modifytuple(HeapTuple tuple, HeapTuple newTuple; /* - * allocate and fill values and nulls arrays from either the tuple or - * the repl information, as appropriate. + * allocate and fill values and nulls arrays from either the tuple or the + * repl information, as appropriate. * * NOTE: it's debatable whether to use heap_deformtuple() here or just - * heap_getattr() only the non-replaced colums. The latter could win - * if there are many replaced columns and few non-replaced ones. - * However, heap_deformtuple costs only O(N) while the heap_getattr - * way would cost O(N^2) if there are many non-replaced columns, so it - * seems better to err on the side of linear cost. + * heap_getattr() only the non-replaced colums. The latter could win if + * there are many replaced columns and few non-replaced ones. However, + * heap_deformtuple costs only O(N) while the heap_getattr way would cost + * O(N^2) if there are many non-replaced columns, so it seems better to + * err on the side of linear cost. */ values = (Datum *) palloc(numberOfAttributes * sizeof(Datum)); nulls = (char *) palloc(numberOfAttributes * sizeof(char)); @@ -1022,8 +1019,8 @@ heap_modifytuple(HeapTuple tuple, pfree(nulls); /* - * copy the identification info of the old tuple: t_ctid, t_self, and - * OID (if any) + * copy the identification info of the old tuple: t_ctid, t_self, and OID + * (if any) */ newTuple->t_data->t_ctid = tuple->t_data->t_ctid; newTuple->t_self = tuple->t_self; @@ -1068,9 +1065,9 @@ heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, natts = tup->t_natts; /* - * In inheritance situations, it is possible that the given tuple - * actually has more fields than the caller is expecting. Don't run - * off the end of the caller's arrays. + * In inheritance situations, it is possible that the given tuple actually + * has more fields than the caller is expecting. Don't run off the end of + * the caller's arrays. */ natts = Min(natts, tdesc_natts); @@ -1161,9 +1158,9 @@ heap_deformtuple(HeapTuple tuple, natts = tup->t_natts; /* - * In inheritance situations, it is possible that the given tuple - * actually has more fields than the caller is expecting. Don't run - * off the end of the caller's arrays. + * In inheritance situations, it is possible that the given tuple actually + * has more fields than the caller is expecting. Don't run off the end of + * the caller's arrays. */ natts = Min(natts, tdesc_natts); @@ -1228,22 +1225,22 @@ heap_deformtuple(HeapTuple tuple, static void slot_deform_tuple(TupleTableSlot *slot, int natts) { - HeapTuple tuple = slot->tts_tuple; - TupleDesc tupleDesc = slot->tts_tupleDescriptor; + HeapTuple tuple = slot->tts_tuple; + TupleDesc tupleDesc = slot->tts_tupleDescriptor; Datum *values = slot->tts_values; bool *isnull = slot->tts_isnull; - HeapTupleHeader tup = tuple->t_data; + HeapTupleHeader tup = tuple->t_data; bool hasnulls = HeapTupleHasNulls(tuple); Form_pg_attribute *att = tupleDesc->attrs; int attnum; - char *tp; /* ptr to tuple data */ - long off; /* offset in tuple data */ - bits8 *bp = tup->t_bits; /* ptr to null bitmap in tuple */ - bool slow; /* can we use/set attcacheoff? */ + char *tp; /* ptr to tuple data */ + long off; /* offset in tuple data */ + bits8 *bp = tup->t_bits; /* ptr to null bitmap in tuple */ + bool slow; /* can we use/set attcacheoff? */ /* - * Check whether the first call for this tuple, and initialize or - * restore loop state. + * Check whether the first call for this tuple, and initialize or restore + * loop state. */ attnum = slot->tts_nvalid; if (attnum == 0) @@ -1269,7 +1266,7 @@ slot_deform_tuple(TupleTableSlot *slot, int natts) { values[attnum] = (Datum) 0; isnull[attnum] = true; - slow = true; /* can't use attcacheoff anymore */ + slow = true; /* can't use attcacheoff anymore */ continue; } @@ -1290,7 +1287,7 @@ slot_deform_tuple(TupleTableSlot *slot, int natts) off = att_addlength(off, thisatt->attlen, tp + off); if (thisatt->attlen <= 0) - slow = true; /* can't use attcacheoff anymore */ + slow = true; /* can't use attcacheoff anymore */ } /* @@ -1316,9 +1313,9 @@ slot_deform_tuple(TupleTableSlot *slot, int natts) Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull) { - HeapTuple tuple = slot->tts_tuple; - TupleDesc tupleDesc = slot->tts_tupleDescriptor; - HeapTupleHeader tup; + HeapTuple tuple = slot->tts_tuple; + TupleDesc tupleDesc = slot->tts_tupleDescriptor; + HeapTupleHeader tup; /* * system attributes are handled by heap_getsysattr @@ -1349,18 +1346,18 @@ slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull) } /* - * otherwise we had better have a physical tuple (tts_nvalid should - * equal natts in all virtual-tuple cases) + * otherwise we had better have a physical tuple (tts_nvalid should equal + * natts in all virtual-tuple cases) */ - if (tuple == NULL) /* internal error */ + if (tuple == NULL) /* internal error */ elog(ERROR, "cannot extract attribute from empty tuple slot"); /* * return NULL if attnum is out of range according to the tuple * - * (We have to check this separately because of various inheritance - * and table-alteration scenarios: the tuple could be either longer - * or shorter than the tupdesc.) + * (We have to check this separately because of various inheritance and + * table-alteration scenarios: the tuple could be either longer or shorter + * than the tupdesc.) */ tup = tuple->t_data; if (attnum > tup->t_natts) @@ -1379,10 +1376,9 @@ slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull) } /* - * If the attribute's column has been dropped, we force a NULL - * result. This case should not happen in normal use, but it could - * happen if we are executing a plan cached before the column was - * dropped. + * If the attribute's column has been dropped, we force a NULL result. + * This case should not happen in normal use, but it could happen if we + * are executing a plan cached before the column was dropped. */ if (tupleDesc->attrs[attnum - 1]->attisdropped) { @@ -1420,11 +1416,11 @@ slot_getallattrs(TupleTableSlot *slot) return; /* - * otherwise we had better have a physical tuple (tts_nvalid should - * equal natts in all virtual-tuple cases) + * otherwise we had better have a physical tuple (tts_nvalid should equal + * natts in all virtual-tuple cases) */ tuple = slot->tts_tuple; - if (tuple == NULL) /* internal error */ + if (tuple == NULL) /* internal error */ elog(ERROR, "cannot extract attribute from empty tuple slot"); /* @@ -1467,11 +1463,11 @@ slot_getsomeattrs(TupleTableSlot *slot, int attnum) elog(ERROR, "invalid attribute number %d", attnum); /* - * otherwise we had better have a physical tuple (tts_nvalid should - * equal natts in all virtual-tuple cases) + * otherwise we had better have a physical tuple (tts_nvalid should equal + * natts in all virtual-tuple cases) */ tuple = slot->tts_tuple; - if (tuple == NULL) /* internal error */ + if (tuple == NULL) /* internal error */ elog(ERROR, "cannot extract attribute from empty tuple slot"); /* @@ -1502,8 +1498,8 @@ slot_getsomeattrs(TupleTableSlot *slot, int attnum) bool slot_attisnull(TupleTableSlot *slot, int attnum) { - HeapTuple tuple = slot->tts_tuple; - TupleDesc tupleDesc = slot->tts_tupleDescriptor; + HeapTuple tuple = slot->tts_tuple; + TupleDesc tupleDesc = slot->tts_tupleDescriptor; /* * system attributes are handled by heap_attisnull @@ -1528,10 +1524,10 @@ slot_attisnull(TupleTableSlot *slot, int attnum) return true; /* - * otherwise we had better have a physical tuple (tts_nvalid should - * equal natts in all virtual-tuple cases) + * otherwise we had better have a physical tuple (tts_nvalid should equal + * natts in all virtual-tuple cases) */ - if (tuple == NULL) /* internal error */ + if (tuple == NULL) /* internal error */ elog(ERROR, "cannot extract attribute from empty tuple slot"); /* and let the tuple tell it */ diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c index e5d19765e7..b3520baa2b 100644 --- a/src/backend/access/common/indextuple.c +++ b/src/backend/access/common/indextuple.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/common/indextuple.c,v 1.74 2005/03/27 18:38:26 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/common/indextuple.c,v 1.75 2005/10/15 02:49:08 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -70,20 +70,20 @@ index_form_tuple(TupleDesc tupleDescriptor, continue; /* - * If value is stored EXTERNAL, must fetch it so we are not - * depending on outside storage. This should be improved someday. + * If value is stored EXTERNAL, must fetch it so we are not depending + * on outside storage. This should be improved someday. */ if (VARATT_IS_EXTERNAL(values[i])) { untoasted_values[i] = PointerGetDatum( - heap_tuple_fetch_attr( - (varattrib *) DatumGetPointer(values[i]))); + heap_tuple_fetch_attr( + (varattrib *) DatumGetPointer(values[i]))); untoasted_free[i] = true; } /* - * If value is above size target, and is of a compressible - * datatype, try to compress it in-line. + * If value is above size target, and is of a compressible datatype, + * try to compress it in-line. */ if (VARATT_SIZE(untoasted_values[i]) > TOAST_INDEX_TARGET && !VARATT_IS_EXTENDED(untoasted_values[i]) && @@ -149,23 +149,23 @@ index_form_tuple(TupleDesc tupleDescriptor, /* * We do this because heap_fill_tuple wants to initialize a "tupmask" - * which is used for HeapTuples, but we want an indextuple infomask. - * The only relevant info is the "has variable attributes" field. - * We have already set the hasnull bit above. + * which is used for HeapTuples, but we want an indextuple infomask. The + * only relevant info is the "has variable attributes" field. We have + * already set the hasnull bit above. */ if (tupmask & HEAP_HASVARWIDTH) infomask |= INDEX_VAR_MASK; /* - * Here we make sure that the size will fit in the field reserved for - * it in t_info. + * Here we make sure that the size will fit in the field reserved for it + * in t_info. */ if ((size & INDEX_SIZE_MASK) != size) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("index row requires %lu bytes, maximum size is %lu", - (unsigned long) size, - (unsigned long) INDEX_SIZE_MASK))); + errmsg("index row requires %lu bytes, maximum size is %lu", + (unsigned long) size, + (unsigned long) INDEX_SIZE_MASK))); infomask |= size; @@ -322,10 +322,9 @@ nocache_index_getattr(IndexTuple tup, } /* - * If slow is false, and we got here, we know that we have a tuple - * with no nulls or var-widths before the target attribute. If - * possible, we also want to initialize the remainder of the attribute - * cached offset values. + * If slow is false, and we got here, we know that we have a tuple with no + * nulls or var-widths before the target attribute. If possible, we also + * want to initialize the remainder of the attribute cached offset values. */ if (!slow) { diff --git a/src/backend/access/common/printtup.c b/src/backend/access/common/printtup.c index 9080d047fc..96dfafb7cb 100644 --- a/src/backend/access/common/printtup.c +++ b/src/backend/access/common/printtup.c @@ -9,7 +9,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/common/printtup.c,v 1.91 2005/06/22 17:45:45 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/common/printtup.c,v 1.92 2005/10/15 02:49:08 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -78,9 +78,9 @@ printtup_create_DR(CommandDest dest, Portal portal) else { /* - * In protocol 2.0 the Bind message does not exist, so there is no - * way for the columns to have different print formats; it's - * sufficient to look at the first one. + * In protocol 2.0 the Bind message does not exist, so there is no way + * for the columns to have different print formats; it's sufficient to + * look at the first one. */ if (portal->formats && portal->formats[0] != 0) self->pub.receiveSlot = printtup_internal_20; @@ -113,8 +113,7 @@ printtup_startup(DestReceiver *self, int operation, TupleDesc typeinfo) if (PG_PROTOCOL_MAJOR(FrontendProtocol) < 3) { /* - * Send portal name to frontend (obsolete cruft, gone in proto - * 3.0) + * Send portal name to frontend (obsolete cruft, gone in proto 3.0) * * If portal name not specified, use "blank" portal. */ @@ -127,8 +126,8 @@ printtup_startup(DestReceiver *self, int operation, TupleDesc typeinfo) } /* - * If this is a retrieve, and we are supposed to emit row - * descriptions, then we send back the tuple descriptor of the tuples. + * If this is a retrieve, and we are supposed to emit row descriptions, + * then we send back the tuple descriptor of the tuples. */ if (operation == CMD_SELECT && myState->sendDescrip) SendRowDescriptionMessage(typeinfo, @@ -280,7 +279,7 @@ printtup_prepare_info(DR_printtup *myState, TupleDesc typeinfo, int numAttrs) static void printtup(TupleTableSlot *slot, DestReceiver *self) { - TupleDesc typeinfo = slot->tts_tupleDescriptor; + TupleDesc typeinfo = slot->tts_tupleDescriptor; DR_printtup *myState = (DR_printtup *) self; StringInfoData buf; int natts = typeinfo->natts; @@ -363,7 +362,7 @@ printtup(TupleTableSlot *slot, DestReceiver *self) static void printtup_20(TupleTableSlot *slot, DestReceiver *self) { - TupleDesc typeinfo = slot->tts_tupleDescriptor; + TupleDesc typeinfo = slot->tts_tupleDescriptor; DR_printtup *myState = (DR_printtup *) self; StringInfoData buf; int natts = typeinfo->natts; @@ -566,7 +565,7 @@ debugtup(TupleTableSlot *slot, DestReceiver *self) static void printtup_internal_20(TupleTableSlot *slot, DestReceiver *self) { - TupleDesc typeinfo = slot->tts_tupleDescriptor; + TupleDesc typeinfo = slot->tts_tupleDescriptor; DR_printtup *myState = (DR_printtup *) self; StringInfoData buf; int natts = typeinfo->natts; diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c index fedc7ec489..cfa455beec 100644 --- a/src/backend/access/common/tupdesc.c +++ b/src/backend/access/common/tupdesc.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/common/tupdesc.c,v 1.111 2005/04/14 22:34:48 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/common/tupdesc.c,v 1.112 2005/10/15 02:49:08 momjian Exp $ * * NOTES * some of the executor utility code such as "ExecTypeFromTL" should be @@ -49,10 +49,10 @@ CreateTemplateTupleDesc(int natts, bool hasoid) * Allocate enough memory for the tuple descriptor, including the * attribute rows, and set up the attribute row pointers. * - * Note: we assume that sizeof(struct tupleDesc) is a multiple of - * the struct pointer alignment requirement, and hence we don't need - * to insert alignment padding between the struct and the array of - * attribute row pointers. + * Note: we assume that sizeof(struct tupleDesc) is a multiple of the struct + * pointer alignment requirement, and hence we don't need to insert + * alignment padding between the struct and the array of attribute row + * pointers. */ attroffset = sizeof(struct tupleDesc) + natts * sizeof(Form_pg_attribute); attroffset = MAXALIGN(attroffset); @@ -273,16 +273,16 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2) Form_pg_attribute attr2 = tupdesc2->attrs[i]; /* - * We do not need to check every single field here: we can - * disregard attrelid and attnum (which were used to place the row - * in the attrs array in the first place). It might look like we - * could dispense with checking attlen/attbyval/attalign, since these - * are derived from atttypid; but in the case of dropped columns - * we must check them (since atttypid will be zero for all dropped - * columns) and in general it seems safer to check them always. + * We do not need to check every single field here: we can disregard + * attrelid and attnum (which were used to place the row in the attrs + * array in the first place). It might look like we could dispense + * with checking attlen/attbyval/attalign, since these are derived + * from atttypid; but in the case of dropped columns we must check + * them (since atttypid will be zero for all dropped columns) and in + * general it seems safer to check them always. * - * attcacheoff must NOT be checked since it's possibly not set - * in both copies. + * attcacheoff must NOT be checked since it's possibly not set in both + * copies. */ if (strcmp(NameStr(attr1->attname), NameStr(attr2->attname)) != 0) return false; @@ -332,9 +332,9 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2) AttrDefault *defval2 = constr2->defval; /* - * We can't assume that the items are always read from the - * system catalogs in the same order; so use the adnum field - * to identify the matching item to compare. + * We can't assume that the items are always read from the system + * catalogs in the same order; so use the adnum field to identify + * the matching item to compare. */ for (j = 0; j < n; defval2++, j++) { @@ -355,9 +355,9 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2) ConstrCheck *check2 = constr2->check; /* - * Similarly, don't assume that the checks are always read in - * the same order; match them up by name and contents. (The - * name *should* be unique, but...) + * Similarly, don't assume that the checks are always read in the + * same order; match them up by name and contents. (The name + * *should* be unique, but...) */ for (j = 0; j < n; check2++, j++) { @@ -407,8 +407,8 @@ TupleDescInitEntry(TupleDesc desc, /* * Note: attributeName can be NULL, because the planner doesn't always - * fill in valid resname values in targetlists, particularly for - * resjunk attributes. + * fill in valid resname values in targetlists, particularly for resjunk + * attributes. */ if (attributeName != NULL) namestrcpy(&(att->attname), attributeName); @@ -482,8 +482,8 @@ BuildDescForRelation(List *schema) ColumnDef *entry = lfirst(l); /* - * for each entry in the list, get the name and type information - * from the list and have TupleDescInitEntry fill in the attribute + * for each entry in the list, get the name and type information from + * the list and have TupleDescInitEntry fill in the attribute * information we need. */ attnum++; @@ -508,8 +508,8 @@ BuildDescForRelation(List *schema) desc->attrs[attnum - 1]->attnotnull = entry->is_not_null; /* - * Note we copy only pre-cooked default expressions. Digestion of - * raw ones is someone else's problem. + * Note we copy only pre-cooked default expressions. Digestion of raw + * ones is someone else's problem. */ if (entry->cooked_default != NULL) { diff --git a/src/backend/access/gist/gistproc.c b/src/backend/access/gist/gistproc.c index 5978c8af4c..b9e0469b05 100644 --- a/src/backend/access/gist/gistproc.c +++ b/src/backend/access/gist/gistproc.c @@ -10,7 +10,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gist/gistproc.c,v 1.2 2005/09/22 20:44:36 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gist/gistproc.c,v 1.3 2005/10/15 02:49:08 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -26,7 +26,7 @@ typedef struct { BOX *key; int pos; -} KBsort; +} KBsort; static int compare_KB(const void *a, const void *b); static bool gist_box_leaf_consistent(BOX *key, BOX *query, diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index f8611ce46a..2cff9509b6 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.80 2005/06/06 17:01:21 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.81 2005/10/15 02:49:08 momjian Exp $ * * NOTES * This file contains only the public interface routines. @@ -55,8 +55,8 @@ hashbuild(PG_FUNCTION_ARGS) HashBuildState buildstate; /* - * We expect to be called exactly once for any index relation. If - * that's not the case, big trouble's what we have. + * We expect to be called exactly once for any index relation. If that's + * not the case, big trouble's what we have. */ if (RelationGetNumberOfBlocks(index) != 0) elog(ERROR, "index \"%s\" already contains data", @@ -70,7 +70,7 @@ hashbuild(PG_FUNCTION_ARGS) /* do the heap scan */ reltuples = IndexBuildHeapScan(heap, index, indexInfo, - hashbuildCallback, (void *) &buildstate); + hashbuildCallback, (void *) &buildstate); /* since we just counted the # of tuples, may as well update stats */ IndexCloseAndUpdateStats(heap, reltuples, index, buildstate.indtuples); @@ -141,12 +141,12 @@ hashinsert(PG_FUNCTION_ARGS) /* * If the single index key is null, we don't insert it into the index. - * Hash tables support scans on '='. Relational algebra says that A = - * B returns null if either A or B is null. This means that no - * qualification used in an index scan could ever return true on a - * null attribute. It also means that indices can't be used by ISNULL - * or NOTNULL scans, but that's an artifact of the strategy map - * architecture chosen in 1986, not of the way nulls are handled here. + * Hash tables support scans on '='. Relational algebra says that A = B + * returns null if either A or B is null. This means that no + * qualification used in an index scan could ever return true on a null + * attribute. It also means that indices can't be used by ISNULL or + * NOTNULL scans, but that's an artifact of the strategy map architecture + * chosen in 1986, not of the way nulls are handled here. */ if (IndexTupleHasNulls(itup)) { @@ -180,16 +180,16 @@ hashgettuple(PG_FUNCTION_ARGS) bool res; /* - * We hold pin but not lock on current buffer while outside the hash - * AM. Reacquire the read lock here. + * We hold pin but not lock on current buffer while outside the hash AM. + * Reacquire the read lock here. */ if (BufferIsValid(so->hashso_curbuf)) _hash_chgbufaccess(rel, so->hashso_curbuf, HASH_NOLOCK, HASH_READ); /* - * If we've already initialized this scan, we can just advance it in - * the appropriate direction. If we haven't done so yet, we call a - * routine to get the first item in the scan. + * If we've already initialized this scan, we can just advance it in the + * appropriate direction. If we haven't done so yet, we call a routine to + * get the first item in the scan. */ if (ItemPointerIsValid(&(scan->currentItemData))) { @@ -199,17 +199,16 @@ hashgettuple(PG_FUNCTION_ARGS) if (scan->kill_prior_tuple) { /* - * Yes, so mark it by setting the LP_DELETE bit in the item - * flags. + * Yes, so mark it by setting the LP_DELETE bit in the item flags. */ offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData)); page = BufferGetPage(so->hashso_curbuf); PageGetItemId(page, offnum)->lp_flags |= LP_DELETE; /* - * Since this can be redone later if needed, it's treated the - * same as a commit-hint-bit status update for heap tuples: we - * mark the buffer dirty but don't make a WAL log entry. + * Since this can be redone later if needed, it's treated the same + * as a commit-hint-bit status update for heap tuples: we mark the + * buffer dirty but don't make a WAL log entry. */ SetBufferCommitInfoNeedsSave(so->hashso_curbuf); } @@ -256,7 +255,7 @@ Datum hashgetmulti(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); - ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1); + ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1); int32 max_tids = PG_GETARG_INT32(2); int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3); HashScanOpaque so = (HashScanOpaque) scan->opaque; @@ -265,8 +264,8 @@ hashgetmulti(PG_FUNCTION_ARGS) int32 ntids = 0; /* - * We hold pin but not lock on current buffer while outside the hash - * AM. Reacquire the read lock here. + * We hold pin but not lock on current buffer while outside the hash AM. + * Reacquire the read lock here. */ if (BufferIsValid(so->hashso_curbuf)) _hash_chgbufaccess(rel, so->hashso_curbuf, HASH_NOLOCK, HASH_READ); @@ -280,6 +279,7 @@ hashgetmulti(PG_FUNCTION_ARGS) res = _hash_next(scan, ForwardScanDirection); else res = _hash_first(scan, ForwardScanDirection); + /* * Skip killed tuples if asked to. */ @@ -505,12 +505,12 @@ hashbulkdelete(PG_FUNCTION_ARGS) num_index_tuples = 0; /* - * Read the metapage to fetch original bucket and tuple counts. Also, - * we keep a copy of the last-seen metapage so that we can use its - * hashm_spares[] values to compute bucket page addresses. This is a - * bit hokey but perfectly safe, since the interesting entries in the - * spares array cannot change under us; and it beats rereading the - * metapage for each bucket. + * Read the metapage to fetch original bucket and tuple counts. Also, we + * keep a copy of the last-seen metapage so that we can use its + * hashm_spares[] values to compute bucket page addresses. This is a bit + * hokey but perfectly safe, since the interesting entries in the spares + * array cannot change under us; and it beats rereading the metapage for + * each bucket. */ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); metap = (HashMetaPage) BufferGetPage(metabuf); @@ -569,7 +569,7 @@ loop_top: ItemPointer htup; hitem = (HashItem) PageGetItem(page, - PageGetItemId(page, offno)); + PageGetItemId(page, offno)); htup = &(hitem->hash_itup.t_tid); if (callback(htup, callback_state)) { @@ -641,8 +641,7 @@ loop_top: { /* * Otherwise, our count is untrustworthy since we may have - * double-scanned tuples in split buckets. Proceed by - * dead-reckoning. + * double-scanned tuples in split buckets. Proceed by dead-reckoning. */ if (metap->hashm_ntuples > tuples_removed) metap->hashm_ntuples -= tuples_removed; diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index 05ca3bcdb1..2ffca5efe6 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.44 2005/05/25 21:40:40 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.45 2005/10/15 02:49:08 momjian Exp $ * * NOTES * These functions are stored in pg_amproc. For each operator class @@ -46,11 +46,11 @@ hashint8(PG_FUNCTION_ARGS) { /* * The idea here is to produce a hash value compatible with the values - * produced by hashint4 and hashint2 for logically equivalent inputs; - * this is necessary if we ever hope to support cross-type hash joins - * across these input types. Since all three types are signed, we can - * xor the high half of the int8 value if the sign is positive, or the - * complement of the high half when the sign is negative. + * produced by hashint4 and hashint2 for logically equivalent inputs; this + * is necessary if we ever hope to support cross-type hash joins across + * these input types. Since all three types are signed, we can xor the + * high half of the int8 value if the sign is positive, or the complement + * of the high half when the sign is negative. */ #ifndef INT64_IS_BUSTED int64 val = PG_GETARG_INT64(0); @@ -78,9 +78,9 @@ hashfloat4(PG_FUNCTION_ARGS) float4 key = PG_GETARG_FLOAT4(0); /* - * On IEEE-float machines, minus zero and zero have different bit - * patterns but should compare as equal. We must ensure that they - * have the same hash value, which is most easily done this way: + * On IEEE-float machines, minus zero and zero have different bit patterns + * but should compare as equal. We must ensure that they have the same + * hash value, which is most easily done this way: */ if (key == (float4) 0) PG_RETURN_UINT32(0); @@ -94,9 +94,9 @@ hashfloat8(PG_FUNCTION_ARGS) float8 key = PG_GETARG_FLOAT8(0); /* - * On IEEE-float machines, minus zero and zero have different bit - * patterns but should compare as equal. We must ensure that they - * have the same hash value, which is most easily done this way: + * On IEEE-float machines, minus zero and zero have different bit patterns + * but should compare as equal. We must ensure that they have the same + * hash value, which is most easily done this way: */ if (key == (float8) 0) PG_RETURN_UINT32(0); @@ -126,8 +126,7 @@ hashname(PG_FUNCTION_ARGS) char *key = NameStr(*PG_GETARG_NAME(0)); int keylen = strlen(key); - Assert(keylen < NAMEDATALEN); /* else it's not truncated - * correctly */ + Assert(keylen < NAMEDATALEN); /* else it's not truncated correctly */ return hash_any((unsigned char *) key, keylen); } @@ -139,8 +138,8 @@ hashtext(PG_FUNCTION_ARGS) Datum result; /* - * Note: this is currently identical in behavior to hashvarlena, but - * it seems likely that we may need to do something different in non-C + * Note: this is currently identical in behavior to hashvarlena, but it + * seems likely that we may need to do something different in non-C * locales. (See also hashbpchar, if so.) */ result = hash_any((unsigned char *) VARDATA(key), diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c index 860376cd48..7637c3566c 100644 --- a/src/backend/access/hash/hashinsert.c +++ b/src/backend/access/hash/hashinsert.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashinsert.c,v 1.37 2005/08/10 21:36:45 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashinsert.c,v 1.38 2005/10/15 02:49:08 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -50,8 +50,8 @@ _hash_doinsert(Relation rel, HashItem hitem) bool isnull; /* - * Compute the hash key for the item. We do this first so as not to - * need to hold any locks while running the hash function. + * Compute the hash key for the item. We do this first so as not to need + * to hold any locks while running the hash function. */ itup = &(hitem->hash_itup); if (rel->rd_rel->relnatts != 1) @@ -64,12 +64,12 @@ _hash_doinsert(Relation rel, HashItem hitem) itemsz = IndexTupleDSize(hitem->hash_itup) + (sizeof(HashItemData) - sizeof(IndexTupleData)); - itemsz = MAXALIGN(itemsz); /* be safe, PageAddItem will do this but - * we need to be consistent */ + itemsz = MAXALIGN(itemsz); /* be safe, PageAddItem will do this but we + * need to be consistent */ /* - * Acquire shared split lock so we can compute the target bucket - * safely (see README). + * Acquire shared split lock so we can compute the target bucket safely + * (see README). */ _hash_getlock(rel, 0, HASH_SHARE); @@ -79,9 +79,9 @@ _hash_doinsert(Relation rel, HashItem hitem) _hash_checkpage(rel, (Page) metap, LH_META_PAGE); /* - * Check whether the item can fit on a hash page at all. (Eventually, - * we ought to try to apply TOAST methods if not.) Note that at this - * point, itemsz doesn't include the ItemId. + * Check whether the item can fit on a hash page at all. (Eventually, we + * ought to try to apply TOAST methods if not.) Note that at this point, + * itemsz doesn't include the ItemId. */ if (itemsz > HashMaxItemSize((Page) metap)) ereport(ERROR, @@ -89,7 +89,7 @@ _hash_doinsert(Relation rel, HashItem hitem) errmsg("index row size %lu exceeds hash maximum %lu", (unsigned long) itemsz, (unsigned long) HashMaxItemSize((Page) metap)), - errhint("Values larger than a buffer page cannot be indexed."))); + errhint("Values larger than a buffer page cannot be indexed."))); /* * Compute the target bucket number, and convert to block number. @@ -105,8 +105,7 @@ _hash_doinsert(Relation rel, HashItem hitem) _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK); /* - * Acquire share lock on target bucket; then we can release split - * lock. + * Acquire share lock on target bucket; then we can release split lock. */ _hash_getlock(rel, blkno, HASH_SHARE); @@ -130,8 +129,8 @@ _hash_doinsert(Relation rel, HashItem hitem) if (BlockNumberIsValid(nextblkno)) { /* - * ovfl page exists; go get it. if it doesn't have room, - * we'll find out next pass through the loop test above. + * ovfl page exists; go get it. if it doesn't have room, we'll + * find out next pass through the loop test above. */ _hash_relbuf(rel, buf); buf = _hash_getbuf(rel, nextblkno, HASH_WRITE); diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c index 1b8b798b45..7289d9a0b3 100644 --- a/src/backend/access/hash/hashovfl.c +++ b/src/backend/access/hash/hashovfl.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.46 2005/05/11 01:26:01 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.47 2005/10/15 02:49:08 momjian Exp $ * * NOTES * Overflow pages look like ordinary relation pages. @@ -44,8 +44,8 @@ bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum) /* loop */ ; /* - * Convert to absolute page number by adding the number of bucket - * pages that exist before this split point. + * Convert to absolute page number by adding the number of bucket pages + * that exist before this split point. */ return (BlockNumber) ((1 << i) + ovflbitnum); } @@ -252,10 +252,10 @@ _hash_getovflpage(Relation rel, Buffer metabuf) /* * We create the new bitmap page with all pages marked "in use". * Actually two pages in the new bitmap's range will exist - * immediately: the bitmap page itself, and the following page - * which is the one we return to the caller. Both of these are - * correctly marked "in use". Subsequent pages do not exist yet, - * but it is convenient to pre-mark them as "in use" too. + * immediately: the bitmap page itself, and the following page which + * is the one we return to the caller. Both of these are correctly + * marked "in use". Subsequent pages do not exist yet, but it is + * convenient to pre-mark them as "in use" too. */ _hash_initbitmap(rel, metap, bitno_to_blkno(metap, bit)); @@ -265,8 +265,8 @@ _hash_getovflpage(Relation rel, Buffer metabuf) else { /* - * Nothing to do here; since the page was past the last used page, - * we know its bitmap bit was preinitialized to "in use". + * Nothing to do here; since the page was past the last used page, we + * know its bitmap bit was preinitialized to "in use". */ } @@ -275,8 +275,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf) /* * Adjust hashm_firstfree to avoid redundant searches. But don't risk - * changing it if someone moved it while we were searching bitmap - * pages. + * changing it if someone moved it while we were searching bitmap pages. */ if (metap->hashm_firstfree == orig_firstfree) metap->hashm_firstfree = bit + 1; @@ -305,8 +304,7 @@ found: /* * Adjust hashm_firstfree to avoid redundant searches. But don't risk - * changing it if someone moved it while we were searching bitmap - * pages. + * changing it if someone moved it while we were searching bitmap pages. */ if (metap->hashm_firstfree == orig_firstfree) { @@ -394,10 +392,10 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf) _hash_wrtbuf(rel, ovflbuf); /* - * Fix up the bucket chain. this is a doubly-linked list, so we must - * fix up the bucket chain members behind and ahead of the overflow - * page being deleted. No concurrency issues since we hold exclusive - * lock on the entire bucket. + * Fix up the bucket chain. this is a doubly-linked list, so we must fix + * up the bucket chain members behind and ahead of the overflow page being + * deleted. No concurrency issues since we hold exclusive lock on the + * entire bucket. */ if (BlockNumberIsValid(prevblkno)) { @@ -488,12 +486,11 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno) /* * It is okay to write-lock the new bitmap page while holding metapage - * write lock, because no one else could be contending for the new - * page. + * write lock, because no one else could be contending for the new page. * - * There is some loss of concurrency in possibly doing I/O for the new - * page while holding the metapage lock, but this path is taken so - * seldom that it's not worth worrying about. + * There is some loss of concurrency in possibly doing I/O for the new page + * while holding the metapage lock, but this path is taken so seldom that + * it's not worth worrying about. */ buf = _hash_getbuf(rel, blkno, HASH_WRITE); pg = BufferGetPage(buf); @@ -586,8 +583,8 @@ _hash_squeezebucket(Relation rel, } /* - * find the last page in the bucket chain by starting at the base - * bucket page and working forward. + * find the last page in the bucket chain by starting at the base bucket + * page and working forward. */ ropaque = wopaque; do @@ -655,22 +652,21 @@ _hash_squeezebucket(Relation rel, /* * delete the tuple from the "read" page. PageIndexTupleDelete - * repacks the ItemId array, so 'roffnum' will be "advanced" - * to the "next" ItemId. + * repacks the ItemId array, so 'roffnum' will be "advanced" to + * the "next" ItemId. */ PageIndexTupleDelete(rpage, roffnum); } /* - * if the "read" page is now empty because of the deletion (or - * because it was empty when we got to it), free it. + * if the "read" page is now empty because of the deletion (or because + * it was empty when we got to it), free it. * * Tricky point here: if our read and write pages are adjacent in the * bucket chain, our write lock on wbuf will conflict with * _hash_freeovflpage's attempt to update the sibling links of the - * removed page. However, in that case we are done anyway, so we - * can simply drop the write lock before calling - * _hash_freeovflpage. + * removed page. However, in that case we are done anyway, so we can + * simply drop the write lock before calling _hash_freeovflpage. */ if (PageIsEmpty(rpage)) { diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 883f2a7312..b40c20b480 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.51 2005/06/09 21:01:25 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.52 2005/10/15 02:49:08 momjian Exp $ * * NOTES * Postgres hash pages look like ordinary relation pages. The opaque @@ -240,13 +240,13 @@ _hash_metapinit(Relation rel) RelationGetRelationName(rel)); /* - * Determine the target fill factor (tuples per bucket) for this - * index. The idea is to make the fill factor correspond to pages - * about 3/4ths full. We can compute it exactly if the index datatype - * is fixed-width, but for var-width there's some guessing involved. + * Determine the target fill factor (tuples per bucket) for this index. + * The idea is to make the fill factor correspond to pages about 3/4ths + * full. We can compute it exactly if the index datatype is fixed-width, + * but for var-width there's some guessing involved. */ data_width = get_typavgwidth(RelationGetDescr(rel)->attrs[0]->atttypid, - RelationGetDescr(rel)->attrs[0]->atttypmod); + RelationGetDescr(rel)->attrs[0]->atttypmod); item_width = MAXALIGN(sizeof(HashItemData)) + MAXALIGN(data_width) + sizeof(ItemIdData); /* include the line pointer */ ffactor = (BLCKSZ * 3 / 4) / item_width; @@ -289,9 +289,8 @@ _hash_metapinit(Relation rel) metap->hashm_procid = index_getprocid(rel, 1, HASHPROC); /* - * We initialize the index with two buckets, 0 and 1, occupying - * physical blocks 1 and 2. The first freespace bitmap page is in - * block 3. + * We initialize the index with two buckets, 0 and 1, occupying physical + * blocks 1 and 2. The first freespace bitmap page is in block 3. */ metap->hashm_maxbucket = metap->hashm_lowmask = 1; /* nbuckets - 1 */ metap->hashm_highmask = 3; /* (nbuckets << 1) - 1 */ @@ -321,8 +320,8 @@ _hash_metapinit(Relation rel) } /* - * Initialize first bitmap page. Can't do this until we create the - * first two buckets, else smgr will complain. + * Initialize first bitmap page. Can't do this until we create the first + * two buckets, else smgr will complain. */ _hash_initbitmap(rel, metap, 3); @@ -367,15 +366,14 @@ _hash_expandtable(Relation rel, Buffer metabuf) * Obtain the page-zero lock to assert the right to begin a split (see * README). * - * Note: deadlock should be impossible here. Our own backend could only - * be holding bucket sharelocks due to stopped indexscans; those will - * not block other holders of the page-zero lock, who are only - * interested in acquiring bucket sharelocks themselves. Exclusive - * bucket locks are only taken here and in hashbulkdelete, and neither - * of these operations needs any additional locks to complete. (If, - * due to some flaw in this reasoning, we manage to deadlock anyway, - * it's okay to error out; the index will be left in a consistent - * state.) + * Note: deadlock should be impossible here. Our own backend could only be + * holding bucket sharelocks due to stopped indexscans; those will not + * block other holders of the page-zero lock, who are only interested in + * acquiring bucket sharelocks themselves. Exclusive bucket locks are + * only taken here and in hashbulkdelete, and neither of these operations + * needs any additional locks to complete. (If, due to some flaw in this + * reasoning, we manage to deadlock anyway, it's okay to error out; the + * index will be left in a consistent state.) */ _hash_getlock(rel, 0, HASH_EXCLUSIVE); @@ -386,8 +384,8 @@ _hash_expandtable(Relation rel, Buffer metabuf) _hash_checkpage(rel, (Page) metap, LH_META_PAGE); /* - * Check to see if split is still needed; someone else might have - * already done one while we waited for the lock. + * Check to see if split is still needed; someone else might have already + * done one while we waited for the lock. * * Make sure this stays in sync with _hash_doinsert() */ @@ -402,11 +400,11 @@ _hash_expandtable(Relation rel, Buffer metabuf) * The lock protects us against other backends, but not against our own * backend. Must check for active scans separately. * - * Ideally we would lock the new bucket too before proceeding, but if we - * are about to cross a splitpoint then the BUCKET_TO_BLKNO mapping - * isn't correct yet. For simplicity we update the metapage first and - * then lock. This should be okay because no one else should be - * trying to lock the new bucket yet... + * Ideally we would lock the new bucket too before proceeding, but if we are + * about to cross a splitpoint then the BUCKET_TO_BLKNO mapping isn't + * correct yet. For simplicity we update the metapage first and then + * lock. This should be okay because no one else should be trying to lock + * the new bucket yet... */ new_bucket = metap->hashm_maxbucket + 1; old_bucket = (new_bucket & metap->hashm_lowmask); @@ -420,14 +418,13 @@ _hash_expandtable(Relation rel, Buffer metabuf) goto fail; /* - * Okay to proceed with split. Update the metapage bucket mapping - * info. + * Okay to proceed with split. Update the metapage bucket mapping info. * - * Since we are scribbling on the metapage data right in the shared - * buffer, any failure in this next little bit leaves us with a big - * problem: the metapage is effectively corrupt but could get written - * back to disk. We don't really expect any failure, but just to be - * sure, establish a critical section. + * Since we are scribbling on the metapage data right in the shared buffer, + * any failure in this next little bit leaves us with a big problem: the + * metapage is effectively corrupt but could get written back to disk. We + * don't really expect any failure, but just to be sure, establish a + * critical section. */ START_CRIT_SECTION(); @@ -443,8 +440,8 @@ _hash_expandtable(Relation rel, Buffer metabuf) /* * If the split point is increasing (hashm_maxbucket's log base 2 * increases), we need to adjust the hashm_spares[] array and - * hashm_ovflpoint so that future overflow pages will be created - * beyond this new batch of bucket pages. + * hashm_ovflpoint so that future overflow pages will be created beyond + * this new batch of bucket pages. * * XXX should initialize new bucket pages to prevent out-of-order page * creation? Don't wanna do it right here though. @@ -471,10 +468,9 @@ _hash_expandtable(Relation rel, Buffer metabuf) /* * Copy bucket mapping info now; this saves re-accessing the meta page * inside _hash_splitbucket's inner loop. Note that once we drop the - * split lock, other splits could begin, so these values might be out - * of date before _hash_splitbucket finishes. That's okay, since all - * it needs is to tell which of these two buckets to map hashkeys - * into. + * split lock, other splits could begin, so these values might be out of + * date before _hash_splitbucket finishes. That's okay, since all it + * needs is to tell which of these two buckets to map hashkeys into. */ maxbucket = metap->hashm_maxbucket; highmask = metap->hashm_highmask; @@ -554,9 +550,9 @@ _hash_splitbucket(Relation rel, TupleDesc itupdesc = RelationGetDescr(rel); /* - * It should be okay to simultaneously write-lock pages from each - * bucket, since no one else can be trying to acquire buffer lock on - * pages of either bucket. + * It should be okay to simultaneously write-lock pages from each bucket, + * since no one else can be trying to acquire buffer lock on pages of + * either bucket. */ oblkno = start_oblkno; nblkno = start_nblkno; @@ -578,17 +574,17 @@ _hash_splitbucket(Relation rel, nopaque->hasho_filler = HASHO_FILL; /* - * Partition the tuples in the old bucket between the old bucket and - * the new bucket, advancing along the old bucket's overflow bucket - * chain and adding overflow pages to the new bucket as needed. + * Partition the tuples in the old bucket between the old bucket and the + * new bucket, advancing along the old bucket's overflow bucket chain and + * adding overflow pages to the new bucket as needed. */ ooffnum = FirstOffsetNumber; omaxoffnum = PageGetMaxOffsetNumber(opage); for (;;) { /* - * at each iteration through this loop, each of these variables - * should be up-to-date: obuf opage oopaque ooffnum omaxoffnum + * at each iteration through this loop, each of these variables should + * be up-to-date: obuf opage oopaque ooffnum omaxoffnum */ /* check if we're at the end of the page */ @@ -600,8 +596,8 @@ _hash_splitbucket(Relation rel, break; /* - * we ran out of tuples on this particular page, but we have - * more overflow pages; advance to next page. + * we ran out of tuples on this particular page, but we have more + * overflow pages; advance to next page. */ _hash_wrtbuf(rel, obuf); @@ -618,8 +614,7 @@ _hash_splitbucket(Relation rel, * Re-hash the tuple to determine which bucket it now belongs in. * * It is annoying to call the hash function while holding locks, but - * releasing and relocking the page for each tuple is unappealing - * too. + * releasing and relocking the page for each tuple is unappealing too. */ hitem = (HashItem) PageGetItem(opage, PageGetItemId(opage, ooffnum)); itup = &(hitem->hash_itup); @@ -632,9 +627,9 @@ _hash_splitbucket(Relation rel, if (bucket == nbucket) { /* - * insert the tuple into the new bucket. if it doesn't fit on - * the current page in the new bucket, we must allocate a new - * overflow page and place the tuple on that page instead. + * insert the tuple into the new bucket. if it doesn't fit on the + * current page in the new bucket, we must allocate a new overflow + * page and place the tuple on that page instead. */ itemsz = IndexTupleDSize(hitem->hash_itup) + (sizeof(HashItemData) - sizeof(IndexTupleData)); @@ -659,13 +654,13 @@ _hash_splitbucket(Relation rel, RelationGetRelationName(rel)); /* - * now delete the tuple from the old bucket. after this - * section of code, 'ooffnum' will actually point to the - * ItemId to which we would point if we had advanced it before - * the deletion (PageIndexTupleDelete repacks the ItemId - * array). this also means that 'omaxoffnum' is exactly one - * less than it used to be, so we really can just decrement it - * instead of calling PageGetMaxOffsetNumber. + * now delete the tuple from the old bucket. after this section + * of code, 'ooffnum' will actually point to the ItemId to which + * we would point if we had advanced it before the deletion + * (PageIndexTupleDelete repacks the ItemId array). this also + * means that 'omaxoffnum' is exactly one less than it used to be, + * so we really can just decrement it instead of calling + * PageGetMaxOffsetNumber. */ PageIndexTupleDelete(opage, ooffnum); omaxoffnum = OffsetNumberPrev(omaxoffnum); @@ -673,9 +668,9 @@ _hash_splitbucket(Relation rel, else { /* - * the tuple stays on this page. we didn't move anything, so - * we didn't delete anything and therefore we don't have to - * change 'omaxoffnum'. + * the tuple stays on this page. we didn't move anything, so we + * didn't delete anything and therefore we don't have to change + * 'omaxoffnum'. */ Assert(bucket == obucket); ooffnum = OffsetNumberNext(ooffnum); @@ -683,11 +678,10 @@ _hash_splitbucket(Relation rel, } /* - * We're at the end of the old bucket chain, so we're done - * partitioning the tuples. Before quitting, call _hash_squeezebucket - * to ensure the tuples remaining in the old bucket (including the - * overflow pages) are packed as tightly as possible. The new bucket - * is already tight. + * We're at the end of the old bucket chain, so we're done partitioning + * the tuples. Before quitting, call _hash_squeezebucket to ensure the + * tuples remaining in the old bucket (including the overflow pages) are + * packed as tightly as possible. The new bucket is already tight. */ _hash_wrtbuf(rel, obuf); _hash_wrtbuf(rel, nbuf); diff --git a/src/backend/access/hash/hashscan.c b/src/backend/access/hash/hashscan.c index 782c087e3b..213eaf89fc 100644 --- a/src/backend/access/hash/hashscan.c +++ b/src/backend/access/hash/hashscan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashscan.c,v 1.38 2004/12/31 21:59:13 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashscan.c,v 1.39 2005/10/15 02:49:08 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -44,9 +44,9 @@ ReleaseResources_hash(void) HashScanList next; /* - * Note: this should be a no-op during normal query shutdown. However, - * in an abort situation ExecutorEnd is not called and so there may be - * open index scans to clean up. + * Note: this should be a no-op during normal query shutdown. However, in + * an abort situation ExecutorEnd is not called and so there may be open + * index scans to clean up. */ prev = NULL; diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c index 9aaf70b0a9..fac46d7902 100644 --- a/src/backend/access/hash/hashsearch.c +++ b/src/backend/access/hash/hashsearch.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.39 2005/10/06 02:29:08 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.40 2005/10/15 02:49:08 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -137,33 +137,32 @@ _hash_first(IndexScanDesc scan, ScanDirection dir) ItemPointerSetInvalid(current); /* - * We do not support hash scans with no index qualification, because - * we would have to read the whole index rather than just one bucket. - * That creates a whole raft of problems, since we haven't got a - * practical way to lock all the buckets against splits or - * compactions. + * We do not support hash scans with no index qualification, because we + * would have to read the whole index rather than just one bucket. That + * creates a whole raft of problems, since we haven't got a practical way + * to lock all the buckets against splits or compactions. */ if (scan->numberOfKeys < 1) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("hash indexes do not support whole-index scans"))); + errmsg("hash indexes do not support whole-index scans"))); /* - * If the constant in the index qual is NULL, assume it cannot match - * any items in the index. + * If the constant in the index qual is NULL, assume it cannot match any + * items in the index. */ if (scan->keyData[0].sk_flags & SK_ISNULL) return false; /* - * Okay to compute the hash key. We want to do this before acquiring - * any locks, in case a user-defined hash function happens to be slow. + * Okay to compute the hash key. We want to do this before acquiring any + * locks, in case a user-defined hash function happens to be slow. */ hashkey = _hash_datum2hashkey(rel, scan->keyData[0].sk_argument); /* - * Acquire shared split lock so we can compute the target bucket - * safely (see README). + * Acquire shared split lock so we can compute the target bucket safely + * (see README). */ _hash_getlock(rel, 0, HASH_SHARE); @@ -186,8 +185,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir) _hash_relbuf(rel, metabuf); /* - * Acquire share lock on target bucket; then we can release split - * lock. + * Acquire share lock on target bucket; then we can release split lock. */ _hash_getlock(rel, blkno, HASH_SHARE); @@ -263,9 +261,9 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) bucket = opaque->hasho_bucket; /* - * If _hash_step is called from _hash_first, current will not be - * valid, so we can't dereference it. However, in that case, we - * presumably want to start at the beginning/end of the page... + * If _hash_step is called from _hash_first, current will not be valid, so + * we can't dereference it. However, in that case, we presumably want to + * start at the beginning/end of the page... */ maxoff = PageGetMaxOffsetNumber(page); if (ItemPointerIsValid(current)) @@ -276,8 +274,8 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) /* * 'offnum' now points to the last tuple we have seen (if any). * - * continue to step through tuples until: 1) we get to the end of the - * bucket chain or 2) we find a valid tuple. + * continue to step through tuples until: 1) we get to the end of the bucket + * chain or 2) we find a valid tuple. */ do { diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 185918d03a..6c669ed62b 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.199 2005/10/06 02:29:10 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.200 2005/10/15 02:49:08 momjian Exp $ * * * INTERFACE ROUTINES @@ -54,7 +54,7 @@ static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, - ItemPointerData from, Buffer newbuf, HeapTuple newtup, bool move); + ItemPointerData from, Buffer newbuf, HeapTuple newtup, bool move); /* ---------------------------------------------------------------- @@ -272,8 +272,8 @@ heapgettup(Relation relation, /* 'dir' is now non-zero */ /* - * calculate line pointer and number of remaining items to check on - * this page. + * calculate line pointer and number of remaining items to check on this + * page. */ lpp = PageGetItemId(dp, lineoff); if (dir < 0) @@ -282,8 +282,8 @@ heapgettup(Relation relation, linesleft = lines - lineoff; /* - * advance the scan until we find a qualifying tuple or run out of - * stuff to scan + * advance the scan until we find a qualifying tuple or run out of stuff + * to scan */ for (;;) { @@ -321,15 +321,14 @@ heapgettup(Relation relation, } else { - ++lpp; /* move forward in this page's ItemId - * array */ + ++lpp; /* move forward in this page's ItemId array */ ++lineoff; } } /* - * if we get here, it means we've exhausted the items on this page - * and it's time to move to the next. + * if we get here, it means we've exhausted the items on this page and + * it's time to move to the next. */ LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); @@ -506,15 +505,15 @@ relation_openrv(const RangeVar *relation, LOCKMODE lockmode) /* * Check for shared-cache-inval messages before trying to open the - * relation. This is needed to cover the case where the name - * identifies a rel that has been dropped and recreated since the - * start of our transaction: if we don't flush the old syscache entry - * then we'll latch onto that entry and suffer an error when we do - * LockRelation. Note that relation_open does not need to do this, - * since a relation's OID never changes. + * relation. This is needed to cover the case where the name identifies a + * rel that has been dropped and recreated since the start of our + * transaction: if we don't flush the old syscache entry then we'll latch + * onto that entry and suffer an error when we do LockRelation. Note that + * relation_open does not need to do this, since a relation's OID never + * changes. * - * We skip this if asked for NoLock, on the assumption that the caller - * has already ensured some appropriate lock is held. + * We skip this if asked for NoLock, on the assumption that the caller has + * already ensured some appropriate lock is held. */ if (lockmode != NoLock) AcceptInvalidationMessages(); @@ -633,9 +632,9 @@ heap_beginscan(Relation relation, Snapshot snapshot, /* * increment relation ref count while scanning relation * - * This is just to make really sure the relcache entry won't go away - * while the scan has a pointer to it. Caller should be holding the - * rel open anyway, so this is redundant in all normal scenarios... + * This is just to make really sure the relcache entry won't go away while + * the scan has a pointer to it. Caller should be holding the rel open + * anyway, so this is redundant in all normal scenarios... */ RelationIncrementReferenceCount(relation); @@ -649,8 +648,8 @@ heap_beginscan(Relation relation, Snapshot snapshot, scan->rs_nkeys = nkeys; /* - * we do this here instead of in initscan() because heap_rescan also - * calls initscan() and we don't want to allocate memory again + * we do this here instead of in initscan() because heap_rescan also calls + * initscan() and we don't want to allocate memory again */ if (nkeys > 0) scan->rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); @@ -763,8 +762,8 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction) } /* - * if we get here it means we have a new current scan tuple, so point - * to the proper return buffer and return the tuple. + * if we get here it means we have a new current scan tuple, so point to + * the proper return buffer and return the tuple. */ HEAPDEBUG_3; /* heap_getnext returning tuple */ @@ -859,8 +858,8 @@ heap_release_fetch(Relation relation, dp = (PageHeader) BufferGetPage(buffer); /* - * We'd better check for out-of-range offnum in case of VACUUM since - * the TID was obtained. + * We'd better check for out-of-range offnum in case of VACUUM since the + * TID was obtained. */ offnum = ItemPointerGetOffsetNumber(tid); if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp)) @@ -952,7 +951,7 @@ heap_release_fetch(Relation relation, * possibly uncommitted version. * * *tid is both an input and an output parameter: it is updated to - * show the latest version of the row. Note that it will not be changed + * show the latest version of the row. Note that it will not be changed * if no version of the row passes the snapshot test. */ void @@ -960,7 +959,7 @@ heap_get_latest_tid(Relation relation, Snapshot snapshot, ItemPointer tid) { - BlockNumber blk; + BlockNumber blk; ItemPointerData ctid; TransactionId priorXmax; @@ -969,10 +968,10 @@ heap_get_latest_tid(Relation relation, return; /* - * Since this can be called with user-supplied TID, don't trust the - * input too much. (RelationGetNumberOfBlocks is an expensive check, - * so we don't check t_ctid links again this way. Note that it would - * not do to call it just once and save the result, either.) + * Since this can be called with user-supplied TID, don't trust the input + * too much. (RelationGetNumberOfBlocks is an expensive check, so we + * don't check t_ctid links again this way. Note that it would not do to + * call it just once and save the result, either.) */ blk = ItemPointerGetBlockNumber(tid); if (blk >= RelationGetNumberOfBlocks(relation)) @@ -980,9 +979,9 @@ heap_get_latest_tid(Relation relation, blk, RelationGetRelationName(relation)); /* - * Loop to chase down t_ctid links. At top of loop, ctid is the - * tuple we need to examine, and *tid is the TID we will return if - * ctid turns out to be bogus. + * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we + * need to examine, and *tid is the TID we will return if ctid turns out + * to be bogus. * * Note that we will loop until we reach the end of the t_ctid chain. * Depending on the snapshot passed, there might be at most one visible @@ -1008,8 +1007,8 @@ heap_get_latest_tid(Relation relation, /* * Check for bogus item number. This is not treated as an error - * condition because it can happen while following a t_ctid link. - * We just assume that the prior tid is OK and return it unchanged. + * condition because it can happen while following a t_ctid link. We + * just assume that the prior tid is OK and return it unchanged. */ offnum = ItemPointerGetOffsetNumber(&ctid); if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp)) @@ -1037,7 +1036,7 @@ heap_get_latest_tid(Relation relation, * tuple. Check for XMIN match. */ if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data))) + !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data))) { LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); @@ -1068,7 +1067,7 @@ heap_get_latest_tid(Relation relation, priorXmax = HeapTupleHeaderGetXmax(tp.t_data); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); - } /* end of loop */ + } /* end of loop */ } /* @@ -1102,13 +1101,12 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, #endif /* - * If the object id of this tuple has already been assigned, trust - * the caller. There are a couple of ways this can happen. At - * initial db creation, the backend program sets oids for tuples. - * When we define an index, we set the oid. Finally, in the - * future, we may allow users to set their own object ids in order - * to support a persistent object store (objects need to contain - * pointers to one another). + * If the object id of this tuple has already been assigned, trust the + * caller. There are a couple of ways this can happen. At initial db + * creation, the backend program sets oids for tuples. When we define + * an index, we set the oid. Finally, in the future, we may allow + * users to set their own object ids in order to support a persistent + * object store (objects need to contain pointers to one another). */ if (!OidIsValid(HeapTupleGetOid(tup))) HeapTupleSetOid(tup, GetNewOid(relation)); @@ -1129,8 +1127,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, /* * If the new tuple is too big for storage or contains already toasted - * out-of-line attributes from some other relation, invoke the - * toaster. + * out-of-line attributes from some other relation, invoke the toaster. */ if (HeapTupleHasExternal(tup) || (MAXALIGN(tup->t_len) > TOAST_TUPLE_THRESHOLD)) @@ -1172,9 +1169,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, xlhdr.t_hoff = tup->t_data->t_hoff; /* - * note we mark rdata[1] as belonging to buffer; if XLogInsert - * decides to write the whole page to the xlog, we don't need to - * store xl_heap_header in the xlog. + * note we mark rdata[1] as belonging to buffer; if XLogInsert decides + * to write the whole page to the xlog, we don't need to store + * xl_heap_header in the xlog. */ rdata[1].data = (char *) &xlhdr; rdata[1].len = SizeOfHeapHeader; @@ -1190,9 +1187,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, rdata[2].next = NULL; /* - * If this is the single and first tuple on page, we can reinit - * the page instead of restoring the whole thing. Set flag, and - * hide buffer references from XLogInsert. + * If this is the single and first tuple on page, we can reinit the + * page instead of restoring the whole thing. Set flag, and hide + * buffer references from XLogInsert. */ if (ItemPointerGetOffsetNumber(&(tup->t_self)) == FirstOffsetNumber && PageGetMaxOffsetNumber(page) == FirstOffsetNumber) @@ -1213,10 +1210,10 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, WriteBuffer(buffer); /* - * If tuple is cachable, mark it for invalidation from the caches in - * case we abort. Note it is OK to do this after WriteBuffer releases - * the buffer, because the "tup" data structure is all in local - * memory, not in the shared buffer. + * If tuple is cachable, mark it for invalidation from the caches in case + * we abort. Note it is OK to do this after WriteBuffer releases the + * buffer, because the "tup" data structure is all in local memory, not in + * the shared buffer. */ CacheInvalidateHeapTuple(relation, tup); @@ -1268,7 +1265,7 @@ heap_delete(Relation relation, ItemPointer tid, ItemPointer ctid, TransactionId *update_xmax, CommandId cid, Snapshot crosscheck, bool wait) { - HTSU_Result result; + HTSU_Result result; TransactionId xid = GetCurrentTransactionId(); ItemId lp; HeapTupleData tp; @@ -1301,7 +1298,7 @@ l1: else if (result == HeapTupleBeingUpdated && wait) { TransactionId xwait; - uint16 infomask; + uint16 infomask; /* must copy state data before unlocking buffer */ xwait = HeapTupleHeaderGetXmax(tp.t_data); @@ -1310,13 +1307,13 @@ l1: LockBuffer(buffer, BUFFER_LOCK_UNLOCK); /* - * Acquire tuple lock to establish our priority for the tuple - * (see heap_lock_tuple). LockTuple will release us when we are + * Acquire tuple lock to establish our priority for the tuple (see + * heap_lock_tuple). LockTuple will release us when we are * next-in-line for the tuple. * - * If we are forced to "start over" below, we keep the tuple lock; - * this arranges that we stay at the head of the line while - * rechecking tuple state. + * If we are forced to "start over" below, we keep the tuple lock; this + * arranges that we stay at the head of the line while rechecking + * tuple state. */ if (!have_tuple_lock) { @@ -1347,12 +1344,12 @@ l1: goto l1; /* - * You might think the multixact is necessarily done here, but - * not so: it could have surviving members, namely our own xact - * or other subxacts of this backend. It is legal for us to - * delete the tuple in either case, however (the latter case is - * essentially a situation of upgrading our former shared lock - * to exclusive). We don't bother changing the on-disk hint bits + * You might think the multixact is necessarily done here, but not + * so: it could have surviving members, namely our own xact or + * other subxacts of this backend. It is legal for us to delete + * the tuple in either case, however (the latter case is + * essentially a situation of upgrading our former shared lock to + * exclusive). We don't bother changing the on-disk hint bits * since we are about to overwrite the xmax altogether. */ } @@ -1385,8 +1382,8 @@ l1: } /* - * We may overwrite if previous xmax aborted, or if it committed - * but only locked the tuple without updating it. + * We may overwrite if previous xmax aborted, or if it committed but + * only locked the tuple without updating it. */ if (tp.t_data->t_infomask & (HEAP_XMAX_INVALID | HEAP_IS_LOCKED)) @@ -1467,18 +1464,18 @@ l1: /* * If the tuple has toasted out-of-line attributes, we need to delete - * those items too. We have to do this before WriteBuffer because we - * need to look at the contents of the tuple, but it's OK to release - * the context lock on the buffer first. + * those items too. We have to do this before WriteBuffer because we need + * to look at the contents of the tuple, but it's OK to release the + * context lock on the buffer first. */ if (HeapTupleHasExternal(&tp)) heap_tuple_toast_attrs(relation, NULL, &tp); /* * Mark tuple for invalidation from system caches at next command - * boundary. We have to do this before WriteBuffer because we need to - * look at the contents of the tuple, so we need to hold our refcount - * on the buffer. + * boundary. We have to do this before WriteBuffer because we need to look + * at the contents of the tuple, so we need to hold our refcount on the + * buffer. */ CacheInvalidateHeapTuple(relation, &tp); @@ -1506,7 +1503,7 @@ l1: void simple_heap_delete(Relation relation, ItemPointer tid) { - HTSU_Result result; + HTSU_Result result; ItemPointerData update_ctid; TransactionId update_xmax; @@ -1569,7 +1566,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, ItemPointer ctid, TransactionId *update_xmax, CommandId cid, Snapshot crosscheck, bool wait) { - HTSU_Result result; + HTSU_Result result; TransactionId xid = GetCurrentTransactionId(); ItemId lp; HeapTupleData oldtup; @@ -1598,8 +1595,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, /* * Note: beyond this point, use oldtup not otid to refer to old tuple. * otid may very well point at newtup->t_self, which we will overwrite - * with the new tuple's location, so there's great risk of confusion - * if we use otid anymore. + * with the new tuple's location, so there's great risk of confusion if we + * use otid anymore. */ l2: @@ -1614,7 +1611,7 @@ l2: else if (result == HeapTupleBeingUpdated && wait) { TransactionId xwait; - uint16 infomask; + uint16 infomask; /* must copy state data before unlocking buffer */ xwait = HeapTupleHeaderGetXmax(oldtup.t_data); @@ -1623,13 +1620,13 @@ l2: LockBuffer(buffer, BUFFER_LOCK_UNLOCK); /* - * Acquire tuple lock to establish our priority for the tuple - * (see heap_lock_tuple). LockTuple will release us when we are + * Acquire tuple lock to establish our priority for the tuple (see + * heap_lock_tuple). LockTuple will release us when we are * next-in-line for the tuple. * - * If we are forced to "start over" below, we keep the tuple lock; - * this arranges that we stay at the head of the line while - * rechecking tuple state. + * If we are forced to "start over" below, we keep the tuple lock; this + * arranges that we stay at the head of the line while rechecking + * tuple state. */ if (!have_tuple_lock) { @@ -1660,12 +1657,12 @@ l2: goto l2; /* - * You might think the multixact is necessarily done here, but - * not so: it could have surviving members, namely our own xact - * or other subxacts of this backend. It is legal for us to - * update the tuple in either case, however (the latter case is - * essentially a situation of upgrading our former shared lock - * to exclusive). We don't bother changing the on-disk hint bits + * You might think the multixact is necessarily done here, but not + * so: it could have surviving members, namely our own xact or + * other subxacts of this backend. It is legal for us to update + * the tuple in either case, however (the latter case is + * essentially a situation of upgrading our former shared lock to + * exclusive). We don't bother changing the on-disk hint bits * since we are about to overwrite the xmax altogether. */ } @@ -1698,8 +1695,8 @@ l2: } /* - * We may overwrite if previous xmax aborted, or if it committed - * but only locked the tuple without updating it. + * We may overwrite if previous xmax aborted, or if it committed but + * only locked the tuple without updating it. */ if (oldtup.t_data->t_infomask & (HEAP_XMAX_INVALID | HEAP_IS_LOCKED)) @@ -1753,15 +1750,15 @@ l2: HeapTupleHeaderSetCmax(newtup->t_data, 0); /* for cleanliness */ /* - * If the toaster needs to be activated, OR if the new tuple will not - * fit on the same page as the old, then we need to release the - * context lock (but not the pin!) on the old tuple's buffer while we - * are off doing TOAST and/or table-file-extension work. We must mark - * the old tuple to show that it's already being updated, else other - * processes may try to update it themselves. + * If the toaster needs to be activated, OR if the new tuple will not fit + * on the same page as the old, then we need to release the context lock + * (but not the pin!) on the old tuple's buffer while we are off doing + * TOAST and/or table-file-extension work. We must mark the old tuple to + * show that it's already being updated, else other processes may try to + * update it themselves. * - * We need to invoke the toaster if there are already any out-of-line - * toasted values present, or if the new tuple is over-threshold. + * We need to invoke the toaster if there are already any out-of-line toasted + * values present, or if the new tuple is over-threshold. */ need_toast = (HeapTupleHasExternal(&oldtup) || HeapTupleHasExternal(newtup) || @@ -1790,22 +1787,21 @@ l2: } /* - * Now, do we need a new page for the tuple, or not? This is a - * bit tricky since someone else could have added tuples to the - * page while we weren't looking. We have to recheck the - * available space after reacquiring the buffer lock. But don't - * bother to do that if the former amount of free space is still - * not enough; it's unlikely there's more free now than before. + * Now, do we need a new page for the tuple, or not? This is a bit + * tricky since someone else could have added tuples to the page while + * we weren't looking. We have to recheck the available space after + * reacquiring the buffer lock. But don't bother to do that if the + * former amount of free space is still not enough; it's unlikely + * there's more free now than before. * * What's more, if we need to get a new page, we will need to acquire - * buffer locks on both old and new pages. To avoid deadlock - * against some other backend trying to get the same two locks in - * the other order, we must be consistent about the order we get - * the locks in. We use the rule "lock the lower-numbered page of - * the relation first". To implement this, we must do - * RelationGetBufferForTuple while not holding the lock on the old - * page, and we must rely on it to get the locks on both pages in - * the correct order. + * buffer locks on both old and new pages. To avoid deadlock against + * some other backend trying to get the same two locks in the other + * order, we must be consistent about the order we get the locks in. + * We use the rule "lock the lower-numbered page of the relation + * first". To implement this, we must do RelationGetBufferForTuple + * while not holding the lock on the old page, and we must rely on it + * to get the locks on both pages in the correct order. */ if (newtupsize > pagefree) { @@ -1823,8 +1819,8 @@ l2: { /* * Rats, it doesn't fit anymore. We must now unlock and - * relock to avoid deadlock. Fortunately, this path - * should seldom be taken. + * relock to avoid deadlock. Fortunately, this path should + * seldom be taken. */ LockBuffer(buffer, BUFFER_LOCK_UNLOCK); newbuf = RelationGetBufferForTuple(relation, newtup->t_len, @@ -1845,9 +1841,9 @@ l2: } /* - * At this point newbuf and buffer are both pinned and locked, and - * newbuf has enough space for the new tuple. If they are the same - * buffer, only one pin is held. + * At this point newbuf and buffer are both pinned and locked, and newbuf + * has enough space for the new tuple. If they are the same buffer, only + * one pin is held. */ /* NO EREPORT(ERROR) from here till changes are logged */ @@ -1897,8 +1893,8 @@ l2: /* * Mark old tuple for invalidation from system caches at next command - * boundary. We have to do this before WriteBuffer because we need to - * look at the contents of the tuple, so we need to hold our refcount. + * boundary. We have to do this before WriteBuffer because we need to look + * at the contents of the tuple, so we need to hold our refcount. */ CacheInvalidateHeapTuple(relation, &oldtup); @@ -1907,10 +1903,10 @@ l2: WriteBuffer(buffer); /* - * If new tuple is cachable, mark it for invalidation from the caches - * in case we abort. Note it is OK to do this after WriteBuffer - * releases the buffer, because the "newtup" data structure is all in - * local memory, not in the shared buffer. + * If new tuple is cachable, mark it for invalidation from the caches in + * case we abort. Note it is OK to do this after WriteBuffer releases the + * buffer, because the "newtup" data structure is all in local memory, not + * in the shared buffer. */ CacheInvalidateHeapTuple(relation, newtup); @@ -1936,7 +1932,7 @@ l2: void simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup) { - HTSU_Result result; + HTSU_Result result; ItemPointerData update_ctid; TransactionId update_xmax; @@ -2012,7 +2008,7 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup) * waiter gets the tuple, potentially leading to indefinite starvation of * some waiters. The possibility of share-locking makes the problem much * worse --- a steady stream of share-lockers can easily block an exclusive - * locker forever. To provide more reliable semantics about who gets a + * locker forever. To provide more reliable semantics about who gets a * tuple-level lock first, we use the standard lock manager. The protocol * for waiting for a tuple-level lock is really * LockTuple() @@ -2020,7 +2016,7 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup) * mark tuple as locked by me * UnlockTuple() * When there are multiple waiters, arbitration of who is to get the lock next - * is provided by LockTuple(). However, at most one tuple-level lock will + * is provided by LockTuple(). However, at most one tuple-level lock will * be held or awaited per backend at any time, so we don't risk overflow * of the lock table. Note that incoming share-lockers are required to * do LockTuple as well, if there is any conflict, to ensure that they don't @@ -2032,11 +2028,11 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, Buffer *buffer, ItemPointer ctid, TransactionId *update_xmax, CommandId cid, LockTupleMode mode, bool nowait) { - HTSU_Result result; + HTSU_Result result; ItemPointer tid = &(tuple->t_self); ItemId lp; PageHeader dp; - TransactionId xid; + TransactionId xid; uint16 new_infomask; LOCKMODE tuple_lock_type; bool have_tuple_lock = false; @@ -2067,7 +2063,7 @@ l3: else if (result == HeapTupleBeingUpdated) { TransactionId xwait; - uint16 infomask; + uint16 infomask; /* must copy state data before unlocking buffer */ xwait = HeapTupleHeaderGetXmax(tuple->t_data); @@ -2077,12 +2073,12 @@ l3: /* * Acquire tuple lock to establish our priority for the tuple. - * LockTuple will release us when we are next-in-line for the - * tuple. We must do this even if we are share-locking. + * LockTuple will release us when we are next-in-line for the tuple. + * We must do this even if we are share-locking. * - * If we are forced to "start over" below, we keep the tuple lock; - * this arranges that we stay at the head of the line while - * rechecking tuple state. + * If we are forced to "start over" below, we keep the tuple lock; this + * arranges that we stay at the head of the line while rechecking + * tuple state. */ if (!have_tuple_lock) { @@ -2091,8 +2087,8 @@ l3: if (!ConditionalLockTuple(relation, tid, tuple_lock_type)) ereport(ERROR, (errcode(ERRCODE_LOCK_NOT_AVAILABLE), - errmsg("could not obtain lock on row in relation \"%s\"", - RelationGetRelationName(relation)))); + errmsg("could not obtain lock on row in relation \"%s\"", + RelationGetRelationName(relation)))); } else LockTuple(relation, tid, tuple_lock_type); @@ -2108,8 +2104,8 @@ l3: LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); /* - * Make sure it's still a shared lock, else start over. (It's - * OK if the ownership of the shared lock has changed, though.) + * Make sure it's still a shared lock, else start over. (It's OK + * if the ownership of the shared lock has changed, though.) */ if (!(tuple->t_data->t_infomask & HEAP_XMAX_SHARED_LOCK)) goto l3; @@ -2122,8 +2118,8 @@ l3: if (!ConditionalMultiXactIdWait((MultiXactId) xwait)) ereport(ERROR, (errcode(ERRCODE_LOCK_NOT_AVAILABLE), - errmsg("could not obtain lock on row in relation \"%s\"", - RelationGetRelationName(relation)))); + errmsg("could not obtain lock on row in relation \"%s\"", + RelationGetRelationName(relation)))); } else MultiXactIdWait((MultiXactId) xwait); @@ -2131,9 +2127,9 @@ l3: LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); /* - * If xwait had just locked the tuple then some other xact - * could update this tuple before we get to this point. - * Check for xmax change, and start over if so. + * If xwait had just locked the tuple then some other xact could + * update this tuple before we get to this point. Check for xmax + * change, and start over if so. */ if (!(tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) || !TransactionIdEquals(HeapTupleHeaderGetXmax(tuple->t_data), @@ -2141,12 +2137,12 @@ l3: goto l3; /* - * You might think the multixact is necessarily done here, but - * not so: it could have surviving members, namely our own xact - * or other subxacts of this backend. It is legal for us to - * lock the tuple in either case, however. We don't bother - * changing the on-disk hint bits since we are about to - * overwrite the xmax altogether. + * You might think the multixact is necessarily done here, but not + * so: it could have surviving members, namely our own xact or + * other subxacts of this backend. It is legal for us to lock the + * tuple in either case, however. We don't bother changing the + * on-disk hint bits since we are about to overwrite the xmax + * altogether. */ } else @@ -2157,8 +2153,8 @@ l3: if (!ConditionalXactLockTableWait(xwait)) ereport(ERROR, (errcode(ERRCODE_LOCK_NOT_AVAILABLE), - errmsg("could not obtain lock on row in relation \"%s\"", - RelationGetRelationName(relation)))); + errmsg("could not obtain lock on row in relation \"%s\"", + RelationGetRelationName(relation)))); } else XactLockTableWait(xwait); @@ -2166,9 +2162,9 @@ l3: LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); /* - * xwait is done, but if xwait had just locked the tuple then - * some other xact could update this tuple before we get to - * this point. Check for xmax change, and start over if so. + * xwait is done, but if xwait had just locked the tuple then some + * other xact could update this tuple before we get to this point. + * Check for xmax change, and start over if so. */ if ((tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) || !TransactionIdEquals(HeapTupleHeaderGetXmax(tuple->t_data), @@ -2188,10 +2184,10 @@ l3: } /* - * We may lock if previous xmax aborted, or if it committed - * but only locked the tuple without updating it. The case where - * we didn't wait because we are joining an existing shared lock - * is correctly handled, too. + * We may lock if previous xmax aborted, or if it committed but only + * locked the tuple without updating it. The case where we didn't + * wait because we are joining an existing shared lock is correctly + * handled, too. */ if (tuple->t_data->t_infomask & (HEAP_XMAX_INVALID | HEAP_IS_LOCKED)) @@ -2213,9 +2209,9 @@ l3: } /* - * Compute the new xmax and infomask to store into the tuple. Note we - * do not modify the tuple just yet, because that would leave it in the - * wrong state if multixact.c elogs. + * Compute the new xmax and infomask to store into the tuple. Note we do + * not modify the tuple just yet, because that would leave it in the wrong + * state if multixact.c elogs. */ xid = GetCurrentTransactionId(); @@ -2229,17 +2225,16 @@ l3: if (mode == LockTupleShared) { - TransactionId xmax = HeapTupleHeaderGetXmax(tuple->t_data); + TransactionId xmax = HeapTupleHeaderGetXmax(tuple->t_data); uint16 old_infomask = tuple->t_data->t_infomask; /* * If this is the first acquisition of a shared lock in the current - * transaction, set my per-backend OldestMemberMXactId setting. - * We can be certain that the transaction will never become a - * member of any older MultiXactIds than that. (We have to do this - * even if we end up just using our own TransactionId below, since - * some other backend could incorporate our XID into a MultiXact - * immediately afterwards.) + * transaction, set my per-backend OldestMemberMXactId setting. We can + * be certain that the transaction will never become a member of any + * older MultiXactIds than that. (We have to do this even if we end + * up just using our own TransactionId below, since some other backend + * could incorporate our XID into a MultiXact immediately afterwards.) */ MultiXactIdSetOldestMember(); @@ -2249,14 +2244,14 @@ l3: * Check to see if we need a MultiXactId because there are multiple * lockers. * - * HeapTupleSatisfiesUpdate will have set the HEAP_XMAX_INVALID - * bit if the xmax was a MultiXactId but it was not running anymore. - * There is a race condition, which is that the MultiXactId may have - * finished since then, but that uncommon case is handled within + * HeapTupleSatisfiesUpdate will have set the HEAP_XMAX_INVALID bit if + * the xmax was a MultiXactId but it was not running anymore. There is + * a race condition, which is that the MultiXactId may have finished + * since then, but that uncommon case is handled within * MultiXactIdExpand. * - * There is a similar race condition possible when the old xmax was - * a regular TransactionId. We test TransactionIdIsInProgress again + * There is a similar race condition possible when the old xmax was a + * regular TransactionId. We test TransactionIdIsInProgress again * just to narrow the window, but it's still possible to end up * creating an unnecessary MultiXactId. Fortunately this is harmless. */ @@ -2277,10 +2272,10 @@ l3: { /* * If the old locker is ourselves, we'll just mark the - * tuple again with our own TransactionId. However we - * have to consider the possibility that we had - * exclusive rather than shared lock before --- if so, - * be careful to preserve the exclusivity of the lock. + * tuple again with our own TransactionId. However we + * have to consider the possibility that we had exclusive + * rather than shared lock before --- if so, be careful to + * preserve the exclusivity of the lock. */ if (!(old_infomask & HEAP_XMAX_SHARED_LOCK)) { @@ -2303,9 +2298,9 @@ l3: else { /* - * Can get here iff HeapTupleSatisfiesUpdate saw the old - * xmax as running, but it finished before - * TransactionIdIsInProgress() got to run. Treat it like + * Can get here iff HeapTupleSatisfiesUpdate saw the old xmax + * as running, but it finished before + * TransactionIdIsInProgress() got to run. Treat it like * there's no locker in the tuple. */ } @@ -2329,8 +2324,8 @@ l3: /* * Store transaction information of xact locking the tuple. * - * Note: our CID is meaningless if storing a MultiXactId, but no harm - * in storing it anyway. + * Note: our CID is meaningless if storing a MultiXactId, but no harm in + * storing it anyway. */ tuple->t_data->t_infomask = new_infomask; HeapTupleHeaderSetXmax(tuple->t_data, xid); @@ -2339,8 +2334,8 @@ l3: tuple->t_data->t_ctid = *tid; /* - * XLOG stuff. You might think that we don't need an XLOG record because - * there is no state change worth restoring after a crash. You would be + * XLOG stuff. You might think that we don't need an XLOG record because + * there is no state change worth restoring after a crash. You would be * wrong however: we have just written either a TransactionId or a * MultiXactId that may never have been seen on disk before, and we need * to make sure that there are XLOG entries covering those ID numbers. @@ -2473,8 +2468,8 @@ log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *unused, int uncnt) /* * The unused-offsets array is not actually in the buffer, but pretend - * that it is. When XLogInsert stores the whole buffer, the offsets - * array need not be stored too. + * that it is. When XLogInsert stores the whole buffer, the offsets array + * need not be stored too. */ if (uncnt > 0) { @@ -2500,11 +2495,10 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, Buffer newbuf, HeapTuple newtup, bool move) { /* - * Note: xlhdr is declared to have adequate size and correct alignment - * for an xl_heap_header. However the two tids, if present at all, - * will be packed in with no wasted space after the xl_heap_header; - * they aren't necessarily aligned as implied by this struct - * declaration. + * Note: xlhdr is declared to have adequate size and correct alignment for + * an xl_heap_header. However the two tids, if present at all, will be + * packed in with no wasted space after the xl_heap_header; they aren't + * necessarily aligned as implied by this struct declaration. */ struct { @@ -2555,8 +2549,8 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, } /* - * As with insert records, we need not store the rdata[2] segment if - * we decide to store the whole buffer instead. + * As with insert records, we need not store the rdata[2] segment if we + * decide to store the whole buffer instead. */ rdata[2].data = (char *) &xlhdr; rdata[2].len = hsize; @@ -2655,8 +2649,8 @@ heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record) Page page; /* - * Note: the NEWPAGE log record is used for both heaps and indexes, so - * do not do anything that assumes we are touching a heap. + * Note: the NEWPAGE log record is used for both heaps and indexes, so do + * not do anything that assumes we are touching a heap. */ if (record->xl_info & XLR_BKP_BLOCK_1) @@ -2699,7 +2693,7 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record) return; buffer = XLogReadBuffer(false, reln, - ItemPointerGetBlockNumber(&(xlrec->target.tid))); + ItemPointerGetBlockNumber(&(xlrec->target.tid))); if (!BufferIsValid(buffer)) elog(PANIC, "heap_delete_redo: no block"); @@ -2707,7 +2701,7 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record) if (PageIsNew((PageHeader) page)) elog(PANIC, "heap_delete_redo: uninitialized page"); - if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */ + if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */ { LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); @@ -2749,7 +2743,7 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record) struct { HeapTupleHeaderData hdr; - char data[MaxTupleSize]; + char data[MaxTupleSize]; } tbuf; HeapTupleHeader htup; xl_heap_header xlhdr; @@ -2764,7 +2758,7 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record) return; buffer = XLogReadBuffer(true, reln, - ItemPointerGetBlockNumber(&(xlrec->target.tid))); + ItemPointerGetBlockNumber(&(xlrec->target.tid))); if (!BufferIsValid(buffer)) return; @@ -2776,7 +2770,7 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record) if (record->xl_info & XLOG_HEAP_INIT_PAGE) PageInit(page, BufferGetPageSize(buffer), 0); - if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */ + if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */ { LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); @@ -2835,7 +2829,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool move) struct { HeapTupleHeaderData hdr; - char data[MaxTupleSize]; + char data[MaxTupleSize]; } tbuf; xl_heap_header xlhdr; int hsize; @@ -2850,7 +2844,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool move) /* Deal with old tuple version */ buffer = XLogReadBuffer(false, reln, - ItemPointerGetBlockNumber(&(xlrec->target.tid))); + ItemPointerGetBlockNumber(&(xlrec->target.tid))); if (!BufferIsValid(buffer)) elog(PANIC, "heap_update_redo: no block"); @@ -2858,7 +2852,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool move) if (PageIsNew((PageHeader) page)) elog(PANIC, "heap_update_redo: uninitialized old page"); - if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */ + if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */ { LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); @@ -2928,7 +2922,7 @@ newsame:; if (record->xl_info & XLOG_HEAP_INIT_PAGE) PageInit(page, BufferGetPageSize(buffer), 0); - if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */ + if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */ { LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); @@ -2961,7 +2955,7 @@ newsame:; if (move) { - TransactionId xid[2]; /* xmax, xmin */ + TransactionId xid[2]; /* xmax, xmin */ memcpy((char *) xid, (char *) xlrec + SizeOfHeapUpdate + SizeOfHeapHeader, @@ -3008,7 +3002,7 @@ heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record) return; buffer = XLogReadBuffer(false, reln, - ItemPointerGetBlockNumber(&(xlrec->target.tid))); + ItemPointerGetBlockNumber(&(xlrec->target.tid))); if (!BufferIsValid(buffer)) elog(PANIC, "heap_lock_redo: no block"); @@ -3016,7 +3010,7 @@ heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record) if (PageIsNew((PageHeader) page)) elog(PANIC, "heap_lock_redo: uninitialized page"); - if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */ + if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */ { LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); @@ -3081,7 +3075,7 @@ static void out_target(char *buf, xl_heaptid *target) { sprintf(buf + strlen(buf), "rel %u/%u/%u; tid %u/%u", - target->node.spcNode, target->node.dbNode, target->node.relNode, + target->node.spcNode, target->node.dbNode, target->node.relNode, ItemPointerGetBlockNumber(&(target->tid)), ItemPointerGetOffsetNumber(&(target->tid))); } diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index fc1b0afd21..800ee4a805 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.57 2005/06/20 18:37:01 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.58 2005/10/15 02:49:08 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -80,7 +80,7 @@ RelationPutHeapTuple(Relation relation, * enough there). In that case, the page will be pinned and locked only once. * * If use_fsm is true (the normal case), we use FSM to help us find free - * space. If use_fsm is false, we always append a new empty page to the + * space. If use_fsm is false, we always append a new empty page to the * end of the relation if the tuple won't fit on the current target page. * This can save some cycles when we know the relation is new and doesn't * contain useful amounts of free space. @@ -122,22 +122,20 @@ RelationGetBufferForTuple(Relation relation, Size len, if (otherBuffer != InvalidBuffer) otherBlock = BufferGetBlockNumber(otherBuffer); else - otherBlock = InvalidBlockNumber; /* just to keep compiler - * quiet */ + otherBlock = InvalidBlockNumber; /* just to keep compiler quiet */ /* - * We first try to put the tuple on the same page we last inserted a - * tuple on, as cached in the relcache entry. If that doesn't work, - * we ask the shared Free Space Map to locate a suitable page. Since - * the FSM's info might be out of date, we have to be prepared to loop - * around and retry multiple times. (To insure this isn't an infinite - * loop, we must update the FSM with the correct amount of free space - * on each page that proves not to be suitable.) If the FSM has no - * record of a page with enough free space, we give up and extend the - * relation. + * We first try to put the tuple on the same page we last inserted a tuple + * on, as cached in the relcache entry. If that doesn't work, we ask the + * shared Free Space Map to locate a suitable page. Since the FSM's info + * might be out of date, we have to be prepared to loop around and retry + * multiple times. (To insure this isn't an infinite loop, we must update + * the FSM with the correct amount of free space on each page that proves + * not to be suitable.) If the FSM has no record of a page with enough + * free space, we give up and extend the relation. * - * When use_fsm is false, we either put the tuple onto the existing - * target page or extend the relation. + * When use_fsm is false, we either put the tuple onto the existing target + * page or extend the relation. */ targetBlock = relation->rd_targblock; @@ -151,9 +149,9 @@ RelationGetBufferForTuple(Relation relation, Size len, targetBlock = GetPageWithFreeSpace(&relation->rd_node, len); /* - * If the FSM knows nothing of the rel, try the last page before - * we give up and extend. This avoids one-tuple-per-page syndrome - * during bootstrapping or in a recently-started system. + * If the FSM knows nothing of the rel, try the last page before we + * give up and extend. This avoids one-tuple-per-page syndrome during + * bootstrapping or in a recently-started system. */ if (targetBlock == InvalidBlockNumber) { @@ -168,8 +166,8 @@ RelationGetBufferForTuple(Relation relation, Size len, { /* * Read and exclusive-lock the target block, as well as the other - * block if one was given, taking suitable care with lock ordering - * and the possibility they are the same block. + * block if one was given, taking suitable care with lock ordering and + * the possibility they are the same block. */ if (otherBuffer == InvalidBuffer) { @@ -199,8 +197,8 @@ RelationGetBufferForTuple(Relation relation, Size len, } /* - * Now we can check to see if there's enough free space here. If - * so, we're done. + * Now we can check to see if there's enough free space here. If so, + * we're done. */ pageHeader = (Page) BufferGetPage(buffer); pageFreeSpace = PageGetFreeSpace(pageHeader); @@ -213,9 +211,9 @@ RelationGetBufferForTuple(Relation relation, Size len, /* * Not enough space, so we must give up our page locks and pin (if - * any) and prepare to look elsewhere. We don't care which order - * we unlock the two buffers in, so this can be slightly simpler - * than the code above. + * any) and prepare to look elsewhere. We don't care which order we + * unlock the two buffers in, so this can be slightly simpler than the + * code above. */ LockBuffer(buffer, BUFFER_LOCK_UNLOCK); if (otherBuffer == InvalidBuffer) @@ -231,8 +229,8 @@ RelationGetBufferForTuple(Relation relation, Size len, break; /* - * Update FSM as to condition of this page, and ask for another - * page to try. + * Update FSM as to condition of this page, and ask for another page + * to try. */ targetBlock = RecordAndGetPageWithFreeSpace(&relation->rd_node, targetBlock, @@ -243,10 +241,10 @@ RelationGetBufferForTuple(Relation relation, Size len, /* * Have to extend the relation. * - * We have to use a lock to ensure no one else is extending the rel at - * the same time, else we will both try to initialize the same new - * page. We can skip locking for new or temp relations, however, - * since no one else could be accessing them. + * We have to use a lock to ensure no one else is extending the rel at the + * same time, else we will both try to initialize the same new page. We + * can skip locking for new or temp relations, however, since no one else + * could be accessing them. */ needLock = !RELATION_IS_LOCAL(relation); @@ -254,17 +252,16 @@ RelationGetBufferForTuple(Relation relation, Size len, LockRelationForExtension(relation, ExclusiveLock); /* - * XXX This does an lseek - rather expensive - but at the moment it is - * the only way to accurately determine how many blocks are in a - * relation. Is it worth keeping an accurate file length in shared - * memory someplace, rather than relying on the kernel to do it for - * us? + * XXX This does an lseek - rather expensive - but at the moment it is the + * only way to accurately determine how many blocks are in a relation. Is + * it worth keeping an accurate file length in shared memory someplace, + * rather than relying on the kernel to do it for us? */ buffer = ReadBuffer(relation, P_NEW); /* - * We can be certain that locking the otherBuffer first is OK, since - * it must have a lower page number. + * We can be certain that locking the otherBuffer first is OK, since it + * must have a lower page number. */ if (otherBuffer != InvalidBuffer) LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE); @@ -275,10 +272,10 @@ RelationGetBufferForTuple(Relation relation, Size len, LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); /* - * Release the file-extension lock; it's now OK for someone else to - * extend the relation some more. Note that we cannot release this - * lock before we have buffer lock on the new page, or we risk a - * race condition against vacuumlazy.c --- see comments therein. + * Release the file-extension lock; it's now OK for someone else to extend + * the relation some more. Note that we cannot release this lock before + * we have buffer lock on the new page, or we risk a race condition + * against vacuumlazy.c --- see comments therein. */ if (needLock) UnlockRelationForExtension(relation, ExclusiveLock); @@ -299,11 +296,11 @@ RelationGetBufferForTuple(Relation relation, Size len, /* * Remember the new page as our target for future insertions. * - * XXX should we enter the new page into the free space map immediately, - * or just keep it for this backend's exclusive use in the short run - * (until VACUUM sees it)? Seems to depend on whether you expect the - * current backend to make more insertions or not, which is probably a - * good bet most of the time. So for now, don't add it to FSM yet. + * XXX should we enter the new page into the free space map immediately, or + * just keep it for this backend's exclusive use in the short run (until + * VACUUM sees it)? Seems to depend on whether you expect the current + * backend to make more insertions or not, which is probably a good bet + * most of the time. So for now, don't add it to FSM yet. */ relation->rd_targblock = BufferGetBlockNumber(buffer); diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index 02da8446cd..fd20f111b8 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.52 2005/08/12 01:35:54 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.53 2005/10/15 02:49:09 momjian Exp $ * * * INTERFACE ROUTINES @@ -90,8 +90,7 @@ heap_tuple_fetch_attr(varattrib *attr) else { /* - * This is a plain value inside of the main tuple - why am I - * called? + * This is a plain value inside of the main tuple - why am I called? */ result = attr; } @@ -154,8 +153,7 @@ heap_tuple_untoast_attr(varattrib *attr) else /* - * This is a plain value inside of the main tuple - why am I - * called? + * This is a plain value inside of the main tuple - why am I called? */ return attr; @@ -255,8 +253,8 @@ toast_raw_datum_size(Datum value) else if (VARATT_IS_EXTERNAL(attr)) { /* - * an uncompressed external attribute has rawsize including the - * header (not too consistent!) + * an uncompressed external attribute has rawsize including the header + * (not too consistent!) */ result = attr->va_content.va_external.va_rawsize; } @@ -274,26 +272,26 @@ toast_raw_datum_size(Datum value) * Return the physical storage size (possibly compressed) of a varlena datum * ---------- */ -Size +Size toast_datum_size(Datum value) { - varattrib *attr = (varattrib *) DatumGetPointer(value); + varattrib *attr = (varattrib *) DatumGetPointer(value); Size result; if (VARATT_IS_EXTERNAL(attr)) { /* * Attribute is stored externally - return the extsize whether - * compressed or not. We do not count the size of the toast - * pointer ... should we? + * compressed or not. We do not count the size of the toast pointer + * ... should we? */ result = attr->va_content.va_external.va_extsize; } else { /* - * Attribute is stored inline either compressed or not, just - * calculate the size of the datum in either case. + * Attribute is stored inline either compressed or not, just calculate + * the size of the datum in either case. */ result = VARSIZE(attr); } @@ -321,12 +319,12 @@ toast_delete(Relation rel, HeapTuple oldtup) * Get the tuple descriptor and break down the tuple into fields. * * NOTE: it's debatable whether to use heap_deformtuple() here or just - * heap_getattr() only the varlena columns. The latter could win if - * there are few varlena columns and many non-varlena ones. However, - * heap_deformtuple costs only O(N) while the heap_getattr way would - * cost O(N^2) if there are many varlena columns, so it seems better - * to err on the side of linear cost. (We won't even be here unless - * there's at least one varlena column, by the way.) + * heap_getattr() only the varlena columns. The latter could win if there + * are few varlena columns and many non-varlena ones. However, + * heap_deformtuple costs only O(N) while the heap_getattr way would cost + * O(N^2) if there are many varlena columns, so it seems better to err on + * the side of linear cost. (We won't even be here unless there's at + * least one varlena column, by the way.) */ tupleDesc = rel->rd_att; att = tupleDesc->attrs; @@ -336,8 +334,8 @@ toast_delete(Relation rel, HeapTuple oldtup) heap_deform_tuple(oldtup, tupleDesc, toast_values, toast_isnull); /* - * Check for external stored attributes and delete them from the - * secondary relation. + * Check for external stored attributes and delete them from the secondary + * relation. */ for (i = 0; i < numAttrs; i++) { @@ -447,9 +445,9 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup) else { /* - * This attribute isn't changed by this update so we - * reuse the original reference to the old value in - * the new tuple. + * This attribute isn't changed by this update so we reuse + * the original reference to the old value in the new + * tuple. */ toast_action[i] = 'p'; toast_sizes[i] = VARATT_SIZE(toast_values[i]); @@ -582,16 +580,15 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup) else { /* - * incompressible data, ignore on subsequent compression - * passes + * incompressible data, ignore on subsequent compression passes */ toast_action[i] = 'x'; } } /* - * Second we look for attributes of attstorage 'x' or 'e' that are - * still inline. + * Second we look for attributes of attstorage 'x' or 'e' that are still + * inline. */ while (MAXALIGN(heap_compute_data_size(tupleDesc, toast_values, toast_isnull)) > @@ -696,8 +693,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup) else { /* - * incompressible data, ignore on subsequent compression - * passes + * incompressible data, ignore on subsequent compression passes */ toast_action[i] = 'x'; } @@ -755,8 +751,8 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup) } /* - * In the case we toasted any values, we need to build a new heap - * tuple with the changed values. + * In the case we toasted any values, we need to build a new heap tuple + * with the changed values. */ if (need_change) { @@ -798,8 +794,8 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup) has_nulls ? newtup->t_data->t_bits : NULL); /* - * In the case we modified a previously modified tuple again, free - * the memory from the previous run + * In the case we modified a previously modified tuple again, free the + * memory from the previous run */ if ((char *) olddata != ((char *) newtup + HEAPTUPLESIZE)) pfree(olddata); @@ -906,8 +902,8 @@ toast_flatten_tuple_attribute(Datum value, return value; /* - * Calculate the new size of the tuple. Header size should not - * change, but data size might. + * Calculate the new size of the tuple. Header size should not change, + * but data size might. */ new_len = offsetof(HeapTupleHeaderData, t_bits); if (has_nulls) @@ -1007,9 +1003,9 @@ toast_save_datum(Relation rel, Datum value) int32 data_todo; /* - * Open the toast relation and its index. We can use the index to - * check uniqueness of the OID we assign to the toasted item, even - * though it has additional columns besides OID. + * Open the toast relation and its index. We can use the index to check + * uniqueness of the OID we assign to the toasted item, even though it has + * additional columns besides OID. */ toastrel = heap_open(rel->rd_rel->reltoastrelid, RowExclusiveLock); toasttupDesc = toastrel->rd_att; @@ -1082,11 +1078,11 @@ toast_save_datum(Relation rel, Datum value) /* * Create the index entry. We cheat a little here by not using - * FormIndexDatum: this relies on the knowledge that the index - * columns are the same as the initial columns of the table. + * FormIndexDatum: this relies on the knowledge that the index columns + * are the same as the initial columns of the table. * - * Note also that there had better not be any user-created index on - * the TOAST table, since we don't bother to update anything else. + * Note also that there had better not be any user-created index on the + * TOAST table, since we don't bother to update anything else. */ index_insert(toastidx, t_values, t_isnull, &(toasttup->t_self), @@ -1148,7 +1144,7 @@ toast_delete_datum(Relation rel, Datum value) ScanKeyInit(&toastkey, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); + ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); /* * Find the chunks by index @@ -1219,14 +1215,14 @@ toast_fetch_datum(varattrib *attr) ScanKeyInit(&toastkey, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); + ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); /* * Read the chunks by index * - * Note that because the index is actually on (valueid, chunkidx) we will - * see the chunks in chunkidx order, even though we didn't explicitly - * ask for it. + * Note that because the index is actually on (valueid, chunkidx) we will see + * the chunks in chunkidx order, even though we didn't explicitly ask for + * it. */ nextidx = 0; @@ -1367,13 +1363,13 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) toastidx = index_open(toastrel->rd_rel->reltoastidxid); /* - * Setup a scan key to fetch from the index. This is either two keys - * or three depending on the number of chunks. + * Setup a scan key to fetch from the index. This is either two keys or + * three depending on the number of chunks. */ ScanKeyInit(&toastkey[0], (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); + ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); /* * Use equality condition for one chunk, a range condition otherwise: diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 90e910f343..ed604f9c5d 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/index/genam.c,v 1.48 2005/05/27 23:31:20 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/index/genam.c,v 1.49 2005/10/15 02:49:09 momjian Exp $ * * NOTES * many of the old access method routines have been turned into @@ -78,15 +78,15 @@ RelationGetIndexScan(Relation indexRelation, scan->numberOfKeys = nkeys; /* - * We allocate the key space here, but the AM is responsible for - * actually filling it from the passed key array. + * We allocate the key space here, but the AM is responsible for actually + * filling it from the passed key array. */ if (nkeys > 0) scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); else scan->keyData = NULL; - scan->is_multiscan = false; /* caller may change this */ + scan->is_multiscan = false; /* caller may change this */ scan->kill_prior_tuple = false; scan->ignore_killed_tuples = true; /* default setting */ scan->keys_are_unique = false; /* may be set by index AM */ @@ -203,8 +203,8 @@ systable_beginscan(Relation heapRelation, /* * Change attribute numbers to be index column numbers. * - * This code could be generalized to search for the index key numbers - * to substitute, but for now there's no need. + * This code could be generalized to search for the index key numbers to + * substitute, but for now there's no need. */ for (i = 0; i < nkeys; i++) { diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 7bf7fcd22f..bd2e3bdd06 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.85 2005/10/06 02:29:11 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.86 2005/10/15 02:49:09 momjian Exp $ * * INTERFACE ROUTINES * index_open - open an index relation by relation OID @@ -111,7 +111,7 @@ do { \ } while(0) static IndexScanDesc index_beginscan_internal(Relation indexRelation, - int nkeys, ScanKey key); + int nkeys, ScanKey key); /* ---------------------------------------------------------------- @@ -122,14 +122,14 @@ static IndexScanDesc index_beginscan_internal(Relation indexRelation, /* ---------------- * index_open - open an index relation by relation OID * - * Note: we acquire no lock on the index. A lock is not needed when + * Note: we acquire no lock on the index. A lock is not needed when * simply examining the index reldesc; the index's schema information * is considered to be protected by the lock that the caller had better - * be holding on the parent relation. Some type of lock should be + * be holding on the parent relation. Some type of lock should be * obtained on the index before physically accessing it, however. * This is handled automatically for most uses by index_beginscan * and index_endscan for scan cases, or by ExecOpenIndices and - * ExecCloseIndices for update cases. Other callers will need to + * ExecCloseIndices for update cases. Other callers will need to * obtain their own locks. * * This is a convenience routine adapted for indexscan use. @@ -241,8 +241,8 @@ index_beginscan(Relation heapRelation, scan = index_beginscan_internal(indexRelation, nkeys, key); /* - * Save additional parameters into the scandesc. Everything else was - * set up by RelationGetIndexScan. + * Save additional parameters into the scandesc. Everything else was set + * up by RelationGetIndexScan. */ scan->is_multiscan = false; scan->heapRelation = heapRelation; @@ -267,8 +267,8 @@ index_beginscan_multi(Relation indexRelation, scan = index_beginscan_internal(indexRelation, nkeys, key); /* - * Save additional parameters into the scandesc. Everything else was - * set up by RelationGetIndexScan. + * Save additional parameters into the scandesc. Everything else was set + * up by RelationGetIndexScan. */ scan->is_multiscan = true; scan->xs_snapshot = snapshot; @@ -294,14 +294,14 @@ index_beginscan_internal(Relation indexRelation, * Acquire AccessShareLock for the duration of the scan * * Note: we could get an SI inval message here and consequently have to - * rebuild the relcache entry. The refcount increment above ensures - * that we will rebuild it and not just flush it... + * rebuild the relcache entry. The refcount increment above ensures that + * we will rebuild it and not just flush it... */ LockRelation(indexRelation, AccessShareLock); /* - * LockRelation can clean rd_aminfo structure, so fill procedure - * after LockRelation + * LockRelation can clean rd_aminfo structure, so fill procedure after + * LockRelation */ GET_REL_PROCEDURE(ambeginscan); @@ -425,8 +425,8 @@ index_restrpos(IndexScanDesc scan) /* * We do not reset got_tuple; so if the scan is actually being - * short-circuited by index_getnext, the effective position - * restoration is done by restoring unique_tuple_pos. + * short-circuited by index_getnext, the effective position restoration is + * done by restoring unique_tuple_pos. */ scan->unique_tuple_pos = scan->unique_tuple_mark; @@ -454,19 +454,19 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) /* * If we already got a tuple and it must be unique, there's no need to - * make the index AM look through any additional tuples. (This can - * save a useful amount of work in scenarios where there are many dead - * tuples due to heavy update activity.) + * make the index AM look through any additional tuples. (This can save a + * useful amount of work in scenarios where there are many dead tuples due + * to heavy update activity.) * * To do this we must keep track of the logical scan position * (before/on/after tuple). Also, we have to be sure to release scan - * resources before returning NULL; if we fail to do so then a - * multi-index scan can easily run the system out of free buffers. We - * can release index-level resources fairly cheaply by calling - * index_rescan. This means there are two persistent states as far as - * the index AM is concerned: on-tuple and rescanned. If we are - * actually asked to re-fetch the single tuple, we have to go through - * a fresh indexscan startup, which penalizes that (infrequent) case. + * resources before returning NULL; if we fail to do so then a multi-index + * scan can easily run the system out of free buffers. We can release + * index-level resources fairly cheaply by calling index_rescan. This + * means there are two persistent states as far as the index AM is + * concerned: on-tuple and rescanned. If we are actually asked to + * re-fetch the single tuple, we have to go through a fresh indexscan + * startup, which penalizes that (infrequent) case. */ if (scan->keys_are_unique && scan->got_tuple) { @@ -485,19 +485,18 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) if (new_tuple_pos == 0) { /* - * We are moving onto the unique tuple from having been off - * it. We just fall through and let the index AM do the work. - * Note we should get the right answer regardless of scan - * direction. + * We are moving onto the unique tuple from having been off it. We + * just fall through and let the index AM do the work. Note we + * should get the right answer regardless of scan direction. */ scan->unique_tuple_pos = 0; /* need to update position */ } else { /* - * Moving off the tuple; must do amrescan to release - * index-level pins before we return NULL. Since index_rescan - * will reset my state, must save and restore... + * Moving off the tuple; must do amrescan to release index-level + * pins before we return NULL. Since index_rescan will reset my + * state, must save and restore... */ int unique_tuple_mark = scan->unique_tuple_mark; @@ -520,8 +519,7 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) bool found; /* - * The AM's gettuple proc finds the next tuple matching the scan - * keys. + * The AM's gettuple proc finds the next tuple matching the scan keys. */ found = DatumGetBool(FunctionCall2(procedure, PointerGetDatum(scan), @@ -556,9 +554,9 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) continue; /* - * If we can't see it, maybe no one else can either. Check to see - * if the tuple is dead to all transactions. If so, signal the - * index AM to not return it on future indexscans. + * If we can't see it, maybe no one else can either. Check to see if + * the tuple is dead to all transactions. If so, signal the index AM + * to not return it on future indexscans. * * We told heap_release_fetch to keep a pin on the buffer, so we can * re-access the tuple here. But we must re-lock the buffer first. @@ -576,8 +574,8 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) scan->got_tuple = true; /* - * If we just fetched a known-unique tuple, then subsequent calls will - * go through the short-circuit code above. unique_tuple_pos has been + * If we just fetched a known-unique tuple, then subsequent calls will go + * through the short-circuit code above. unique_tuple_pos has been * initialized to 0, which is the correct state ("on row"). */ @@ -805,11 +803,10 @@ index_getprocinfo(Relation irel, procId = loc[procindex]; /* - * Complain if function was not found during - * IndexSupportInitialize. This should not happen unless the - * system tables contain bogus entries for the index opclass. (If - * an AM wants to allow a support function to be optional, it can - * use index_getprocid.) + * Complain if function was not found during IndexSupportInitialize. + * This should not happen unless the system tables contain bogus + * entries for the index opclass. (If an AM wants to allow a support + * function to be optional, it can use index_getprocid.) */ if (!RegProcedureIsValid(procId)) elog(ERROR, "missing support function %d for attribute %d of index \"%s\"", diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index c73ba358ec..33c7612aac 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.126 2005/10/12 17:18:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.127 2005/10/15 02:49:09 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -93,30 +93,29 @@ top: /* * If the page was split between the time that we surrendered our read - * lock and acquired our write lock, then this page may no longer be - * the right place for the key we want to insert. In this case, we - * need to move right in the tree. See Lehman and Yao for an - * excruciatingly precise description. + * lock and acquired our write lock, then this page may no longer be the + * right place for the key we want to insert. In this case, we need to + * move right in the tree. See Lehman and Yao for an excruciatingly + * precise description. */ buf = _bt_moveright(rel, buf, natts, itup_scankey, false, BT_WRITE); /* - * If we're not allowing duplicates, make sure the key isn't already - * in the index. + * If we're not allowing duplicates, make sure the key isn't already in + * the index. * - * NOTE: obviously, _bt_check_unique can only detect keys that are - * already in the index; so it cannot defend against concurrent - * insertions of the same key. We protect against that by means of - * holding a write lock on the target page. Any other would-be - * inserter of the same key must acquire a write lock on the same - * target page, so only one would-be inserter can be making the check - * at one time. Furthermore, once we are past the check we hold write - * locks continuously until we have performed our insertion, so no - * later inserter can fail to see our insertion. (This requires some - * care in _bt_insertonpg.) + * NOTE: obviously, _bt_check_unique can only detect keys that are already in + * the index; so it cannot defend against concurrent insertions of the + * same key. We protect against that by means of holding a write lock on + * the target page. Any other would-be inserter of the same key must + * acquire a write lock on the same target page, so only one would-be + * inserter can be making the check at one time. Furthermore, once we are + * past the check we hold write locks continuously until we have performed + * our insertion, so no later inserter can fail to see our insertion. + * (This requires some care in _bt_insertonpg.) * - * If we must wait for another xact, we release the lock while waiting, - * and then must start over completely. + * If we must wait for another xact, we release the lock while waiting, and + * then must start over completely. */ if (index_is_unique) { @@ -167,8 +166,8 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel, maxoff = PageGetMaxOffsetNumber(page); /* - * Find first item >= proposed new item. Note we could also get a - * pointer to end-of-page here. + * Find first item >= proposed new item. Note we could also get a pointer + * to end-of-page here. */ offset = _bt_binsrch(rel, buf, natts, itup_scankey, false); @@ -194,24 +193,24 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel, /* * We can skip items that are marked killed. * - * Formerly, we applied _bt_isequal() before checking the kill - * flag, so as to fall out of the item loop as soon as - * possible. However, in the presence of heavy update activity - * an index may contain many killed items with the same key; - * running _bt_isequal() on each killed item gets expensive. - * Furthermore it is likely that the non-killed version of - * each key appears first, so that we didn't actually get to - * exit any sooner anyway. So now we just advance over killed - * items as quickly as we can. We only apply _bt_isequal() - * when we get to a non-killed item or the end of the page. + * Formerly, we applied _bt_isequal() before checking the kill flag, + * so as to fall out of the item loop as soon as possible. + * However, in the presence of heavy update activity an index may + * contain many killed items with the same key; running + * _bt_isequal() on each killed item gets expensive. Furthermore + * it is likely that the non-killed version of each key appears + * first, so that we didn't actually get to exit any sooner + * anyway. So now we just advance over killed items as quickly as + * we can. We only apply _bt_isequal() when we get to a non-killed + * item or the end of the page. */ if (!ItemIdDeleted(curitemid)) { /* - * _bt_compare returns 0 for (1,NULL) and (1,NULL) - - * this's how we handling NULLs - and so we must not use - * _bt_compare in real comparison, but only for - * ordering/finding items on pages. - vadim 03/24/97 + * _bt_compare returns 0 for (1,NULL) and (1,NULL) - this's + * how we handling NULLs - and so we must not use _bt_compare + * in real comparison, but only for ordering/finding items on + * pages. - vadim 03/24/97 */ if (!_bt_isequal(itupdesc, page, offset, natts, itup_scankey)) break; /* we're past all the equal tuples */ @@ -246,15 +245,15 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel, */ ereport(ERROR, (errcode(ERRCODE_UNIQUE_VIOLATION), - errmsg("duplicate key violates unique constraint \"%s\"", - RelationGetRelationName(rel)))); + errmsg("duplicate key violates unique constraint \"%s\"", + RelationGetRelationName(rel)))); } else if (htup.t_data != NULL) { /* - * Hmm, if we can't see the tuple, maybe it can be - * marked killed. This logic should match - * index_getnext and btgettuple. + * Hmm, if we can't see the tuple, maybe it can be marked + * killed. This logic should match index_getnext and + * btgettuple. */ LockBuffer(hbuffer, BUFFER_LOCK_SHARE); if (HeapTupleSatisfiesVacuum(htup.t_data, RecentGlobalXmin, @@ -377,15 +376,15 @@ _bt_insertonpg(Relation rel, itemsz = IndexTupleDSize(btitem->bti_itup) + (sizeof(BTItemData) - sizeof(IndexTupleData)); - itemsz = MAXALIGN(itemsz); /* be safe, PageAddItem will do this but - * we need to be consistent */ + itemsz = MAXALIGN(itemsz); /* be safe, PageAddItem will do this but we + * need to be consistent */ /* - * Check whether the item can fit on a btree page at all. (Eventually, - * we ought to try to apply TOAST methods if not.) We actually need to - * be able to fit three items on every page, so restrict any one item - * to 1/3 the per-page available space. Note that at this point, - * itemsz doesn't include the ItemId. + * Check whether the item can fit on a btree page at all. (Eventually, we + * ought to try to apply TOAST methods if not.) We actually need to be + * able to fit three items on every page, so restrict any one item to 1/3 + * the per-page available space. Note that at this point, itemsz doesn't + * include the ItemId. */ if (itemsz > BTMaxItemSize(page)) ereport(ERROR, @@ -393,9 +392,9 @@ _bt_insertonpg(Relation rel, errmsg("index row size %lu exceeds btree maximum, %lu", (unsigned long) itemsz, (unsigned long) BTMaxItemSize(page)), - errhint("Values larger than 1/3 of a buffer page cannot be indexed.\n" - "Consider a function index of an MD5 hash of the value, " - "or use full text indexing."))); + errhint("Values larger than 1/3 of a buffer page cannot be indexed.\n" + "Consider a function index of an MD5 hash of the value, " + "or use full text indexing."))); /* * Determine exactly where new item will go. @@ -432,11 +431,11 @@ _bt_insertonpg(Relation rel, /* * step right to next non-dead page * - * must write-lock that page before releasing write lock on - * current page; else someone else's _bt_check_unique scan - * could fail to see our insertion. write locks on - * intermediate dead pages won't do because we don't know when - * they will get de-linked from the tree. + * must write-lock that page before releasing write lock on current + * page; else someone else's _bt_check_unique scan could fail to + * see our insertion. write locks on intermediate dead pages + * won't do because we don't know when they will get de-linked + * from the tree. */ Buffer rbuf = InvalidBuffer; @@ -459,9 +458,9 @@ _bt_insertonpg(Relation rel, } /* - * Now we are on the right page, so find the insert position. If - * we moved right at all, we know we should insert at the start of - * the page, else must find the position by searching. + * Now we are on the right page, so find the insert position. If we + * moved right at all, we know we should insert at the start of the + * page, else must find the position by searching. */ if (movedright) newitemoff = P_FIRSTDATAKEY(lpageop); @@ -472,9 +471,9 @@ _bt_insertonpg(Relation rel, /* * Do we need to split the page to fit the item on it? * - * Note: PageGetFreeSpace() subtracts sizeof(ItemIdData) from its result, - * so this comparison is correct even though we appear to be - * accounting only for the item and not for its line pointer. + * Note: PageGetFreeSpace() subtracts sizeof(ItemIdData) from its result, so + * this comparison is correct even though we appear to be accounting only + * for the item and not for its line pointer. */ if (PageGetFreeSpace(page) < itemsz) { @@ -522,12 +521,11 @@ _bt_insertonpg(Relation rel, itup_blkno = BufferGetBlockNumber(buf); /* - * If we are doing this insert because we split a page that was - * the only one on its tree level, but was not the root, it may - * have been the "fast root". We need to ensure that the fast - * root link points at or above the current page. We can safely - * acquire a lock on the metapage here --- see comments for - * _bt_newroot(). + * If we are doing this insert because we split a page that was the + * only one on its tree level, but was not the root, it may have been + * the "fast root". We need to ensure that the fast root link points + * at or above the current page. We can safely acquire a lock on the + * metapage here --- see comments for _bt_newroot(). */ if (split_only_page) { @@ -692,11 +690,11 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, lopaque->btpo.level = ropaque->btpo.level = oopaque->btpo.level; /* - * If the page we're splitting is not the rightmost page at its level - * in the tree, then the first entry on the page is the high key for - * the page. We need to copy that to the right half. Otherwise - * (meaning the rightmost page case), all the items on the right half - * will be user data. + * If the page we're splitting is not the rightmost page at its level in + * the tree, then the first entry on the page is the high key for the + * page. We need to copy that to the right half. Otherwise (meaning the + * rightmost page case), all the items on the right half will be user + * data. */ rightoff = P_HIKEY; @@ -712,9 +710,9 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, } /* - * The "high key" for the new left page will be the first key that's - * going to go into the new right page. This might be either the - * existing data item at position firstright, or the incoming tuple. + * The "high key" for the new left page will be the first key that's going + * to go into the new right page. This might be either the existing data + * item at position firstright, or the incoming tuple. */ leftoff = P_HIKEY; if (!newitemonleft && newitemoff == firstright) @@ -806,8 +804,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, /* * We have to grab the right sibling (if any) and fix the prev pointer * there. We are guaranteed that this is deadlock-free since no other - * writer will be holding a lock on that page and trying to move left, - * and all readers release locks on a page before trying to fetch its + * writer will be holding a lock on that page and trying to move left, and + * all readers release locks on a page before trying to fetch its * neighbors. */ @@ -821,8 +819,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, } /* - * Right sibling is locked, new siblings are prepared, but original - * page is not updated yet. Log changes before continuing. + * Right sibling is locked, new siblings are prepared, but original page + * is not updated yet. Log changes before continuing. * * NO EREPORT(ERROR) till right sibling is updated. */ @@ -850,10 +848,10 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, xlrec.level = lopaque->btpo.level; /* - * Direct access to page is not good but faster - we should - * implement some new func in page API. Note we only store the - * tuples themselves, knowing that the item pointers are in the - * same order and can be reconstructed by scanning the tuples. + * Direct access to page is not good but faster - we should implement + * some new func in page API. Note we only store the tuples + * themselves, knowing that the item pointers are in the same order + * and can be reconstructed by scanning the tuples. */ xlrec.leftlen = ((PageHeader) leftpage)->pd_special - ((PageHeader) leftpage)->pd_upper; @@ -903,13 +901,13 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, } /* - * By here, the original data page has been split into two new halves, - * and these are correct. The algorithm requires that the left page - * never move during a split, so we copy the new left page back on top - * of the original. Note that this is not a waste of time, since we - * also require (in the page management code) that the center of a - * page always be clean, and the most efficient way to guarantee this - * is just to compact the data by reinserting it into a new left page. + * By here, the original data page has been split into two new halves, and + * these are correct. The algorithm requires that the left page never + * move during a split, so we copy the new left page back on top of the + * original. Note that this is not a waste of time, since we also require + * (in the page management code) that the center of a page always be + * clean, and the most efficient way to guarantee this is just to compact + * the data by reinserting it into a new left page. */ PageRestoreTempPage(leftpage, origpage); @@ -984,13 +982,13 @@ _bt_findsplitloc(Relation rel, MAXALIGN(sizeof(BTPageOpaqueData)); /* - * Finding the best possible split would require checking all the - * possible split points, because of the high-key and left-key special - * cases. That's probably more work than it's worth; instead, stop as - * soon as we find a "good-enough" split, where good-enough is defined - * as an imbalance in free space of no more than pagesize/16 - * (arbitrary...) This should let us stop near the middle on most - * pages, instead of plowing to the end. + * Finding the best possible split would require checking all the possible + * split points, because of the high-key and left-key special cases. + * That's probably more work than it's worth; instead, stop as soon as we + * find a "good-enough" split, where good-enough is defined as an + * imbalance in free space of no more than pagesize/16 (arbitrary...) This + * should let us stop near the middle on most pages, instead of plowing to + * the end. */ goodenough = leftspace / 16; @@ -1006,8 +1004,8 @@ _bt_findsplitloc(Relation rel, dataitemtotal = rightspace - (int) PageGetFreeSpace(page); /* - * Scan through the data items and calculate space usage for a split - * at each possible position. + * Scan through the data items and calculate space usage for a split at + * each possible position. */ dataitemstoleft = 0; maxoff = PageGetMaxOffsetNumber(page); @@ -1024,9 +1022,9 @@ _bt_findsplitloc(Relation rel, itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData); /* - * We have to allow for the current item becoming the high key of - * the left page; therefore it counts against left space as well - * as right space. + * We have to allow for the current item becoming the high key of the + * left page; therefore it counts against left space as well as right + * space. */ leftfree = leftspace - dataitemstoleft - (int) itemsz; rightfree = rightspace - (dataitemtotal - dataitemstoleft); @@ -1058,8 +1056,8 @@ _bt_findsplitloc(Relation rel, } /* - * I believe it is not possible to fail to find a feasible split, but - * just in case ... + * I believe it is not possible to fail to find a feasible split, but just + * in case ... */ if (!state.have_split) elog(ERROR, "could not find a feasible split point for \"%s\"", @@ -1105,8 +1103,7 @@ _bt_checksplitloc(FindSplitData *state, OffsetNumber firstright, { /* * On a rightmost page, try to equalize right free space with - * twice the left free space. See comments for - * _bt_findsplitloc. + * twice the left free space. See comments for _bt_findsplitloc. */ delta = (2 * leftfree) - rightfree; } @@ -1153,19 +1150,18 @@ _bt_insert_parent(Relation rel, bool is_only) { /* - * Here we have to do something Lehman and Yao don't talk about: deal - * with a root split and construction of a new root. If our stack is - * empty then we have just split a node on what had been the root - * level when we descended the tree. If it was still the root then we - * perform a new-root construction. If it *wasn't* the root anymore, - * search to find the next higher level that someone constructed - * meanwhile, and find the right place to insert as for the normal - * case. + * Here we have to do something Lehman and Yao don't talk about: deal with + * a root split and construction of a new root. If our stack is empty + * then we have just split a node on what had been the root level when we + * descended the tree. If it was still the root then we perform a + * new-root construction. If it *wasn't* the root anymore, search to find + * the next higher level that someone constructed meanwhile, and find the + * right place to insert as for the normal case. * - * If we have to search for the parent level, we do so by re-descending - * from the root. This is not super-efficient, but it's rare enough - * not to matter. (This path is also taken when called from WAL - * recovery --- we have no stack in that case.) + * If we have to search for the parent level, we do so by re-descending from + * the root. This is not super-efficient, but it's rare enough not to + * matter. (This path is also taken when called from WAL recovery --- we + * have no stack in that case.) */ if (is_root) { @@ -1219,9 +1215,9 @@ _bt_insert_parent(Relation rel, /* * Find the parent buffer and get the parent page. * - * Oops - if we were moved right then we need to change stack item! - * We want to find parent pointing to where we are, right ? - - * vadim 05/27/97 + * Oops - if we were moved right then we need to change stack item! We + * want to find parent pointing to where we are, right ? - vadim + * 05/27/97 */ ItemPointerSet(&(stack->bts_btitem.bti_itup.t_tid), bknum, P_HIKEY); @@ -1291,9 +1287,9 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access) maxoff = PageGetMaxOffsetNumber(page); /* - * start = InvalidOffsetNumber means "search the whole page". - * We need this test anyway due to possibility that page has a - * high key now when it didn't before. + * start = InvalidOffsetNumber means "search the whole page". We + * need this test anyway due to possibility that page has a high + * key now when it didn't before. */ if (start < minoff) start = minoff; @@ -1307,8 +1303,8 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access) /* * These loops will check every item on the page --- but in an - * order that's attuned to the probability of where it - * actually is. Scan to the right first, then to the left. + * order that's attuned to the probability of where it actually + * is. Scan to the right first, then to the left. */ for (offnum = start; offnum <= maxoff; @@ -1424,9 +1420,9 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) metad->btm_fastlevel = rootopaque->btpo.level; /* - * Create downlink item for left page (old root). Since this will be - * the first item in a non-leaf page, it implicitly has minus-infinity - * key value, so we need not store any actual key in it. + * Create downlink item for left page (old root). Since this will be the + * first item in a non-leaf page, it implicitly has minus-infinity key + * value, so we need not store any actual key in it. */ itemsz = sizeof(BTItemData); new_item = (BTItem) palloc(itemsz); @@ -1434,17 +1430,17 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) ItemPointerSet(&(new_item->bti_itup.t_tid), lbkno, P_HIKEY); /* - * Insert the left page pointer into the new root page. The root page - * is the rightmost page on its level so there is no "high key" in it; - * the two items will go into positions P_HIKEY and P_FIRSTKEY. + * Insert the left page pointer into the new root page. The root page is + * the rightmost page on its level so there is no "high key" in it; the + * two items will go into positions P_HIKEY and P_FIRSTKEY. */ if (PageAddItem(rootpage, (Item) new_item, itemsz, P_HIKEY, LP_USED) == InvalidOffsetNumber) elog(PANIC, "failed to add leftkey to new root page"); pfree(new_item); /* - * Create downlink item for right page. The key for it is obtained - * from the "high key" position in the left page. + * Create downlink item for right page. The key for it is obtained from + * the "high key" position in the left page. */ itemid = PageGetItemId(lpage, P_HIKEY); itemsz = ItemIdGetLength(itemid); @@ -1476,8 +1472,8 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) rdata[0].next = &(rdata[1]); /* - * Direct access to page is not good but faster - we should - * implement some new func in page API. + * Direct access to page is not good but faster - we should implement + * some new func in page API. */ rdata[1].data = (char *) rootpage + ((PageHeader) rootpage)->pd_upper; rdata[1].len = ((PageHeader) rootpage)->pd_special - diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 52d60abaec..927860030c 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.87 2005/08/12 14:34:14 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.88 2005/10/15 02:49:09 momjian Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -115,8 +115,8 @@ _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level) metaopaque->btpo_flags = BTP_META; /* - * Set pd_lower just past the end of the metadata. This is not - * essential but it makes the page look compressible to xlog.c. + * Set pd_lower just past the end of the metadata. This is not essential + * but it makes the page look compressible to xlog.c. */ ((PageHeader) page)->pd_lower = ((char *) metad + sizeof(BTMetaPageData)) - (char *) page; @@ -198,26 +198,26 @@ _bt_getroot(Relation rel, int access) LockBuffer(metabuf, BT_WRITE); /* - * Race condition: if someone else initialized the metadata - * between the time we released the read lock and acquired the - * write lock, we must avoid doing it again. + * Race condition: if someone else initialized the metadata between + * the time we released the read lock and acquired the write lock, we + * must avoid doing it again. */ if (metad->btm_root != P_NONE) { /* - * Metadata initialized by someone else. In order to - * guarantee no deadlocks, we have to release the metadata - * page and start all over again. (Is that really true? But - * it's hardly worth trying to optimize this case.) + * Metadata initialized by someone else. In order to guarantee no + * deadlocks, we have to release the metadata page and start all + * over again. (Is that really true? But it's hardly worth trying + * to optimize this case.) */ _bt_relbuf(rel, metabuf); return _bt_getroot(rel, access); } /* - * Get, initialize, write, and leave a lock of the appropriate - * type on the new root page. Since this is the first page in the - * tree, it's a leaf as well as the root. + * Get, initialize, write, and leave a lock of the appropriate type on + * the new root page. Since this is the first page in the tree, it's + * a leaf as well as the root. */ rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); rootblkno = BufferGetBlockNumber(rootbuf); @@ -266,9 +266,9 @@ _bt_getroot(Relation rel, int access) _bt_wrtnorelbuf(rel, rootbuf); /* - * swap root write lock for read lock. There is no danger of - * anyone else accessing the new root page while it's unlocked, - * since no one else knows where it is yet. + * swap root write lock for read lock. There is no danger of anyone + * else accessing the new root page while it's unlocked, since no one + * else knows where it is yet. */ LockBuffer(rootbuf, BUFFER_LOCK_UNLOCK); LockBuffer(rootbuf, BT_READ); @@ -312,8 +312,8 @@ _bt_getroot(Relation rel, int access) } /* - * By here, we have a pin and read lock on the root page, and no lock - * set on the metadata page. Return the root page's buffer. + * By here, we have a pin and read lock on the root page, and no lock set + * on the metadata page. Return the root page's buffer. */ return rootbuf; } @@ -435,27 +435,26 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) /* * First see if the FSM knows of any free pages. * - * We can't trust the FSM's report unreservedly; we have to check - * that the page is still free. (For example, an already-free - * page could have been re-used between the time the last VACUUM - * scanned it and the time the VACUUM made its FSM updates.) + * We can't trust the FSM's report unreservedly; we have to check that + * the page is still free. (For example, an already-free page could + * have been re-used between the time the last VACUUM scanned it and + * the time the VACUUM made its FSM updates.) * - * In fact, it's worse than that: we can't even assume that it's safe - * to take a lock on the reported page. If somebody else has a - * lock on it, or even worse our own caller does, we could - * deadlock. (The own-caller scenario is actually not improbable. - * Consider an index on a serial or timestamp column. Nearly all - * splits will be at the rightmost page, so it's entirely likely - * that _bt_split will call us while holding a lock on the page - * most recently acquired from FSM. A VACUUM running concurrently - * with the previous split could well have placed that page back - * in FSM.) + * In fact, it's worse than that: we can't even assume that it's safe to + * take a lock on the reported page. If somebody else has a lock on + * it, or even worse our own caller does, we could deadlock. (The + * own-caller scenario is actually not improbable. Consider an index + * on a serial or timestamp column. Nearly all splits will be at the + * rightmost page, so it's entirely likely that _bt_split will call us + * while holding a lock on the page most recently acquired from FSM. + * A VACUUM running concurrently with the previous split could well + * have placed that page back in FSM.) * - * To get around that, we ask for only a conditional lock on the - * reported page. If we fail, then someone else is using the - * page, and we may reasonably assume it's not free. (If we - * happen to be wrong, the worst consequence is the page will be - * lost to use till the next VACUUM, which is no big problem.) + * To get around that, we ask for only a conditional lock on the reported + * page. If we fail, then someone else is using the page, and we may + * reasonably assume it's not free. (If we happen to be wrong, the + * worst consequence is the page will be lost to use till the next + * VACUUM, which is no big problem.) */ for (;;) { @@ -486,10 +485,10 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) /* * Extend the relation by one page. * - * We have to use a lock to ensure no one else is extending the rel - * at the same time, else we will both try to initialize the same - * new page. We can skip locking for new or temp relations, - * however, since no one else could be accessing them. + * We have to use a lock to ensure no one else is extending the rel at + * the same time, else we will both try to initialize the same new + * page. We can skip locking for new or temp relations, however, + * since no one else could be accessing them. */ needLock = !RELATION_IS_LOCAL(rel); @@ -504,8 +503,8 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) /* * Release the file-extension lock; it's now OK for someone else to * extend the relation some more. Note that we cannot release this - * lock before we have buffer lock on the new page, or we risk a - * race condition against btvacuumcleanup --- see comments therein. + * lock before we have buffer lock on the new page, or we risk a race + * condition against btvacuumcleanup --- see comments therein. */ if (needLock) UnlockRelationForExtension(rel, ExclusiveLock); @@ -614,10 +613,10 @@ _bt_page_recyclable(Page page) BTPageOpaque opaque; /* - * It's possible to find an all-zeroes page in an index --- for - * example, a backend might successfully extend the relation one page - * and then crash before it is able to make a WAL entry for adding the - * page. If we find a zeroed page then reclaim it. + * It's possible to find an all-zeroes page in an index --- for example, a + * backend might successfully extend the relation one page and then crash + * before it is able to make a WAL entry for adding the page. If we find a + * zeroed page then reclaim it. */ if (PageIsNew(page)) return true; @@ -672,9 +671,9 @@ _bt_delitems(Relation rel, Buffer buf, rdata[0].next = &(rdata[1]); /* - * The target-offsets array is not in the buffer, but pretend that - * it is. When XLogInsert stores the whole buffer, the offsets - * array need not be stored too. + * The target-offsets array is not in the buffer, but pretend that it + * is. When XLogInsert stores the whole buffer, the offsets array + * need not be stored too. */ if (nitems > 0) { @@ -747,8 +746,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) BTPageOpaque opaque; /* - * We can never delete rightmost pages nor root pages. While at it, - * check that page is not already deleted and is empty. + * We can never delete rightmost pages nor root pages. While at it, check + * that page is not already deleted and is empty. */ page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -760,8 +759,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) } /* - * Save info about page, including a copy of its high key (it must - * have one, being non-rightmost). + * Save info about page, including a copy of its high key (it must have + * one, being non-rightmost). */ target = BufferGetBlockNumber(buf); targetlevel = opaque->btpo.level; @@ -770,11 +769,11 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) targetkey = CopyBTItem((BTItem) PageGetItem(page, itemid)); /* - * We need to get an approximate pointer to the page's parent page. - * Use the standard search mechanism to search for the page's high - * key; this will give us a link to either the current parent or - * someplace to its left (if there are multiple equal high keys). To - * avoid deadlocks, we'd better drop the target page lock first. + * We need to get an approximate pointer to the page's parent page. Use + * the standard search mechanism to search for the page's high key; this + * will give us a link to either the current parent or someplace to its + * left (if there are multiple equal high keys). To avoid deadlocks, we'd + * better drop the target page lock first. */ _bt_relbuf(rel, buf); /* we need a scan key to do our search, so build one */ @@ -786,9 +785,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) _bt_relbuf(rel, lbuf); /* - * If we are trying to delete an interior page, _bt_search did more - * than we needed. Locate the stack item pointing to our parent - * level. + * If we are trying to delete an interior page, _bt_search did more than + * we needed. Locate the stack item pointing to our parent level. */ ilevel = 0; for (;;) @@ -803,16 +801,15 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) /* * We have to lock the pages we need to modify in the standard order: - * moving right, then up. Else we will deadlock against other - * writers. + * moving right, then up. Else we will deadlock against other writers. * - * So, we need to find and write-lock the current left sibling of the - * target page. The sibling that was current a moment ago could have - * split, so we may have to move right. This search could fail if - * either the sibling or the target page was deleted by someone else - * meanwhile; if so, give up. (Right now, that should never happen, - * since page deletion is only done in VACUUM and there shouldn't be - * multiple VACUUMs concurrently on the same table.) + * So, we need to find and write-lock the current left sibling of the target + * page. The sibling that was current a moment ago could have split, so + * we may have to move right. This search could fail if either the + * sibling or the target page was deleted by someone else meanwhile; if + * so, give up. (Right now, that should never happen, since page deletion + * is only done in VACUUM and there shouldn't be multiple VACUUMs + * concurrently on the same table.) */ if (leftsib != P_NONE) { @@ -839,19 +836,18 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) lbuf = InvalidBuffer; /* - * Next write-lock the target page itself. It should be okay to take - * just a write lock not a superexclusive lock, since no scans would - * stop on an empty page. + * Next write-lock the target page itself. It should be okay to take just + * a write lock not a superexclusive lock, since no scans would stop on an + * empty page. */ buf = _bt_getbuf(rel, target, BT_WRITE); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); /* - * Check page is still empty etc, else abandon deletion. The empty - * check is necessary since someone else might have inserted into it - * while we didn't have it locked; the others are just for paranoia's - * sake. + * Check page is still empty etc, else abandon deletion. The empty check + * is necessary since someone else might have inserted into it while we + * didn't have it locked; the others are just for paranoia's sake. */ if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque) || P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page)) @@ -872,9 +868,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) rbuf = _bt_getbuf(rel, rightsib, BT_WRITE); /* - * Next find and write-lock the current parent of the target page. - * This is essentially the same as the corresponding step of - * splitting. + * Next find and write-lock the current parent of the target page. This is + * essentially the same as the corresponding step of splitting. */ ItemPointerSet(&(stack->bts_btitem.bti_itup.t_tid), target, P_HIKEY); @@ -887,8 +882,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) /* * If the target is the rightmost child of its parent, then we can't - * delete, unless it's also the only child --- in which case the - * parent changes to half-dead status. + * delete, unless it's also the only child --- in which case the parent + * changes to half-dead status. */ page = BufferGetPage(pbuf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -917,11 +912,10 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) } /* - * If we are deleting the next-to-last page on the target's level, - * then the rightsib is a candidate to become the new fast root. (In - * theory, it might be possible to push the fast root even further - * down, but the odds of doing so are slim, and the locking - * considerations daunting.) + * If we are deleting the next-to-last page on the target's level, then + * the rightsib is a candidate to become the new fast root. (In theory, it + * might be possible to push the fast root even further down, but the odds + * of doing so are slim, and the locking considerations daunting.) * * We can safely acquire a lock on the metapage here --- see comments for * _bt_newroot(). @@ -939,9 +933,9 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) metad = BTPageGetMeta(metapg); /* - * The expected case here is btm_fastlevel == targetlevel+1; - * if the fastlevel is <= targetlevel, something is wrong, and - * we choose to overwrite it to fix it. + * The expected case here is btm_fastlevel == targetlevel+1; if + * the fastlevel is <= targetlevel, something is wrong, and we + * choose to overwrite it to fix it. */ if (metad->btm_fastlevel > targetlevel + 1) { @@ -961,9 +955,9 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) /* * Update parent. The normal case is a tad tricky because we want to - * delete the target's downlink and the *following* key. Easiest way - * is to copy the right sibling's downlink over the target downlink, - * and then delete the following item. + * delete the target's downlink and the *following* key. Easiest way is + * to copy the right sibling's downlink over the target downlink, and then + * delete the following item. */ page = BufferGetPage(pbuf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -992,8 +986,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) } /* - * Update siblings' side-links. Note the target page's side-links - * will continue to point to the siblings. + * Update siblings' side-links. Note the target page's side-links will + * continue to point to the siblings. */ if (BufferIsValid(lbuf)) { @@ -1123,10 +1117,10 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) _bt_wrtbuf(rel, lbuf); /* - * If parent became half dead, recurse to try to delete it. Otherwise, - * if right sibling is empty and is now the last child of the parent, - * recurse to try to delete it. (These cases cannot apply at the same - * time, though the second case might itself recurse to the first.) + * If parent became half dead, recurse to try to delete it. Otherwise, if + * right sibling is empty and is now the last child of the parent, recurse + * to try to delete it. (These cases cannot apply at the same time, + * though the second case might itself recurse to the first.) */ if (parent_half_dead) { diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index d4232c847f..10e2fe6190 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.131 2005/09/02 19:02:19 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.132 2005/10/15 02:49:09 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -39,9 +39,9 @@ typedef struct BTSpool *spool; /* - * spool2 is needed only when the index is an unique index. Dead - * tuples are put into spool2 instead of spool in order to avoid - * uniqueness check. + * spool2 is needed only when the index is an unique index. Dead tuples + * are put into spool2 instead of spool in order to avoid uniqueness + * check. */ BTSpool *spool2; double indtuples; @@ -72,10 +72,10 @@ btbuild(PG_FUNCTION_ARGS) BTBuildState buildstate; /* - * bootstrap processing does something strange, so don't use - * sort/build for initial catalog indices. at some point i need to - * look harder at this. (there is some kind of incremental processing - * going on there.) -- pma 08/29/95 + * bootstrap processing does something strange, so don't use sort/build + * for initial catalog indices. at some point i need to look harder at + * this. (there is some kind of incremental processing going on there.) + * -- pma 08/29/95 */ buildstate.usefast = (FastBuild && IsNormalProcessingMode()); buildstate.isUnique = indexInfo->ii_Unique; @@ -91,8 +91,8 @@ btbuild(PG_FUNCTION_ARGS) #endif /* BTREE_BUILD_STATS */ /* - * We expect to be called exactly once for any index relation. If - * that's not the case, big trouble's what we have. + * We expect to be called exactly once for any index relation. If that's + * not the case, big trouble's what we have. */ if (RelationGetNumberOfBlocks(index) != 0) elog(ERROR, "index \"%s\" already contains data", @@ -103,8 +103,8 @@ btbuild(PG_FUNCTION_ARGS) buildstate.spool = _bt_spoolinit(index, indexInfo->ii_Unique, false); /* - * If building a unique index, put dead tuples in a second spool - * to keep them out of the uniqueness check. + * If building a unique index, put dead tuples in a second spool to + * keep them out of the uniqueness check. */ if (indexInfo->ii_Unique) buildstate.spool2 = _bt_spoolinit(index, false, true); @@ -129,8 +129,8 @@ btbuild(PG_FUNCTION_ARGS) /* * if we are doing bottom-up btree build, finish the build by (1) - * completing the sort of the spool file, (2) inserting the sorted - * tuples into btree pages and (3) building the upper levels. + * completing the sort of the spool file, (2) inserting the sorted tuples + * into btree pages and (3) building the upper levels. */ if (buildstate.usefast) { @@ -176,9 +176,8 @@ btbuildCallback(Relation index, btitem = _bt_formitem(itup); /* - * if we are doing bottom-up btree build, we insert the index into a - * spool file for subsequent processing. otherwise, we insert into - * the btree. + * if we are doing bottom-up btree build, we insert the index into a spool + * file for subsequent processing. otherwise, we insert into the btree. */ if (buildstate->usefast) { @@ -248,16 +247,16 @@ btgettuple(PG_FUNCTION_ARGS) bool res; /* - * If we've already initialized this scan, we can just advance it in - * the appropriate direction. If we haven't done so yet, we call a - * routine to get the first item in the scan. + * If we've already initialized this scan, we can just advance it in the + * appropriate direction. If we haven't done so yet, we call a routine to + * get the first item in the scan. */ if (ItemPointerIsValid(&(scan->currentItemData))) { /* - * Restore scan position using heap TID returned by previous call - * to btgettuple(). _bt_restscan() re-grabs the read lock on the - * buffer, too. + * Restore scan position using heap TID returned by previous call to + * btgettuple(). _bt_restscan() re-grabs the read lock on the buffer, + * too. */ _bt_restscan(scan); @@ -267,17 +266,16 @@ btgettuple(PG_FUNCTION_ARGS) if (scan->kill_prior_tuple) { /* - * Yes, so mark it by setting the LP_DELETE bit in the item - * flags. + * Yes, so mark it by setting the LP_DELETE bit in the item flags. */ offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData)); page = BufferGetPage(so->btso_curbuf); PageGetItemId(page, offnum)->lp_flags |= LP_DELETE; /* - * Since this can be redone later if needed, it's treated the - * same as a commit-hint-bit status update for heap tuples: we - * mark the buffer dirty but don't make a WAL log entry. + * Since this can be redone later if needed, it's treated the same + * as a commit-hint-bit status update for heap tuples: we mark the + * buffer dirty but don't make a WAL log entry. */ SetBufferCommitInfoNeedsSave(so->btso_curbuf); } @@ -306,11 +304,11 @@ btgettuple(PG_FUNCTION_ARGS) } /* - * Save heap TID to use it in _bt_restscan. Then release the read - * lock on the buffer so that we aren't blocking other backends. + * Save heap TID to use it in _bt_restscan. Then release the read lock on + * the buffer so that we aren't blocking other backends. * - * NOTE: we do keep the pin on the buffer! This is essential to ensure - * that someone else doesn't delete the index entry we are stopped on. + * NOTE: we do keep the pin on the buffer! This is essential to ensure that + * someone else doesn't delete the index entry we are stopped on. */ if (res) { @@ -333,7 +331,7 @@ Datum btgetmulti(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); - ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1); + ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1); int32 max_tids = PG_GETARG_INT32(2); int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3); BTScanOpaque so = (BTScanOpaque) scan->opaque; @@ -355,6 +353,7 @@ btgetmulti(PG_FUNCTION_ARGS) res = _bt_next(scan, ForwardScanDirection); else res = _bt_first(scan, ForwardScanDirection); + /* * Skip killed tuples if asked to. */ @@ -381,8 +380,8 @@ btgetmulti(PG_FUNCTION_ARGS) } /* - * Save heap TID to use it in _bt_restscan. Then release the read - * lock on the buffer so that we aren't blocking other backends. + * Save heap TID to use it in _bt_restscan. Then release the read lock on + * the buffer so that we aren't blocking other backends. */ if (res) { @@ -456,8 +455,8 @@ btrescan(PG_FUNCTION_ARGS) } /* - * Reset the scan keys. Note that keys ordering stuff moved to - * _bt_first. - vadim 05/05/97 + * Reset the scan keys. Note that keys ordering stuff moved to _bt_first. + * - vadim 05/05/97 */ if (scankey && scan->numberOfKeys > 0) memmove(scan->keyData, @@ -593,21 +592,20 @@ btbulkdelete(PG_FUNCTION_ARGS) num_index_tuples = 0; /* - * The outer loop iterates over index leaf pages, the inner over items - * on a leaf page. We issue just one _bt_delitems() call per page, so - * as to minimize WAL traffic. + * The outer loop iterates over index leaf pages, the inner over items on + * a leaf page. We issue just one _bt_delitems() call per page, so as to + * minimize WAL traffic. * * Note that we exclusive-lock every leaf page containing data items, in - * sequence left to right. It sounds attractive to only - * exclusive-lock those containing items we need to delete, but - * unfortunately that is not safe: we could then pass a stopped - * indexscan, which could in rare cases lead to deleting the item it - * needs to find when it resumes. (See _bt_restscan --- this could - * only happen if an indexscan stops on a deletable item and then a - * page split moves that item into a page further to its right, which - * the indexscan will have no pin on.) We can skip obtaining - * exclusive lock on empty pages though, since no indexscan could be - * stopped on those. + * sequence left to right. It sounds attractive to only exclusive-lock + * those containing items we need to delete, but unfortunately that is not + * safe: we could then pass a stopped indexscan, which could in rare cases + * lead to deleting the item it needs to find when it resumes. (See + * _bt_restscan --- this could only happen if an indexscan stops on a + * deletable item and then a page split moves that item into a page + * further to its right, which the indexscan will have no pin on.) We can + * skip obtaining exclusive lock on empty pages though, since no indexscan + * could be stopped on those. */ buf = _bt_get_endpoint(rel, 0, false); if (BufferIsValid(buf)) /* check for empty index */ @@ -632,15 +630,15 @@ btbulkdelete(PG_FUNCTION_ARGS) if (minoff <= maxoff && !P_ISDELETED(opaque)) { /* - * Trade in the initial read lock for a super-exclusive - * write lock on this page. + * Trade in the initial read lock for a super-exclusive write + * lock on this page. */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); LockBufferForCleanup(buf); /* - * Recompute minoff/maxoff, both of which could have - * changed while we weren't holding the lock. + * Recompute minoff/maxoff, both of which could have changed + * while we weren't holding the lock. */ minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(page); @@ -657,7 +655,7 @@ btbulkdelete(PG_FUNCTION_ARGS) ItemPointer htup; btitem = (BTItem) PageGetItem(page, - PageGetItemId(page, offnum)); + PageGetItemId(page, offnum)); htup = &(btitem->bti_itup.t_tid); if (callback(htup, callback_state)) { @@ -670,8 +668,8 @@ btbulkdelete(PG_FUNCTION_ARGS) } /* - * If we need to delete anything, do it and write the buffer; - * else just release the buffer. + * If we need to delete anything, do it and write the buffer; else + * just release the buffer. */ nextpage = opaque->btpo_next; if (ndeletable > 0) @@ -725,19 +723,19 @@ btvacuumcleanup(PG_FUNCTION_ARGS) Assert(stats != NULL); /* - * First find out the number of pages in the index. We must acquire - * the relation-extension lock while doing this to avoid a race - * condition: if someone else is extending the relation, there is - * a window where bufmgr/smgr have created a new all-zero page but - * it hasn't yet been write-locked by _bt_getbuf(). If we manage to - * scan such a page here, we'll improperly assume it can be recycled. - * Taking the lock synchronizes things enough to prevent a problem: - * either num_pages won't include the new page, or _bt_getbuf already - * has write lock on the buffer and it will be fully initialized before - * we can examine it. (See also vacuumlazy.c, which has the same issue.) + * First find out the number of pages in the index. We must acquire the + * relation-extension lock while doing this to avoid a race condition: if + * someone else is extending the relation, there is a window where + * bufmgr/smgr have created a new all-zero page but it hasn't yet been + * write-locked by _bt_getbuf(). If we manage to scan such a page here, + * we'll improperly assume it can be recycled. Taking the lock + * synchronizes things enough to prevent a problem: either num_pages won't + * include the new page, or _bt_getbuf already has write lock on the + * buffer and it will be fully initialized before we can examine it. (See + * also vacuumlazy.c, which has the same issue.) * - * We can skip locking for new or temp relations, - * however, since no one else could be accessing them. + * We can skip locking for new or temp relations, however, since no one else + * could be accessing them. */ needLock = !RELATION_IS_LOCAL(rel); @@ -807,12 +805,12 @@ btvacuumcleanup(PG_FUNCTION_ARGS) /* * During VACUUM FULL it's okay to recycle deleted pages - * immediately, since there can be no other transactions - * scanning the index. Note that we will only recycle the - * current page and not any parent pages that _bt_pagedel - * might have recursed to; this seems reasonable in the name - * of simplicity. (Trying to do otherwise would mean we'd - * have to sort the list of recyclable pages we're building.) + * immediately, since there can be no other transactions scanning + * the index. Note that we will only recycle the current page and + * not any parent pages that _bt_pagedel might have recursed to; + * this seems reasonable in the name of simplicity. (Trying to do + * otherwise would mean we'd have to sort the list of recyclable + * pages we're building.) */ if (ndel && info->vacuum_full) { @@ -827,10 +825,10 @@ btvacuumcleanup(PG_FUNCTION_ARGS) } /* - * During VACUUM FULL, we truncate off any recyclable pages at the end - * of the index. In a normal vacuum it'd be unsafe to do this except - * by acquiring exclusive lock on the index and then rechecking all - * the pages; doesn't seem worth it. + * During VACUUM FULL, we truncate off any recyclable pages at the end of + * the index. In a normal vacuum it'd be unsafe to do this except by + * acquiring exclusive lock on the index and then rechecking all the + * pages; doesn't seem worth it. */ if (info->vacuum_full && nFreePages > 0) { @@ -857,9 +855,9 @@ btvacuumcleanup(PG_FUNCTION_ARGS) } /* - * Update the shared Free Space Map with the info we now have about - * free pages in the index, discarding any old info the map may have. - * We do not need to sort the page numbers; they're in order already. + * Update the shared Free Space Map with the info we now have about free + * pages in the index, discarding any old info the map may have. We do not + * need to sort the page numbers; they're in order already. */ RecordIndexFreeSpace(&rel->rd_node, nFreePages, freePages); @@ -915,15 +913,15 @@ _bt_restscan(IndexScanDesc scan) opaque = (BTPageOpaque) PageGetSpecialPointer(page); /* - * We use this as flag when first index tuple on page is deleted but - * we do not move left (this would slowdown vacuum) - so we set + * We use this as flag when first index tuple on page is deleted but we do + * not move left (this would slowdown vacuum) - so we set * current->ip_posid before first index tuple on the current page * (_bt_step will move it right)... XXX still needed? */ if (!ItemPointerIsValid(target)) { ItemPointerSetOffsetNumber(current, - OffsetNumberPrev(P_FIRSTDATAKEY(opaque))); + OffsetNumberPrev(P_FIRSTDATAKEY(opaque))); return; } @@ -948,12 +946,12 @@ _bt_restscan(IndexScanDesc scan) } /* - * The item we're looking for moved right at least one page, so - * move right. We are careful here to pin and read-lock the next - * non-dead page before releasing the current one. This ensures - * that a concurrent btbulkdelete scan cannot pass our position - * --- if it did, it might be able to reach and delete our target - * item before we can find it again. + * The item we're looking for moved right at least one page, so move + * right. We are careful here to pin and read-lock the next non-dead + * page before releasing the current one. This ensures that a + * concurrent btbulkdelete scan cannot pass our position --- if it + * did, it might be able to reach and delete our target item before we + * can find it again. */ if (P_RIGHTMOST(opaque)) elog(ERROR, "failed to re-find previous key in \"%s\"", diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index c029824fa6..06075dd3dd 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.94 2005/10/06 02:29:12 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.95 2005/10/15 02:49:09 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -69,9 +69,9 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey, BTStack new_stack; /* - * Race -- the page we just grabbed may have split since we read - * its pointer in the parent (or metapage). If it has, we may - * need to move right to its new sibling. Do that. + * Race -- the page we just grabbed may have split since we read its + * pointer in the parent (or metapage). If it has, we may need to + * move right to its new sibling. Do that. */ *bufP = _bt_moveright(rel, *bufP, keysz, scankey, nextkey, BT_READ); @@ -82,8 +82,8 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey, break; /* - * Find the appropriate item on the internal page, and get the - * child page that it points to. + * Find the appropriate item on the internal page, and get the child + * page that it points to. */ offnum = _bt_binsrch(rel, *bufP, keysz, scankey, nextkey); itemid = PageGetItemId(page, offnum); @@ -94,13 +94,13 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey, /* * We need to save the location of the index entry we chose in the - * parent page on a stack. In case we split the tree, we'll use - * the stack to work back up to the parent page. We also save the - * actual downlink (TID) to uniquely identify the index entry, in - * case it moves right while we're working lower in the tree. See - * the paper by Lehman and Yao for how this is detected and - * handled. (We use the child link to disambiguate duplicate keys - * in the index -- Lehman and Yao disallow duplicate keys.) + * parent page on a stack. In case we split the tree, we'll use the + * stack to work back up to the parent page. We also save the actual + * downlink (TID) to uniquely identify the index entry, in case it + * moves right while we're working lower in the tree. See the paper + * by Lehman and Yao for how this is detected and handled. (We use the + * child link to disambiguate duplicate keys in the index -- Lehman + * and Yao disallow duplicate keys.) */ new_stack = (BTStack) palloc(sizeof(BTStackData)); new_stack->bts_blkno = par_blkno; @@ -156,19 +156,18 @@ _bt_moveright(Relation rel, opaque = (BTPageOpaque) PageGetSpecialPointer(page); /* - * When nextkey = false (normal case): if the scan key that brought us - * to this page is > the high key stored on the page, then the page - * has split and we need to move right. (If the scan key is equal to - * the high key, we might or might not need to move right; have to - * scan the page first anyway.) + * When nextkey = false (normal case): if the scan key that brought us to + * this page is > the high key stored on the page, then the page has split + * and we need to move right. (If the scan key is equal to the high key, + * we might or might not need to move right; have to scan the page first + * anyway.) * * When nextkey = true: move right if the scan key is >= page's high key. * - * The page could even have split more than once, so scan as far as - * needed. + * The page could even have split more than once, so scan as far as needed. * - * We also have to move right if we followed a link that brought us to a - * dead page. + * We also have to move right if we followed a link that brought us to a dead + * page. */ cmpval = nextkey ? 0 : 1; @@ -242,24 +241,24 @@ _bt_binsrch(Relation rel, high = PageGetMaxOffsetNumber(page); /* - * If there are no keys on the page, return the first available slot. - * Note this covers two cases: the page is really empty (no keys), or - * it contains only a high key. The latter case is possible after - * vacuuming. This can never happen on an internal page, however, - * since they are never empty (an internal page must have children). + * If there are no keys on the page, return the first available slot. Note + * this covers two cases: the page is really empty (no keys), or it + * contains only a high key. The latter case is possible after vacuuming. + * This can never happen on an internal page, however, since they are + * never empty (an internal page must have children). */ if (high < low) return low; /* - * Binary search to find the first key on the page >= scan key, or - * first key > scankey when nextkey is true. + * Binary search to find the first key on the page >= scan key, or first + * key > scankey when nextkey is true. * * For nextkey=false (cmpval=1), the loop invariant is: all slots before * 'low' are < scan key, all slots at or after 'high' are >= scan key. * - * For nextkey=true (cmpval=0), the loop invariant is: all slots before - * 'low' are <= scan key, all slots at or after 'high' are > scan key. + * For nextkey=true (cmpval=0), the loop invariant is: all slots before 'low' + * are <= scan key, all slots at or after 'high' are > scan key. * * We can fall out when high == low. */ @@ -285,15 +284,15 @@ _bt_binsrch(Relation rel, * At this point we have high == low, but be careful: they could point * past the last slot on the page. * - * On a leaf page, we always return the first key >= scan key (resp. > - * scan key), which could be the last slot + 1. + * On a leaf page, we always return the first key >= scan key (resp. > scan + * key), which could be the last slot + 1. */ if (P_ISLEAF(opaque)) return low; /* - * On a non-leaf page, return the last key < scan key (resp. <= scan - * key). There must be one if _bt_compare() is playing by the rules. + * On a non-leaf page, return the last key < scan key (resp. <= scan key). + * There must be one if _bt_compare() is playing by the rules. */ Assert(low > P_FIRSTDATAKEY(opaque)); @@ -337,8 +336,8 @@ _bt_compare(Relation rel, int i; /* - * Force result ">" if target item is first data item on an internal - * page --- see NOTE above. + * Force result ">" if target item is first data item on an internal page + * --- see NOTE above. */ if (!P_ISLEAF(opaque) && offnum == P_FIRSTDATAKEY(opaque)) return 1; @@ -347,15 +346,15 @@ _bt_compare(Relation rel, itup = &(btitem->bti_itup); /* - * The scan key is set up with the attribute number associated with - * each term in the key. It is important that, if the index is - * multi-key, the scan contain the first k key attributes, and that - * they be in order. If you think about how multi-key ordering works, - * you'll understand why this is. + * The scan key is set up with the attribute number associated with each + * term in the key. It is important that, if the index is multi-key, the + * scan contain the first k key attributes, and that they be in order. If + * you think about how multi-key ordering works, you'll understand why + * this is. * - * We don't test for violation of this condition here, however. The - * initial setup for the index scan had better have gotten it right - * (see _bt_first). + * We don't test for violation of this condition here, however. The initial + * setup for the index scan had better have gotten it right (see + * _bt_first). */ for (i = 1; i <= keysz; i++) @@ -381,15 +380,15 @@ _bt_compare(Relation rel, else { /* - * The sk_func needs to be passed the index value as left arg - * and the sk_argument as right arg (they might be of - * different types). Since it is convenient for callers to - * think of _bt_compare as comparing the scankey to the index - * item, we have to flip the sign of the comparison result. + * The sk_func needs to be passed the index value as left arg and + * the sk_argument as right arg (they might be of different + * types). Since it is convenient for callers to think of + * _bt_compare as comparing the scankey to the index item, we have + * to flip the sign of the comparison result. * - * Note: curious-looking coding is to avoid overflow if - * comparison function returns INT_MIN. There is no risk of - * overflow for positive results. + * Note: curious-looking coding is to avoid overflow if comparison + * function returns INT_MIN. There is no risk of overflow for + * positive results. */ result = DatumGetInt32(FunctionCall2(&scankey->sk_func, datum, @@ -497,7 +496,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) bool goback; bool continuescan; ScanKey startKeys[INDEX_MAX_KEYS]; - ScanKeyData scankeys[INDEX_MAX_KEYS]; + ScanKeyData scankeys[INDEX_MAX_KEYS]; int keysCount = 0; int i; StrategyNumber strat_total; @@ -505,8 +504,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) pgstat_count_index_scan(&scan->xs_pgstat_info); /* - * Examine the scan keys and eliminate any redundant keys; also - * discover how many keys must be matched to continue the scan. + * Examine the scan keys and eliminate any redundant keys; also discover + * how many keys must be matched to continue the scan. */ _bt_preprocess_keys(scan); @@ -556,9 +555,9 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) ScanKey cur; /* - * chosen is the so-far-chosen key for the current attribute, if - * any. We don't cast the decision in stone until we reach keys - * for the next attribute. + * chosen is the so-far-chosen key for the current attribute, if any. + * We don't cast the decision in stone until we reach keys for the + * next attribute. */ curattr = 1; chosen = NULL; @@ -595,9 +594,9 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) } /* - * Done if that was the last attribute, or if next key - * is not in sequence (implying no boundary key is available - * for the next attribute). + * Done if that was the last attribute, or if next key is not + * in sequence (implying no boundary key is available for the + * next attribute). */ if (i >= so->numberOfKeys || cur->sk_attno != curattr + 1) @@ -632,17 +631,17 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) } /* - * If we found no usable boundary keys, we have to start from one end - * of the tree. Walk down that edge to the first or last key, and - * scan from there. + * If we found no usable boundary keys, we have to start from one end of + * the tree. Walk down that edge to the first or last key, and scan from + * there. */ if (keysCount == 0) return _bt_endpoint(scan, dir); /* * We want to start the scan somewhere within the index. Set up a - * 3-way-comparison scankey we can use to search for the boundary - * point we identified above. + * 3-way-comparison scankey we can use to search for the boundary point we + * identified above. */ Assert(keysCount <= INDEX_MAX_KEYS); for (i = 0; i < keysCount; i++) @@ -650,16 +649,15 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) ScanKey cur = startKeys[i]; /* - * _bt_preprocess_keys disallows it, but it's place to add some - * code later + * _bt_preprocess_keys disallows it, but it's place to add some code + * later */ if (cur->sk_flags & SK_ISNULL) elog(ERROR, "btree doesn't support is(not)null, yet"); /* - * If scankey operator is of default subtype, we can use the - * cached comparison procedure; otherwise gotta look it up in the - * catalogs. + * If scankey operator is of default subtype, we can use the cached + * comparison procedure; otherwise gotta look it up in the catalogs. */ if (cur->sk_subtype == InvalidOid) { @@ -692,13 +690,13 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) } /* - * Examine the selected initial-positioning strategy to determine - * exactly where we need to start the scan, and set flag variables to - * control the code below. + * Examine the selected initial-positioning strategy to determine exactly + * where we need to start the scan, and set flag variables to control the + * code below. * - * If nextkey = false, _bt_search and _bt_binsrch will locate the first - * item >= scan key. If nextkey = true, they will locate the first - * item > scan key. + * If nextkey = false, _bt_search and _bt_binsrch will locate the first item + * >= scan key. If nextkey = true, they will locate the first item > scan + * key. * * If goback = true, we will then step back one item, while if goback = * false, we will start the scan on the located item. @@ -710,10 +708,10 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) case BTLessStrategyNumber: /* - * Find first item >= scankey, then back up one to arrive at - * last item < scankey. (Note: this positioning strategy is - * only used for a backward scan, so that is always the - * correct starting position.) + * Find first item >= scankey, then back up one to arrive at last + * item < scankey. (Note: this positioning strategy is only used + * for a backward scan, so that is always the correct starting + * position.) */ nextkey = false; goback = true; @@ -722,10 +720,10 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) case BTLessEqualStrategyNumber: /* - * Find first item > scankey, then back up one to arrive at - * last item <= scankey. (Note: this positioning strategy is - * only used for a backward scan, so that is always the - * correct starting position.) + * Find first item > scankey, then back up one to arrive at last + * item <= scankey. (Note: this positioning strategy is only used + * for a backward scan, so that is always the correct starting + * position.) */ nextkey = true; goback = true; @@ -734,14 +732,14 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) case BTEqualStrategyNumber: /* - * If a backward scan was specified, need to start with last - * equal item not first one. + * If a backward scan was specified, need to start with last equal + * item not first one. */ if (ScanDirectionIsBackward(dir)) { /* - * This is the same as the <= strategy. We will check at - * the end whether the found item is actually =. + * This is the same as the <= strategy. We will check at the + * end whether the found item is actually =. */ nextkey = true; goback = true; @@ -749,8 +747,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) else { /* - * This is the same as the >= strategy. We will check at - * the end whether the found item is actually =. + * This is the same as the >= strategy. We will check at the + * end whether the found item is actually =. */ nextkey = false; goback = false; @@ -813,24 +811,24 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) ItemPointerSet(current, blkno, offnum); /* - * If nextkey = false, we are positioned at the first item >= scan - * key, or possibly at the end of a page on which all the existing - * items are less than the scan key and we know that everything on - * later pages is greater than or equal to scan key. + * If nextkey = false, we are positioned at the first item >= scan key, or + * possibly at the end of a page on which all the existing items are less + * than the scan key and we know that everything on later pages is greater + * than or equal to scan key. * * If nextkey = true, we are positioned at the first item > scan key, or - * possibly at the end of a page on which all the existing items are - * less than or equal to the scan key and we know that everything on - * later pages is greater than scan key. + * possibly at the end of a page on which all the existing items are less + * than or equal to the scan key and we know that everything on later + * pages is greater than scan key. * - * The actually desired starting point is either this item or the prior - * one, or in the end-of-page case it's the first item on the next - * page or the last item on this page. We apply _bt_step if needed to - * get to the right place. + * The actually desired starting point is either this item or the prior one, + * or in the end-of-page case it's the first item on the next page or the + * last item on this page. We apply _bt_step if needed to get to the + * right place. * * If _bt_step fails (meaning we fell off the end of the index in one - * direction or the other), then there are no matches so we just - * return false. + * direction or the other), then there are no matches so we just return + * false. */ if (goback) { @@ -902,8 +900,8 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) BlockNumber blkno; /* - * Don't use ItemPointerGetOffsetNumber or you risk to get assertion - * due to ability of ip_posid to be equal 0. + * Don't use ItemPointerGetOffsetNumber or you risk to get assertion due + * to ability of ip_posid to be equal 0. */ offnum = current->ip_posid; @@ -954,9 +952,9 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) /* * Walk left to the next page with data. This is much more * complex than the walk-right case because of the possibility - * that the page to our left splits while we are in flight to - * it, plus the possibility that the page we were on gets - * deleted after we leave it. See nbtree/README for details. + * that the page to our left splits while we are in flight to it, + * plus the possibility that the page we were on gets deleted + * after we leave it. See nbtree/README for details. */ for (;;) { @@ -973,9 +971,9 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) opaque = (BTPageOpaque) PageGetSpecialPointer(page); /* - * Okay, we managed to move left to a non-deleted page. - * Done if it's not half-dead and not empty. Else loop - * back and do it all again. + * Okay, we managed to move left to a non-deleted page. Done + * if it's not half-dead and not empty. Else loop back and do + * it all again. */ if (!P_IGNORE(opaque)) { @@ -1043,15 +1041,14 @@ _bt_walk_left(Relation rel, Buffer buf) /* * If this isn't the page we want, walk right till we find what we - * want --- but go no more than four hops (an arbitrary limit). If - * we don't find the correct page by then, the most likely bet is - * that the original page got deleted and isn't in the sibling - * chain at all anymore, not that its left sibling got split more - * than four times. + * want --- but go no more than four hops (an arbitrary limit). If we + * don't find the correct page by then, the most likely bet is that + * the original page got deleted and isn't in the sibling chain at all + * anymore, not that its left sibling got split more than four times. * - * Note that it is correct to test P_ISDELETED not P_IGNORE here, - * because half-dead pages are still in the sibling chain. Caller - * must reject half-dead pages if wanted. + * Note that it is correct to test P_ISDELETED not P_IGNORE here, because + * half-dead pages are still in the sibling chain. Caller must reject + * half-dead pages if wanted. */ tries = 0; for (;;) @@ -1077,9 +1074,9 @@ _bt_walk_left(Relation rel, Buffer buf) { /* * It was deleted. Move right to first nondeleted page (there - * must be one); that is the page that has acquired the - * deleted one's keyspace, so stepping left from it will take - * us where we want to be. + * must be one); that is the page that has acquired the deleted + * one's keyspace, so stepping left from it will take us where we + * want to be. */ for (;;) { @@ -1095,16 +1092,16 @@ _bt_walk_left(Relation rel, Buffer buf) } /* - * Now return to top of loop, resetting obknum to point to - * this nondeleted page, and try again. + * Now return to top of loop, resetting obknum to point to this + * nondeleted page, and try again. */ } else { /* - * It wasn't deleted; the explanation had better be that the - * page to the left got split or deleted. Without this check, - * we'd go into an infinite loop if there's anything wrong. + * It wasn't deleted; the explanation had better be that the page + * to the left got split or deleted. Without this check, we'd go + * into an infinite loop if there's anything wrong. */ if (opaque->btpo_prev == lblkno) elog(ERROR, "could not find left sibling in \"%s\"", @@ -1137,8 +1134,8 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost) /* * If we are looking for a leaf page, okay to descend from fast root; - * otherwise better descend from true root. (There is no point in - * being smarter about intermediate levels.) + * otherwise better descend from true root. (There is no point in being + * smarter about intermediate levels.) */ if (level == 0) buf = _bt_getroot(rel, BT_READ); @@ -1159,8 +1156,8 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost) /* * If we landed on a deleted page, step right to find a live page * (there must be one). Also, if we want the rightmost page, step - * right if needed to get to it (this could happen if the page - * split since we obtained a pointer to it). + * right if needed to get to it (this could happen if the page split + * since we obtained a pointer to it). */ while (P_IGNORE(opaque) || (rightmost && !P_RIGHTMOST(opaque))) @@ -1228,9 +1225,9 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) so = (BTScanOpaque) scan->opaque; /* - * Scan down to the leftmost or rightmost leaf page. This is a - * simplified version of _bt_search(). We don't maintain a stack - * since we know we won't need it. + * Scan down to the leftmost or rightmost leaf page. This is a simplified + * version of _bt_search(). We don't maintain a stack since we know we + * won't need it. */ buf = _bt_get_endpoint(rel, 0, ScanDirectionIsBackward(dir)); @@ -1261,8 +1258,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) Assert(P_RIGHTMOST(opaque)); start = PageGetMaxOffsetNumber(page); - if (start < P_FIRSTDATAKEY(opaque)) /* watch out for empty - * page */ + if (start < P_FIRSTDATAKEY(opaque)) /* watch out for empty page */ start = P_FIRSTDATAKEY(opaque); } else @@ -1276,8 +1272,8 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) so->btso_curbuf = buf; /* - * Left/rightmost page could be empty due to deletions, if so step - * till we find a nonempty page. + * Left/rightmost page could be empty due to deletions, if so step till we + * find a nonempty page. */ if (start > maxoff) { @@ -1291,8 +1287,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) itup = &(btitem->bti_itup); /* - * Okay, we are on the first or last tuple. Does it pass all the - * quals? + * Okay, we are on the first or last tuple. Does it pass all the quals? */ if (_bt_checkkeys(scan, itup, dir, &continuescan)) { diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index ee5acee5c3..6ee5d42b63 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -56,7 +56,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.94 2005/08/11 13:22:33 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.95 2005/10/15 02:49:09 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -99,12 +99,10 @@ typedef struct BTPageState { Page btps_page; /* workspace for page building */ BlockNumber btps_blkno; /* block # to write this page at */ - BTItem btps_minkey; /* copy of minimum key (first item) on - * page */ + BTItem btps_minkey; /* copy of minimum key (first item) on page */ OffsetNumber btps_lastoff; /* last item offset loaded */ uint32 btps_level; /* tree level (0 = leaf) */ - Size btps_full; /* "full" if less than this much free - * space */ + Size btps_full; /* "full" if less than this much free space */ struct BTPageState *btps_next; /* link to parent level, if any */ } BTPageState; @@ -157,21 +155,21 @@ _bt_spoolinit(Relation index, bool isunique, bool isdead) btspool->isunique = isunique; /* - * We size the sort area as maintenance_work_mem rather than work_mem - * to speed index creation. This should be OK since a single backend - * can't run multiple index creations in parallel. Note that creation - * of a unique index actually requires two BTSpool objects. We expect - * that the second one (for dead tuples) won't get very full, so we - * give it only work_mem. + * We size the sort area as maintenance_work_mem rather than work_mem to + * speed index creation. This should be OK since a single backend can't + * run multiple index creations in parallel. Note that creation of a + * unique index actually requires two BTSpool objects. We expect that the + * second one (for dead tuples) won't get very full, so we give it only + * work_mem. */ btKbytes = isdead ? work_mem : maintenance_work_mem; btspool->sortstate = tuplesort_begin_index(index, isunique, btKbytes, false); /* - * Currently, tuplesort provides sort functions on IndexTuples. If we - * kept anything in a BTItem other than a regular IndexTuple, we'd - * need to modify tuplesort to understand BTItems as such. + * Currently, tuplesort provides sort functions on IndexTuples. If we kept + * anything in a BTItem other than a regular IndexTuple, we'd need to + * modify tuplesort to understand BTItems as such. */ Assert(sizeof(BTItemData) == sizeof(IndexTupleData)); @@ -222,8 +220,8 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2) wstate.index = btspool->index; /* - * We need to log index creation in WAL iff WAL archiving is enabled - * AND it's not a temp index. + * We need to log index creation in WAL iff WAL archiving is enabled AND + * it's not a temp index. */ wstate.btws_use_wal = XLogArchivingActive() && !wstate.index->rd_istemp; @@ -313,9 +311,9 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) /* * If we have to write pages nonsequentially, fill in the space with * zeroes until we come back and overwrite. This is not logically - * necessary on standard Unix filesystems (unwritten space will read - * as zeroes anyway), but it should help to avoid fragmentation. The - * dummy pages aren't WAL-logged though. + * necessary on standard Unix filesystems (unwritten space will read as + * zeroes anyway), but it should help to avoid fragmentation. The dummy + * pages aren't WAL-logged though. */ while (blkno > wstate->btws_pages_written) { @@ -328,8 +326,8 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) /* * Now write the page. We say isTemp = true even if it's not a temp - * index, because there's no need for smgr to schedule an fsync for - * this write; we'll do it ourselves before ending the build. + * index, because there's no need for smgr to schedule an fsync for this + * write; we'll do it ourselves before ending the build. */ smgrwrite(wstate->index->rd_smgr, blkno, (char *) page, true); @@ -483,15 +481,15 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, BTItem bti) btisz = MAXALIGN(btisz); /* - * Check whether the item can fit on a btree page at all. (Eventually, - * we ought to try to apply TOAST methods if not.) We actually need to - * be able to fit three items on every page, so restrict any one item - * to 1/3 the per-page available space. Note that at this point, btisz - * doesn't include the ItemId. + * Check whether the item can fit on a btree page at all. (Eventually, we + * ought to try to apply TOAST methods if not.) We actually need to be + * able to fit three items on every page, so restrict any one item to 1/3 + * the per-page available space. Note that at this point, btisz doesn't + * include the ItemId. * - * NOTE: similar code appears in _bt_insertonpg() to defend against - * oversize items being inserted into an already-existing index. But - * during creation of an index, we don't go through there. + * NOTE: similar code appears in _bt_insertonpg() to defend against oversize + * items being inserted into an already-existing index. But during + * creation of an index, we don't go through there. */ if (btisz > BTMaxItemSize(npage)) ereport(ERROR, @@ -499,9 +497,9 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, BTItem bti) errmsg("index row size %lu exceeds btree maximum, %lu", (unsigned long) btisz, (unsigned long) BTMaxItemSize(npage)), - errhint("Values larger than 1/3 of a buffer page cannot be indexed.\n" - "Consider a function index of an MD5 hash of the value, " - "or use full text indexing."))); + errhint("Values larger than 1/3 of a buffer page cannot be indexed.\n" + "Consider a function index of an MD5 hash of the value, " + "or use full text indexing."))); if (pgspc < btisz || pgspc < state->btps_full) { @@ -523,11 +521,11 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, BTItem bti) /* * We copy the last item on the page into the new page, and then - * rearrange the old page so that the 'last item' becomes its high - * key rather than a true data item. There had better be at least - * two items on the page already, else the page would be empty of - * useful data. (Hence, we must allow pages to be packed at least - * 2/3rds full; the 70% figure used above is close to minimum.) + * rearrange the old page so that the 'last item' becomes its high key + * rather than a true data item. There had better be at least two + * items on the page already, else the page would be empty of useful + * data. (Hence, we must allow pages to be packed at least 2/3rds + * full; the 70% figure used above is close to minimum.) */ Assert(last_off > P_FIRSTKEY); ii = PageGetItemId(opage, last_off); @@ -544,8 +542,8 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, BTItem bti) /* * Link the old page into its parent, using its minimum key. If we - * don't have a parent, we have to create one; this adds a new - * btree level. + * don't have a parent, we have to create one; this adds a new btree + * level. */ if (state->btps_next == NULL) state->btps_next = _bt_pagestate(wstate, state->btps_level + 1); @@ -557,9 +555,9 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, BTItem bti) pfree(state->btps_minkey); /* - * Save a copy of the minimum key for the new page. We have to - * copy it off the old page, not the new one, in case we are not - * at leaf level. + * Save a copy of the minimum key for the new page. We have to copy + * it off the old page, not the new one, in case we are not at leaf + * level. */ state->btps_minkey = _bt_formitem(&(obti->bti_itup)); @@ -576,8 +574,8 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, BTItem bti) } /* - * Write out the old page. We never need to touch it again, so we - * can free the opage workspace too. + * Write out the old page. We never need to touch it again, so we can + * free the opage workspace too. */ _bt_blwritepage(wstate, opage, oblkno); @@ -588,10 +586,10 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, BTItem bti) } /* - * If the new item is the first for its page, stash a copy for later. - * Note this will only happen for the first item on a level; on later - * pages, the first item for a page is copied from the prior page in - * the code above. + * If the new item is the first for its page, stash a copy for later. Note + * this will only happen for the first item on a level; on later pages, + * the first item for a page is copied from the prior page in the code + * above. */ if (last_off == P_HIKEY) { @@ -636,9 +634,9 @@ _bt_uppershutdown(BTWriteState *wstate, BTPageState *state) * We have to link the last page on this level to somewhere. * * If we're at the top, it's the root, so attach it to the metapage. - * Otherwise, add an entry for it to its parent using its minimum - * key. This may cause the last page of the parent level to - * split, but that's not a problem -- we haven't gotten to it yet. + * Otherwise, add an entry for it to its parent using its minimum key. + * This may cause the last page of the parent level to split, but + * that's not a problem -- we haven't gotten to it yet. */ if (s->btps_next == NULL) { @@ -657,8 +655,8 @@ _bt_uppershutdown(BTWriteState *wstate, BTPageState *state) } /* - * This is the rightmost page, so the ItemId array needs to be - * slid back one slot. Then we can dump out the page. + * This is the rightmost page, so the ItemId array needs to be slid + * back one slot. Then we can dump out the page. */ _bt_slideleft(s->btps_page); _bt_blwritepage(wstate, s->btps_page, s->btps_blkno); @@ -667,9 +665,9 @@ _bt_uppershutdown(BTWriteState *wstate, BTPageState *state) /* * As the last step in the process, construct the metapage and make it - * point to the new root (unless we had no data at all, in which case - * it's set to point to "P_NONE"). This changes the index to the - * "valid" state by filling in a valid magic number in the metapage. + * point to the new root (unless we had no data at all, in which case it's + * set to point to "P_NONE"). This changes the index to the "valid" state + * by filling in a valid magic number in the metapage. */ metapage = (Page) palloc(BLCKSZ); _bt_initmetapage(metapage, rootblkno, rootlevel); @@ -748,7 +746,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2) compare = DatumGetInt32(FunctionCall2(&entry->sk_func, attrDatum1, - attrDatum2)); + attrDatum2)); if (compare > 0) { load1 = false; @@ -772,7 +770,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2) if (should_free) pfree(bti); bti = (BTItem) tuplesort_getindextuple(btspool->sortstate, - true, &should_free); + true, &should_free); } else { @@ -780,7 +778,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2) if (should_free2) pfree(bti2); bti2 = (BTItem) tuplesort_getindextuple(btspool2->sortstate, - true, &should_free2); + true, &should_free2); } } _bt_freeskey(indexScanKey); @@ -789,7 +787,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2) { /* merge is unnecessary */ while ((bti = (BTItem) tuplesort_getindextuple(btspool->sortstate, - true, &should_free)) != NULL) + true, &should_free)) != NULL) { /* When we see first tuple, create first index page */ if (state == NULL) @@ -805,19 +803,19 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2) _bt_uppershutdown(wstate, state); /* - * If the index isn't temp, we must fsync it down to disk before it's - * safe to commit the transaction. (For a temp index we don't care - * since the index will be uninteresting after a crash anyway.) + * If the index isn't temp, we must fsync it down to disk before it's safe + * to commit the transaction. (For a temp index we don't care since the + * index will be uninteresting after a crash anyway.) * * It's obvious that we must do this when not WAL-logging the build. It's * less obvious that we have to do it even if we did WAL-log the index - * pages. The reason is that since we're building outside shared - * buffers, a CHECKPOINT occurring during the build has no way to - * flush the previously written data to disk (indeed it won't know the - * index even exists). A crash later on would replay WAL from the - * checkpoint, therefore it wouldn't replay our earlier WAL entries. - * If we do not fsync those pages here, they might still not be on - * disk when the crash occurs. + * pages. The reason is that since we're building outside shared buffers, + * a CHECKPOINT occurring during the build has no way to flush the + * previously written data to disk (indeed it won't know the index even + * exists). A crash later on would replay WAL from the checkpoint, + * therefore it wouldn't replay our earlier WAL entries. If we do not + * fsync those pages here, they might still not be on disk when the crash + * occurs. */ if (!wstate->index->rd_istemp) smgrimmedsync(wstate->index->rd_smgr); diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 9a5f8d7ac9..269213d21f 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtutils.c,v 1.63 2005/06/13 23:14:48 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtutils.c,v 1.64 2005/10/15 02:49:09 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -48,8 +48,8 @@ _bt_mkscankey(Relation rel, IndexTuple itup) bool null; /* - * We can use the cached (default) support procs since no - * cross-type comparison can be needed. + * We can use the cached (default) support procs since no cross-type + * comparison can be needed. */ procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC); arg = index_getattr(itup, i + 1, itupdesc, &null); @@ -93,8 +93,8 @@ _bt_mkscankey_nodata(Relation rel) FmgrInfo *procinfo; /* - * We can use the cached (default) support procs since no - * cross-type comparison can be needed. + * We can use the cached (default) support procs since no cross-type + * comparison can be needed. */ procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC); ScanKeyEntryInitializeWithInfo(&skey[i], @@ -257,9 +257,9 @@ _bt_preprocess_keys(IndexScanDesc scan) if (numberOfKeys == 1) { /* - * We don't use indices for 'A is null' and 'A is not null' - * currently and 'A < = > <> NULL' will always fail - so qual is - * not OK if comparison value is NULL. - vadim 03/21/97 + * We don't use indices for 'A is null' and 'A is not null' currently + * and 'A < = > <> NULL' will always fail - so qual is not OK if + * comparison value is NULL. - vadim 03/21/97 */ if (cur->sk_flags & SK_ISNULL) so->qual_ok = false; @@ -286,20 +286,20 @@ _bt_preprocess_keys(IndexScanDesc scan) /* * Initialize for processing of keys for attr 1. * - * xform[i] points to the currently best scan key of strategy type i+1, - * if any is found with a default operator subtype; it is NULL if we - * haven't yet found such a key for this attr. Scan keys of - * nondefault subtypes are transferred to the output with no - * processing except for noting if they are of "=" type. + * xform[i] points to the currently best scan key of strategy type i+1, if + * any is found with a default operator subtype; it is NULL if we haven't + * yet found such a key for this attr. Scan keys of nondefault subtypes + * are transferred to the output with no processing except for noting if + * they are of "=" type. */ attno = 1; memset(xform, 0, sizeof(xform)); hasOtherTypeEqual = false; /* - * Loop iterates from 0 to numberOfKeys inclusive; we use the last - * pass to handle after-last-key processing. Actual exit from the - * loop is at the "break" statement below. + * Loop iterates from 0 to numberOfKeys inclusive; we use the last pass to + * handle after-last-key processing. Actual exit from the loop is at the + * "break" statement below. */ for (i = 0;; cur++, i++) { @@ -319,8 +319,8 @@ _bt_preprocess_keys(IndexScanDesc scan) } /* - * If we are at the end of the keys for a particular attr, finish - * up processing and emit the cleaned-up keys. + * If we are at the end of the keys for a particular attr, finish up + * processing and emit the cleaned-up keys. */ if (i == numberOfKeys || cur->sk_attno != attno) { @@ -331,9 +331,9 @@ _bt_preprocess_keys(IndexScanDesc scan) elog(ERROR, "btree index keys must be ordered by attribute"); /* - * If = has been specified, no other key will be used. In case - * of key > 2 && key == 1 and so on we have to set qual_ok to - * false before discarding the other keys. + * If = has been specified, no other key will be used. In case of + * key > 2 && key == 1 and so on we have to set qual_ok to false + * before discarding the other keys. */ if (xform[BTEqualStrategyNumber - 1]) { @@ -411,8 +411,8 @@ _bt_preprocess_keys(IndexScanDesc scan) } /* - * If all attrs before this one had "=", include these keys - * into the required-keys count. + * If all attrs before this one had "=", include these keys into + * the required-keys count. */ if (priorNumberOfEqualCols == attno - 1) so->numberOfRequiredKeys = new_numberOfKeys; @@ -526,11 +526,11 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, if (isNull) { /* - * Since NULLs are sorted after non-NULLs, we know we have - * reached the upper limit of the range of values for this - * index attr. On a forward scan, we can stop if this qual is - * one of the "must match" subset. On a backward scan, - * however, we should keep going. + * Since NULLs are sorted after non-NULLs, we know we have reached + * the upper limit of the range of values for this index attr. On + * a forward scan, we can stop if this qual is one of the "must + * match" subset. On a backward scan, however, we should keep + * going. */ if (ikey < so->numberOfRequiredKeys && ScanDirectionIsForward(dir)) @@ -547,24 +547,22 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, if (!DatumGetBool(test)) { /* - * Tuple fails this qual. If it's a required qual, then we - * may be able to conclude no further tuples will pass, - * either. We have to look at the scan direction and the qual - * type. + * Tuple fails this qual. If it's a required qual, then we may be + * able to conclude no further tuples will pass, either. We have + * to look at the scan direction and the qual type. * - * Note: the only case in which we would keep going after failing - * a required qual is if there are partially-redundant quals - * that _bt_preprocess_keys() was unable to eliminate. For - * example, given "x > 4 AND x > 10" where both are cross-type - * comparisons and so not removable, we might start the scan - * at the x = 4 boundary point. The "x > 10" condition will - * fail until we pass x = 10, but we must not stop the scan on - * its account. + * Note: the only case in which we would keep going after failing a + * required qual is if there are partially-redundant quals that + * _bt_preprocess_keys() was unable to eliminate. For example, + * given "x > 4 AND x > 10" where both are cross-type comparisons + * and so not removable, we might start the scan at the x = 4 + * boundary point. The "x > 10" condition will fail until we pass + * x = 10, but we must not stop the scan on its account. * - * Note: because we stop the scan as soon as any required - * equality qual fails, it is critical that equality quals be - * used for the initial positioning in _bt_first() when they - * are available. See comments in _bt_first(). + * Note: because we stop the scan as soon as any required equality + * qual fails, it is critical that equality quals be used for the + * initial positioning in _bt_first() when they are available. See + * comments in _bt_first(). */ if (ikey < so->numberOfRequiredKeys) { diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index 078d852924..61bf93a904 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.22 2005/06/06 17:01:22 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.23 2005/10/15 02:49:09 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -101,7 +101,7 @@ _bt_restore_page(Page page, char *from, int len) (sizeof(BTItemData) - sizeof(IndexTupleData)); itemsz = MAXALIGN(itemsz); if (PageAddItem(page, (Item) from, itemsz, - FirstOffsetNumber, LP_USED) == InvalidOffsetNumber) + FirstOffsetNumber, LP_USED) == InvalidOffsetNumber) elog(PANIC, "_bt_restore_page: can't add item to page"); from += itemsz; } @@ -136,8 +136,8 @@ _bt_restore_meta(Relation reln, XLogRecPtr lsn, pageop->btpo_flags = BTP_META; /* - * Set pd_lower just past the end of the metadata. This is not - * essential but it makes the page look compressible to xlog.c. + * Set pd_lower just past the end of the metadata. This is not essential + * but it makes the page look compressible to xlog.c. */ ((PageHeader) metapg)->pd_lower = ((char *) md + sizeof(BTMetaPageData)) - (char *) metapg; @@ -181,7 +181,7 @@ btree_xlog_insert(bool isleaf, bool ismeta, if (!(record->xl_info & XLR_BKP_BLOCK_1)) { buffer = XLogReadBuffer(false, reln, - ItemPointerGetBlockNumber(&(xlrec->target.tid))); + ItemPointerGetBlockNumber(&(xlrec->target.tid))); if (!BufferIsValid(buffer)) elog(PANIC, "btree_insert_redo: block unfound"); page = (Page) BufferGetPage(buffer); @@ -217,8 +217,8 @@ btree_xlog_insert(bool isleaf, bool ismeta, if (!isleaf && incomplete_splits != NIL) { forget_matching_split(reln, xlrec->target.node, - ItemPointerGetBlockNumber(&(xlrec->target.tid)), - ItemPointerGetOffsetNumber(&(xlrec->target.tid)), + ItemPointerGetBlockNumber(&(xlrec->target.tid)), + ItemPointerGetOffsetNumber(&(xlrec->target.tid)), false); } } @@ -325,8 +325,8 @@ btree_xlog_split(bool onleft, bool isroot, if (xlrec->level > 0 && incomplete_splits != NIL) { forget_matching_split(reln, xlrec->target.node, - ItemPointerGetBlockNumber(&(xlrec->target.tid)), - ItemPointerGetOffsetNumber(&(xlrec->target.tid)), + ItemPointerGetBlockNumber(&(xlrec->target.tid)), + ItemPointerGetOffsetNumber(&(xlrec->target.tid)), false); } @@ -655,7 +655,7 @@ static void out_target(char *buf, xl_btreetid *target) { sprintf(buf + strlen(buf), "rel %u/%u/%u; tid %u/%u", - target->node.spcNode, target->node.dbNode, target->node.relNode, + target->node.spcNode, target->node.dbNode, target->node.relNode, ItemPointerGetBlockNumber(&(target->tid)), ItemPointerGetOffsetNumber(&(target->tid))); } diff --git a/src/backend/access/rtree/rtget.c b/src/backend/access/rtree/rtget.c index 199a178c4f..010a493d20 100644 --- a/src/backend/access/rtree/rtget.c +++ b/src/backend/access/rtree/rtget.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/rtree/rtget.c,v 1.36 2005/10/06 02:29:14 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/rtree/rtget.c,v 1.37 2005/10/15 02:49:09 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -32,12 +32,12 @@ rtgettuple(PG_FUNCTION_ARGS) IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); RTreeScanOpaque so = (RTreeScanOpaque) s->opaque; - Page page; + Page page; OffsetNumber offnum; /* - * If we've already produced a tuple and the executor has informed - * us that it should be marked "killed", do so now. + * If we've already produced a tuple and the executor has informed us that + * it should be marked "killed", do so now. */ if (s->kill_prior_tuple && ItemPointerIsValid(&(s->currentItemData))) { @@ -48,14 +48,13 @@ rtgettuple(PG_FUNCTION_ARGS) } /* - * Get the next tuple that matches the search key; if asked to - * skip killed tuples, find the first non-killed tuple that - * matches. Return as soon as we've run out of matches or we've - * found an acceptable match. + * Get the next tuple that matches the search key; if asked to skip killed + * tuples, find the first non-killed tuple that matches. Return as soon as + * we've run out of matches or we've found an acceptable match. */ for (;;) { - bool res = rtnext(s, dir); + bool res = rtnext(s, dir); if (res && s->ignore_killed_tuples) { @@ -73,7 +72,7 @@ Datum rtgetmulti(PG_FUNCTION_ARGS) { IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0); - ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1); + ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1); int32 max_tids = PG_GETARG_INT32(2); int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3); RTreeScanOpaque so = (RTreeScanOpaque) s->opaque; @@ -86,7 +85,7 @@ rtgetmulti(PG_FUNCTION_ARGS) res = rtnext(s, ForwardScanDirection); if (res && s->ignore_killed_tuples) { - Page page; + Page page; OffsetNumber offnum; offnum = ItemPointerGetOffsetNumber(&(s->currentItemData)); @@ -201,12 +200,11 @@ rtnext(IndexScanDesc s, ScanDirection dir) blk = ItemPointerGetBlockNumber(&(it->t_tid)); /* - * Note that we release the pin on the page as we descend - * down the tree, even though there's a good chance we'll - * eventually need to re-read the buffer later in this - * scan. This may or may not be optimal, but it doesn't - * seem likely to make a huge performance difference - * either way. + * Note that we release the pin on the page as we descend down the + * tree, even though there's a good chance we'll eventually need + * to re-read the buffer later in this scan. This may or may not + * be optimal, but it doesn't seem likely to make a huge + * performance difference either way. */ so->curbuf = ReleaseAndReadBuffer(so->curbuf, s->indexRelation, blk); p = BufferGetPage(so->curbuf); @@ -233,7 +231,7 @@ findnext(IndexScanDesc s, OffsetNumber n, ScanDirection dir) IndexTuple it; RTreePageOpaque po; RTreeScanOpaque so; - Page p; + Page p; so = (RTreeScanOpaque) s->opaque; p = BufferGetPage(so->curbuf); @@ -242,8 +240,8 @@ findnext(IndexScanDesc s, OffsetNumber n, ScanDirection dir) po = (RTreePageOpaque) PageGetSpecialPointer(p); /* - * If we modified the index during the scan, we may have a pointer to - * a ghost tuple, before the scan. If this is the case, back up one. + * If we modified the index during the scan, we may have a pointer to a + * ghost tuple, before the scan. If this is the case, back up one. */ if (so->s_flags & RTS_CURBEFORE) @@ -277,7 +275,7 @@ findnext(IndexScanDesc s, OffsetNumber n, ScanDirection dir) } if (n >= FirstOffsetNumber && n <= maxoff) - return n; /* found a match on this page */ + return n; /* found a match on this page */ else return InvalidOffsetNumber; /* no match, go to next page */ } diff --git a/src/backend/access/rtree/rtproc.c b/src/backend/access/rtree/rtproc.c index d8d766f47d..292dac6a13 100644 --- a/src/backend/access/rtree/rtproc.c +++ b/src/backend/access/rtree/rtproc.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/rtree/rtproc.c,v 1.42 2004/12/31 21:59:26 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/access/rtree/rtproc.c,v 1.43 2005/10/15 02:49:09 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -146,8 +146,8 @@ rt_poly_size(PG_FUNCTION_ARGS) ydim; /* - * Can't just use GETARG because of possibility that input is NULL; - * since POLYGON is toastable, GETARG will try to inspect its value + * Can't just use GETARG because of possibility that input is NULL; since + * POLYGON is toastable, GETARG will try to inspect its value */ if (aptr == NULL) { diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c index 3b96b9ebe2..d684101d26 100644 --- a/src/backend/access/rtree/rtree.c +++ b/src/backend/access/rtree/rtree.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/rtree/rtree.c,v 1.91 2005/08/10 21:36:46 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/rtree/rtree.c,v 1.92 2005/10/15 02:49:09 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -121,8 +121,8 @@ rtbuild(PG_FUNCTION_ARGS) initRtstate(&buildstate.rtState, index); /* - * We expect to be called exactly once for any index relation. If - * that's not the case, big trouble's what we have. + * We expect to be called exactly once for any index relation. If that's + * not the case, big trouble's what we have. */ if (RelationGetNumberOfBlocks(index) != 0) elog(ERROR, "index \"%s\" already contains data", @@ -175,10 +175,10 @@ rtbuildCallback(Relation index, /* * Since we already have the index relation locked, we call rtdoinsert - * directly. Normal access method calls dispatch through rtinsert, - * which locks the relation for write. This is the right thing to do - * if you're inserting single tups, but not when you're initializing - * the whole index at once. + * directly. Normal access method calls dispatch through rtinsert, which + * locks the relation for write. This is the right thing to do if you're + * inserting single tups, but not when you're initializing the whole index + * at once. */ rtdoinsert(index, itup, &buildstate->rtState); @@ -226,9 +226,8 @@ rtinsert(PG_FUNCTION_ARGS) initRtstate(&rtState, r); /* - * Since rtree is not marked "amconcurrent" in pg_am, caller should - * have acquired exclusive lock on index relation. We need no locking - * here. + * Since rtree is not marked "amconcurrent" in pg_am, caller should have + * acquired exclusive lock on index relation. We need no locking here. */ rtdoinsert(r, itup, &rtState); @@ -331,7 +330,7 @@ rttighten(Relation r, p = BufferGetPage(b); oldud = IndexTupleGetDatum(PageGetItem(p, - PageGetItemId(p, stk->rts_child))); + PageGetItemId(p, stk->rts_child))); FunctionCall2(&rtstate->sizeFn, oldud, PointerGetDatum(&old_size)); @@ -342,8 +341,8 @@ rttighten(Relation r, PointerGetDatum(&newd_size)); /* - * If newd_size == 0 we have degenerate rectangles, so we don't know - * if there was any change, so we have to assume there was. + * If newd_size == 0 we have degenerate rectangles, so we don't know if + * there was any change, so we have to assume there was. */ if ((newd_size == 0) || (newd_size != old_size)) { @@ -370,8 +369,8 @@ rttighten(Relation r, /* * The user may be defining an index on variable-sized data (like * polygons). If so, we need to get a constant-sized datum for - * insertion on the internal page. We do this by calling the - * union proc, which is required to return a rectangle. + * insertion on the internal page. We do this by calling the union + * proc, which is required to return a rectangle. */ tdatum = FunctionCall2(&rtstate->unionFn, datum, datum); @@ -428,8 +427,8 @@ rtdosplit(Relation r, /* * The root of the tree is the first block in the relation. If we're - * about to split the root, we need to do some hocus-pocus to enforce - * this guarantee. + * about to split the root, we need to do some hocus-pocus to enforce this + * guarantee. */ if (BufferGetBlockNumber(buffer) == P_ROOT) @@ -459,10 +458,9 @@ rtdosplit(Relation r, newitemoff = OffsetNumberNext(maxoff); /* - * spl_left contains a list of the offset numbers of the tuples that - * will go to the left page. For each offset number, get the tuple - * item, then add the item to the left page. Similarly for the right - * side. + * spl_left contains a list of the offset numbers of the tuples that will + * go to the left page. For each offset number, get the tuple item, then + * add the item to the left page. Similarly for the right side. */ /* fill left node */ @@ -525,13 +523,13 @@ rtdosplit(Relation r, * introduced in its structure by splitting this page. * * 2) "Tighten" the bounding box of the pointer to the left page in the - * parent node in the tree, if any. Since we moved a bunch of stuff - * off the left page, we expect it to get smaller. This happens in - * the internal insertion routine. + * parent node in the tree, if any. Since we moved a bunch of stuff off + * the left page, we expect it to get smaller. This happens in the + * internal insertion routine. * - * 3) Insert a pointer to the right page in the parent. This may cause - * the parent to split. If it does, we need to repeat steps one and - * two for each split node in the tree. + * 3) Insert a pointer to the right page in the parent. This may cause the + * parent to split. If it does, we need to repeat steps one and two for + * each split node in the tree. */ /* adjust active scans */ @@ -583,10 +581,10 @@ rtintinsert(Relation r, old = (IndexTuple) PageGetItem(p, PageGetItemId(p, stk->rts_child)); /* - * This is a hack. Right now, we force rtree internal keys to be - * constant size. To fix this, need delete the old key and add both - * left and right for the two new pages. The insertion of left may - * force a split if the new left key is bigger than the old key. + * This is a hack. Right now, we force rtree internal keys to be constant + * size. To fix this, need delete the old key and add both left and right + * for the two new pages. The insertion of left may force a split if the + * new left key is bigger than the old key. */ if (IndexTupleSize(old) != IndexTupleSize(ltup)) @@ -603,8 +601,7 @@ rtintinsert(Relation r, rttighten(r, stk->rts_parent, newdatum, IndexTupleAttSize(ltup), rtstate); rtdosplit(r, b, stk->rts_parent, rtup, rtstate); - WriteBuffer(b); /* don't forget to release buffer! - - * 01/31/94 */ + WriteBuffer(b); /* don't forget to release buffer! - 01/31/94 */ } else { @@ -716,16 +713,15 @@ rtpicksplit(Relation r, int total_num_tuples, num_tuples_without_seeds, max_after_split; /* in Guttman's lingo, (M - m) */ - float diff; /* diff between cost of putting tuple left - * or right */ + float diff; /* diff between cost of putting tuple left or + * right */ SPLITCOST *cost_vector; int n; /* - * First, make sure the new item is not so large that we can't - * possibly fit it on a page, even by itself. (It's sufficient to - * make this test here, since any oversize tuple must lead to a page - * split attempt.) + * First, make sure the new item is not so large that we can't possibly + * fit it on a page, even by itself. (It's sufficient to make this test + * here, since any oversize tuple must lead to a page split attempt.) */ newitemsz = IndexTupleTotalSize(itup); if (newitemsz > RTPageAvailSpace) @@ -734,11 +730,10 @@ rtpicksplit(Relation r, errmsg("index row size %lu exceeds rtree maximum, %lu", (unsigned long) newitemsz, (unsigned long) RTPageAvailSpace), - errhint("Values larger than a buffer page cannot be indexed."))); + errhint("Values larger than a buffer page cannot be indexed."))); maxoff = PageGetMaxOffsetNumber(page); - newitemoff = OffsetNumberNext(maxoff); /* phony index for new - * item */ + newitemoff = OffsetNumberNext(maxoff); /* phony index for new item */ total_num_tuples = newitemoff; num_tuples_without_seeds = total_num_tuples - 2; max_after_split = total_num_tuples / 2; /* works for m = M/2 */ @@ -793,8 +788,7 @@ rtpicksplit(Relation r, pfree(DatumGetPointer(inter_d)); /* - * are these a more promising split that what we've already - * seen? + * are these a more promising split that what we've already seen? */ if (size_waste > waste || firsttime) { @@ -809,10 +803,10 @@ rtpicksplit(Relation r, if (firsttime) { /* - * There is no possible split except to put the new item on its - * own page. Since we still have to compute the union rectangles, - * we play dumb and run through the split algorithm anyway, - * setting seed_1 = first item on page and seed_2 = new item. + * There is no possible split except to put the new item on its own + * page. Since we still have to compute the union rectangles, we play + * dumb and run through the split algorithm anyway, setting seed_1 = + * first item on page and seed_2 = new item. */ seed_1 = FirstOffsetNumber; seed_2 = newitemoff; @@ -840,25 +834,23 @@ rtpicksplit(Relation r, /* * Now split up the regions between the two seeds. * - * The cost_vector array will contain hints for determining where each - * tuple should go. Each record in the array will contain a boolean, - * choose_left, that indicates which node the tuple prefers to be on, - * and the absolute difference in cost between putting the tuple in - * its favored node and in the other node. + * The cost_vector array will contain hints for determining where each tuple + * should go. Each record in the array will contain a boolean, + * choose_left, that indicates which node the tuple prefers to be on, and + * the absolute difference in cost between putting the tuple in its + * favored node and in the other node. * * Later, we will sort the cost_vector in descending order by cost - * difference, and consider the tuples in that order for placement. - * That way, the tuples that *really* want to be in one node or the - * other get to choose first, and the tuples that don't really care - * choose last. + * difference, and consider the tuples in that order for placement. That + * way, the tuples that *really* want to be in one node or the other get + * to choose first, and the tuples that don't really care choose last. * * First, build the cost_vector array. The new index tuple will also be - * handled in this loop, and represented in the array, with - * i==newitemoff. + * handled in this loop, and represented in the array, with i==newitemoff. * - * In the case of variable size tuples it is possible that we only have - * the two seeds and no other tuples, in which case we don't do any of - * this cost_vector stuff. + * In the case of variable size tuples it is possible that we only have the + * two seeds and no other tuples, in which case we don't do any of this + * cost_vector stuff. */ /* to keep compiler quiet */ @@ -908,13 +900,13 @@ rtpicksplit(Relation r, } /* - * Now make the final decisions about where each tuple will go, and - * build the vectors to return in the SPLITVEC record. + * Now make the final decisions about where each tuple will go, and build + * the vectors to return in the SPLITVEC record. * - * The cost_vector array contains (descriptions of) all the tuples, in - * the order that we want to consider them, so we we just iterate - * through it and place each tuple in left or right nodes, according - * to the criteria described below. + * The cost_vector array contains (descriptions of) all the tuples, in the + * order that we want to consider them, so we we just iterate through it + * and place each tuple in left or right nodes, according to the criteria + * described below. */ left = v->spl_left; @@ -923,8 +915,8 @@ rtpicksplit(Relation r, v->spl_nright = 0; /* - * Place the seeds first. left avail space, left union, right avail - * space, and right union have already been adjusted for the seeds. + * Place the seeds first. left avail space, left union, right avail space, + * and right union have already been adjusted for the seeds. */ *left++ = seed_1; @@ -966,32 +958,30 @@ rtpicksplit(Relation r, PointerGetDatum(&size_beta)); /* - * We prefer the page that shows smaller enlargement of its union - * area (Guttman's algorithm), but we must take care that at least - * one page will still have room for the new item after this one - * is added. + * We prefer the page that shows smaller enlargement of its union area + * (Guttman's algorithm), but we must take care that at least one page + * will still have room for the new item after this one is added. * - * (We know that all the old items together can fit on one page, so - * we need not worry about any other problem than failing to fit - * the new item.) + * (We know that all the old items together can fit on one page, so we + * need not worry about any other problem than failing to fit the new + * item.) * - * Guttman's algorithm actually has two factors to consider (in - * order): 1. if one node has so many tuples already assigned to - * it that the other needs all the rest in order to satisfy the - * condition that neither node has fewer than m tuples, then that - * is decisive; 2. otherwise, choose the page that shows the - * smaller enlargement of its union area. + * Guttman's algorithm actually has two factors to consider (in order): + * 1. if one node has so many tuples already assigned to it that the + * other needs all the rest in order to satisfy the condition that + * neither node has fewer than m tuples, then that is decisive; 2. + * otherwise, choose the page that shows the smaller enlargement of + * its union area. * - * I have chosen m = M/2, where M is the maximum number of tuples on - * a page. (Actually, this is only strictly true for fixed size - * tuples. For variable size tuples, there still might have to be - * only one tuple on a page, if it is really big. But even with - * variable size tuples we still try to get m as close as possible - * to M/2.) + * I have chosen m = M/2, where M is the maximum number of tuples on a + * page. (Actually, this is only strictly true for fixed size tuples. + * For variable size tuples, there still might have to be only one + * tuple on a page, if it is really big. But even with variable size + * tuples we still try to get m as close as possible to M/2.) * - * The question of which page shows the smaller enlargement of its - * union area has already been answered, and the answer stored in - * the choose_left field of the SPLITCOST record. + * The question of which page shows the smaller enlargement of its union + * area has already been answered, and the answer stored in the + * choose_left field of the SPLITCOST record. */ left_feasible = (left_avail_space >= item_1_sz && ((left_avail_space - item_1_sz) >= newitemsz || @@ -1003,9 +993,8 @@ rtpicksplit(Relation r, { /* * Both feasible, use Guttman's algorithm. First check the m - * condition described above, and if that doesn't apply, - * choose the page with the smaller enlargement of its union - * area. + * condition described above, and if that doesn't apply, choose + * the page with the smaller enlargement of its union area. */ if (v->spl_nleft > max_after_split) choose_left = false; @@ -1153,9 +1142,8 @@ rtbulkdelete(PG_FUNCTION_ARGS) num_index_tuples = 0; /* - * Since rtree is not marked "amconcurrent" in pg_am, caller should - * have acquired exclusive lock on index relation. We need no locking - * here. + * Since rtree is not marked "amconcurrent" in pg_am, caller should have + * acquired exclusive lock on index relation. We need no locking here. */ /* diff --git a/src/backend/access/rtree/rtscan.c b/src/backend/access/rtree/rtscan.c index 3f9f81befb..577c6a6436 100644 --- a/src/backend/access/rtree/rtscan.c +++ b/src/backend/access/rtree/rtscan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/rtree/rtscan.c,v 1.59 2005/06/24 00:18:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/rtree/rtscan.c,v 1.60 2005/10/15 02:49:09 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -123,11 +123,11 @@ rtrescan(PG_FUNCTION_ARGS) /* * Scans on internal pages use different operators than they do on - * leaf pages. For example, if the user wants all boxes that - * exactly match (x1,y1,x2,y2), then on internal pages we need to - * find all boxes that contain (x1,y1,x2,y2). rtstrat.c knows - * how to pick the opclass member to use for internal pages. - * In some cases we need to negate the result of the opclass member. + * leaf pages. For example, if the user wants all boxes that exactly + * match (x1,y1,x2,y2), then on internal pages we need to find all + * boxes that contain (x1,y1,x2,y2). rtstrat.c knows how to pick the + * opclass member to use for internal pages. In some cases we need to + * negate the result of the opclass member. */ for (i = 0; i < s->numberOfKeys; i++) { @@ -333,9 +333,9 @@ ReleaseResources_rtree(void) RTScanList next; /* - * Note: this should be a no-op during normal query shutdown. However, - * in an abort situation ExecutorEnd is not called and so there may be - * open index scans to clean up. + * Note: this should be a no-op during normal query shutdown. However, in + * an abort situation ExecutorEnd is not called and so there may be open + * index scans to clean up. */ prev = NULL; @@ -440,8 +440,7 @@ adjustiptr(IndexScanDesc s, else { /* - * remember that we're before the current - * tuple + * remember that we're before the current tuple */ ItemPointerSet(iptr, blkno, FirstOffsetNumber); if (iptr == &(s->currentItemData)) diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 4a2e1f5592..f29f460ade 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -24,7 +24,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.32 2005/08/20 23:26:08 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.33 2005/10/15 02:49:09 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -222,14 +222,14 @@ StartupCLOG(void) /* * Zero out the remainder of the current clog page. Under normal * circumstances it should be zeroes already, but it seems at least - * theoretically possible that XLOG replay will have settled on a - * nextXID value that is less than the last XID actually used and - * marked by the previous database lifecycle (since subtransaction - * commit writes clog but makes no WAL entry). Let's just be safe. - * (We need not worry about pages beyond the current one, since those - * will be zeroed when first used. For the same reason, there is no - * need to do anything when nextXid is exactly at a page boundary; and - * it's likely that the "current" page doesn't exist yet in that case.) + * theoretically possible that XLOG replay will have settled on a nextXID + * value that is less than the last XID actually used and marked by the + * previous database lifecycle (since subtransaction commit writes clog + * but makes no WAL entry). Let's just be safe. (We need not worry about + * pages beyond the current one, since those will be zeroed when first + * used. For the same reason, there is no need to do anything when + * nextXid is exactly at a page boundary; and it's likely that the + * "current" page doesn't exist yet in that case.) */ if (TransactionIdToPgIndex(xid) != 0) { @@ -325,8 +325,8 @@ TruncateCLOG(TransactionId oldestXact) int cutoffPage; /* - * The cutoff point is the start of the segment containing oldestXact. - * We pass the *page* containing oldestXact to SimpleLruTruncate. + * The cutoff point is the start of the segment containing oldestXact. We + * pass the *page* containing oldestXact to SimpleLruTruncate. */ cutoffPage = TransactionIdToPage(oldestXact); diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 1adaebb6d8..ffe14ed6bf 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -4,15 +4,15 @@ * PostgreSQL multi-transaction-log manager * * The pg_multixact manager is a pg_clog-like manager that stores an array - * of TransactionIds for each MultiXactId. It is a fundamental part of the - * shared-row-lock implementation. A share-locked tuple stores a + * of TransactionIds for each MultiXactId. It is a fundamental part of the + * shared-row-lock implementation. A share-locked tuple stores a * MultiXactId in its Xmax, and a transaction that needs to wait for the * tuple to be unlocked can sleep on the potentially-several TransactionIds * that compose the MultiXactId. * * We use two SLRU areas, one for storing the offsets at which the data * starts for each MultiXactId in the other one. This trick allows us to - * store variable length arrays of TransactionIds. (We could alternatively + * store variable length arrays of TransactionIds. (We could alternatively * use one area containing counts and TransactionIds, with valid MultiXactId * values pointing at slots containing counts; but that way seems less robust * since it would get completely confused if someone inquired about a bogus @@ -32,7 +32,7 @@ * * Like clog.c, and unlike subtrans.c, we have to preserve state across * crashes and ensure that MXID and offset numbering increases monotonically - * across a crash. We do this in the same way as it's done for transaction + * across a crash. We do this in the same way as it's done for transaction * IDs: the WAL record is guaranteed to contain evidence of every MXID we * could need to worry about, and we just make sure that at the end of * replay, the next-MXID and next-offset counters are at least as large as @@ -42,7 +42,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.8 2005/08/20 23:26:08 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.9 2005/10/15 02:49:09 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -59,13 +59,13 @@ /* - * Defines for MultiXactOffset page sizes. A page is the same BLCKSZ as is + * Defines for MultiXactOffset page sizes. A page is the same BLCKSZ as is * used everywhere else in Postgres. * * Note: because both MultiXactOffsets and TransactionIds are 32 bits and * wrap around at 0xFFFFFFFF, MultiXact page numbering also wraps around at * 0xFFFFFFFF/MULTIXACT_*_PER_PAGE, and segment numbering at - * 0xFFFFFFFF/MULTIXACT_*_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need take no + * 0xFFFFFFFF/MULTIXACT_*_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need take no * explicit notice of that fact in this module, except when comparing segment * and page numbers in TruncateMultiXact * (see MultiXact{Offset,Member}PagePrecedes). @@ -92,11 +92,11 @@ static SlruCtlData MultiXactOffsetCtlData; static SlruCtlData MultiXactMemberCtlData; -#define MultiXactOffsetCtl (&MultiXactOffsetCtlData) -#define MultiXactMemberCtl (&MultiXactMemberCtlData) +#define MultiXactOffsetCtl (&MultiXactOffsetCtlData) +#define MultiXactMemberCtl (&MultiXactMemberCtlData) /* - * MultiXact state shared across all backends. All this state is protected + * MultiXact state shared across all backends. All this state is protected * by MultiXactGenLock. (We also use MultiXactOffsetControlLock and * MultiXactMemberControlLock to guard accesses to the two sets of SLRU * buffers. For concurrency's sake, we avoid holding more than one of these @@ -105,50 +105,48 @@ static SlruCtlData MultiXactMemberCtlData; typedef struct MultiXactStateData { /* next-to-be-assigned MultiXactId */ - MultiXactId nextMXact; + MultiXactId nextMXact; /* next-to-be-assigned offset */ - MultiXactOffset nextOffset; + MultiXactOffset nextOffset; /* the Offset SLRU area was last truncated at this MultiXactId */ - MultiXactId lastTruncationPoint; + MultiXactId lastTruncationPoint; /* - * Per-backend data starts here. We have two arrays stored in - * the area immediately following the MultiXactStateData struct. - * Each is indexed by BackendId. (Note: valid BackendIds run from 1 to - * MaxBackends; element zero of each array is never used.) + * Per-backend data starts here. We have two arrays stored in the area + * immediately following the MultiXactStateData struct. Each is indexed by + * BackendId. (Note: valid BackendIds run from 1 to MaxBackends; element + * zero of each array is never used.) * - * OldestMemberMXactId[k] is the oldest MultiXactId each backend's - * current transaction(s) could possibly be a member of, or - * InvalidMultiXactId when the backend has no live transaction that - * could possibly be a member of a MultiXact. Each backend sets its - * entry to the current nextMXact counter just before first acquiring a - * shared lock in a given transaction, and clears it at transaction end. - * (This works because only during or after acquiring a shared lock - * could an XID possibly become a member of a MultiXact, and that - * MultiXact would have to be created during or after the lock - * acquisition.) + * OldestMemberMXactId[k] is the oldest MultiXactId each backend's current + * transaction(s) could possibly be a member of, or InvalidMultiXactId + * when the backend has no live transaction that could possibly be a + * member of a MultiXact. Each backend sets its entry to the current + * nextMXact counter just before first acquiring a shared lock in a given + * transaction, and clears it at transaction end. (This works because only + * during or after acquiring a shared lock could an XID possibly become a + * member of a MultiXact, and that MultiXact would have to be created + * during or after the lock acquisition.) * - * OldestVisibleMXactId[k] is the oldest MultiXactId each backend's - * current transaction(s) think is potentially live, or InvalidMultiXactId - * when not in a transaction or not in a transaction that's paid any - * attention to MultiXacts yet. This is computed when first needed in - * a given transaction, and cleared at transaction end. We can compute - * it as the minimum of the valid OldestMemberMXactId[] entries at the - * time we compute it (using nextMXact if none are valid). Each backend - * is required not to attempt to access any SLRU data for MultiXactIds - * older than its own OldestVisibleMXactId[] setting; this is necessary - * because the checkpointer could truncate away such data at any instant. + * OldestVisibleMXactId[k] is the oldest MultiXactId each backend's current + * transaction(s) think is potentially live, or InvalidMultiXactId when + * not in a transaction or not in a transaction that's paid any attention + * to MultiXacts yet. This is computed when first needed in a given + * transaction, and cleared at transaction end. We can compute it as the + * minimum of the valid OldestMemberMXactId[] entries at the time we + * compute it (using nextMXact if none are valid). Each backend is + * required not to attempt to access any SLRU data for MultiXactIds older + * than its own OldestVisibleMXactId[] setting; this is necessary because + * the checkpointer could truncate away such data at any instant. * - * The checkpointer can compute the safe truncation point as the oldest - * valid value among all the OldestMemberMXactId[] and - * OldestVisibleMXactId[] entries, or nextMXact if none are valid. - * Clearly, it is not possible for any later-computed OldestVisibleMXactId - * value to be older than this, and so there is no risk of truncating - * data that is still needed. + * The checkpointer can compute the safe truncation point as the oldest valid + * value among all the OldestMemberMXactId[] and OldestVisibleMXactId[] + * entries, or nextMXact if none are valid. Clearly, it is not possible + * for any later-computed OldestVisibleMXactId value to be older than + * this, and so there is no risk of truncating data that is still needed. */ - MultiXactId perBackendXactIds[1]; /* VARIABLE LENGTH ARRAY */ + MultiXactId perBackendXactIds[1]; /* VARIABLE LENGTH ARRAY */ } MultiXactStateData; /* Pointers to the state data in shared memory */ @@ -176,13 +174,13 @@ static MultiXactId *OldestVisibleMXactId; typedef struct mXactCacheEnt { struct mXactCacheEnt *next; - MultiXactId multi; - int nxids; - TransactionId xids[1]; /* VARIABLE LENGTH ARRAY */ + MultiXactId multi; + int nxids; + TransactionId xids[1]; /* VARIABLE LENGTH ARRAY */ } mXactCacheEnt; -static mXactCacheEnt *MXactCache = NULL; -static MemoryContext MXactContext = NULL; +static mXactCacheEnt *MXactCache = NULL; +static MemoryContext MXactContext = NULL; #ifdef MULTIXACT_DEBUG @@ -201,14 +199,15 @@ static MemoryContext MXactContext = NULL; static void MultiXactIdSetOldestVisible(void); static MultiXactId CreateMultiXactId(int nxids, TransactionId *xids); static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, - int nxids, TransactionId *xids); + int nxids, TransactionId *xids); static MultiXactId GetNewMultiXactId(int nxids, MultiXactOffset *offset); /* MultiXact cache management */ static MultiXactId mXactCacheGetBySet(int nxids, TransactionId *xids); -static int mXactCacheGetById(MultiXactId multi, TransactionId **xids); +static int mXactCacheGetById(MultiXactId multi, TransactionId **xids); static void mXactCachePut(MultiXactId multi, int nxids, TransactionId *xids); -static int xidComparator(const void *arg1, const void *arg2); +static int xidComparator(const void *arg1, const void *arg2); + #ifdef MULTIXACT_DEBUG static char *mxid_to_string(MultiXactId multi, int nxids, TransactionId *xids); #endif @@ -220,7 +219,7 @@ static bool MultiXactOffsetPagePrecedes(int page1, int page2); static bool MultiXactMemberPagePrecedes(int page1, int page2); static bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2); static bool MultiXactOffsetPrecedes(MultiXactOffset offset1, - MultiXactOffset offset2); + MultiXactOffset offset2); static void ExtendMultiXactOffset(MultiXactId multi); static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers); static void TruncateMultiXact(void); @@ -239,8 +238,8 @@ static void WriteMZeroPageXlogRec(int pageno, uint8 info); MultiXactId MultiXactIdCreate(TransactionId xid1, TransactionId xid2) { - MultiXactId newMulti; - TransactionId xids[2]; + MultiXactId newMulti; + TransactionId xids[2]; AssertArg(TransactionIdIsValid(xid1)); AssertArg(TransactionIdIsValid(xid2)); @@ -248,9 +247,9 @@ MultiXactIdCreate(TransactionId xid1, TransactionId xid2) Assert(!TransactionIdEquals(xid1, xid2)); /* - * Note: unlike MultiXactIdExpand, we don't bother to check that both - * XIDs are still running. In typical usage, xid2 will be our own XID - * and the caller just did a check on xid1, so it'd be wasted effort. + * Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs + * are still running. In typical usage, xid2 will be our own XID and the + * caller just did a check on xid1, so it'd be wasted effort. */ xids[0] = xid1; @@ -281,12 +280,12 @@ MultiXactIdCreate(TransactionId xid1, TransactionId xid2) MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid) { - MultiXactId newMulti; - TransactionId *members; - TransactionId *newMembers; - int nmembers; - int i; - int j; + MultiXactId newMulti; + TransactionId *members; + TransactionId *newMembers; + int nmembers; + int i; + int j; AssertArg(MultiXactIdIsValid(multi)); AssertArg(TransactionIdIsValid(xid)); @@ -313,8 +312,8 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid) } /* - * If the TransactionId is already a member of the MultiXactId, - * just return the existing MultiXactId. + * If the TransactionId is already a member of the MultiXactId, just + * return the existing MultiXactId. */ for (i = 0; i < nmembers; i++) { @@ -329,9 +328,9 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid) /* * Determine which of the members of the MultiXactId are still running, - * and use them to create a new one. (Removing dead members is just - * an optimization, but a useful one. Note we have the same race - * condition here as above: j could be 0 at the end of the loop.) + * and use them to create a new one. (Removing dead members is just an + * optimization, but a useful one. Note we have the same race condition + * here as above: j could be 0 at the end of the loop.) */ newMembers = (TransactionId *) palloc(sizeof(TransactionId) * (nmembers + 1)); @@ -355,7 +354,7 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid) /* * MultiXactIdIsRunning - * Returns whether a MultiXactId is "running". + * Returns whether a MultiXactId is "running". * * We return true if at least one member of the given MultiXactId is still * running. Note that a "false" result is certain not to change, @@ -365,9 +364,9 @@ bool MultiXactIdIsRunning(MultiXactId multi) { TransactionId *members; - TransactionId myXid; - int nmembers; - int i; + TransactionId myXid; + int nmembers; + int i; debug_elog3(DEBUG2, "IsRunning %u?", multi); @@ -394,7 +393,7 @@ MultiXactIdIsRunning(MultiXactId multi) /* * This could be made faster by having another entry point in procarray.c, - * walking the PGPROC array only once for all the members. But in most + * walking the PGPROC array only once for all the members. But in most * cases nmembers should be small enough that it doesn't much matter. */ for (i = 0; i < nmembers; i++) @@ -436,19 +435,19 @@ MultiXactIdSetOldestMember(void) /* * You might think we don't need to acquire a lock here, since - * fetching and storing of TransactionIds is probably atomic, - * but in fact we do: suppose we pick up nextMXact and then - * lose the CPU for a long time. Someone else could advance - * nextMXact, and then another someone else could compute an - * OldestVisibleMXactId that would be after the value we are - * going to store when we get control back. Which would be wrong. + * fetching and storing of TransactionIds is probably atomic, but in + * fact we do: suppose we pick up nextMXact and then lose the CPU for + * a long time. Someone else could advance nextMXact, and then + * another someone else could compute an OldestVisibleMXactId that + * would be after the value we are going to store when we get control + * back. Which would be wrong. */ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); /* * We have to beware of the possibility that nextMXact is in the - * wrapped-around state. We don't fix the counter itself here, - * but we must be sure to store a valid value in our array entry. + * wrapped-around state. We don't fix the counter itself here, but we + * must be sure to store a valid value in our array entry. */ nextMXact = MultiXactState->nextMXact; if (nextMXact < FirstMultiXactId) @@ -475,7 +474,7 @@ MultiXactIdSetOldestMember(void) * The value to set is the oldest of nextMXact and all the valid per-backend * OldestMemberMXactId[] entries. Because of the locking we do, we can be * certain that no subsequent call to MultiXactIdSetOldestMember can set - * an OldestMemberMXactId[] entry older than what we compute here. Therefore + * an OldestMemberMXactId[] entry older than what we compute here. Therefore * there is no live transaction, now or later, that can be a member of any * MultiXactId older than the OldestVisibleMXactId we compute here. */ @@ -485,14 +484,14 @@ MultiXactIdSetOldestVisible(void) if (!MultiXactIdIsValid(OldestVisibleMXactId[MyBackendId])) { MultiXactId oldestMXact; - int i; + int i; LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); /* * We have to beware of the possibility that nextMXact is in the - * wrapped-around state. We don't fix the counter itself here, - * but we must be sure to store a valid value in our array entry. + * wrapped-around state. We don't fix the counter itself here, but we + * must be sure to store a valid value in our array entry. */ oldestMXact = MultiXactState->nextMXact; if (oldestMXact < FirstMultiXactId) @@ -535,17 +534,17 @@ void MultiXactIdWait(MultiXactId multi) { TransactionId *members; - int nmembers; + int nmembers; nmembers = GetMultiXactIdMembers(multi, &members); if (nmembers >= 0) { - int i; + int i; for (i = 0; i < nmembers; i++) { - TransactionId member = members[i]; + TransactionId member = members[i]; debug_elog4(DEBUG2, "MultiXactIdWait: waiting for %d (%u)", i, member); @@ -564,19 +563,19 @@ MultiXactIdWait(MultiXactId multi) bool ConditionalMultiXactIdWait(MultiXactId multi) { - bool result = true; + bool result = true; TransactionId *members; - int nmembers; + int nmembers; nmembers = GetMultiXactIdMembers(multi, &members); if (nmembers >= 0) { - int i; + int i; for (i = 0; i < nmembers; i++) { - TransactionId member = members[i]; + TransactionId member = members[i]; debug_elog4(DEBUG2, "ConditionalMultiXactIdWait: trying %d (%u)", i, member); @@ -596,7 +595,7 @@ ConditionalMultiXactIdWait(MultiXactId multi) /* * CreateMultiXactId - * Make a new MultiXactId + * Make a new MultiXactId * * Make XLOG, SLRU and cache entries for a new MultiXactId, recording the * given TransactionIds as members. Returns the newly created MultiXactId. @@ -606,7 +605,7 @@ ConditionalMultiXactIdWait(MultiXactId multi) static MultiXactId CreateMultiXactId(int nxids, TransactionId *xids) { - MultiXactId multi; + MultiXactId multi; MultiXactOffset offset; XLogRecData rdata[2]; xl_multixact_create xlrec; @@ -641,15 +640,15 @@ CreateMultiXactId(int nxids, TransactionId *xids) /* * Make an XLOG entry describing the new MXID. * - * Note: we need not flush this XLOG entry to disk before proceeding. - * The only way for the MXID to be referenced from any data page is - * for heap_lock_tuple() to have put it there, and heap_lock_tuple() - * generates an XLOG record that must follow ours. The normal LSN - * interlock between the data page and that XLOG record will ensure - * that our XLOG record reaches disk first. If the SLRU members/offsets - * data reaches disk sooner than the XLOG record, we do not care because - * we'll overwrite it with zeroes unless the XLOG record is there too; - * see notes at top of this file. + * Note: we need not flush this XLOG entry to disk before proceeding. The + * only way for the MXID to be referenced from any data page is for + * heap_lock_tuple() to have put it there, and heap_lock_tuple() generates + * an XLOG record that must follow ours. The normal LSN interlock between + * the data page and that XLOG record will ensure that our XLOG record + * reaches disk first. If the SLRU members/offsets data reaches disk + * sooner than the XLOG record, we do not care because we'll overwrite it + * with zeroes unless the XLOG record is there too; see notes at top of + * this file. */ xlrec.mid = multi; xlrec.moff = offset; @@ -702,9 +701,9 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, /* * Note: we pass the MultiXactId to SimpleLruReadPage as the "transaction" * to complain about if there's any I/O error. This is kinda bogus, but - * since the errors will always give the full pathname, it should be - * clear enough that a MultiXactId is really involved. Perhaps someday - * we'll take the trouble to generalize the slru.c error reporting code. + * since the errors will always give the full pathname, it should be clear + * enough that a MultiXactId is really involved. Perhaps someday we'll + * take the trouble to generalize the slru.c error reporting code. */ slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, multi); offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno]; @@ -750,7 +749,7 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, * GetNewMultiXactId * Get the next MultiXactId. * - * Also, reserve the needed amount of space in the "members" area. The + * Also, reserve the needed amount of space in the "members" area. The * starting offset of the reserved space is returned in *offset. * * This may generate XLOG records for expansion of the offsets and/or members @@ -761,7 +760,7 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, static MultiXactId GetNewMultiXactId(int nxids, MultiXactOffset *offset) { - MultiXactId result; + MultiXactId result; debug_elog3(DEBUG2, "GetNew: for %d xids", nxids); @@ -785,8 +784,8 @@ GetNewMultiXactId(int nxids, MultiXactOffset *offset) * Advance counter. As in GetNewTransactionId(), this must not happen * until after ExtendMultiXactOffset has succeeded! * - * We don't care about MultiXactId wraparound here; it will be handled by - * the next iteration. But note that nextMXact may be InvalidMultiXactId + * We don't care about MultiXactId wraparound here; it will be handled by the + * next iteration. But note that nextMXact may be InvalidMultiXactId * after this routine exits, so anyone else looking at the variable must * be prepared to deal with that. */ @@ -809,7 +808,7 @@ GetNewMultiXactId(int nxids, MultiXactOffset *offset) /* * GetMultiXactIdMembers - * Returns the set of TransactionIds that make up a MultiXactId + * Returns the set of TransactionIds that make up a MultiXactId * * We return -1 if the MultiXactId is too old to possibly have any members * still running; in that case we have not actually looked them up, and @@ -822,13 +821,13 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids) int prev_pageno; int entryno; int slotno; - MultiXactOffset *offptr; - MultiXactOffset offset; + MultiXactOffset *offptr; + MultiXactOffset offset; int length; int i; - MultiXactId nextMXact; - MultiXactId tmpMXact; - MultiXactOffset nextOffset; + MultiXactId nextMXact; + MultiXactId tmpMXact; + MultiXactOffset nextOffset; TransactionId *ptr; debug_elog3(DEBUG2, "GetMembers: asked for %u", multi); @@ -850,13 +849,13 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids) /* * We check known limits on MultiXact before resorting to the SLRU area. * - * An ID older than our OldestVisibleMXactId[] entry can't possibly still - * be running, and we'd run the risk of trying to read already-truncated - * SLRU data if we did try to examine it. + * An ID older than our OldestVisibleMXactId[] entry can't possibly still be + * running, and we'd run the risk of trying to read already-truncated SLRU + * data if we did try to examine it. * - * Conversely, an ID >= nextMXact shouldn't ever be seen here; if it is - * seen, it implies undetected ID wraparound has occurred. We just - * silently assume that such an ID is no longer running. + * Conversely, an ID >= nextMXact shouldn't ever be seen here; if it is seen, + * it implies undetected ID wraparound has occurred. We just silently + * assume that such an ID is no longer running. * * Shared lock is enough here since we aren't modifying any global state. * Also, we can examine our own OldestVisibleMXactId without the lock, @@ -880,9 +879,9 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids) } /* - * Before releasing the lock, save the current counter values, because - * the target MultiXactId may be just one less than nextMXact. We will - * need to use nextOffset as the endpoint if so. + * Before releasing the lock, save the current counter values, because the + * target MultiXactId may be just one less than nextMXact. We will need + * to use nextOffset as the endpoint if so. */ nextMXact = MultiXactState->nextMXact; nextOffset = MultiXactState->nextOffset; @@ -902,11 +901,11 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids) /* * How many members do we need to read? If we are at the end of the - * assigned MultiXactIds, use the offset just saved above. Else we - * need to check the MultiXactId following ours. + * assigned MultiXactIds, use the offset just saved above. Else we need + * to check the MultiXactId following ours. * - * Use the same increment rule as GetNewMultiXactId(), that is, don't - * handle wraparound explicitly until needed. + * Use the same increment rule as GetNewMultiXactId(), that is, don't handle + * wraparound explicitly until needed. */ tmpMXact = multi + 1; @@ -974,9 +973,9 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids) /* * mXactCacheGetBySet - * returns a MultiXactId from the cache based on the set of - * TransactionIds that compose it, or InvalidMultiXactId if - * none matches. + * returns a MultiXactId from the cache based on the set of + * TransactionIds that compose it, or InvalidMultiXactId if + * none matches. * * This is helpful, for example, if two transactions want to lock a huge * table. By using the cache, the second will use the same MultiXactId @@ -988,7 +987,7 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids) static MultiXactId mXactCacheGetBySet(int nxids, TransactionId *xids) { - mXactCacheEnt *entry; + mXactCacheEnt *entry; debug_elog3(DEBUG2, "CacheGet: looking for %s", mxid_to_string(InvalidMultiXactId, nxids, xids)); @@ -1015,8 +1014,8 @@ mXactCacheGetBySet(int nxids, TransactionId *xids) /* * mXactCacheGetById - * returns the composing TransactionId set from the cache for a - * given MultiXactId, if present. + * returns the composing TransactionId set from the cache for a + * given MultiXactId, if present. * * If successful, *xids is set to the address of a palloc'd copy of the * TransactionId set. Return value is number of members, or -1 on failure. @@ -1024,7 +1023,7 @@ mXactCacheGetBySet(int nxids, TransactionId *xids) static int mXactCacheGetById(MultiXactId multi, TransactionId **xids) { - mXactCacheEnt *entry; + mXactCacheEnt *entry; debug_elog3(DEBUG2, "CacheGet: looking for %u", multi); @@ -1032,7 +1031,7 @@ mXactCacheGetById(MultiXactId multi, TransactionId **xids) { if (entry->multi == multi) { - TransactionId *ptr; + TransactionId *ptr; Size size; size = sizeof(TransactionId) * entry->nxids; @@ -1042,7 +1041,7 @@ mXactCacheGetById(MultiXactId multi, TransactionId **xids) memcpy(ptr, entry->xids, size); debug_elog3(DEBUG2, "CacheGet: found %s", - mxid_to_string(multi, entry->nxids, entry->xids)); + mxid_to_string(multi, entry->nxids, entry->xids)); return entry->nxids; } } @@ -1053,12 +1052,12 @@ mXactCacheGetById(MultiXactId multi, TransactionId **xids) /* * mXactCachePut - * Add a new MultiXactId and its composing set into the local cache. + * Add a new MultiXactId and its composing set into the local cache. */ static void mXactCachePut(MultiXactId multi, int nxids, TransactionId *xids) { - mXactCacheEnt *entry; + mXactCacheEnt *entry; debug_elog3(DEBUG2, "CachePut: storing %s", mxid_to_string(multi, nxids, xids)); @@ -1092,7 +1091,7 @@ mXactCachePut(MultiXactId multi, int nxids, TransactionId *xids) /* * xidComparator - * qsort comparison function for XIDs + * qsort comparison function for XIDs * * We don't need to use wraparound comparison for XIDs, and indeed must * not do so since that does not respect the triangle inequality! Any @@ -1101,8 +1100,8 @@ mXactCachePut(MultiXactId multi, int nxids, TransactionId *xids) static int xidComparator(const void *arg1, const void *arg2) { - TransactionId xid1 = * (const TransactionId *) arg1; - TransactionId xid2 = * (const TransactionId *) arg2; + TransactionId xid1 = *(const TransactionId *) arg1; + TransactionId xid2 = *(const TransactionId *) arg2; if (xid1 > xid2) return 1; @@ -1115,8 +1114,9 @@ xidComparator(const void *arg1, const void *arg2) static char * mxid_to_string(MultiXactId multi, int nxids, TransactionId *xids) { - char *str = palloc(15 * (nxids + 1) + 4); - int i; + char *str = palloc(15 * (nxids + 1) + 4); + int i; + snprintf(str, 47, "%u %d[%u", multi, nxids, xids[0]); for (i = 1; i < nxids; i++) @@ -1137,18 +1137,18 @@ void AtEOXact_MultiXact(void) { /* - * Reset our OldestMemberMXactId and OldestVisibleMXactId values, - * both of which should only be valid while within a transaction. + * Reset our OldestMemberMXactId and OldestVisibleMXactId values, both of + * which should only be valid while within a transaction. * - * We assume that storing a MultiXactId is atomic and so we need - * not take MultiXactGenLock to do this. + * We assume that storing a MultiXactId is atomic and so we need not take + * MultiXactGenLock to do this. */ OldestMemberMXactId[MyBackendId] = InvalidMultiXactId; OldestVisibleMXactId[MyBackendId] = InvalidMultiXactId; /* - * Discard the local MultiXactId cache. Since MXactContext was created - * as a child of TopTransactionContext, we needn't delete it explicitly. + * Discard the local MultiXactId cache. Since MXactContext was created as + * a child of TopTransactionContext, we needn't delete it explicitly. */ MXactContext = NULL; MXactCache = NULL; @@ -1156,7 +1156,7 @@ AtEOXact_MultiXact(void) /* * Initialization of shared memory for MultiXact. We use two SLRU areas, - * thus double memory. Also, reserve space for the shared MultiXactState + * thus double memory. Also, reserve space for the shared MultiXactState * struct and the per-backend MultiXactId arrays (two of those, too). */ Size @@ -1178,7 +1178,7 @@ MultiXactShmemSize(void) void MultiXactShmemInit(void) { - bool found; + bool found; debug_elog2(DEBUG2, "Shared Memory Init for MultiXact"); @@ -1205,8 +1205,8 @@ MultiXactShmemInit(void) Assert(found); /* - * Set up array pointers. Note that perBackendXactIds[0] is wasted - * space since we only use indexes 1..MaxBackends in each array. + * Set up array pointers. Note that perBackendXactIds[0] is wasted space + * since we only use indexes 1..MaxBackends in each array. */ OldestMemberMXactId = MultiXactState->perBackendXactIds; OldestVisibleMXactId = OldestMemberMXactId + MaxBackends; @@ -1214,7 +1214,7 @@ MultiXactShmemInit(void) /* * This func must be called ONCE on system install. It creates the initial - * MultiXact segments. (The MultiXacts directories are assumed to have been + * MultiXact segments. (The MultiXacts directories are assumed to have been * created by initdb, and MultiXactShmemInit must have been called already.) */ void @@ -1287,7 +1287,7 @@ ZeroMultiXactMemberPage(int pageno, bool writeXlog) * This must be called ONCE during postmaster or standalone-backend startup. * * StartupXLOG has already established nextMXact/nextOffset by calling - * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact. Note that we + * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact. Note that we * may already have replayed WAL data into the SLRU files. * * We don't need any locks here, really; the SLRU locks are taken @@ -1311,14 +1311,14 @@ StartupMultiXact(void) MultiXactOffsetCtl->shared->latest_page_number = pageno; /* - * Zero out the remainder of the current offsets page. See notes - * in StartupCLOG() for motivation. + * Zero out the remainder of the current offsets page. See notes in + * StartupCLOG() for motivation. */ entryno = MultiXactIdToOffsetEntry(multi); if (entryno != 0) { int slotno; - MultiXactOffset *offptr; + MultiXactOffset *offptr; slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, multi); offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno]; @@ -1341,14 +1341,14 @@ StartupMultiXact(void) MultiXactMemberCtl->shared->latest_page_number = pageno; /* - * Zero out the remainder of the current members page. See notes - * in StartupCLOG() for motivation. + * Zero out the remainder of the current members page. See notes in + * StartupCLOG() for motivation. */ entryno = MXOffsetToMemberEntry(offset); if (entryno != 0) { int slotno; - TransactionId *xidptr; + TransactionId *xidptr; slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, offset); xidptr = (TransactionId *) MultiXactMemberCtl->shared->page_buffer[slotno]; @@ -1499,14 +1499,14 @@ static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers) { /* - * It's possible that the members span more than one page of the - * members file, so we loop to ensure we consider each page. The - * coding is not optimal if the members span several pages, but - * that seems unusual enough to not worry much about. + * It's possible that the members span more than one page of the members + * file, so we loop to ensure we consider each page. The coding is not + * optimal if the members span several pages, but that seems unusual + * enough to not worry much about. */ while (nmembers > 0) { - int entryno; + int entryno; /* * Only zero when at first entry of a page. @@ -1514,7 +1514,7 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers) entryno = MXOffsetToMemberEntry(offset); if (entryno == 0) { - int pageno; + int pageno; pageno = MXOffsetToMemberPage(offset); @@ -1536,7 +1536,7 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers) * Remove all MultiXactOffset and MultiXactMember segments before the oldest * ones still of interest. * - * This is called only during checkpoints. We assume no more than one + * This is called only during checkpoints. We assume no more than one * backend does this at a time. * * XXX do we have any issues with needing to checkpoint here? @@ -1545,23 +1545,23 @@ static void TruncateMultiXact(void) { MultiXactId nextMXact; - MultiXactOffset nextOffset; + MultiXactOffset nextOffset; MultiXactId oldestMXact; - MultiXactOffset oldestOffset; + MultiXactOffset oldestOffset; int cutoffPage; int i; /* - * First, compute where we can safely truncate. Per notes above, - * this is the oldest valid value among all the OldestMemberMXactId[] and + * First, compute where we can safely truncate. Per notes above, this is + * the oldest valid value among all the OldestMemberMXactId[] and * OldestVisibleMXactId[] entries, or nextMXact if none are valid. */ LWLockAcquire(MultiXactGenLock, LW_SHARED); /* * We have to beware of the possibility that nextMXact is in the - * wrapped-around state. We don't fix the counter itself here, - * but we must be sure to use a valid value in our calculation. + * wrapped-around state. We don't fix the counter itself here, but we + * must be sure to use a valid value in our calculation. */ nextMXact = MultiXactState->nextMXact; if (nextMXact < FirstMultiXactId) @@ -1597,9 +1597,9 @@ TruncateMultiXact(void) return; /* - * We need to determine where to truncate MultiXactMember. If we - * found a valid oldest MultiXactId, read its starting offset; - * otherwise we use the nextOffset value we saved above. + * We need to determine where to truncate MultiXactMember. If we found a + * valid oldest MultiXactId, read its starting offset; otherwise we use + * the nextOffset value we saved above. */ if (oldestMXact == nextMXact) oldestOffset = nextOffset; @@ -1608,7 +1608,7 @@ TruncateMultiXact(void) int pageno; int slotno; int entryno; - MultiXactOffset *offptr; + MultiXactOffset *offptr; LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE); @@ -1624,8 +1624,8 @@ TruncateMultiXact(void) } /* - * The cutoff point is the start of the segment containing oldestMXact. - * We pass the *page* containing oldestMXact to SimpleLruTruncate. + * The cutoff point is the start of the segment containing oldestMXact. We + * pass the *page* containing oldestMXact to SimpleLruTruncate. */ cutoffPage = MultiXactIdToOffsetPage(oldestMXact); @@ -1677,8 +1677,8 @@ MultiXactOffsetPagePrecedes(int page1, int page2) static bool MultiXactMemberPagePrecedes(int page1, int page2) { - MultiXactOffset offset1; - MultiXactOffset offset2; + MultiXactOffset offset1; + MultiXactOffset offset2; offset1 = ((MultiXactOffset) page1) * MULTIXACT_MEMBERS_PER_PAGE; offset2 = ((MultiXactOffset) page2) * MULTIXACT_MEMBERS_PER_PAGE; @@ -1695,7 +1695,7 @@ MultiXactMemberPagePrecedes(int page1, int page2) static bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2) { - int32 diff = (int32) (multi1 - multi2); + int32 diff = (int32) (multi1 - multi2); return (diff < 0); } @@ -1706,7 +1706,7 @@ MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2) static bool MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2) { - int32 diff = (int32) (offset1 - offset2); + int32 diff = (int32) (offset1 - offset2); return (diff < 0); } @@ -1783,9 +1783,9 @@ multixact_redo(XLogRecPtr lsn, XLogRecord *record) MultiXactAdvanceNextMXact(xlrec->mid + 1, xlrec->moff + xlrec->nxids); /* - * Make sure nextXid is beyond any XID mentioned in the record. - * This should be unnecessary, since any XID found here ought to - * have other evidence in the XLOG, but let's be safe. + * Make sure nextXid is beyond any XID mentioned in the record. This + * should be unnecessary, since any XID found here ought to have other + * evidence in the XLOG, but let's be safe. */ max_xid = record->xl_xid; for (i = 0; i < xlrec->nxids; i++) diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 67d9d3f54f..5891890b76 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -48,7 +48,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.27 2005/08/20 23:26:08 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.28 2005/10/15 02:49:09 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -186,8 +186,8 @@ SimpleLruInit(SlruCtl ctl, const char *name, Assert(found); /* - * Initialize the unshared control struct, including directory path. - * We assume caller set PagePrecedes. + * Initialize the unshared control struct, including directory path. We + * assume caller set PagePrecedes. */ ctl->shared = shared; ctl->do_fsync = true; /* default behavior */ @@ -351,11 +351,11 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata) LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE); /* - * Check to see if someone else already did the write, or took the - * buffer away from us. If so, do nothing. NOTE: we really should - * never see WRITE_IN_PROGRESS here, since that state should only - * occur while the writer is holding the buffer lock. But accept it - * so that we have a recovery path if a writer aborts. + * Check to see if someone else already did the write, or took the buffer + * away from us. If so, do nothing. NOTE: we really should never see + * WRITE_IN_PROGRESS here, since that state should only occur while the + * writer is holding the buffer lock. But accept it so that we have a + * recovery path if a writer aborts. */ if (shared->page_number[slotno] != pageno || (shared->page_status[slotno] != SLRU_PAGE_DIRTY && @@ -368,15 +368,14 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata) /* * Mark the slot write-busy. After this point, a transaction status - * update on this page will mark it dirty again. NB: we are assuming - * that read/write of the page status field is atomic, since we change - * the state while not holding control lock. However, we cannot set - * this state any sooner, or we'd possibly fool a previous writer into - * thinking he's successfully dumped the page when he hasn't. - * (Scenario: other writer starts, page is redirtied, we come along - * and set WRITE_IN_PROGRESS again, other writer completes and sets - * CLEAN because redirty info has been lost, then we think it's clean - * too.) + * update on this page will mark it dirty again. NB: we are assuming that + * read/write of the page status field is atomic, since we change the + * state while not holding control lock. However, we cannot set this + * state any sooner, or we'd possibly fool a previous writer into thinking + * he's successfully dumped the page when he hasn't. (Scenario: other + * writer starts, page is redirtied, we come along and set + * WRITE_IN_PROGRESS again, other writer completes and sets CLEAN because + * redirty info has been lost, then we think it's clean too.) */ shared->page_status[slotno] = SLRU_PAGE_WRITE_IN_PROGRESS; @@ -436,8 +435,8 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno) * In a crash-and-restart situation, it's possible for us to receive * commands to set the commit status of transactions whose bits are in * already-truncated segments of the commit log (see notes in - * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the - * case where the file doesn't exist, and return zeroes instead. + * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case + * where the file doesn't exist, and return zeroes instead. */ fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); if (fd < 0) @@ -528,17 +527,16 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata) { /* * If the file doesn't already exist, we should create it. It is - * possible for this to need to happen when writing a page that's - * not first in its segment; we assume the OS can cope with that. - * (Note: it might seem that it'd be okay to create files only - * when SimpleLruZeroPage is called for the first page of a - * segment. However, if after a crash and restart the REDO logic - * elects to replay the log from a checkpoint before the latest - * one, then it's possible that we will get commands to set - * transaction status of transactions that have already been - * truncated from the commit log. Easiest way to deal with that is - * to accept references to nonexistent files here and in - * SlruPhysicalReadPage.) + * possible for this to need to happen when writing a page that's not + * first in its segment; we assume the OS can cope with that. (Note: + * it might seem that it'd be okay to create files only when + * SimpleLruZeroPage is called for the first page of a segment. + * However, if after a crash and restart the REDO logic elects to + * replay the log from a checkpoint before the latest one, then it's + * possible that we will get commands to set transaction status of + * transactions that have already been truncated from the commit log. + * Easiest way to deal with that is to accept references to + * nonexistent files here and in SlruPhysicalReadPage.) */ SlruFileName(ctl, path, segno); fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); @@ -635,49 +633,49 @@ SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid) case SLRU_OPEN_FAILED: ereport(ERROR, (errcode_for_file_access(), - errmsg("could not access status of transaction %u", xid), + errmsg("could not access status of transaction %u", xid), errdetail("could not open file \"%s\": %m", path))); break; case SLRU_CREATE_FAILED: ereport(ERROR, (errcode_for_file_access(), - errmsg("could not access status of transaction %u", xid), + errmsg("could not access status of transaction %u", xid), errdetail("could not create file \"%s\": %m", path))); break; case SLRU_SEEK_FAILED: ereport(ERROR, (errcode_for_file_access(), - errmsg("could not access status of transaction %u", xid), - errdetail("could not seek in file \"%s\" to offset %u: %m", - path, offset))); + errmsg("could not access status of transaction %u", xid), + errdetail("could not seek in file \"%s\" to offset %u: %m", + path, offset))); break; case SLRU_READ_FAILED: ereport(ERROR, (errcode_for_file_access(), - errmsg("could not access status of transaction %u", xid), - errdetail("could not read from file \"%s\" at offset %u: %m", - path, offset))); + errmsg("could not access status of transaction %u", xid), + errdetail("could not read from file \"%s\" at offset %u: %m", + path, offset))); break; case SLRU_WRITE_FAILED: ereport(ERROR, (errcode_for_file_access(), - errmsg("could not access status of transaction %u", xid), - errdetail("could not write to file \"%s\" at offset %u: %m", - path, offset))); + errmsg("could not access status of transaction %u", xid), + errdetail("could not write to file \"%s\" at offset %u: %m", + path, offset))); break; case SLRU_FSYNC_FAILED: ereport(ERROR, (errcode_for_file_access(), - errmsg("could not access status of transaction %u", xid), + errmsg("could not access status of transaction %u", xid), errdetail("could not fsync file \"%s\": %m", path))); break; case SLRU_CLOSE_FAILED: ereport(ERROR, (errcode_for_file_access(), - errmsg("could not access status of transaction %u", xid), + errmsg("could not access status of transaction %u", xid), errdetail("could not close file \"%s\": %m", path))); break; @@ -723,8 +721,8 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) } /* - * If we find any EMPTY slot, just select that one. Else locate - * the least-recently-used slot that isn't the latest page. + * If we find any EMPTY slot, just select that one. Else locate the + * least-recently-used slot that isn't the latest page. */ for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++) { @@ -745,10 +743,10 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) return bestslot; /* - * We need to do I/O. Normal case is that we have to write it - * out, but it's possible in the worst case to have selected a - * read-busy page. In that case we use SimpleLruReadPage to wait - * for the read to complete. + * We need to do I/O. Normal case is that we have to write it out, + * but it's possible in the worst case to have selected a read-busy + * page. In that case we use SimpleLruReadPage to wait for the read + * to complete. */ if (shared->page_status[bestslot] == SLRU_PAGE_READ_IN_PROGRESS) (void) SimpleLruReadPage(ctl, shared->page_number[bestslot], @@ -757,9 +755,9 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) SimpleLruWritePage(ctl, bestslot, NULL); /* - * Now loop back and try again. This is the easiest way of - * dealing with corner cases such as the victim page being - * re-dirtied while we wrote it. + * Now loop back and try again. This is the easiest way of dealing + * with corner cases such as the victim page being re-dirtied while we + * wrote it. */ } } @@ -789,9 +787,9 @@ SimpleLruFlush(SlruCtl ctl, bool checkpoint) SimpleLruWritePage(ctl, slotno, &fdata); /* - * When called during a checkpoint, we cannot assert that the slot - * is clean now, since another process might have re-dirtied it - * already. That's okay. + * When called during a checkpoint, we cannot assert that the slot is + * clean now, since another process might have re-dirtied it already. + * That's okay. */ Assert(checkpoint || shared->page_status[slotno] == SLRU_PAGE_EMPTY || @@ -841,10 +839,10 @@ SimpleLruTruncate(SlruCtl ctl, int cutoffPage) cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT; /* - * Scan shared memory and remove any pages preceding the cutoff page, - * to ensure we won't rewrite them later. (Since this is normally - * called in or just after a checkpoint, any dirty pages should have - * been flushed already ... we're just being extra careful here.) + * Scan shared memory and remove any pages preceding the cutoff page, to + * ensure we won't rewrite them later. (Since this is normally called in + * or just after a checkpoint, any dirty pages should have been flushed + * already ... we're just being extra careful here.) */ LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); @@ -852,16 +850,16 @@ restart:; /* * While we are holding the lock, make an important safety check: the - * planned cutoff point must be <= the current endpoint page. - * Otherwise we have already wrapped around, and proceeding with the - * truncation would risk removing the current segment. + * planned cutoff point must be <= the current endpoint page. Otherwise we + * have already wrapped around, and proceeding with the truncation would + * risk removing the current segment. */ if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage)) { LWLockRelease(shared->ControlLock); ereport(LOG, - (errmsg("could not truncate directory \"%s\": apparent wraparound", - ctl->Dir))); + (errmsg("could not truncate directory \"%s\": apparent wraparound", + ctl->Dir))); return; } @@ -882,9 +880,9 @@ restart:; } /* - * Hmm, we have (or may have) I/O operations acting on the page, - * so we've got to wait for them to finish and then start again. - * This is the same logic as in SlruSelectLRUPage. + * Hmm, we have (or may have) I/O operations acting on the page, so + * we've got to wait for them to finish and then start again. This is + * the same logic as in SlruSelectLRUPage. */ if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS) (void) SimpleLruReadPage(ctl, shared->page_number[slotno], diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index 9b45035036..7671eb6a45 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -22,7 +22,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/subtrans.c,v 1.10 2005/08/20 23:26:08 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/subtrans.c,v 1.11 2005/10/15 02:49:09 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -234,9 +234,8 @@ StartupSUBTRANS(TransactionId oldestActiveXID) /* * Since we don't expect pg_subtrans to be valid across crashes, we * initialize the currently-active page(s) to zeroes during startup. - * Whenever we advance into a new page, ExtendSUBTRANS will likewise - * zero the new page without regard to whatever was previously on - * disk. + * Whenever we advance into a new page, ExtendSUBTRANS will likewise zero + * the new page without regard to whatever was previously on disk. */ LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); @@ -262,8 +261,8 @@ ShutdownSUBTRANS(void) /* * Flush dirty SUBTRANS pages to disk * - * This is not actually necessary from a correctness point of view. We do - * it merely as a debugging aid. + * This is not actually necessary from a correctness point of view. We do it + * merely as a debugging aid. */ SimpleLruFlush(SubTransCtl, false); } @@ -277,9 +276,9 @@ CheckPointSUBTRANS(void) /* * Flush dirty SUBTRANS pages to disk * - * This is not actually necessary from a correctness point of view. We do - * it merely to improve the odds that writing of dirty pages is done - * by the checkpoint process and not by backends. + * This is not actually necessary from a correctness point of view. We do it + * merely to improve the odds that writing of dirty pages is done by the + * checkpoint process and not by backends. */ SimpleLruFlush(SubTransCtl, true); } @@ -329,8 +328,8 @@ TruncateSUBTRANS(TransactionId oldestXact) int cutoffPage; /* - * The cutoff point is the start of the segment containing oldestXact. - * We pass the *page* containing oldestXact to SimpleLruTruncate. + * The cutoff point is the start of the segment containing oldestXact. We + * pass the *page* containing oldestXact to SimpleLruTruncate. */ cutoffPage = TransactionIdToPage(oldestXact); diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c index 5fa6f82daf..5985252052 100644 --- a/src/backend/access/transam/transam.c +++ b/src/backend/access/transam/transam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/transam.c,v 1.65 2005/06/17 22:32:42 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/transam.c,v 1.66 2005/10/15 02:49:09 momjian Exp $ * * NOTES * This file contains the high level access-method interface to the @@ -54,8 +54,8 @@ TransactionLogFetch(TransactionId transactionId) XidStatus xidstatus; /* - * Before going to the commit log manager, check our single item cache - * to see if we didn't just check the transaction status a moment ago. + * Before going to the commit log manager, check our single item cache to + * see if we didn't just check the transaction status a moment ago. */ if (TransactionIdEquals(transactionId, cachedFetchXid)) return cachedFetchXidStatus; @@ -78,8 +78,8 @@ TransactionLogFetch(TransactionId transactionId) xidstatus = TransactionIdGetStatus(transactionId); /* - * DO NOT cache status for unfinished or sub-committed transactions! - * We only cache status that is guaranteed not to change. + * DO NOT cache status for unfinished or sub-committed transactions! We + * only cache status that is guaranteed not to change. */ if (xidstatus != TRANSACTION_STATUS_IN_PROGRESS && xidstatus != TRANSACTION_STATUS_SUB_COMMITTED) @@ -169,18 +169,18 @@ TransactionIdDidCommit(TransactionId transactionId) return true; /* - * If it's marked subcommitted, we have to check the parent - * recursively. However, if it's older than TransactionXmin, we can't - * look at pg_subtrans; instead assume that the parent crashed without - * cleaning up its children. + * If it's marked subcommitted, we have to check the parent recursively. + * However, if it's older than TransactionXmin, we can't look at + * pg_subtrans; instead assume that the parent crashed without cleaning up + * its children. * - * Originally we Assert'ed that the result of SubTransGetParent was - * not zero. However with the introduction of prepared transactions, - * there can be a window just after database startup where we do not - * have complete knowledge in pg_subtrans of the transactions after - * TransactionXmin. StartupSUBTRANS() has ensured that any missing - * information will be zeroed. Since this case should not happen under - * normal conditions, it seems reasonable to emit a WARNING for it. + * Originally we Assert'ed that the result of SubTransGetParent was not zero. + * However with the introduction of prepared transactions, there can be a + * window just after database startup where we do not have complete + * knowledge in pg_subtrans of the transactions after TransactionXmin. + * StartupSUBTRANS() has ensured that any missing information will be + * zeroed. Since this case should not happen under normal conditions, it + * seems reasonable to emit a WARNING for it. */ if (xidstatus == TRANSACTION_STATUS_SUB_COMMITTED) { @@ -225,10 +225,10 @@ TransactionIdDidAbort(TransactionId transactionId) return true; /* - * If it's marked subcommitted, we have to check the parent - * recursively. However, if it's older than TransactionXmin, we can't - * look at pg_subtrans; instead assume that the parent crashed without - * cleaning up its children. + * If it's marked subcommitted, we have to check the parent recursively. + * However, if it's older than TransactionXmin, we can't look at + * pg_subtrans; instead assume that the parent crashed without cleaning up + * its children. */ if (xidstatus == TRANSACTION_STATUS_SUB_COMMITTED) { diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 05590da14e..0ece348e18 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.14 2005/10/13 22:55:55 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.15 2005/10/15 02:49:09 momjian Exp $ * * NOTES * Each global transaction is associated with a global transaction @@ -64,7 +64,7 @@ #define TWOPHASE_DIR "pg_twophase" /* GUC variable, can't be changed after startup */ -int max_prepared_xacts = 5; +int max_prepared_xacts = 5; /* * This struct describes one global transaction that is in prepared state @@ -97,7 +97,7 @@ int max_prepared_xacts = 5; * entry will remain in prepXacts until recycled. We can detect recyclable * entries by checking for valid = false and locking_xid no longer active. * - * typedef struct GlobalTransactionData *GlobalTransaction appears in + * typedef struct GlobalTransactionData *GlobalTransaction appears in * twophase.h */ #define GIDSIZE 200 @@ -105,12 +105,12 @@ int max_prepared_xacts = 5; typedef struct GlobalTransactionData { PGPROC proc; /* dummy proc */ - TimestampTz prepared_at; /* time of preparation */ + TimestampTz prepared_at; /* time of preparation */ XLogRecPtr prepare_lsn; /* XLOG offset of prepare record */ Oid owner; /* ID of user that executed the xact */ TransactionId locking_xid; /* top-level XID of backend working on xact */ bool valid; /* TRUE if fully prepared */ - char gid[GIDSIZE]; /* The GID assigned to the prepared xact */ + char gid[GIDSIZE]; /* The GID assigned to the prepared xact */ } GlobalTransactionData; /* @@ -123,30 +123,30 @@ typedef struct TwoPhaseStateData SHMEM_OFFSET freeGXacts; /* Number of valid prepXacts entries. */ - int numPrepXacts; + int numPrepXacts; /* * There are max_prepared_xacts items in this array, but C wants a * fixed-size array. */ - GlobalTransaction prepXacts[1]; /* VARIABLE LENGTH ARRAY */ + GlobalTransaction prepXacts[1]; /* VARIABLE LENGTH ARRAY */ } TwoPhaseStateData; /* VARIABLE LENGTH STRUCT */ static TwoPhaseStateData *TwoPhaseState; static void RecordTransactionCommitPrepared(TransactionId xid, - int nchildren, - TransactionId *children, - int nrels, - RelFileNode *rels); + int nchildren, + TransactionId *children, + int nrels, + RelFileNode *rels); static void RecordTransactionAbortPrepared(TransactionId xid, - int nchildren, - TransactionId *children, - int nrels, - RelFileNode *rels); + int nchildren, + TransactionId *children, + int nrels, + RelFileNode *rels); static void ProcessRecords(char *bufptr, TransactionId xid, - const TwoPhaseCallback callbacks[]); + const TwoPhaseCallback callbacks[]); /* @@ -171,7 +171,7 @@ TwoPhaseShmemSize(void) void TwoPhaseShmemInit(void) { - bool found; + bool found; TwoPhaseState = ShmemInitStruct("Prepared Transaction Table", TwoPhaseShmemSize(), @@ -190,7 +190,7 @@ TwoPhaseShmemInit(void) */ gxacts = (GlobalTransaction) ((char *) TwoPhaseState + - MAXALIGN(offsetof(TwoPhaseStateData, prepXacts) + + MAXALIGN(offsetof(TwoPhaseStateData, prepXacts) + sizeof(GlobalTransaction) * max_prepared_xacts)); for (i = 0; i < max_prepared_xacts; i++) { @@ -205,7 +205,7 @@ TwoPhaseShmemInit(void) /* * MarkAsPreparing - * Reserve the GID for the given transaction. + * Reserve the GID for the given transaction. * * Internally, this creates a gxact struct and puts it into the active array. * NOTE: this is also used when reloading a gxact after a crash; so avoid @@ -215,8 +215,8 @@ GlobalTransaction MarkAsPreparing(TransactionId xid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid) { - GlobalTransaction gxact; - int i; + GlobalTransaction gxact; + int i; if (strlen(gid) >= GIDSIZE) ereport(ERROR, @@ -227,10 +227,9 @@ MarkAsPreparing(TransactionId xid, const char *gid, LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); /* - * First, find and recycle any gxacts that failed during prepare. - * We do this partly to ensure we don't mistakenly say their GIDs - * are still reserved, and partly so we don't fail on out-of-slots - * unnecessarily. + * First, find and recycle any gxacts that failed during prepare. We do + * this partly to ensure we don't mistakenly say their GIDs are still + * reserved, and partly so we don't fail on out-of-slots unnecessarily. */ for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { @@ -360,13 +359,13 @@ MarkAsPrepared(GlobalTransaction gxact) static GlobalTransaction LockGXact(const char *gid, Oid user) { - int i; + int i; LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { - GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; + GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; /* Ignore not-yet-valid GIDs */ if (!gxact->valid) @@ -380,15 +379,15 @@ LockGXact(const char *gid, Oid user) if (TransactionIdIsActive(gxact->locking_xid)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("prepared transaction with identifier \"%s\" is busy", - gid))); + errmsg("prepared transaction with identifier \"%s\" is busy", + gid))); gxact->locking_xid = InvalidTransactionId; } if (user != gxact->owner && !superuser_arg(user)) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("permission denied to finish prepared transaction"), + errmsg("permission denied to finish prepared transaction"), errhint("Must be superuser or the user that prepared the transaction."))); /* OK for me to lock it */ @@ -403,8 +402,8 @@ LockGXact(const char *gid, Oid user) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), - errmsg("prepared transaction with identifier \"%s\" does not exist", - gid))); + errmsg("prepared transaction with identifier \"%s\" does not exist", + gid))); /* NOTREACHED */ return NULL; @@ -419,7 +418,7 @@ LockGXact(const char *gid, Oid user) static void RemoveGXact(GlobalTransaction gxact) { - int i; + int i; LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); @@ -449,7 +448,7 @@ RemoveGXact(GlobalTransaction gxact) /* * TransactionIdIsPrepared * True iff transaction associated with the identifier is prepared - * for two-phase commit + * for two-phase commit * * Note: only gxacts marked "valid" are considered; but notice we do not * check the locking status. @@ -459,14 +458,14 @@ RemoveGXact(GlobalTransaction gxact) static bool TransactionIdIsPrepared(TransactionId xid) { - bool result = false; - int i; + bool result = false; + int i; LWLockAcquire(TwoPhaseStateLock, LW_SHARED); for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { - GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; + GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; if (gxact->valid && gxact->proc.xid == xid) { @@ -496,8 +495,8 @@ static int GetPreparedTransactionList(GlobalTransaction *gxacts) { GlobalTransaction array; - int num; - int i; + int num; + int i; LWLockAcquire(TwoPhaseStateLock, LW_SHARED); @@ -526,13 +525,13 @@ GetPreparedTransactionList(GlobalTransaction *gxacts) typedef struct { GlobalTransaction array; - int ngxacts; - int currIdx; + int ngxacts; + int currIdx; } Working_State; /* * pg_prepared_xact - * Produce a view with one row per prepared transaction. + * Produce a view with one row per prepared transaction. * * This function is here so we don't have to export the * GlobalTransactionData struct definition. @@ -552,8 +551,7 @@ pg_prepared_xact(PG_FUNCTION_ARGS) funcctx = SRF_FIRSTCALL_INIT(); /* - * Switch to memory context appropriate for multiple function - * calls + * Switch to memory context appropriate for multiple function calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); @@ -574,8 +572,8 @@ pg_prepared_xact(PG_FUNCTION_ARGS) funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* - * Collect all the 2PC status information that we will format and - * send out as a result set. + * Collect all the 2PC status information that we will format and send + * out as a result set. */ status = (Working_State *) palloc(sizeof(Working_State)); funcctx->user_fctx = (void *) status; @@ -644,7 +642,7 @@ TwoPhaseGetDummyProc(TransactionId xid) for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { - GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; + GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; if (gxact->proc.xid == xid) { @@ -665,7 +663,7 @@ TwoPhaseGetDummyProc(TransactionId xid) } /************************************************************************/ -/* State file support */ +/* State file support */ /************************************************************************/ #define TwoPhaseFilePath(path, xid) \ @@ -674,14 +672,14 @@ TwoPhaseGetDummyProc(TransactionId xid) /* * 2PC state file format: * - * 1. TwoPhaseFileHeader - * 2. TransactionId[] (subtransactions) + * 1. TwoPhaseFileHeader + * 2. TransactionId[] (subtransactions) * 3. RelFileNode[] (files to be deleted at commit) * 4. RelFileNode[] (files to be deleted at abort) - * 5. TwoPhaseRecordOnDisk - * 6. ... - * 7. TwoPhaseRecordOnDisk (end sentinel, rmid == TWOPHASE_RM_END_ID) - * 8. CRC32 + * 5. TwoPhaseRecordOnDisk + * 6. ... + * 7. TwoPhaseRecordOnDisk (end sentinel, rmid == TWOPHASE_RM_END_ID) + * 8. CRC32 * * Each segment except the final CRC32 is MAXALIGN'd. */ @@ -693,16 +691,16 @@ TwoPhaseGetDummyProc(TransactionId xid) typedef struct TwoPhaseFileHeader { - uint32 magic; /* format identifier */ - uint32 total_len; /* actual file length */ - TransactionId xid; /* original transaction XID */ - Oid database; /* OID of database it was in */ - TimestampTz prepared_at; /* time of preparation */ - Oid owner; /* user running the transaction */ - int32 nsubxacts; /* number of following subxact XIDs */ - int32 ncommitrels; /* number of delete-on-commit rels */ - int32 nabortrels; /* number of delete-on-abort rels */ - char gid[GIDSIZE]; /* GID for transaction */ + uint32 magic; /* format identifier */ + uint32 total_len; /* actual file length */ + TransactionId xid; /* original transaction XID */ + Oid database; /* OID of database it was in */ + TimestampTz prepared_at; /* time of preparation */ + Oid owner; /* user running the transaction */ + int32 nsubxacts; /* number of following subxact XIDs */ + int32 ncommitrels; /* number of delete-on-commit rels */ + int32 nabortrels; /* number of delete-on-abort rels */ + char gid[GIDSIZE]; /* GID for transaction */ } TwoPhaseFileHeader; /* @@ -713,9 +711,9 @@ typedef struct TwoPhaseFileHeader */ typedef struct TwoPhaseRecordOnDisk { - uint32 len; /* length of rmgr data */ - TwoPhaseRmgrId rmid; /* resource manager for this record */ - uint16 info; /* flag bits for use by rmgr */ + uint32 len; /* length of rmgr data */ + TwoPhaseRmgrId rmid; /* resource manager for this record */ + uint16 info; /* flag bits for use by rmgr */ } TwoPhaseRecordOnDisk; /* @@ -728,9 +726,9 @@ static struct xllist { XLogRecData *head; /* first data block in the chain */ XLogRecData *tail; /* last block in chain */ - uint32 bytes_free; /* free bytes left in tail block */ - uint32 total_len; /* total data bytes in chain */ -} records; + uint32 bytes_free; /* free bytes left in tail block */ + uint32 total_len; /* total data bytes in chain */ +} records; /* @@ -744,7 +742,7 @@ static struct xllist static void save_state_data(const void *data, uint32 len) { - uint32 padlen = MAXALIGN(len); + uint32 padlen = MAXALIGN(len); if (padlen > records.bytes_free) { @@ -772,7 +770,7 @@ save_state_data(const void *data, uint32 len) void StartPrepare(GlobalTransaction gxact) { - TransactionId xid = gxact->proc.xid; + TransactionId xid = gxact->proc.xid; TwoPhaseFileHeader hdr; TransactionId *children; RelFileNode *commitrels; @@ -833,13 +831,13 @@ StartPrepare(GlobalTransaction gxact) void EndPrepare(GlobalTransaction gxact) { - TransactionId xid = gxact->proc.xid; + TransactionId xid = gxact->proc.xid; TwoPhaseFileHeader *hdr; - char path[MAXPGPATH]; - XLogRecData *record; - pg_crc32 statefile_crc; - pg_crc32 bogus_crc; - int fd; + char path[MAXPGPATH]; + XLogRecData *record; + pg_crc32 statefile_crc; + pg_crc32 bogus_crc; + int fd; /* Add the end sentinel to the list of 2PC records */ RegisterTwoPhaseRecord(TWOPHASE_RM_END_ID, 0, @@ -853,10 +851,10 @@ EndPrepare(GlobalTransaction gxact) /* * Create the 2PC state file. * - * Note: because we use BasicOpenFile(), we are responsible for ensuring - * the FD gets closed in any error exit path. Once we get into the - * critical section, though, it doesn't matter since any failure causes - * PANIC anyway. + * Note: because we use BasicOpenFile(), we are responsible for ensuring the + * FD gets closed in any error exit path. Once we get into the critical + * section, though, it doesn't matter since any failure causes PANIC + * anyway. */ TwoPhaseFilePath(path, xid); @@ -887,11 +885,10 @@ EndPrepare(GlobalTransaction gxact) FIN_CRC32(statefile_crc); /* - * Write a deliberately bogus CRC to the state file; this is just - * paranoia to catch the case where four more bytes will run us out of - * disk space. + * Write a deliberately bogus CRC to the state file; this is just paranoia + * to catch the case where four more bytes will run us out of disk space. */ - bogus_crc = ~ statefile_crc; + bogus_crc = ~statefile_crc; if ((write(fd, &bogus_crc, sizeof(pg_crc32))) != sizeof(pg_crc32)) { @@ -914,11 +911,11 @@ EndPrepare(GlobalTransaction gxact) * The state file isn't valid yet, because we haven't written the correct * CRC yet. Before we do that, insert entry in WAL and flush it to disk. * - * Between the time we have written the WAL entry and the time we write - * out the correct state file CRC, we have an inconsistency: the xact is - * prepared according to WAL but not according to our on-disk state. - * We use a critical section to force a PANIC if we are unable to complete - * the write --- then, WAL replay should repair the inconsistency. The + * Between the time we have written the WAL entry and the time we write out + * the correct state file CRC, we have an inconsistency: the xact is + * prepared according to WAL but not according to our on-disk state. We + * use a critical section to force a PANIC if we are unable to complete + * the write --- then, WAL replay should repair the inconsistency. The * odds of a PANIC actually occurring should be very tiny given that we * were able to write the bogus CRC above. * @@ -956,16 +953,16 @@ EndPrepare(GlobalTransaction gxact) errmsg("could not close twophase state file: %m"))); /* - * Mark the prepared transaction as valid. As soon as xact.c marks - * MyProc as not running our XID (which it will do immediately after - * this function returns), others can commit/rollback the xact. + * Mark the prepared transaction as valid. As soon as xact.c marks MyProc + * as not running our XID (which it will do immediately after this + * function returns), others can commit/rollback the xact. * * NB: a side effect of this is to make a dummy ProcArray entry for the * prepared XID. This must happen before we clear the XID from MyProc, * else there is a window where the XID is not running according to - * TransactionIdInProgress, and onlookers would be entitled to assume - * the xact crashed. Instead we have a window where the same XID - * appears twice in ProcArray, which is OK. + * TransactionIdInProgress, and onlookers would be entitled to assume the + * xact crashed. Instead we have a window where the same XID appears + * twice in ProcArray, which is OK. */ MarkAsPrepared(gxact); @@ -1011,9 +1008,10 @@ ReadTwoPhaseFile(TransactionId xid) char *buf; TwoPhaseFileHeader *hdr; int fd; - struct stat stat; + struct stat stat; uint32 crc_offset; - pg_crc32 calc_crc, file_crc; + pg_crc32 calc_crc, + file_crc; TwoPhaseFilePath(path, xid); @@ -1028,9 +1026,8 @@ ReadTwoPhaseFile(TransactionId xid) } /* - * Check file length. We can determine a lower bound pretty easily. - * We set an upper bound mainly to avoid palloc() failure on a corrupt - * file. + * Check file length. We can determine a lower bound pretty easily. We + * set an upper bound mainly to avoid palloc() failure on a corrupt file. */ if (fstat(fd, &stat)) { @@ -1107,17 +1104,17 @@ FinishPreparedTransaction(const char *gid, bool isCommit) { GlobalTransaction gxact; TransactionId xid; - char *buf; - char *bufptr; + char *buf; + char *bufptr; TwoPhaseFileHeader *hdr; TransactionId *children; RelFileNode *commitrels; RelFileNode *abortrels; - int i; + int i; /* - * Validate the GID, and lock the GXACT to ensure that two backends - * do not try to commit the same GID at once. + * Validate the GID, and lock the GXACT to ensure that two backends do not + * try to commit the same GID at once. */ gxact = LockGXact(gid, GetUserId()); xid = gxact->proc.xid; @@ -1148,10 +1145,10 @@ FinishPreparedTransaction(const char *gid, bool isCommit) /* * The order of operations here is critical: make the XLOG entry for * commit or abort, then mark the transaction committed or aborted in - * pg_clog, then remove its PGPROC from the global ProcArray (which - * means TransactionIdIsInProgress will stop saying the prepared xact - * is in progress), then run the post-commit or post-abort callbacks. - * The callbacks will release the locks the transaction held. + * pg_clog, then remove its PGPROC from the global ProcArray (which means + * TransactionIdIsInProgress will stop saying the prepared xact is in + * progress), then run the post-commit or post-abort callbacks. The + * callbacks will release the locks the transaction held. */ if (isCommit) RecordTransactionCommitPrepared(xid, @@ -1165,18 +1162,18 @@ FinishPreparedTransaction(const char *gid, bool isCommit) ProcArrayRemove(&gxact->proc); /* - * In case we fail while running the callbacks, mark the gxact invalid - * so no one else will try to commit/rollback, and so it can be recycled - * properly later. It is still locked by our XID so it won't go away yet. + * In case we fail while running the callbacks, mark the gxact invalid so + * no one else will try to commit/rollback, and so it can be recycled + * properly later. It is still locked by our XID so it won't go away yet. * * (We assume it's safe to do this without taking TwoPhaseStateLock.) */ gxact->valid = false; /* - * We have to remove any files that were supposed to be dropped. - * For consistency with the regular xact.c code paths, must do this - * before releasing locks, so do it before running the callbacks. + * We have to remove any files that were supposed to be dropped. For + * consistency with the regular xact.c code paths, must do this before + * releasing locks, so do it before running the callbacks. * * NB: this code knows that we couldn't be dropping any temp rels ... */ @@ -1228,8 +1225,8 @@ ProcessRecords(char *bufptr, TransactionId xid, bufptr += MAXALIGN(sizeof(TwoPhaseRecordOnDisk)); if (callbacks[record->rmid] != NULL) - callbacks[record->rmid](xid, record->info, - (void *) bufptr, record->len); + callbacks[record->rmid] (xid, record->info, + (void *) bufptr, record->len); bufptr += MAXALIGN(record->len); } @@ -1244,15 +1241,15 @@ ProcessRecords(char *bufptr, TransactionId xid, void RemoveTwoPhaseFile(TransactionId xid, bool giveWarning) { - char path[MAXPGPATH]; + char path[MAXPGPATH]; TwoPhaseFilePath(path, xid); if (unlink(path)) if (errno != ENOENT || giveWarning) ereport(WARNING, (errcode_for_file_access(), - errmsg("could not remove two-phase state file \"%s\": %m", - path))); + errmsg("could not remove two-phase state file \"%s\": %m", + path))); } /* @@ -1300,8 +1297,8 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len) } /* - * We must fsync the file because the end-of-replay checkpoint will - * not do so, there being no GXACT in shared memory yet to tell it to. + * We must fsync the file because the end-of-replay checkpoint will not do + * so, there being no GXACT in shared memory yet to tell it to. */ if (pg_fsync(fd) != 0) { @@ -1343,15 +1340,15 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon) int i; /* - * We don't want to hold the TwoPhaseStateLock while doing I/O, - * so we grab it just long enough to make a list of the XIDs that - * require fsyncing, and then do the I/O afterwards. + * We don't want to hold the TwoPhaseStateLock while doing I/O, so we grab + * it just long enough to make a list of the XIDs that require fsyncing, + * and then do the I/O afterwards. * - * This approach creates a race condition: someone else could delete - * a GXACT between the time we release TwoPhaseStateLock and the time - * we try to open its state file. We handle this by special-casing - * ENOENT failures: if we see that, we verify that the GXACT is no - * longer valid, and if so ignore the failure. + * This approach creates a race condition: someone else could delete a GXACT + * between the time we release TwoPhaseStateLock and the time we try to + * open its state file. We handle this by special-casing ENOENT failures: + * if we see that, we verify that the GXACT is no longer valid, and if so + * ignore the failure. */ if (max_prepared_xacts <= 0) return; /* nothing to do */ @@ -1362,9 +1359,9 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon) for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { - GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; + GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; - if (gxact->valid && + if (gxact->valid && XLByteLE(gxact->prepare_lsn, redo_horizon)) xids[nxids++] = gxact->proc.xid; } @@ -1374,7 +1371,7 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon) for (i = 0; i < nxids; i++) { TransactionId xid = xids[i]; - int fd; + int fd; TwoPhaseFilePath(path, xid); @@ -1424,7 +1421,7 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon) * * We throw away any prepared xacts with main XID beyond nextXid --- if any * are present, it suggests that the DBA has done a PITR recovery to an - * earlier point in time without cleaning out pg_twophase. We dare not + * earlier point in time without cleaning out pg_twophase. We dare not * try to recover such prepared xacts since they likely depend on database * state that doesn't exist now. * @@ -1442,7 +1439,7 @@ PrescanPreparedTransactions(void) { TransactionId origNextXid = ShmemVariableCache->nextXid; TransactionId result = origNextXid; - DIR *cldir; + DIR *cldir; struct dirent *clde; cldir = AllocateDir(TWOPHASE_DIR); @@ -1452,10 +1449,10 @@ PrescanPreparedTransactions(void) strspn(clde->d_name, "0123456789ABCDEF") == 8) { TransactionId xid; - char *buf; - TwoPhaseFileHeader *hdr; + char *buf; + TwoPhaseFileHeader *hdr; TransactionId *subxids; - int i; + int i; xid = (TransactionId) strtoul(clde->d_name, NULL, 16); @@ -1541,8 +1538,8 @@ PrescanPreparedTransactions(void) void RecoverPreparedTransactions(void) { - char dir[MAXPGPATH]; - DIR *cldir; + char dir[MAXPGPATH]; + DIR *cldir; struct dirent *clde; snprintf(dir, MAXPGPATH, "%s", TWOPHASE_DIR); @@ -1554,12 +1551,12 @@ RecoverPreparedTransactions(void) strspn(clde->d_name, "0123456789ABCDEF") == 8) { TransactionId xid; - char *buf; - char *bufptr; - TwoPhaseFileHeader *hdr; + char *buf; + char *bufptr; + TwoPhaseFileHeader *hdr; TransactionId *subxids; - GlobalTransaction gxact; - int i; + GlobalTransaction gxact; + int i; xid = (TransactionId) strtoul(clde->d_name, NULL, 16); @@ -1598,8 +1595,8 @@ RecoverPreparedTransactions(void) /* * Reconstruct subtrans state for the transaction --- needed - * because pg_subtrans is not preserved over a restart. Note - * that we are linking all the subtransactions directly to the + * because pg_subtrans is not preserved over a restart. Note that + * we are linking all the subtransactions directly to the * top-level XID; there may originally have been a more complex * hierarchy, but there's no need to restore that exactly. */ @@ -1609,12 +1606,12 @@ RecoverPreparedTransactions(void) /* * Recreate its GXACT and dummy PGPROC * - * Note: since we don't have the PREPARE record's WAL location - * at hand, we leave prepare_lsn zeroes. This means the GXACT - * will be fsync'd on every future checkpoint. We assume this + * Note: since we don't have the PREPARE record's WAL location at + * hand, we leave prepare_lsn zeroes. This means the GXACT will + * be fsync'd on every future checkpoint. We assume this * situation is infrequent enough that the performance cost is - * negligible (especially since we know the state file has - * already been fsynced). + * negligible (especially since we know the state file has already + * been fsynced). */ gxact = MarkAsPreparing(xid, hdr->gid, hdr->prepared_at, @@ -1773,12 +1770,11 @@ RecordTransactionAbortPrepared(TransactionId xid, XLogFlush(recptr); /* - * Mark the transaction aborted in clog. This is not absolutely - * necessary but we may as well do it while we are here. + * Mark the transaction aborted in clog. This is not absolutely necessary + * but we may as well do it while we are here. */ TransactionIdAbort(xid); TransactionIdAbortTree(nchildren, children); END_CRIT_SECTION(); } - diff --git a/src/backend/access/transam/twophase_rmgr.c b/src/backend/access/transam/twophase_rmgr.c index e78f8b2fbb..eab442404f 100644 --- a/src/backend/access/transam/twophase_rmgr.c +++ b/src/backend/access/transam/twophase_rmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/twophase_rmgr.c,v 1.1 2005/06/17 22:32:42 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/twophase_rmgr.c,v 1.2 2005/10/15 02:49:09 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -21,29 +21,29 @@ #include "utils/inval.h" -const TwoPhaseCallback twophase_recover_callbacks[TWOPHASE_RM_MAX_ID + 1] = +const TwoPhaseCallback twophase_recover_callbacks[TWOPHASE_RM_MAX_ID + 1] = { - NULL, /* END ID */ - lock_twophase_recover, /* Lock */ - NULL, /* Inval */ - NULL, /* flat file update */ - NULL /* notify/listen */ + NULL, /* END ID */ + lock_twophase_recover, /* Lock */ + NULL, /* Inval */ + NULL, /* flat file update */ + NULL /* notify/listen */ }; -const TwoPhaseCallback twophase_postcommit_callbacks[TWOPHASE_RM_MAX_ID + 1] = +const TwoPhaseCallback twophase_postcommit_callbacks[TWOPHASE_RM_MAX_ID + 1] = { - NULL, /* END ID */ - lock_twophase_postcommit, /* Lock */ - inval_twophase_postcommit, /* Inval */ - flatfile_twophase_postcommit, /* flat file update */ - notify_twophase_postcommit /* notify/listen */ + NULL, /* END ID */ + lock_twophase_postcommit, /* Lock */ + inval_twophase_postcommit, /* Inval */ + flatfile_twophase_postcommit, /* flat file update */ + notify_twophase_postcommit /* notify/listen */ }; -const TwoPhaseCallback twophase_postabort_callbacks[TWOPHASE_RM_MAX_ID + 1] = +const TwoPhaseCallback twophase_postabort_callbacks[TWOPHASE_RM_MAX_ID + 1] = { - NULL, /* END ID */ - lock_twophase_postabort, /* Lock */ - NULL, /* Inval */ - NULL, /* flat file update */ - NULL /* notify/listen */ + NULL, /* END ID */ + lock_twophase_postabort, /* Lock */ + NULL, /* Inval */ + NULL, /* flat file update */ + NULL /* notify/listen */ }; diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index 99d9213af0..bff646afb6 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -6,7 +6,7 @@ * Copyright (c) 2000-2005, PostgreSQL Global Development Group * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/varsup.c,v 1.66 2005/08/22 16:59:47 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/varsup.c,v 1.67 2005/10/15 02:49:09 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -49,21 +49,21 @@ GetNewTransactionId(bool isSubXact) xid = ShmemVariableCache->nextXid; /* - * Check to see if it's safe to assign another XID. This protects - * against catastrophic data loss due to XID wraparound. The basic - * rules are: warn if we're past xidWarnLimit, and refuse to execute - * transactions if we're past xidStopLimit, unless we are running in - * a standalone backend (which gives an escape hatch to the DBA who - * ignored all those warnings). + * Check to see if it's safe to assign another XID. This protects against + * catastrophic data loss due to XID wraparound. The basic rules are: + * warn if we're past xidWarnLimit, and refuse to execute transactions if + * we're past xidStopLimit, unless we are running in a standalone backend + * (which gives an escape hatch to the DBA who ignored all those + * warnings). * - * Test is coded to fall out as fast as possible during normal operation, - * ie, when the warn limit is set and we haven't violated it. + * Test is coded to fall out as fast as possible during normal operation, ie, + * when the warn limit is set and we haven't violated it. */ if (TransactionIdFollowsOrEquals(xid, ShmemVariableCache->xidWarnLimit) && TransactionIdIsValid(ShmemVariableCache->xidWarnLimit)) { if (IsUnderPostmaster && - TransactionIdFollowsOrEquals(xid, ShmemVariableCache->xidStopLimit)) + TransactionIdFollowsOrEquals(xid, ShmemVariableCache->xidStopLimit)) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("database is not accepting queries to avoid wraparound data loss in database \"%s\"", @@ -72,20 +72,19 @@ GetNewTransactionId(bool isSubXact) NameStr(ShmemVariableCache->limit_datname)))); else ereport(WARNING, - (errmsg("database \"%s\" must be vacuumed within %u transactions", - NameStr(ShmemVariableCache->limit_datname), - ShmemVariableCache->xidWrapLimit - xid), - errhint("To avoid a database shutdown, execute a full-database VACUUM in \"%s\".", - NameStr(ShmemVariableCache->limit_datname)))); + (errmsg("database \"%s\" must be vacuumed within %u transactions", + NameStr(ShmemVariableCache->limit_datname), + ShmemVariableCache->xidWrapLimit - xid), + errhint("To avoid a database shutdown, execute a full-database VACUUM in \"%s\".", + NameStr(ShmemVariableCache->limit_datname)))); } /* * If we are allocating the first XID of a new page of the commit log, - * zero out that commit-log page before returning. We must do this - * while holding XidGenLock, else another xact could acquire and - * commit a later XID before we zero the page. Fortunately, a page of - * the commit log holds 32K or more transactions, so we don't have to - * do this very often. + * zero out that commit-log page before returning. We must do this while + * holding XidGenLock, else another xact could acquire and commit a later + * XID before we zero the page. Fortunately, a page of the commit log + * holds 32K or more transactions, so we don't have to do this very often. * * Extend pg_subtrans too. */ @@ -93,45 +92,43 @@ GetNewTransactionId(bool isSubXact) ExtendSUBTRANS(xid); /* - * Now advance the nextXid counter. This must not happen until after - * we have successfully completed ExtendCLOG() --- if that routine - * fails, we want the next incoming transaction to try it again. We - * cannot assign more XIDs until there is CLOG space for them. + * Now advance the nextXid counter. This must not happen until after we + * have successfully completed ExtendCLOG() --- if that routine fails, we + * want the next incoming transaction to try it again. We cannot assign + * more XIDs until there is CLOG space for them. */ TransactionIdAdvance(ShmemVariableCache->nextXid); /* - * We must store the new XID into the shared PGPROC array before - * releasing XidGenLock. This ensures that when GetSnapshotData calls + * We must store the new XID into the shared PGPROC array before releasing + * XidGenLock. This ensures that when GetSnapshotData calls * ReadNewTransactionId, all active XIDs before the returned value of - * nextXid are already present in PGPROC. Else we have a race - * condition. + * nextXid are already present in PGPROC. Else we have a race condition. * * XXX by storing xid into MyProc without acquiring ProcArrayLock, we are * relying on fetch/store of an xid to be atomic, else other backends * might see a partially-set xid here. But holding both locks at once - * would be a nasty concurrency hit (and in fact could cause a - * deadlock against GetSnapshotData). So for now, assume atomicity. - * Note that readers of PGPROC xid field should be careful to fetch - * the value only once, rather than assume they can read it multiple - * times and get the same answer each time. + * would be a nasty concurrency hit (and in fact could cause a deadlock + * against GetSnapshotData). So for now, assume atomicity. Note that + * readers of PGPROC xid field should be careful to fetch the value only + * once, rather than assume they can read it multiple times and get the + * same answer each time. * * The same comments apply to the subxact xid count and overflow fields. * - * A solution to the atomic-store problem would be to give each PGPROC - * its own spinlock used only for fetching/storing that PGPROC's xid - * and related fields. + * A solution to the atomic-store problem would be to give each PGPROC its + * own spinlock used only for fetching/storing that PGPROC's xid and + * related fields. * * If there's no room to fit a subtransaction XID into PGPROC, set the * cache-overflowed flag instead. This forces readers to look in - * pg_subtrans to map subtransaction XIDs up to top-level XIDs. There - * is a race-condition window, in that the new XID will not appear as - * running until its parent link has been placed into pg_subtrans. - * However, that will happen before anyone could possibly have a - * reason to inquire about the status of the XID, so it seems OK. - * (Snapshots taken during this window *will* include the parent XID, - * so they will deliver the correct answer later on when someone does - * have a reason to inquire.) + * pg_subtrans to map subtransaction XIDs up to top-level XIDs. There is a + * race-condition window, in that the new XID will not appear as running + * until its parent link has been placed into pg_subtrans. However, that + * will happen before anyone could possibly have a reason to inquire about + * the status of the XID, so it seems OK. (Snapshots taken during this + * window *will* include the parent XID, so they will deliver the correct + * answer later on when someone does have a reason to inquire.) */ if (MyProc != NULL) { @@ -197,27 +194,26 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, xidWrapLimit += FirstNormalTransactionId; /* - * We'll refuse to continue assigning XIDs in interactive mode once - * we get within 1M transactions of data loss. This leaves lots - * of room for the DBA to fool around fixing things in a standalone - * backend, while not being significant compared to total XID space. - * (Note that since vacuuming requires one transaction per table - * cleaned, we had better be sure there's lots of XIDs left...) + * We'll refuse to continue assigning XIDs in interactive mode once we get + * within 1M transactions of data loss. This leaves lots of room for the + * DBA to fool around fixing things in a standalone backend, while not + * being significant compared to total XID space. (Note that since + * vacuuming requires one transaction per table cleaned, we had better be + * sure there's lots of XIDs left...) */ xidStopLimit = xidWrapLimit - 1000000; if (xidStopLimit < FirstNormalTransactionId) xidStopLimit -= FirstNormalTransactionId; /* - * We'll start complaining loudly when we get within 10M transactions - * of the stop point. This is kind of arbitrary, but if you let your - * gas gauge get down to 1% of full, would you be looking for the - * next gas station? We need to be fairly liberal about this number - * because there are lots of scenarios where most transactions are - * done by automatic clients that won't pay attention to warnings. - * (No, we're not gonna make this configurable. If you know enough to - * configure it, you know enough to not get in this kind of trouble in - * the first place.) + * We'll start complaining loudly when we get within 10M transactions of + * the stop point. This is kind of arbitrary, but if you let your gas + * gauge get down to 1% of full, would you be looking for the next gas + * station? We need to be fairly liberal about this number because there + * are lots of scenarios where most transactions are done by automatic + * clients that won't pay attention to warnings. (No, we're not gonna make + * this configurable. If you know enough to configure it, you know enough + * to not get in this kind of trouble in the first place.) */ xidWarnLimit = xidStopLimit - 10000000; if (xidWarnLimit < FirstNormalTransactionId) @@ -234,16 +230,16 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, /* Log the info */ ereport(LOG, - (errmsg("transaction ID wrap limit is %u, limited by database \"%s\"", - xidWrapLimit, NameStr(*oldest_datname)))); + (errmsg("transaction ID wrap limit is %u, limited by database \"%s\"", + xidWrapLimit, NameStr(*oldest_datname)))); /* Give an immediate warning if past the wrap warn point */ if (TransactionIdFollowsOrEquals(curXid, xidWarnLimit)) ereport(WARNING, - (errmsg("database \"%s\" must be vacuumed within %u transactions", - NameStr(*oldest_datname), - xidWrapLimit - curXid), - errhint("To avoid a database shutdown, execute a full-database VACUUM in \"%s\".", - NameStr(*oldest_datname)))); + (errmsg("database \"%s\" must be vacuumed within %u transactions", + NameStr(*oldest_datname), + xidWrapLimit - curXid), + errhint("To avoid a database shutdown, execute a full-database VACUUM in \"%s\".", + NameStr(*oldest_datname)))); } @@ -272,11 +268,11 @@ GetNewObjectId(void) * right after a wrap occurs, so as to avoid a possibly large number of * iterations in GetNewOid.) Note we are relying on unsigned comparison. * - * During initdb, we start the OID generator at FirstBootstrapObjectId, - * so we only enforce wrapping to that point when in bootstrap or - * standalone mode. The first time through this routine after normal - * postmaster start, the counter will be forced up to FirstNormalObjectId. - * This mechanism leaves the OIDs between FirstBootstrapObjectId and + * During initdb, we start the OID generator at FirstBootstrapObjectId, so we + * only enforce wrapping to that point when in bootstrap or standalone + * mode. The first time through this routine after normal postmaster + * start, the counter will be forced up to FirstNormalObjectId. This + * mechanism leaves the OIDs between FirstBootstrapObjectId and * FirstNormalObjectId available for automatic assignment during initdb, * while ensuring they will never conflict with user-assigned OIDs. */ diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index eabcb117cc..ea19e07564 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.214 2005/08/20 23:45:08 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.215 2005/10/15 02:49:09 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -110,15 +110,14 @@ typedef enum TBlockState */ typedef struct TransactionStateData { - TransactionId transactionId; /* my XID, or Invalid if none */ + TransactionId transactionId; /* my XID, or Invalid if none */ SubTransactionId subTransactionId; /* my subxact ID */ char *name; /* savepoint name, if any */ int savepointLevel; /* savepoint level */ TransState state; /* low-level state */ TBlockState blockState; /* high-level state */ int nestingLevel; /* nest depth */ - MemoryContext curTransactionContext; /* my xact-lifetime - * context */ + MemoryContext curTransactionContext; /* my xact-lifetime context */ ResourceOwner curTransactionOwner; /* my query resources */ List *childXids; /* subcommitted child XIDs */ Oid currentUser; /* subxact start current_user */ @@ -219,8 +218,8 @@ static void AtStart_Memory(void); static void AtStart_ResourceOwner(void); static void CallXactCallbacks(XactEvent event); static void CallSubXactCallbacks(SubXactEvent event, - SubTransactionId mySubid, - SubTransactionId parentSubid); + SubTransactionId mySubid, + SubTransactionId parentSubid); static void CleanupTransaction(void); static void CommitTransaction(void); static void RecordTransactionAbort(void); @@ -349,18 +348,18 @@ AssignSubTransactionId(TransactionState s) /* * Generate a new Xid and record it in PG_PROC and pg_subtrans. * - * NB: we must make the subtrans entry BEFORE the Xid appears anywhere - * in shared storage other than PG_PROC; because if there's no room for - * it in PG_PROC, the subtrans entry is needed to ensure that other - * backends see the Xid as "running". See GetNewTransactionId. + * NB: we must make the subtrans entry BEFORE the Xid appears anywhere in + * shared storage other than PG_PROC; because if there's no room for it in + * PG_PROC, the subtrans entry is needed to ensure that other backends see + * the Xid as "running". See GetNewTransactionId. */ s->transactionId = GetNewTransactionId(true); SubTransSetParent(s->transactionId, s->parent->transactionId); /* - * Acquire lock on the transaction XID. (We assume this cannot block.) - * We have to be sure that the lock is assigned to the transaction's + * Acquire lock on the transaction XID. (We assume this cannot block.) We + * have to be sure that the lock is assigned to the transaction's * ResourceOwner. */ currentOwner = CurrentResourceOwner; @@ -453,22 +452,22 @@ TransactionIdIsCurrentTransactionId(TransactionId xid) /* * We always say that BootstrapTransactionId is "not my transaction ID" - * even when it is (ie, during bootstrap). Along with the fact that + * even when it is (ie, during bootstrap). Along with the fact that * transam.c always treats BootstrapTransactionId as already committed, - * this causes the tqual.c routines to see all tuples as committed, - * which is what we need during bootstrap. (Bootstrap mode only inserts - * tuples, it never updates or deletes them, so all tuples can be presumed - * good immediately.) + * this causes the tqual.c routines to see all tuples as committed, which + * is what we need during bootstrap. (Bootstrap mode only inserts tuples, + * it never updates or deletes them, so all tuples can be presumed good + * immediately.) */ if (xid == BootstrapTransactionId) return false; /* - * We will return true for the Xid of the current subtransaction, any - * of its subcommitted children, any of its parents, or any of their - * previously subcommitted children. However, a transaction being - * aborted is no longer "current", even though it may still have an - * entry on the state stack. + * We will return true for the Xid of the current subtransaction, any of + * its subcommitted children, any of its parents, or any of their + * previously subcommitted children. However, a transaction being aborted + * is no longer "current", even though it may still have an entry on the + * state stack. */ for (s = CurrentTransactionState; s != NULL; s = s->parent) { @@ -498,12 +497,12 @@ void CommandCounterIncrement(void) { currentCommandId += 1; - if (currentCommandId == FirstCommandId) /* check for overflow */ + if (currentCommandId == FirstCommandId) /* check for overflow */ { currentCommandId -= 1; ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("cannot have more than 2^32-1 commands in a transaction"))); + errmsg("cannot have more than 2^32-1 commands in a transaction"))); } /* Propagate new command ID into static snapshots, if set */ @@ -607,16 +606,15 @@ AtSubStart_Memory(void) Assert(CurTransactionContext != NULL); /* - * Create a CurTransactionContext, which will be used to hold data - * that survives subtransaction commit but disappears on - * subtransaction abort. We make it a child of the immediate parent's - * CurTransactionContext. + * Create a CurTransactionContext, which will be used to hold data that + * survives subtransaction commit but disappears on subtransaction abort. + * We make it a child of the immediate parent's CurTransactionContext. */ CurTransactionContext = AllocSetContextCreate(CurTransactionContext, "CurTransactionContext", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); s->curTransactionContext = CurTransactionContext; /* Make the CurTransactionContext active. */ @@ -634,8 +632,8 @@ AtSubStart_ResourceOwner(void) Assert(s->parent != NULL); /* - * Create a resource owner for the subtransaction. We make it a child - * of the immediate parent's resource owner. + * Create a resource owner for the subtransaction. We make it a child of + * the immediate parent's resource owner. */ s->curTransactionOwner = ResourceOwnerCreate(s->parent->curTransactionOwner, @@ -666,11 +664,10 @@ RecordTransactionCommit(void) nchildren = xactGetCommittedChildren(&children); /* - * If we made neither any XLOG entries nor any temp-rel updates, and - * have no files to be deleted, we can omit recording the transaction - * commit at all. (This test includes the effects of subtransactions, - * so the presence of committed subxacts need not alone force a - * write.) + * If we made neither any XLOG entries nor any temp-rel updates, and have + * no files to be deleted, we can omit recording the transaction commit at + * all. (This test includes the effects of subtransactions, so the + * presence of committed subxacts need not alone force a write.) */ if (MyXactMadeXLogEntry || MyXactMadeTempRelUpdate || nrels > 0) { @@ -684,18 +681,17 @@ RecordTransactionCommit(void) START_CRIT_SECTION(); /* - * If our transaction made any transaction-controlled XLOG - * entries, we need to lock out checkpoint start between writing - * our XLOG record and updating pg_clog. Otherwise it is possible - * for the checkpoint to set REDO after the XLOG record but fail - * to flush the pg_clog update to disk, leading to loss of the - * transaction commit if we crash a little later. Slightly klugy - * fix for problem discovered 2004-08-10. + * If our transaction made any transaction-controlled XLOG entries, we + * need to lock out checkpoint start between writing our XLOG record + * and updating pg_clog. Otherwise it is possible for the checkpoint + * to set REDO after the XLOG record but fail to flush the pg_clog + * update to disk, leading to loss of the transaction commit if we + * crash a little later. Slightly klugy fix for problem discovered + * 2004-08-10. * - * (If it made no transaction-controlled XLOG entries, its XID - * appears nowhere in permanent storage, so no one else will ever - * care if it committed; so it doesn't matter if we lose the - * commit flag.) + * (If it made no transaction-controlled XLOG entries, its XID appears + * nowhere in permanent storage, so no one else will ever care if it + * committed; so it doesn't matter if we lose the commit flag.) * * Note we only need a shared lock. */ @@ -704,8 +700,8 @@ RecordTransactionCommit(void) LWLockAcquire(CheckpointStartLock, LW_SHARED); /* - * We only need to log the commit in XLOG if the transaction made - * any transaction-controlled XLOG entries or will delete files. + * We only need to log the commit in XLOG if the transaction made any + * transaction-controlled XLOG entries or will delete files. */ if (madeTCentries || nrels > 0) { @@ -748,26 +744,26 @@ RecordTransactionCommit(void) } /* - * We must flush our XLOG entries to disk if we made any XLOG - * entries, whether in or out of transaction control. For - * example, if we reported a nextval() result to the client, this - * ensures that any XLOG record generated by nextval will hit the - * disk before we report the transaction committed. + * We must flush our XLOG entries to disk if we made any XLOG entries, + * whether in or out of transaction control. For example, if we + * reported a nextval() result to the client, this ensures that any + * XLOG record generated by nextval will hit the disk before we report + * the transaction committed. * - * Note: if we generated a commit record above, MyXactMadeXLogEntry - * will certainly be set now. + * Note: if we generated a commit record above, MyXactMadeXLogEntry will + * certainly be set now. */ if (MyXactMadeXLogEntry) { /* * Sleep before flush! So we can flush more than one commit - * records per single fsync. (The idea is some other backend - * may do the XLogFlush while we're sleeping. This needs work - * still, because on most Unixen, the minimum select() delay - * is 10msec or more, which is way too long.) + * records per single fsync. (The idea is some other backend may + * do the XLogFlush while we're sleeping. This needs work still, + * because on most Unixen, the minimum select() delay is 10msec or + * more, which is way too long.) * - * We do not sleep if enableFsync is not turned on, nor if there - * are fewer than CommitSiblings other backends with active + * We do not sleep if enableFsync is not turned on, nor if there are + * fewer than CommitSiblings other backends with active * transactions. */ if (CommitDelay > 0 && enableFsync && @@ -778,14 +774,13 @@ RecordTransactionCommit(void) } /* - * We must mark the transaction committed in clog if its XID - * appears either in permanent rels or in local temporary rels. We - * test this by seeing if we made transaction-controlled entries - * *OR* local-rel tuple updates. Note that if we made only the - * latter, we have not emitted an XLOG record for our commit, and - * so in the event of a crash the clog update might be lost. This - * is okay because no one else will ever care whether we - * committed. + * We must mark the transaction committed in clog if its XID appears + * either in permanent rels or in local temporary rels. We test this + * by seeing if we made transaction-controlled entries *OR* local-rel + * tuple updates. Note that if we made only the latter, we have not + * emitted an XLOG record for our commit, and so in the event of a + * crash the clog update might be lost. This is okay because no one + * else will ever care whether we committed. */ if (madeTCentries || MyXactMadeTempRelUpdate) { @@ -833,9 +828,8 @@ static void AtCommit_Memory(void) { /* - * Now that we're "out" of a transaction, have the system allocate - * things in the top memory context instead of per-transaction - * contexts. + * Now that we're "out" of a transaction, have the system allocate things + * in the top memory context instead of per-transaction contexts. */ MemoryContextSwitchTo(TopMemoryContext); @@ -870,9 +864,9 @@ AtSubCommit_Memory(void) /* * Ordinarily we cannot throw away the child's CurTransactionContext, - * since the data it contains will be needed at upper commit. However, - * if there isn't actually anything in it, we can throw it away. This - * avoids a small memory leak in the common case of "trivial" subxacts. + * since the data it contains will be needed at upper commit. However, if + * there isn't actually anything in it, we can throw it away. This avoids + * a small memory leak in the common case of "trivial" subxacts. */ if (MemoryContextIsEmpty(s->curTransactionContext)) { @@ -908,9 +902,10 @@ AtSubCommit_childXids(void) { s->parent->childXids = list_concat(s->parent->childXids, s->childXids); + /* - * list_concat doesn't free the list header for the second list; - * do so here to avoid memory leakage (kluge) + * list_concat doesn't free the list header for the second list; do so + * here to avoid memory leakage (kluge) */ pfree(s->childXids); s->childXids = NIL; @@ -929,14 +924,14 @@ RecordSubTransactionCommit(void) * We do not log the subcommit in XLOG; it doesn't matter until the * top-level transaction commits. * - * We must mark the subtransaction subcommitted in clog if its XID - * appears either in permanent rels or in local temporary rels. We - * test this by seeing if we made transaction-controlled entries *OR* - * local-rel tuple updates. (The test here actually covers the entire - * transaction tree so far, so it may mark subtransactions that don't - * really need it, but it's probably not worth being tenser. Note that - * if a prior subtransaction dirtied these variables, then - * RecordTransactionCommit will have to do the full pushup anyway...) + * We must mark the subtransaction subcommitted in clog if its XID appears + * either in permanent rels or in local temporary rels. We test this by + * seeing if we made transaction-controlled entries *OR* local-rel tuple + * updates. (The test here actually covers the entire transaction tree so + * far, so it may mark subtransactions that don't really need it, but it's + * probably not worth being tenser. Note that if a prior subtransaction + * dirtied these variables, then RecordTransactionCommit will have to do + * the full pushup anyway...) */ if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate) { @@ -974,9 +969,9 @@ RecordTransactionAbort(void) /* * If we made neither any transaction-controlled XLOG entries nor any - * temp-rel updates, and are not going to delete any files, we can - * omit recording the transaction abort at all. No one will ever care - * that it aborted. (These tests cover our whole transaction tree.) + * temp-rel updates, and are not going to delete any files, we can omit + * recording the transaction abort at all. No one will ever care that it + * aborted. (These tests cover our whole transaction tree.) */ if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate || nrels > 0) { @@ -992,16 +987,16 @@ RecordTransactionAbort(void) START_CRIT_SECTION(); /* - * We only need to log the abort in XLOG if the transaction made - * any transaction-controlled XLOG entries or will delete files. - * (If it made no transaction-controlled XLOG entries, its XID - * appears nowhere in permanent storage, so no one else will ever - * care if it committed.) + * We only need to log the abort in XLOG if the transaction made any + * transaction-controlled XLOG entries or will delete files. (If it + * made no transaction-controlled XLOG entries, its XID appears + * nowhere in permanent storage, so no one else will ever care if it + * committed.) * - * We do not flush XLOG to disk unless deleting files, since the - * default assumption after a crash would be that we aborted, - * anyway. For the same reason, we don't need to worry about - * interlocking against checkpoint start. + * We do not flush XLOG to disk unless deleting files, since the default + * assumption after a crash would be that we aborted, anyway. For the + * same reason, we don't need to worry about interlocking against + * checkpoint start. */ if (MyLastRecPtr.xrecoff != 0 || nrels > 0) { @@ -1047,8 +1042,8 @@ RecordTransactionAbort(void) * Mark the transaction aborted in clog. This is not absolutely * necessary but we may as well do it while we are here. * - * The ordering here isn't critical but it seems best to mark the - * parent first. This assures an atomic transition of all the + * The ordering here isn't critical but it seems best to mark the parent + * first. This assures an atomic transition of all the * subtransactions to aborted state from the point of view of * concurrent TransactionIdDidAbort calls. */ @@ -1078,8 +1073,8 @@ AtAbort_Memory(void) { /* * Make sure we are in a valid context (not a child of - * TopTransactionContext...). Note that it is possible for this code - * to be called when we aren't in a transaction at all; go directly to + * TopTransactionContext...). Note that it is possible for this code to + * be called when we aren't in a transaction at all; go directly to * TopMemoryContext in that case. */ if (TopTransactionContext != NULL) @@ -1087,8 +1082,8 @@ AtAbort_Memory(void) MemoryContextSwitchTo(TopTransactionContext); /* - * We do not want to destroy the transaction's global state yet, - * so we can't free any memory here. + * We do not want to destroy the transaction's global state yet, so we + * can't free any memory here. */ } else @@ -1114,8 +1109,8 @@ static void AtAbort_ResourceOwner(void) { /* - * Make sure we have a valid ResourceOwner, if possible (else it - * will be NULL, which is OK) + * Make sure we have a valid ResourceOwner, if possible (else it will be + * NULL, which is OK) */ CurrentResourceOwner = TopTransactionResourceOwner; } @@ -1143,7 +1138,7 @@ AtSubAbort_childXids(void) /* * We keep the child-XID lists in TopTransactionContext (see - * AtSubCommit_childXids). This means we'd better free the list + * AtSubCommit_childXids). This means we'd better free the list * explicitly at abort to avoid leakage. */ list_free(s->childXids); @@ -1168,11 +1163,11 @@ RecordSubTransactionAbort(void) /* * If we made neither any transaction-controlled XLOG entries nor any - * temp-rel updates, and are not going to delete any files, we can - * omit recording the transaction abort at all. No one will ever care - * that it aborted. (These tests cover our whole transaction tree, - * and therefore may mark subxacts that don't really need it, but it's - * probably not worth being tenser.) + * temp-rel updates, and are not going to delete any files, we can omit + * recording the transaction abort at all. No one will ever care that it + * aborted. (These tests cover our whole transaction tree, and therefore + * may mark subxacts that don't really need it, but it's probably not + * worth being tenser.) * * In this case we needn't worry about marking subcommitted children as * aborted, because they didn't mark themselves as subcommitted in the @@ -1183,8 +1178,8 @@ RecordSubTransactionAbort(void) START_CRIT_SECTION(); /* - * We only need to log the abort in XLOG if the transaction made - * any transaction-controlled XLOG entries or will delete files. + * We only need to log the abort in XLOG if the transaction made any + * transaction-controlled XLOG entries or will delete files. */ if (MyLastRecPtr.xrecoff != 0 || nrels > 0) { @@ -1238,11 +1233,10 @@ RecordSubTransactionAbort(void) } /* - * We can immediately remove failed XIDs from PGPROC's cache of - * running child XIDs. It's easiest to do it here while we have the - * child XID array at hand, even though in the main-transaction case - * the equivalent work happens just after return from - * RecordTransactionAbort. + * We can immediately remove failed XIDs from PGPROC's cache of running + * child XIDs. It's easiest to do it here while we have the child XID + * array at hand, even though in the main-transaction case the equivalent + * work happens just after return from RecordTransactionAbort. */ XidCacheRemoveRunningXids(xid, nchildren, children); @@ -1265,9 +1259,8 @@ static void AtCleanup_Memory(void) { /* - * Now that we're "out" of a transaction, have the system allocate - * things in the top memory context instead of per-transaction - * contexts. + * Now that we're "out" of a transaction, have the system allocate things + * in the top memory context instead of per-transaction contexts. */ MemoryContextSwitchTo(TopMemoryContext); @@ -1304,9 +1297,9 @@ AtSubCleanup_Memory(void) CurTransactionContext = s->parent->curTransactionContext; /* - * Delete the subxact local memory contexts. Its CurTransactionContext - * can go too (note this also kills CurTransactionContexts from any - * children of the subxact). + * Delete the subxact local memory contexts. Its CurTransactionContext can + * go too (note this also kills CurTransactionContexts from any children + * of the subxact). */ if (s->curTransactionContext) MemoryContextDelete(s->curTransactionContext); @@ -1344,11 +1337,10 @@ StartTransaction(void) * start processing */ s->state = TRANS_START; - s->transactionId = InvalidTransactionId; /* until assigned */ + s->transactionId = InvalidTransactionId; /* until assigned */ /* - * Make sure we've freed any old snapshot, and reset xact state - * variables + * Make sure we've freed any old snapshot, and reset xact state variables */ FreeXactSnapshot(); XactIsoLevel = DefaultXactIsoLevel; @@ -1386,10 +1378,10 @@ StartTransaction(void) s->childXids = NIL; /* - * You might expect to see "s->currentUser = GetUserId();" here, but - * you won't because it doesn't work during startup; the userid isn't - * set yet during a backend's first transaction start. We only use - * the currentUser field in sub-transaction state structs. + * You might expect to see "s->currentUser = GetUserId();" here, but you + * won't because it doesn't work during startup; the userid isn't set yet + * during a backend's first transaction start. We only use the + * currentUser field in sub-transaction state structs. * * prevXactReadOnly is also valid only in sub-transactions. */ @@ -1432,13 +1424,12 @@ CommitTransaction(void) Assert(s->parent == NULL); /* - * Do pre-commit processing (most of this stuff requires database - * access, and in fact could still cause an error...) + * Do pre-commit processing (most of this stuff requires database access, + * and in fact could still cause an error...) * - * It is possible for CommitHoldablePortals to invoke functions that - * queue deferred triggers, and it's also possible that triggers create - * holdable cursors. So we have to loop until there's nothing left to - * do. + * It is possible for CommitHoldablePortals to invoke functions that queue + * deferred triggers, and it's also possible that triggers create holdable + * cursors. So we have to loop until there's nothing left to do. */ for (;;) { @@ -1525,19 +1516,19 @@ CommitTransaction(void) } /* - * This is all post-commit cleanup. Note that if an error is raised - * here, it's too late to abort the transaction. This should be just + * This is all post-commit cleanup. Note that if an error is raised here, + * it's too late to abort the transaction. This should be just * noncritical resource releasing. * - * The ordering of operations is not entirely random. The idea is: - * release resources visible to other backends (eg, files, buffer - * pins); then release locks; then release backend-local resources. We - * want to release locks at the point where any backend waiting for us - * will see our transaction as being fully cleaned up. + * The ordering of operations is not entirely random. The idea is: release + * resources visible to other backends (eg, files, buffer pins); then + * release locks; then release backend-local resources. We want to release + * locks at the point where any backend waiting for us will see our + * transaction as being fully cleaned up. * - * Resources that can be associated with individual queries are handled - * by the ResourceOwner mechanism. The other calls here are for - * backend-wide state. + * Resources that can be associated with individual queries are handled by + * the ResourceOwner mechanism. The other calls here are for backend-wide + * state. */ CallXactCallbacks(XACT_EVENT_COMMIT); @@ -1553,12 +1544,11 @@ CommitTransaction(void) AtEOXact_RelationCache(true); /* - * Make catalog changes visible to all backends. This has to happen - * after relcache references are dropped (see comments for - * AtEOXact_RelationCache), but before locks are released (if anyone - * is waiting for lock on a relation we've modified, we want them to - * know about the catalog change before they start using the - * relation). + * Make catalog changes visible to all backends. This has to happen after + * relcache references are dropped (see comments for + * AtEOXact_RelationCache), but before locks are released (if anyone is + * waiting for lock on a relation we've modified, we want them to know + * about the catalog change before they start using the relation). */ AtEOXact_Inval(true); @@ -1621,10 +1611,10 @@ CommitTransaction(void) static void PrepareTransaction(void) { - TransactionState s = CurrentTransactionState; - TransactionId xid = GetCurrentTransactionId(); - GlobalTransaction gxact; - TimestampTz prepared_at; + TransactionState s = CurrentTransactionState; + TransactionId xid = GetCurrentTransactionId(); + GlobalTransaction gxact; + TimestampTz prepared_at; ShowTransactionState("PrepareTransaction"); @@ -1637,13 +1627,12 @@ PrepareTransaction(void) Assert(s->parent == NULL); /* - * Do pre-commit processing (most of this stuff requires database - * access, and in fact could still cause an error...) + * Do pre-commit processing (most of this stuff requires database access, + * and in fact could still cause an error...) * - * It is possible for PrepareHoldablePortals to invoke functions that - * queue deferred triggers, and it's also possible that triggers create - * holdable cursors. So we have to loop until there's nothing left to - * do. + * It is possible for PrepareHoldablePortals to invoke functions that queue + * deferred triggers, and it's also possible that triggers create holdable + * cursors. So we have to loop until there's nothing left to do. */ for (;;) { @@ -1693,8 +1682,8 @@ PrepareTransaction(void) BufmgrCommit(); /* - * Reserve the GID for this transaction. This could fail if the - * requested GID is invalid or already in use. + * Reserve the GID for this transaction. This could fail if the requested + * GID is invalid or already in use. */ gxact = MarkAsPreparing(xid, prepareGID, prepared_at, GetUserId(), MyDatabaseId); @@ -1707,14 +1696,14 @@ PrepareTransaction(void) * want transaction abort to be able to clean up. (In particular, the * AtPrepare routines may error out if they find cases they cannot * handle.) State cleanup should happen in the PostPrepare routines - * below. However, some modules can go ahead and clear state here - * because they wouldn't do anything with it during abort anyway. + * below. However, some modules can go ahead and clear state here because + * they wouldn't do anything with it during abort anyway. * * Note: because the 2PC state file records will be replayed in the same - * order they are made, the order of these calls has to match the order - * in which we want things to happen during COMMIT PREPARED or - * ROLLBACK PREPARED; in particular, pay attention to whether things - * should happen before or after releasing the transaction's locks. + * order they are made, the order of these calls has to match the order in + * which we want things to happen during COMMIT PREPARED or ROLLBACK + * PREPARED; in particular, pay attention to whether things should happen + * before or after releasing the transaction's locks. */ StartPrepare(gxact); @@ -1726,15 +1715,14 @@ PrepareTransaction(void) /* * Here is where we really truly prepare. * - * We have to record transaction prepares even if we didn't - * make any updates, because the transaction manager might - * get confused if we lose a global transaction. + * We have to record transaction prepares even if we didn't make any updates, + * because the transaction manager might get confused if we lose a global + * transaction. */ EndPrepare(gxact); /* - * Now we clean up backend-internal state and release internal - * resources. + * Now we clean up backend-internal state and release internal resources. */ /* Break the chain of back-links in the XLOG records I output */ @@ -1743,9 +1731,9 @@ PrepareTransaction(void) MyXactMadeTempRelUpdate = false; /* - * Let others know about no transaction in progress by me. This has - * to be done *after* the prepared transaction has been marked valid, - * else someone may think it is unlocked and recyclable. + * Let others know about no transaction in progress by me. This has to be + * done *after* the prepared transaction has been marked valid, else + * someone may think it is unlocked and recyclable. */ /* Lock ProcArrayLock because that's what GetSnapshotData uses. */ @@ -1762,7 +1750,7 @@ PrepareTransaction(void) /* * This is all post-transaction cleanup. Note that if an error is raised * here, it's too late to abort the transaction. This should be just - * noncritical resource releasing. See notes in CommitTransaction. + * noncritical resource releasing. See notes in CommitTransaction. */ CallXactCallbacks(XACT_EVENT_PREPARE); @@ -1819,8 +1807,8 @@ PrepareTransaction(void) s->childXids = NIL; /* - * done with 1st phase commit processing, set current transaction - * state back to default + * done with 1st phase commit processing, set current transaction state + * back to default */ s->state = TRANS_DEFAULT; @@ -1842,8 +1830,8 @@ AbortTransaction(void) /* * Release any LW locks we might be holding as quickly as possible. * (Regular locks, however, must be held till we finish aborting.) - * Releasing LW locks is critical since we might try to grab them - * again while cleaning up! + * Releasing LW locks is critical since we might try to grab them again + * while cleaning up! */ LWLockReleaseAll(); @@ -1852,8 +1840,8 @@ AbortTransaction(void) UnlockBuffers(); /* - * Also clean up any open wait for lock, since the lock manager will - * choke if we try to wait for another lock before doing this. + * Also clean up any open wait for lock, since the lock manager will choke + * if we try to wait for another lock before doing this. */ LockWaitCancel(); @@ -1866,8 +1854,8 @@ AbortTransaction(void) Assert(s->parent == NULL); /* - * set the current transaction state information appropriately during - * the abort processing + * set the current transaction state information appropriately during the + * abort processing */ s->state = TRANS_ABORT; @@ -1876,15 +1864,14 @@ AbortTransaction(void) AtAbort_ResourceOwner(); /* - * Reset user id which might have been changed transiently. We cannot - * use s->currentUser, since it may not be set yet; instead rely on - * internal state of miscinit.c. + * Reset user id which might have been changed transiently. We cannot use + * s->currentUser, since it may not be set yet; instead rely on internal + * state of miscinit.c. * - * (Note: it is not necessary to restore session authorization here - * because that can only be changed via GUC, and GUC will take care of - * rolling it back if need be. However, an error within a SECURITY - * DEFINER function could send control here with the wrong current - * userid.) + * (Note: it is not necessary to restore session authorization here because + * that can only be changed via GUC, and GUC will take care of rolling it + * back if need be. However, an error within a SECURITY DEFINER function + * could send control here with the wrong current userid.) */ AtAbort_UserId(); @@ -1898,15 +1885,15 @@ AbortTransaction(void) AtEOXact_UpdateFlatFiles(false); /* - * Advertise the fact that we aborted in pg_clog (assuming that we - * got as far as assigning an XID to advertise). + * Advertise the fact that we aborted in pg_clog (assuming that we got as + * far as assigning an XID to advertise). */ if (TransactionIdIsValid(s->transactionId)) RecordTransactionAbort(); /* - * Let others know about no transaction in progress by me. Note that - * this must be done _before_ releasing locks we hold and _after_ + * Let others know about no transaction in progress by me. Note that this + * must be done _before_ releasing locks we hold and _after_ * RecordTransactionAbort. */ if (MyProc != NULL) @@ -2012,8 +1999,8 @@ StartTransactionCommand(void) switch (s->blockState) { /* - * if we aren't in a transaction block, we just do our usual - * start transaction. + * if we aren't in a transaction block, we just do our usual start + * transaction. */ case TBLOCK_DEFAULT: StartTransaction(); @@ -2021,23 +2008,23 @@ StartTransactionCommand(void) break; /* - * We are somewhere in a transaction block or subtransaction - * and about to start a new command. For now we do nothing, - * but someday we may do command-local resource initialization. - * (Note that any needed CommandCounterIncrement was done by - * the previous CommitTransactionCommand.) + * We are somewhere in a transaction block or subtransaction and + * about to start a new command. For now we do nothing, but + * someday we may do command-local resource initialization. (Note + * that any needed CommandCounterIncrement was done by the + * previous CommitTransactionCommand.) */ case TBLOCK_INPROGRESS: case TBLOCK_SUBINPROGRESS: break; /* - * Here we are in a failed transaction block (one of - * the commands caused an abort) so we do nothing but remain in - * the abort state. Eventually we will get a ROLLBACK command - * which will get us out of this state. (It is up to other - * code to ensure that no commands other than ROLLBACK will be - * processed in these states.) + * Here we are in a failed transaction block (one of the commands + * caused an abort) so we do nothing but remain in the abort + * state. Eventually we will get a ROLLBACK command which will + * get us out of this state. (It is up to other code to ensure + * that no commands other than ROLLBACK will be processed in these + * states.) */ case TBLOCK_ABORT: case TBLOCK_SUBABORT: @@ -2099,10 +2086,10 @@ CommitTransactionCommand(void) break; /* - * We are completing a "BEGIN TRANSACTION" command, so we - * change to the "transaction block in progress" state and - * return. (We assume the BEGIN did nothing to the database, - * so we need no CommandCounterIncrement.) + * We are completing a "BEGIN TRANSACTION" command, so we change + * to the "transaction block in progress" state and return. (We + * assume the BEGIN did nothing to the database, so we need no + * CommandCounterIncrement.) */ case TBLOCK_BEGIN: s->blockState = TBLOCK_INPROGRESS; @@ -2110,8 +2097,8 @@ CommitTransactionCommand(void) /* * This is the case when we have finished executing a command - * someplace within a transaction block. We increment the - * command counter and return. + * someplace within a transaction block. We increment the command + * counter and return. */ case TBLOCK_INPROGRESS: case TBLOCK_SUBINPROGRESS: @@ -2119,8 +2106,8 @@ CommitTransactionCommand(void) break; /* - * We are completing a "COMMIT" command. Do it and return to - * the idle state. + * We are completing a "COMMIT" command. Do it and return to the + * idle state. */ case TBLOCK_END: CommitTransaction(); @@ -2128,17 +2115,17 @@ CommitTransactionCommand(void) break; /* - * Here we are in the middle of a transaction block but one of - * the commands caused an abort so we do nothing but remain in - * the abort state. Eventually we will get a ROLLBACK comand. + * Here we are in the middle of a transaction block but one of the + * commands caused an abort so we do nothing but remain in the + * abort state. Eventually we will get a ROLLBACK comand. */ case TBLOCK_ABORT: case TBLOCK_SUBABORT: break; /* - * Here we were in an aborted transaction block and we just - * got the ROLLBACK command from the user, so clean up the + * Here we were in an aborted transaction block and we just got + * the ROLLBACK command from the user, so clean up the * already-aborted transaction and return to the idle state. */ case TBLOCK_ABORT_END: @@ -2147,9 +2134,9 @@ CommitTransactionCommand(void) break; /* - * Here we were in a perfectly good transaction block but the - * user told us to ROLLBACK anyway. We have to abort the - * transaction and then clean up. + * Here we were in a perfectly good transaction block but the user + * told us to ROLLBACK anyway. We have to abort the transaction + * and then clean up. */ case TBLOCK_ABORT_PENDING: AbortTransaction(); @@ -2169,8 +2156,8 @@ CommitTransactionCommand(void) /* * We were just issued a SAVEPOINT inside a transaction block. * Start a subtransaction. (DefineSavepoint already did - * PushTransaction, so as to have someplace to put the - * SUBBEGIN state.) + * PushTransaction, so as to have someplace to put the SUBBEGIN + * state.) */ case TBLOCK_SUBBEGIN: StartSubTransaction(); @@ -2259,8 +2246,8 @@ CommitTransactionCommand(void) break; /* - * Same as above, but the subtransaction had already failed, - * so we don't need AbortSubTransaction. + * Same as above, but the subtransaction had already failed, so we + * don't need AbortSubTransaction. */ case TBLOCK_SUBABORT_RESTART: { @@ -2320,8 +2307,8 @@ AbortCurrentTransaction(void) break; /* - * if we aren't in a transaction block, we just do the basic - * abort & cleanup transaction. + * if we aren't in a transaction block, we just do the basic abort + * & cleanup transaction. */ case TBLOCK_STARTED: AbortTransaction(); @@ -2330,11 +2317,11 @@ AbortCurrentTransaction(void) break; /* - * If we are in TBLOCK_BEGIN it means something screwed up - * right after reading "BEGIN TRANSACTION". We assume that - * the user will interpret the error as meaning the BEGIN - * failed to get him into a transaction block, so we should - * abort and return to idle state. + * If we are in TBLOCK_BEGIN it means something screwed up right + * after reading "BEGIN TRANSACTION". We assume that the user + * will interpret the error as meaning the BEGIN failed to get him + * into a transaction block, so we should abort and return to idle + * state. */ case TBLOCK_BEGIN: AbortTransaction(); @@ -2354,9 +2341,9 @@ AbortCurrentTransaction(void) break; /* - * Here, we failed while trying to COMMIT. Clean up the - * transaction and return to idle state (we do not want to - * stay in the transaction). + * Here, we failed while trying to COMMIT. Clean up the + * transaction and return to idle state (we do not want to stay in + * the transaction). */ case TBLOCK_END: AbortTransaction(); @@ -2365,9 +2352,9 @@ AbortCurrentTransaction(void) break; /* - * Here, we are already in an aborted transaction state and - * are waiting for a ROLLBACK, but for some reason we failed - * again! So we just remain in the abort state. + * Here, we are already in an aborted transaction state and are + * waiting for a ROLLBACK, but for some reason we failed again! + * So we just remain in the abort state. */ case TBLOCK_ABORT: case TBLOCK_SUBABORT: @@ -2375,8 +2362,8 @@ AbortCurrentTransaction(void) /* * We are in a failed transaction and we got the ROLLBACK command. - * We have already aborted, we just need to cleanup and go to - * idle state. + * We have already aborted, we just need to cleanup and go to idle + * state. */ case TBLOCK_ABORT_END: CleanupTransaction(); @@ -2395,8 +2382,8 @@ AbortCurrentTransaction(void) /* * Here, we failed while trying to PREPARE. Clean up the - * transaction and return to idle state (we do not want to - * stay in the transaction). + * transaction and return to idle state (we do not want to stay in + * the transaction). */ case TBLOCK_PREPARE: AbortTransaction(); @@ -2406,8 +2393,8 @@ AbortCurrentTransaction(void) /* * We got an error inside a subtransaction. Abort just the - * subtransaction, and go to the persistent SUBABORT state - * until we get ROLLBACK. + * subtransaction, and go to the persistent SUBABORT state until + * we get ROLLBACK. */ case TBLOCK_SUBINPROGRESS: AbortSubTransaction(); @@ -2416,7 +2403,7 @@ AbortCurrentTransaction(void) /* * If we failed while trying to create a subtransaction, clean up - * the broken subtransaction and abort the parent. The same + * the broken subtransaction and abort the parent. The same * applies if we get a failure while ending a subtransaction. */ case TBLOCK_SUBBEGIN: @@ -2479,15 +2466,15 @@ PreventTransactionChain(void *stmtNode, const char *stmtType) stmtType))); /* - * Are we inside a function call? If the statement's parameter block - * was allocated in QueryContext, assume it is an interactive command. + * Are we inside a function call? If the statement's parameter block was + * allocated in QueryContext, assume it is an interactive command. * Otherwise assume it is coming from a function. */ if (!MemoryContextContains(QueryContext, stmtNode)) ereport(ERROR, (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), /* translator: %s represents an SQL statement name */ - errmsg("%s cannot be executed from a function", stmtType))); + errmsg("%s cannot be executed from a function", stmtType))); /* If we got past IsTransactionBlock test, should be in default state */ if (CurrentTransactionState->blockState != TBLOCK_DEFAULT && @@ -2529,8 +2516,8 @@ RequireTransactionChain(void *stmtNode, const char *stmtType) return; /* - * Are we inside a function call? If the statement's parameter block - * was allocated in QueryContext, assume it is an interactive command. + * Are we inside a function call? If the statement's parameter block was + * allocated in QueryContext, assume it is an interactive command. * Otherwise assume it is coming from a function. */ if (!MemoryContextContains(QueryContext, stmtNode)) @@ -2556,8 +2543,8 @@ bool IsInTransactionChain(void *stmtNode) { /* - * Return true on same conditions that would make - * PreventTransactionChain error out + * Return true on same conditions that would make PreventTransactionChain + * error out */ if (IsTransactionBlock()) return true; @@ -2705,8 +2692,7 @@ BeginTransactionBlock(void) switch (s->blockState) { /* - * We are not inside a transaction block, so allow one to - * begin. + * We are not inside a transaction block, so allow one to begin. */ case TBLOCK_STARTED: s->blockState = TBLOCK_BEGIN; @@ -2721,7 +2707,7 @@ BeginTransactionBlock(void) case TBLOCK_SUBABORT: ereport(WARNING, (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), - errmsg("there is already a transaction in progress"))); + errmsg("there is already a transaction in progress"))); break; /* These cases are invalid. */ @@ -2759,7 +2745,7 @@ bool PrepareTransactionBlock(char *gid) { TransactionState s; - bool result; + bool result; /* Set up to commit the current transaction */ result = EndTransactionBlock(); @@ -2832,8 +2818,8 @@ EndTransactionBlock(void) break; /* - * We are in a live subtransaction block. Set up to subcommit - * all open subtransactions and then commit the main transaction. + * We are in a live subtransaction block. Set up to subcommit all + * open subtransactions and then commit the main transaction. */ case TBLOCK_SUBINPROGRESS: while (s->parent != NULL) @@ -2854,9 +2840,9 @@ EndTransactionBlock(void) break; /* - * Here we are inside an aborted subtransaction. Treat the - * COMMIT as ROLLBACK: set up to abort everything and exit - * the main transaction. + * Here we are inside an aborted subtransaction. Treat the COMMIT + * as ROLLBACK: set up to abort everything and exit the main + * transaction. */ case TBLOCK_SUBABORT: while (s->parent != NULL) @@ -2927,9 +2913,9 @@ UserAbortTransactionBlock(void) switch (s->blockState) { /* - * We are inside a transaction block and we got a ROLLBACK - * command from the user, so tell CommitTransactionCommand - * to abort and exit the transaction block. + * We are inside a transaction block and we got a ROLLBACK command + * from the user, so tell CommitTransactionCommand to abort and + * exit the transaction block. */ case TBLOCK_INPROGRESS: s->blockState = TBLOCK_ABORT_PENDING; @@ -2937,17 +2923,17 @@ UserAbortTransactionBlock(void) /* * We are inside a failed transaction block and we got a ROLLBACK - * command from the user. Abort processing is already done, - * so CommitTransactionCommand just has to cleanup and go back - * to idle state. + * command from the user. Abort processing is already done, so + * CommitTransactionCommand just has to cleanup and go back to + * idle state. */ case TBLOCK_ABORT: s->blockState = TBLOCK_ABORT_END; break; /* - * We are inside a subtransaction. Mark everything - * up to top level as exitable. + * We are inside a subtransaction. Mark everything up to top + * level as exitable. */ case TBLOCK_SUBINPROGRESS: case TBLOCK_SUBABORT: @@ -2972,8 +2958,8 @@ UserAbortTransactionBlock(void) break; /* - * The user issued ABORT when not inside a transaction. Issue - * a WARNING and go to abort state. The upcoming call to + * The user issued ABORT when not inside a transaction. Issue a + * WARNING and go to abort state. The upcoming call to * CommitTransactionCommand() will then put us back into the * default state. */ @@ -3021,8 +3007,8 @@ DefineSavepoint(char *name) s = CurrentTransactionState; /* changed by push */ /* - * Savepoint names, like the TransactionState block itself, - * live in TopTransactionContext. + * Savepoint names, like the TransactionState block itself, live + * in TopTransactionContext. */ if (name) s->name = MemoryContextStrdup(TopTransactionContext, name); @@ -3078,8 +3064,8 @@ ReleaseSavepoint(List *options) break; /* - * We are in a non-aborted subtransaction. This is the only - * valid case. + * We are in a non-aborted subtransaction. This is the only valid + * case. */ case TBLOCK_SUBINPROGRESS: break; @@ -3134,8 +3120,8 @@ ReleaseSavepoint(List *options) /* * Mark "commit pending" all subtransactions up to the target - * subtransaction. The actual commits will happen when control gets - * to CommitTransactionCommand. + * subtransaction. The actual commits will happen when control gets to + * CommitTransactionCommand. */ xact = CurrentTransactionState; for (;;) @@ -3232,8 +3218,8 @@ RollbackToSavepoint(List *options) /* * Mark "abort pending" all subtransactions up to the target - * subtransaction. The actual aborts will happen when control gets - * to CommitTransactionCommand. + * subtransaction. The actual aborts will happen when control gets to + * CommitTransactionCommand. */ xact = CurrentTransactionState; for (;;) @@ -3284,8 +3270,8 @@ BeginInternalSubTransaction(char *name) s = CurrentTransactionState; /* changed by push */ /* - * Savepoint names, like the TransactionState block itself, - * live in TopTransactionContext. + * Savepoint names, like the TransactionState block itself, live + * in TopTransactionContext. */ if (name) s->name = MemoryContextStrdup(TopTransactionContext, name); @@ -3333,7 +3319,7 @@ ReleaseCurrentSubTransaction(void) Assert(s->state == TRANS_INPROGRESS); MemoryContextSwitchTo(CurTransactionContext); CommitSubTransaction(); - s = CurrentTransactionState; /* changed by pop */ + s = CurrentTransactionState; /* changed by pop */ Assert(s->state == TRANS_INPROGRESS); } @@ -3433,8 +3419,7 @@ AbortOutOfAnyTransaction(void) break; /* - * In a subtransaction, so clean it up and abort parent - * too + * In a subtransaction, so clean it up and abort parent too */ case TBLOCK_SUBBEGIN: case TBLOCK_SUBINPROGRESS: @@ -3667,9 +3652,9 @@ CommitSubTransaction(void) s->parent->subTransactionId); /* - * We need to restore the upper transaction's read-only state, in case - * the upper is read-write while the child is read-only; GUC will - * incorrectly think it should leave the child state in place. + * We need to restore the upper transaction's read-only state, in case the + * upper is read-write while the child is read-only; GUC will incorrectly + * think it should leave the child state in place. */ XactReadOnly = s->prevXactReadOnly; @@ -3706,8 +3691,8 @@ AbortSubTransaction(void) /* * Release any LW locks we might be holding as quickly as possible. * (Regular locks, however, must be held till we finish aborting.) - * Releasing LW locks is critical since we might try to grab them - * again while cleaning up! + * Releasing LW locks is critical since we might try to grab them again + * while cleaning up! * * FIXME This may be incorrect --- Are there some locks we should keep? * Buffer locks, for example? I don't think so but I'm not sure. @@ -3726,8 +3711,8 @@ AbortSubTransaction(void) AtSubAbort_ResourceOwner(); /* - * We can skip all this stuff if the subxact failed before creating - * a ResourceOwner... + * We can skip all this stuff if the subxact failed before creating a + * ResourceOwner... */ if (s->curTransactionOwner) { @@ -3777,25 +3762,23 @@ AbortSubTransaction(void) } /* - * Reset user id which might have been changed transiently. Here we - * want to restore to the userid that was current at subxact entry. - * (As in AbortTransaction, we need not worry about the session - * userid.) + * Reset user id which might have been changed transiently. Here we want + * to restore to the userid that was current at subxact entry. (As in + * AbortTransaction, we need not worry about the session userid.) * - * Must do this after AtEOXact_GUC to handle the case where we entered - * the subxact inside a SECURITY DEFINER function (hence current and - * session userids were different) and then session auth was changed - * inside the subxact. GUC will reset both current and session - * userids to the entry-time session userid. This is right in every - * other scenario so it seems simplest to let GUC do that and fix it - * here. + * Must do this after AtEOXact_GUC to handle the case where we entered the + * subxact inside a SECURITY DEFINER function (hence current and session + * userids were different) and then session auth was changed inside the + * subxact. GUC will reset both current and session userids to the + * entry-time session userid. This is right in every other scenario so it + * seems simplest to let GUC do that and fix it here. */ SetUserId(s->currentUser); /* - * Restore the upper transaction's read-only state, too. This should - * be redundant with GUC's cleanup but we may as well do it for - * consistency with the commit case. + * Restore the upper transaction's read-only state, too. This should be + * redundant with GUC's cleanup but we may as well do it for consistency + * with the commit case. */ XactReadOnly = s->prevXactReadOnly; @@ -3846,11 +3829,11 @@ PushTransaction(void) { TransactionState p = CurrentTransactionState; TransactionState s; - Oid currentUser; + Oid currentUser; /* - * At present, GetUserId cannot fail, but let's not assume that. Get - * the ID before entering the critical code sequence. + * At present, GetUserId cannot fail, but let's not assume that. Get the + * ID before entering the critical code sequence. */ currentUser = GetUserId(); @@ -3860,6 +3843,7 @@ PushTransaction(void) s = (TransactionState) MemoryContextAllocZero(TopTransactionContext, sizeof(TransactionStateData)); + /* * Assign a subtransaction ID, watching out for counter wraparound. */ @@ -3872,11 +3856,12 @@ PushTransaction(void) (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("cannot have more than 2^32-1 subtransactions in a transaction"))); } + /* * We can now stack a minimally valid subtransaction without fear of * failure. */ - s->transactionId = InvalidTransactionId; /* until assigned */ + s->transactionId = InvalidTransactionId; /* until assigned */ s->subTransactionId = currentSubTransactionId; s->parent = p; s->nestingLevel = p->nestingLevel + 1; @@ -3889,10 +3874,10 @@ PushTransaction(void) CurrentTransactionState = s; /* - * AbortSubTransaction and CleanupSubTransaction have to be able to - * cope with the subtransaction from here on out; in particular they - * should not assume that it necessarily has a transaction context, - * resource owner, or XID. + * AbortSubTransaction and CleanupSubTransaction have to be able to cope + * with the subtransaction from here on out; in particular they should not + * assume that it necessarily has a transaction context, resource owner, + * or XID. */ } @@ -3959,7 +3944,7 @@ ShowTransactionStateRec(TransactionState s) /* use ereport to suppress computation if msg will not be printed */ ereport(DEBUG3, (errmsg_internal("name: %s; blockState: %13s; state: %7s, xid/subid/cid: %u/%u/%u, nestlvl: %d, children: %s", - PointerIsValid(s->name) ? s->name : "unnamed", + PointerIsValid(s->name) ? s->name : "unnamed", BlockStateAsString(s->blockState), TransStateAsString(s->state), (unsigned int) s->transactionId, @@ -4215,7 +4200,7 @@ xact_desc_commit(char *buf, xl_xact_commit *xlrec) if (xlrec->nsubxacts > 0) { TransactionId *xacts = (TransactionId *) - &xlrec->xnodes[xlrec->nrels]; + &xlrec->xnodes[xlrec->nrels]; sprintf(buf + strlen(buf), "; subxacts:"); for (i = 0; i < xlrec->nsubxacts; i++) @@ -4246,7 +4231,7 @@ xact_desc_abort(char *buf, xl_xact_abort *xlrec) if (xlrec->nsubxacts > 0) { TransactionId *xacts = (TransactionId *) - &xlrec->xnodes[xlrec->nrels]; + &xlrec->xnodes[xlrec->nrels]; sprintf(buf + strlen(buf), "; subxacts:"); for (i = 0; i < xlrec->nsubxacts; i++) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 878d7e21ef..7a37c656dc 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.219 2005/10/03 00:28:41 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.220 2005/10/15 02:49:10 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -51,7 +51,7 @@ /* * Because O_DIRECT bypasses the kernel buffers, and because we never * read those buffers except during crash recovery, it is a win to use - * it in all cases where we sync on each write(). We could allow O_DIRECT + * it in all cases where we sync on each write(). We could allow O_DIRECT * with fsync(), but because skipping the kernel buffer forces writes out * quickly, it seems best just to use it for O_SYNC. It is hard to imagine * how fsync() could be a win for O_DIRECT compared to O_SYNC and O_DIRECT. @@ -85,14 +85,14 @@ #if O_DSYNC != BARE_OPEN_SYNC_FLAG #define OPEN_DATASYNC_FLAG (O_DSYNC | PG_O_DIRECT) #endif -#else /* !defined(OPEN_SYNC_FLAG) */ +#else /* !defined(OPEN_SYNC_FLAG) */ /* Win32 only has O_DSYNC */ #define OPEN_DATASYNC_FLAG (O_DSYNC | PG_O_DIRECT) #endif #endif #if defined(OPEN_DATASYNC_FLAG) -#define DEFAULT_SYNC_METHOD_STR "open_datasync" +#define DEFAULT_SYNC_METHOD_STR "open_datasync" #define DEFAULT_SYNC_METHOD SYNC_METHOD_OPEN #define DEFAULT_SYNC_FLAGBIT OPEN_DATASYNC_FLAG #elif defined(HAVE_FDATASYNC) @@ -154,7 +154,7 @@ bool XLOG_DEBUG = false; /* these are derived from XLOG_sync_method by assign_xlog_sync_method */ -int sync_method = DEFAULT_SYNC_METHOD; +int sync_method = DEFAULT_SYNC_METHOD; static int open_sync_bit = DEFAULT_SYNC_FLAGBIT; #define XLOG_SYNC_BIT (enableFsync ? open_sync_bit : 0) @@ -368,10 +368,9 @@ typedef struct XLogCtlData XLogCtlWrite Write; /* - * These values do not change after startup, although the pointed-to - * pages and xlblocks values certainly do. Permission to read/write - * the pages and xlblocks values depends on WALInsertLock and - * WALWriteLock. + * These values do not change after startup, although the pointed-to pages + * and xlblocks values certainly do. Permission to read/write the pages + * and xlblocks values depends on WALInsertLock and WALWriteLock. */ char *pages; /* buffers for unwritten XLOG pages */ XLogRecPtr *xlblocks; /* 1st byte ptr-s + BLCKSZ */ @@ -449,8 +448,8 @@ static char *readRecordBuf = NULL; static uint32 readRecordBufSize = 0; /* State information for XLOG reading */ -static XLogRecPtr ReadRecPtr; /* start of last record read */ -static XLogRecPtr EndRecPtr; /* end+1 of last record read */ +static XLogRecPtr ReadRecPtr; /* start of last record read */ +static XLogRecPtr EndRecPtr; /* end+1 of last record read */ static XLogRecord *nextRecord = NULL; static TimeLineID lastPageTLI = 0; @@ -467,7 +466,7 @@ static void exitArchiveRecovery(TimeLineID endTLI, static bool recoveryStopsHere(XLogRecord *record, bool *includeThis); static bool XLogCheckBuffer(XLogRecData *rdata, - XLogRecPtr *lsn, BkpBlock *bkpb); + XLogRecPtr *lsn, BkpBlock *bkpb); static bool AdvanceXLInsertBuffer(void); static void XLogWrite(XLogwrtRqst WriteRqst, bool flexible); static int XLogFileInit(uint32 log, uint32 seg, @@ -481,7 +480,7 @@ static bool RestoreArchivedFile(char *path, const char *xlogfname, const char *recovername, off_t expectedSize); static int PreallocXlogFiles(XLogRecPtr endptr); static void MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr, - int *nsegsremoved, int *nsegsrecycled); + int *nsegsremoved, int *nsegsrecycled); static void RemoveOldBackupHistory(void); static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode); static bool ValidXLOGHeader(XLogPageHeader hdr, int emode); @@ -554,36 +553,34 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata) } /* - * In bootstrap mode, we don't actually log anything but XLOG - * resources; return a phony record pointer. + * In bootstrap mode, we don't actually log anything but XLOG resources; + * return a phony record pointer. */ if (IsBootstrapProcessingMode() && rmid != RM_XLOG_ID) { RecPtr.xlogid = 0; - RecPtr.xrecoff = SizeOfXLogLongPHD; /* start of 1st chkpt - * record */ + RecPtr.xrecoff = SizeOfXLogLongPHD; /* start of 1st chkpt record */ return (RecPtr); } /* * Here we scan the rdata chain, determine which buffers must be backed * up, and compute the CRC values for the data. Note that the record - * header isn't added into the CRC initially since we don't know the - * final length or info bits quite yet. Thus, the CRC will represent - * the CRC of the whole record in the order "rdata, then backup blocks, - * then record header". + * header isn't added into the CRC initially since we don't know the final + * length or info bits quite yet. Thus, the CRC will represent the CRC of + * the whole record in the order "rdata, then backup blocks, then record + * header". * - * We may have to loop back to here if a race condition is detected - * below. We could prevent the race by doing all this work while - * holding the insert lock, but it seems better to avoid doing CRC - * calculations while holding the lock. This means we have to be - * careful about modifying the rdata chain until we know we aren't - * going to loop back again. The only change we allow ourselves to - * make earlier is to set rdt->data = NULL in chain items we have - * decided we will have to back up the whole buffer for. This is OK - * because we will certainly decide the same thing again for those - * items if we do it over; doing it here saves an extra pass over the - * chain later. + * We may have to loop back to here if a race condition is detected below. We + * could prevent the race by doing all this work while holding the insert + * lock, but it seems better to avoid doing CRC calculations while holding + * the lock. This means we have to be careful about modifying the rdata + * chain until we know we aren't going to loop back again. The only + * change we allow ourselves to make earlier is to set rdt->data = NULL in + * chain items we have decided we will have to back up the whole buffer + * for. This is OK because we will certainly decide the same thing again + * for those items if we do it over; doing it here saves an extra pass + * over the chain later. */ begin:; for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) @@ -680,12 +677,12 @@ begin:; } /* - * NOTE: the test for len == 0 here is somewhat fishy, since in theory - * all of the rmgr data might have been suppressed in favor of backup - * blocks. Currently, all callers of XLogInsert provide at least some - * not-in-a-buffer data and so len == 0 should never happen, but that - * may not be true forever. If you need to remove the len == 0 check, - * also remove the check for xl_len == 0 in ReadRecord, below. + * NOTE: the test for len == 0 here is somewhat fishy, since in theory all + * of the rmgr data might have been suppressed in favor of backup blocks. + * Currently, all callers of XLogInsert provide at least some + * not-in-a-buffer data and so len == 0 should never happen, but that may + * not be true forever. If you need to remove the len == 0 check, also + * remove the check for xl_len == 0 in ReadRecord, below. */ if (len == 0) elog(PANIC, "invalid xlog record length %u", len); @@ -718,9 +715,9 @@ begin:; * Since the amount of data we write here is completely optional * anyway, tell XLogWrite it can be "flexible" and stop at a * convenient boundary. This allows writes triggered by this - * mechanism to synchronize with the cache boundaries, so that - * in a long transaction we'll basically dump alternating halves - * of the buffer array. + * mechanism to synchronize with the cache boundaries, so that in + * a long transaction we'll basically dump alternating halves of + * the buffer array. */ LogwrtResult = XLogCtl->Write.LogwrtResult; if (XLByteLT(LogwrtResult.Write, LogwrtRqst.Write)) @@ -733,10 +730,9 @@ begin:; LWLockAcquire(WALInsertLock, LW_EXCLUSIVE); /* - * Check to see if my RedoRecPtr is out of date. If so, may have to - * go back and recompute everything. This can only happen just after - * a checkpoint, so it's better to be slow in this case and fast - * otherwise. + * Check to see if my RedoRecPtr is out of date. If so, may have to go + * back and recompute everything. This can only happen just after a + * checkpoint, so it's better to be slow in this case and fast otherwise. */ if (!XLByteEQ(RedoRecPtr, Insert->RedoRecPtr)) { @@ -751,8 +747,8 @@ begin:; XLByteLE(dtbuf_lsn[i], RedoRecPtr)) { /* - * Oops, this buffer now needs to be backed up, but we - * didn't think so above. Start over. + * Oops, this buffer now needs to be backed up, but we didn't + * think so above. Start over. */ LWLockRelease(WALInsertLock); END_CRIT_SECTION(); @@ -762,15 +758,14 @@ begin:; } /* - * Make additional rdata chain entries for the backup blocks, so that - * we don't need to special-case them in the write loop. Note that we - * have now irrevocably changed the input rdata chain. At the exit of - * this loop, write_len includes the backup block data. + * Make additional rdata chain entries for the backup blocks, so that we + * don't need to special-case them in the write loop. Note that we have + * now irrevocably changed the input rdata chain. At the exit of this + * loop, write_len includes the backup block data. * - * Also set the appropriate info bits to show which buffers were backed - * up. The i'th XLR_SET_BKP_BLOCK bit corresponds to the i'th - * distinct buffer value (ignoring InvalidBuffer) appearing in the - * rdata chain. + * Also set the appropriate info bits to show which buffers were backed up. + * The i'th XLR_SET_BKP_BLOCK bit corresponds to the i'th distinct buffer + * value (ignoring InvalidBuffer) appearing in the rdata chain. */ write_len = len; for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) @@ -822,8 +817,7 @@ begin:; /* * If there isn't enough space on the current XLOG page for a record - * header, advance to the next page (leaving the unused space as - * zeroes). + * header, advance to the next page (leaving the unused space as zeroes). */ updrqst = false; freespace = INSERT_FREESPACE(Insert); @@ -925,15 +919,15 @@ begin:; freespace = INSERT_FREESPACE(Insert); /* - * The recptr I return is the beginning of the *next* record. This - * will be stored as LSN for changed data pages... + * The recptr I return is the beginning of the *next* record. This will be + * stored as LSN for changed data pages... */ INSERT_RECPTR(RecPtr, Insert, curridx); /* Need to update shared LogwrtRqst if some block was filled up */ if (freespace < SizeOfXLogRecord) - updrqst = true; /* curridx is filled and available for - * writing out */ + updrqst = true; /* curridx is filled and available for writing + * out */ else curridx = PrevBufIdx(curridx); WriteRqst = XLogCtl->xlblocks[curridx]; @@ -975,9 +969,9 @@ XLogCheckBuffer(XLogRecData *rdata, page = (PageHeader) BufferGetBlock(rdata->buffer); /* - * XXX We assume page LSN is first data on *every* page that can be - * passed to XLogInsert, whether it otherwise has the standard page - * layout or not. + * XXX We assume page LSN is first data on *every* page that can be passed + * to XLogInsert, whether it otherwise has the standard page layout or + * not. */ *lsn = page->pd_lsn; @@ -1163,9 +1157,9 @@ AdvanceXLInsertBuffer(void) LogwrtResult = Insert->LogwrtResult; /* - * Get ending-offset of the buffer page we need to replace (this may - * be zero if the buffer hasn't been used yet). Fall through if it's - * already written out. + * Get ending-offset of the buffer page we need to replace (this may be + * zero if the buffer hasn't been used yet). Fall through if it's already + * written out. */ OldPageRqstPtr = XLogCtl->xlblocks[nextidx]; if (!XLByteLE(OldPageRqstPtr, LogwrtResult.Write)) @@ -1208,9 +1202,8 @@ AdvanceXLInsertBuffer(void) else { /* - * Have to write buffers while holding insert lock. This - * is not good, so only write as much as we absolutely - * must. + * Have to write buffers while holding insert lock. This is + * not good, so only write as much as we absolutely must. */ WriteRqst.Write = OldPageRqstPtr; WriteRqst.Flush.xlogid = 0; @@ -1223,8 +1216,8 @@ AdvanceXLInsertBuffer(void) } /* - * Now the next buffer slot is free and we can set it up to be the - * next output page. + * Now the next buffer slot is free and we can set it up to be the next + * output page. */ NewPageEndPtr = XLogCtl->xlblocks[Insert->curridx]; if (NewPageEndPtr.xrecoff >= XLogFileSize) @@ -1237,24 +1230,27 @@ AdvanceXLInsertBuffer(void) NewPageEndPtr.xrecoff += BLCKSZ; XLogCtl->xlblocks[nextidx] = NewPageEndPtr; NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) BLCKSZ); + Insert->curridx = nextidx; Insert->currpage = NewPage; - Insert->currpos = ((char *) NewPage) + SizeOfXLogShortPHD; + + Insert->currpos = ((char *) NewPage) +SizeOfXLogShortPHD; /* - * Be sure to re-zero the buffer so that bytes beyond what we've - * written will look like zeroes and not valid XLOG records... + * Be sure to re-zero the buffer so that bytes beyond what we've written + * will look like zeroes and not valid XLOG records... */ MemSet((char *) NewPage, 0, BLCKSZ); /* * Fill the new page's header */ - NewPage->xlp_magic = XLOG_PAGE_MAGIC; + NewPage ->xlp_magic = XLOG_PAGE_MAGIC; + /* NewPage->xlp_info = 0; */ /* done by memset */ - NewPage->xlp_tli = ThisTimeLineID; - NewPage->xlp_pageaddr.xlogid = NewPageEndPtr.xlogid; - NewPage->xlp_pageaddr.xrecoff = NewPageEndPtr.xrecoff - BLCKSZ; + NewPage ->xlp_tli = ThisTimeLineID; + NewPage ->xlp_pageaddr.xlogid = NewPageEndPtr.xlogid; + NewPage ->xlp_pageaddr.xrecoff = NewPageEndPtr.xrecoff - BLCKSZ; /* * If first page of an XLOG segment file, make it a long header. @@ -1265,8 +1261,9 @@ AdvanceXLInsertBuffer(void) NewLongPage->xlp_sysid = ControlFile->system_identifier; NewLongPage->xlp_seg_size = XLogSegSize; - NewPage->xlp_info |= XLP_LONG_HEADER; - Insert->currpos = ((char *) NewPage) + SizeOfXLogLongPHD; + NewPage ->xlp_info |= XLP_LONG_HEADER; + + Insert->currpos = ((char *) NewPage) +SizeOfXLogLongPHD; } return update_needed; @@ -1298,19 +1295,18 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) Assert(CritSectionCount > 0); /* - * Update local LogwrtResult (caller probably did this already, - * but...) + * Update local LogwrtResult (caller probably did this already, but...) */ LogwrtResult = Write->LogwrtResult; /* * Since successive pages in the xlog cache are consecutively allocated, * we can usually gather multiple pages together and issue just one - * write() call. npages is the number of pages we have determined can - * be written together; startidx is the cache block index of the first - * one, and startoffset is the file offset at which it should go. - * The latter two variables are only valid when npages > 0, but we must - * initialize all of them to keep the compiler quiet. + * write() call. npages is the number of pages we have determined can be + * written together; startidx is the cache block index of the first one, + * and startoffset is the file offset at which it should go. The latter + * two variables are only valid when npages > 0, but we must initialize + * all of them to keep the compiler quiet. */ npages = 0; startidx = 0; @@ -1320,18 +1316,17 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) * Within the loop, curridx is the cache block index of the page to * consider writing. We advance Write->curridx only after successfully * writing pages. (Right now, this refinement is useless since we are - * going to PANIC if any error occurs anyway; but someday it may come - * in useful.) + * going to PANIC if any error occurs anyway; but someday it may come in + * useful.) */ curridx = Write->curridx; while (XLByteLT(LogwrtResult.Write, WriteRqst.Write)) { /* - * Make sure we're not ahead of the insert process. This could - * happen if we're passed a bogus WriteRqst.Write that is past the - * end of the last page that's been initialized by - * AdvanceXLInsertBuffer. + * Make sure we're not ahead of the insert process. This could happen + * if we're passed a bogus WriteRqst.Write that is past the end of the + * last page that's been initialized by AdvanceXLInsertBuffer. */ if (!XLByteLT(LogwrtResult.Write, XLogCtl->xlblocks[curridx])) elog(PANIC, "xlog write request %X/%X is past end of log %X/%X", @@ -1355,8 +1350,8 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) if (close(openLogFile)) ereport(PANIC, (errcode_for_file_access(), - errmsg("could not close log file %u, segment %u: %m", - openLogId, openLogSeg))); + errmsg("could not close log file %u, segment %u: %m", + openLogId, openLogSeg))); openLogFile = -1; } XLByteToPrevSeg(LogwrtResult.Write, openLogId, openLogSeg); @@ -1379,13 +1374,13 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) UpdateControlFile(); /* - * Signal bgwriter to start a checkpoint if it's been - * too long since the last one. (We look at local copy of - * RedoRecPtr which might be a little out of date, but - * should be close enough for this purpose.) + * Signal bgwriter to start a checkpoint if it's been too long + * since the last one. (We look at local copy of RedoRecPtr + * which might be a little out of date, but should be close + * enough for this purpose.) * - * A straight computation of segment number could overflow - * 32 bits. Rather than assuming we have working 64-bit + * A straight computation of segment number could overflow 32 + * bits. Rather than assuming we have working 64-bit * arithmetic, we compare the highest-order bits separately, * and force a checkpoint immediately when they change. */ @@ -1434,10 +1429,10 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) npages++; /* - * Dump the set if this will be the last loop iteration, or if - * we are at the last page of the cache area (since the next page - * won't be contiguous in memory), or if we are at the end of the - * logfile segment. + * Dump the set if this will be the last loop iteration, or if we are + * at the last page of the cache area (since the next page won't be + * contiguous in memory), or if we are at the end of the logfile + * segment. */ finishing_seg = !ispartialpage && (startoffset + npages * BLCKSZ) >= XLogSegSize; @@ -1496,7 +1491,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) if (finishing_seg) { issue_xlog_fsync(); - LogwrtResult.Flush = LogwrtResult.Write; /* end of page */ + LogwrtResult.Flush = LogwrtResult.Write; /* end of page */ if (XLogArchivingActive()) XLogArchiveNotifySeg(openLogId, openLogSeg); @@ -1526,20 +1521,20 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) XLByteLT(LogwrtResult.Flush, LogwrtResult.Write)) { /* - * Could get here without iterating above loop, in which case we - * might have no open file or the wrong one. However, we do not - * need to fsync more than one file. + * Could get here without iterating above loop, in which case we might + * have no open file or the wrong one. However, we do not need to + * fsync more than one file. */ if (sync_method != SYNC_METHOD_OPEN) { if (openLogFile >= 0 && - !XLByteInPrevSeg(LogwrtResult.Write, openLogId, openLogSeg)) + !XLByteInPrevSeg(LogwrtResult.Write, openLogId, openLogSeg)) { if (close(openLogFile)) ereport(PANIC, (errcode_for_file_access(), - errmsg("could not close log file %u, segment %u: %m", - openLogId, openLogSeg))); + errmsg("could not close log file %u, segment %u: %m", + openLogId, openLogSeg))); openLogFile = -1; } if (openLogFile < 0) @@ -1557,8 +1552,8 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) * Update shared-memory status * * We make sure that the shared 'request' values do not fall behind the - * 'result' values. This is not absolutely essential, but it saves - * some code in a couple of places. + * 'result' values. This is not absolutely essential, but it saves some + * code in a couple of places. */ { /* use volatile pointer to prevent code rearrangement */ @@ -1608,11 +1603,10 @@ XLogFlush(XLogRecPtr record) /* * Since fsync is usually a horribly expensive operation, we try to - * piggyback as much data as we can on each fsync: if we see any more - * data entered into the xlog buffer, we'll write and fsync that too, - * so that the final value of LogwrtResult.Flush is as large as - * possible. This gives us some chance of avoiding another fsync - * immediately after. + * piggyback as much data as we can on each fsync: if we see any more data + * entered into the xlog buffer, we'll write and fsync that too, so that + * the final value of LogwrtResult.Flush is as large as possible. This + * gives us some chance of avoiding another fsync immediately after. */ /* initialize to given target; may increase below */ @@ -1669,31 +1663,29 @@ XLogFlush(XLogRecPtr record) /* * If we still haven't flushed to the request point then we have a - * problem; most likely, the requested flush point is past end of - * XLOG. This has been seen to occur when a disk page has a corrupted - * LSN. + * problem; most likely, the requested flush point is past end of XLOG. + * This has been seen to occur when a disk page has a corrupted LSN. * - * Formerly we treated this as a PANIC condition, but that hurts the - * system's robustness rather than helping it: we do not want to take - * down the whole system due to corruption on one data page. In - * particular, if the bad page is encountered again during recovery - * then we would be unable to restart the database at all! (This - * scenario has actually happened in the field several times with 7.1 - * releases. Note that we cannot get here while InRedo is true, but if - * the bad page is brought in and marked dirty during recovery then - * CreateCheckPoint will try to flush it at the end of recovery.) + * Formerly we treated this as a PANIC condition, but that hurts the system's + * robustness rather than helping it: we do not want to take down the + * whole system due to corruption on one data page. In particular, if the + * bad page is encountered again during recovery then we would be unable + * to restart the database at all! (This scenario has actually happened + * in the field several times with 7.1 releases. Note that we cannot get + * here while InRedo is true, but if the bad page is brought in and marked + * dirty during recovery then CreateCheckPoint will try to flush it at the + * end of recovery.) * - * The current approach is to ERROR under normal conditions, but only - * WARNING during recovery, so that the system can be brought up even - * if there's a corrupt LSN. Note that for calls from xact.c, the - * ERROR will be promoted to PANIC since xact.c calls this routine - * inside a critical section. However, calls from bufmgr.c are not - * within critical sections and so we will not force a restart for a - * bad LSN on a data page. + * The current approach is to ERROR under normal conditions, but only WARNING + * during recovery, so that the system can be brought up even if there's a + * corrupt LSN. Note that for calls from xact.c, the ERROR will be + * promoted to PANIC since xact.c calls this routine inside a critical + * section. However, calls from bufmgr.c are not within critical sections + * and so we will not force a restart for a bad LSN on a data page. */ if (XLByteLT(LogwrtResult.Flush, record)) elog(InRecovery ? WARNING : ERROR, - "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X", + "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X", record.xlogid, record.xrecoff, LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff); } @@ -1734,8 +1726,7 @@ XLogFileInit(uint32 log, uint32 seg, XLogFilePath(path, ThisTimeLineID, log, seg); /* - * Try to use existent file (checkpoint maker may have created it - * already) + * Try to use existent file (checkpoint maker may have created it already) */ if (*use_existent) { @@ -1754,10 +1745,10 @@ XLogFileInit(uint32 log, uint32 seg, } /* - * Initialize an empty (all zeroes) segment. NOTE: it is possible - * that another process is doing the same thing. If so, we will end - * up pre-creating an extra log segment. That seems OK, and better - * than holding the lock throughout this lengthy process. + * Initialize an empty (all zeroes) segment. NOTE: it is possible that + * another process is doing the same thing. If so, we will end up + * pre-creating an extra log segment. That seems OK, and better than + * holding the lock throughout this lengthy process. */ snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid()); @@ -1772,13 +1763,13 @@ XLogFileInit(uint32 log, uint32 seg, errmsg("could not create file \"%s\": %m", tmppath))); /* - * Zero-fill the file. We have to do this the hard way to ensure that - * all the file space has really been allocated --- on platforms that - * allow "holes" in files, just seeking to the end doesn't allocate - * intermediate space. This way, we know that we have all the space - * and (after the fsync below) that all the indirect blocks are down - * on disk. Therefore, fdatasync(2) or O_DSYNC will be sufficient to - * sync future writes to the log file. + * Zero-fill the file. We have to do this the hard way to ensure that all + * the file space has really been allocated --- on platforms that allow + * "holes" in files, just seeking to the end doesn't allocate intermediate + * space. This way, we know that we have all the space and (after the + * fsync below) that all the indirect blocks are down on disk. Therefore, + * fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the + * log file. */ MemSet(zbuffer, 0, sizeof(zbuffer)); for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(zbuffer)) @@ -1789,8 +1780,7 @@ XLogFileInit(uint32 log, uint32 seg, int save_errno = errno; /* - * If we fail to make the file, delete it to release disk - * space + * If we fail to make the file, delete it to release disk space */ unlink(tmppath); /* if write didn't set errno, assume problem is no disk space */ @@ -1798,7 +1788,7 @@ XLogFileInit(uint32 log, uint32 seg, ereport(ERROR, (errcode_for_file_access(), - errmsg("could not write to file \"%s\": %m", tmppath))); + errmsg("could not write to file \"%s\": %m", tmppath))); } } @@ -1816,9 +1806,9 @@ XLogFileInit(uint32 log, uint32 seg, * Now move the segment into place with its final name. * * If caller didn't want to use a pre-existing file, get rid of any - * pre-existing file. Otherwise, cope with possibility that someone - * else has created the file while we were filling ours: if so, use - * ours to pre-create a future log segment. + * pre-existing file. Otherwise, cope with possibility that someone else + * has created the file while we were filling ours: if so, use ours to + * pre-create a future log segment. */ installed_log = log; installed_seg = seg; @@ -1840,8 +1830,8 @@ XLogFileInit(uint32 log, uint32 seg, if (fd < 0) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open file \"%s\" (log file %u, segment %u): %m", - path, log, seg))); + errmsg("could not open file \"%s\" (log file %u, segment %u): %m", + path, log, seg))); return (fd); } @@ -1908,7 +1898,7 @@ XLogFileCopy(uint32 log, uint32 seg, errmsg("could not read file \"%s\": %m", path))); else ereport(ERROR, - (errmsg("not enough data in file \"%s\"", path))); + (errmsg("not enough data in file \"%s\"", path))); } errno = 0; if ((int) write(fd, buffer, sizeof(buffer)) != (int) sizeof(buffer)) @@ -1916,8 +1906,7 @@ XLogFileCopy(uint32 log, uint32 seg, int save_errno = errno; /* - * If we fail to make the file, delete it to release disk - * space + * If we fail to make the file, delete it to release disk space */ unlink(tmppath); /* if write didn't set errno, assume problem is no disk space */ @@ -1925,7 +1914,7 @@ XLogFileCopy(uint32 log, uint32 seg, ereport(ERROR, (errcode_for_file_access(), - errmsg("could not write to file \"%s\": %m", tmppath))); + errmsg("could not write to file \"%s\": %m", tmppath))); } } @@ -2057,8 +2046,8 @@ XLogFileOpen(uint32 log, uint32 seg) if (fd < 0) ereport(PANIC, (errcode_for_file_access(), - errmsg("could not open file \"%s\" (log file %u, segment %u): %m", - path, log, seg))); + errmsg("could not open file \"%s\" (log file %u, segment %u): %m", + path, log, seg))); return fd; } @@ -2075,14 +2064,14 @@ XLogFileRead(uint32 log, uint32 seg, int emode) int fd; /* - * Loop looking for a suitable timeline ID: we might need to read any - * of the timelines listed in expectedTLIs. + * Loop looking for a suitable timeline ID: we might need to read any of + * the timelines listed in expectedTLIs. * * We expect curFileTLI on entry to be the TLI of the preceding file in - * sequence, or 0 if there was no predecessor. We do not allow - * curFileTLI to go backwards; this prevents us from picking up the - * wrong file when a parent timeline extends to higher segment numbers - * than the child we want to read. + * sequence, or 0 if there was no predecessor. We do not allow curFileTLI + * to go backwards; this prevents us from picking up the wrong file when a + * parent timeline extends to higher segment numbers than the child we + * want to read. */ foreach(cell, expectedTLIs) { @@ -2111,8 +2100,8 @@ XLogFileRead(uint32 log, uint32 seg, int emode) if (errno != ENOENT) /* unexpected failure? */ ereport(PANIC, (errcode_for_file_access(), - errmsg("could not open file \"%s\" (log file %u, segment %u): %m", - path, log, seg))); + errmsg("could not open file \"%s\" (log file %u, segment %u): %m", + path, log, seg))); } /* Couldn't find it. For simplicity, complain about front timeline */ @@ -2120,8 +2109,8 @@ XLogFileRead(uint32 log, uint32 seg, int emode) errno = ENOENT; ereport(emode, (errcode_for_file_access(), - errmsg("could not open file \"%s\" (log file %u, segment %u): %m", - path, log, seg))); + errmsg("could not open file \"%s\" (log file %u, segment %u): %m", + path, log, seg))); return -1; } @@ -2152,29 +2141,27 @@ RestoreArchivedFile(char *path, const char *xlogfname, struct stat stat_buf; /* - * When doing archive recovery, we always prefer an archived log file - * even if a file of the same name exists in XLOGDIR. The reason is - * that the file in XLOGDIR could be an old, un-filled or - * partly-filled version that was copied and restored as part of - * backing up $PGDATA. + * When doing archive recovery, we always prefer an archived log file even + * if a file of the same name exists in XLOGDIR. The reason is that the + * file in XLOGDIR could be an old, un-filled or partly-filled version + * that was copied and restored as part of backing up $PGDATA. * * We could try to optimize this slightly by checking the local copy - * lastchange timestamp against the archived copy, but we have no API - * to do this, nor can we guarantee that the lastchange timestamp was - * preserved correctly when we copied to archive. Our aim is - * robustness, so we elect not to do this. + * lastchange timestamp against the archived copy, but we have no API to + * do this, nor can we guarantee that the lastchange timestamp was + * preserved correctly when we copied to archive. Our aim is robustness, + * so we elect not to do this. * - * If we cannot obtain the log file from the archive, however, we will - * try to use the XLOGDIR file if it exists. This is so that we can - * make use of log segments that weren't yet transferred to the - * archive. + * If we cannot obtain the log file from the archive, however, we will try to + * use the XLOGDIR file if it exists. This is so that we can make use of + * log segments that weren't yet transferred to the archive. * - * Notice that we don't actually overwrite any files when we copy back - * from archive because the recoveryRestoreCommand may inadvertently - * restore inappropriate xlogs, or they may be corrupt, so we may wish - * to fallback to the segments remaining in current XLOGDIR later. The - * copy-from-archive filename is always the same, ensuring that we - * don't run out of disk space on long recoveries. + * Notice that we don't actually overwrite any files when we copy back from + * archive because the recoveryRestoreCommand may inadvertently restore + * inappropriate xlogs, or they may be corrupt, so we may wish to fallback + * to the segments remaining in current XLOGDIR later. The + * copy-from-archive filename is always the same, ensuring that we don't + * run out of disk space on long recoveries. */ snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername); @@ -2259,11 +2246,11 @@ RestoreArchivedFile(char *path, const char *xlogfname, * command apparently succeeded, but let's make sure the file is * really there now and has the correct size. * - * XXX I made wrong-size a fatal error to ensure the DBA would notice - * it, but is that too strong? We could try to plow ahead with a - * local copy of the file ... but the problem is that there - * probably isn't one, and we'd incorrectly conclude we've reached - * the end of WAL and we're done recovering ... + * XXX I made wrong-size a fatal error to ensure the DBA would notice it, + * but is that too strong? We could try to plow ahead with a local + * copy of the file ... but the problem is that there probably isn't + * one, and we'd incorrectly conclude we've reached the end of WAL and + * we're done recovering ... */ if (stat(xlogpath, &stat_buf) == 0) { @@ -2296,18 +2283,17 @@ RestoreArchivedFile(char *path, const char *xlogfname, /* * remember, we rollforward UNTIL the restore fails so failure here is * just part of the process... that makes it difficult to determine - * whether the restore failed because there isn't an archive to - * restore, or because the administrator has specified the restore - * program incorrectly. We have to assume the former. + * whether the restore failed because there isn't an archive to restore, + * or because the administrator has specified the restore program + * incorrectly. We have to assume the former. */ ereport(DEBUG2, - (errmsg("could not restore file \"%s\" from archive: return code %d", - xlogfname, rc))); + (errmsg("could not restore file \"%s\" from archive: return code %d", + xlogfname, rc))); /* - * if an archived file is not available, there might still be a - * version of this file in XLOGDIR, so return that as the filename to - * open. + * if an archived file is not available, there might still be a version of + * this file in XLOGDIR, so return that as the filename to open. * * In many recovery scenarios we expect this to fail also, but if so that * just means we've reached the end of WAL. @@ -2375,8 +2361,8 @@ MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr, if (xldir == NULL) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open transaction log directory \"%s\": %m", - XLOGDIR))); + errmsg("could not open transaction log directory \"%s\": %m", + XLOGDIR))); XLogFileName(lastoff, ThisTimeLineID, log, seg); @@ -2384,14 +2370,14 @@ MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr, { /* * We ignore the timeline part of the XLOG segment identifiers in - * deciding whether a segment is still needed. This ensures that - * we won't prematurely remove a segment from a parent timeline. - * We could probably be a little more proactive about removing - * segments of non-parent timelines, but that would be a whole lot - * more complicated. + * deciding whether a segment is still needed. This ensures that we + * won't prematurely remove a segment from a parent timeline. We could + * probably be a little more proactive about removing segments of + * non-parent timelines, but that would be a whole lot more + * complicated. * - * We use the alphanumeric sorting property of the filenames to - * decide which ones are earlier than the lastoff segment. + * We use the alphanumeric sorting property of the filenames to decide + * which ones are earlier than the lastoff segment. */ if (strlen(xlde->d_name) == 24 && strspn(xlde->d_name, "0123456789ABCDEF") == 24 && @@ -2409,16 +2395,16 @@ MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr, snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlde->d_name); /* - * Before deleting the file, see if it can be recycled as - * a future log segment. + * Before deleting the file, see if it can be recycled as a + * future log segment. */ if (InstallXLogFileSegment(&endlogId, &endlogSeg, path, true, &max_advance, true)) { ereport(DEBUG2, - (errmsg("recycled transaction log file \"%s\"", - xlde->d_name))); + (errmsg("recycled transaction log file \"%s\"", + xlde->d_name))); (*nsegsrecycled)++; /* Needn't recheck that slot on future iterations */ if (max_advance > 0) @@ -2431,8 +2417,8 @@ MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr, { /* No need for any more future segments... */ ereport(DEBUG2, - (errmsg("removing transaction log file \"%s\"", - xlde->d_name))); + (errmsg("removing transaction log file \"%s\"", + xlde->d_name))); unlink(path); (*nsegsremoved)++; } @@ -2459,8 +2445,8 @@ RemoveOldBackupHistory(void) if (xldir == NULL) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open transaction log directory \"%s\": %m", - XLOGDIR))); + errmsg("could not open transaction log directory \"%s\": %m", + XLOGDIR))); while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL) { @@ -2473,8 +2459,8 @@ RemoveOldBackupHistory(void) if (!XLogArchivingActive() || XLogArchiveIsDone(xlde->d_name)) { ereport(DEBUG2, - (errmsg("removing transaction log backup history file \"%s\"", - xlde->d_name))); + (errmsg("removing transaction log backup history file \"%s\"", + xlde->d_name))); snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlde->d_name); unlink(path); XLogArchiveCleanup(xlde->d_name); @@ -2576,7 +2562,7 @@ RecordIsValid(XLogRecord *record, XLogRecPtr recptr, int emode) blk = (char *) XLogRecGetData(record) + len; for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) { - uint32 blen; + uint32 blen; if (!(record->xl_info & XLR_SET_BKP_BLOCK(i))) continue; @@ -2611,8 +2597,8 @@ RecordIsValid(XLogRecord *record, XLogRecPtr recptr, int emode) if (!EQ_CRC32(record->xl_crc, crc)) { ereport(emode, - (errmsg("incorrect resource manager data checksum in record at %X/%X", - recptr.xlogid, recptr.xrecoff))); + (errmsg("incorrect resource manager data checksum in record at %X/%X", + recptr.xlogid, recptr.xrecoff))); return false; } @@ -2647,12 +2633,11 @@ ReadRecord(XLogRecPtr *RecPtr, int emode) if (readBuf == NULL) { /* - * First time through, permanently allocate readBuf. We do it - * this way, rather than just making a static array, for two - * reasons: (1) no need to waste the storage in most - * instantiations of the backend; (2) a static char array isn't - * guaranteed to have any particular alignment, whereas malloc() - * will provide MAXALIGN'd storage. + * First time through, permanently allocate readBuf. We do it this + * way, rather than just making a static array, for two reasons: (1) + * no need to waste the storage in most instantiations of the backend; + * (2) a static char array isn't guaranteed to have any particular + * alignment, whereas malloc() will provide MAXALIGN'd storage. */ readBuf = (char *) malloc(BLCKSZ); Assert(readBuf != NULL); @@ -2685,11 +2670,11 @@ ReadRecord(XLogRecPtr *RecPtr, int emode) RecPtr->xlogid, RecPtr->xrecoff))); /* - * Since we are going to a random position in WAL, forget any - * prior state about what timeline we were in, and allow it to be - * any timeline in expectedTLIs. We also set a flag to allow - * curFileTLI to go backwards (but we can't reset that variable - * right here, since we might not change files at all). + * Since we are going to a random position in WAL, forget any prior + * state about what timeline we were in, and allow it to be any + * timeline in expectedTLIs. We also set a flag to allow curFileTLI + * to go backwards (but we can't reset that variable right here, since + * we might not change files at all). */ lastPageTLI = 0; /* see comment in ValidXLOGHeader */ randAccess = true; /* allow curFileTLI to go backwards too */ @@ -2741,9 +2726,9 @@ ReadRecord(XLogRecPtr *RecPtr, int emode) if (targetRecOff == 0) { /* - * Can only get here in the continuing-from-prev-page case, - * because XRecOffIsValid eliminated the zero-page-offset case - * otherwise. Need to skip over the new page's header. + * Can only get here in the continuing-from-prev-page case, because + * XRecOffIsValid eliminated the zero-page-offset case otherwise. Need + * to skip over the new page's header. */ tmpRecPtr.xrecoff += pageHeaderSize; targetRecOff = pageHeaderSize; @@ -2791,14 +2776,14 @@ got_record:; { ereport(emode, (errmsg("invalid resource manager ID %u at %X/%X", - record->xl_rmid, RecPtr->xlogid, RecPtr->xrecoff))); + record->xl_rmid, RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; } if (randAccess) { /* - * We can't exactly verify the prev-link, but surely it should be - * less than the record's own address. + * We can't exactly verify the prev-link, but surely it should be less + * than the record's own address. */ if (!XLByteLT(record->xl_prev, *RecPtr)) { @@ -2812,9 +2797,9 @@ got_record:; else { /* - * Record's prev-link should exactly match our previous location. - * This check guards against torn WAL pages where a stale but - * valid-looking WAL record starts on a sector boundary. + * Record's prev-link should exactly match our previous location. This + * check guards against torn WAL pages where a stale but valid-looking + * WAL record starts on a sector boundary. */ if (!XLByteEQ(record->xl_prev, ReadRecPtr)) { @@ -2827,11 +2812,10 @@ got_record:; } /* - * Allocate or enlarge readRecordBuf as needed. To avoid useless - * small increases, round its size to a multiple of BLCKSZ, and make - * sure it's at least 4*BLCKSZ to start with. (That is enough for all - * "normal" records, but very large commit or abort records might need - * more space.) + * Allocate or enlarge readRecordBuf as needed. To avoid useless small + * increases, round its size to a multiple of BLCKSZ, and make sure it's + * at least 4*BLCKSZ to start with. (That is enough for all "normal" + * records, but very large commit or abort records might need more space.) */ total_len = record->xl_tot_len; if (total_len > readRecordBufSize) @@ -2927,7 +2911,7 @@ got_record:; MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len)) { nextRecord = (XLogRecord *) ((char *) contrecord + - MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len)); + MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len)); } EndRecPtr.xlogid = readId; EndRecPtr.xrecoff = readSeg * XLogSegSize + readOff + @@ -2991,8 +2975,8 @@ ValidXLOGHeader(XLogPageHeader hdr, int emode) char sysident_str[32]; /* - * Format sysids separately to keep platform-dependent format - * code out of the translatable message string. + * Format sysids separately to keep platform-dependent format code + * out of the translatable message string. */ snprintf(fhdrident_str, sizeof(fhdrident_str), UINT64_FORMAT, longhdr->xlp_sysid); @@ -3000,15 +2984,15 @@ ValidXLOGHeader(XLogPageHeader hdr, int emode) ControlFile->system_identifier); ereport(emode, (errmsg("WAL file is from different system"), - errdetail("WAL file SYSID is %s, pg_control SYSID is %s", - fhdrident_str, sysident_str))); + errdetail("WAL file SYSID is %s, pg_control SYSID is %s", + fhdrident_str, sysident_str))); return false; } if (longhdr->xlp_seg_size != XLogSegSize) { ereport(emode, (errmsg("WAL file is from different system"), - errdetail("Incorrect XLOG_SEG_SIZE in page header."))); + errdetail("Incorrect XLOG_SEG_SIZE in page header."))); return false; } } @@ -3018,7 +3002,7 @@ ValidXLOGHeader(XLogPageHeader hdr, int emode) { ereport(emode, (errmsg("unexpected pageaddr %X/%X in log file %u, segment %u, offset %u", - hdr->xlp_pageaddr.xlogid, hdr->xlp_pageaddr.xrecoff, + hdr->xlp_pageaddr.xlogid, hdr->xlp_pageaddr.xrecoff, readId, readSeg, readOff))); return false; } @@ -3040,9 +3024,9 @@ ValidXLOGHeader(XLogPageHeader hdr, int emode) * immediate parent's TLI, we should never see TLI go backwards across * successive pages of a consistent WAL sequence. * - * Of course this check should only be applied when advancing - * sequentially across pages; therefore ReadRecord resets lastPageTLI - * to zero when going to a random page. + * Of course this check should only be applied when advancing sequentially + * across pages; therefore ReadRecord resets lastPageTLI to zero when + * going to a random page. */ if (hdr->xlp_tli < lastPageTLI) { @@ -3123,7 +3107,7 @@ readTimeLineHistory(TimeLineID targetTLI) tli <= (TimeLineID) linitial_int(result)) ereport(FATAL, (errmsg("invalid data in history file: %s", fline), - errhint("Timeline IDs must be in increasing sequence."))); + errhint("Timeline IDs must be in increasing sequence."))); /* Build list with newest item first */ result = lcons_int((int) tli, result); @@ -3137,7 +3121,7 @@ readTimeLineHistory(TimeLineID targetTLI) targetTLI <= (TimeLineID) linitial_int(result)) ereport(FATAL, (errmsg("invalid data in history file \"%s\"", path), - errhint("Timeline IDs must be less than child timeline's ID."))); + errhint("Timeline IDs must be less than child timeline's ID."))); result = lcons_int((int) targetTLI, result); @@ -3196,8 +3180,8 @@ findNewestTimeLine(TimeLineID startTLI) TimeLineID probeTLI; /* - * The algorithm is just to probe for the existence of timeline - * history files. XXX is it useful to allow gaps in the sequence? + * The algorithm is just to probe for the existence of timeline history + * files. XXX is it useful to allow gaps in the sequence? */ newestTLI = startTLI; @@ -3302,14 +3286,13 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, unlink(tmppath); /* - * if write didn't set errno, assume problem is no disk - * space + * if write didn't set errno, assume problem is no disk space */ errno = save_errno ? save_errno : ENOSPC; ereport(ERROR, (errcode_for_file_access(), - errmsg("could not write to file \"%s\": %m", tmppath))); + errmsg("could not write to file \"%s\": %m", tmppath))); } } close(srcfd); @@ -3454,11 +3437,11 @@ WriteControlFile(void) FIN_CRC32(ControlFile->crc); /* - * We write out BLCKSZ bytes into pg_control, zero-padding the excess - * over sizeof(ControlFileData). This reduces the odds of - * premature-EOF errors when reading pg_control. We'll still fail - * when we check the contents of the file, but hopefully with a more - * specific error than "couldn't read pg_control". + * We write out BLCKSZ bytes into pg_control, zero-padding the excess over + * sizeof(ControlFileData). This reduces the odds of premature-EOF errors + * when reading pg_control. We'll still fail when we check the contents + * of the file, but hopefully with a more specific error than "couldn't + * read pg_control". */ if (sizeof(ControlFileData) > BLCKSZ) ereport(PANIC, @@ -3524,17 +3507,17 @@ ReadControlFile(void) close(fd); /* - * Check for expected pg_control format version. If this is wrong, - * the CRC check will likely fail because we'll be checking the wrong - * number of bytes. Complaining about wrong version will probably be - * more enlightening than complaining about wrong CRC. + * Check for expected pg_control format version. If this is wrong, the + * CRC check will likely fail because we'll be checking the wrong number + * of bytes. Complaining about wrong version will probably be more + * enlightening than complaining about wrong CRC. */ if (ControlFile->pg_control_version != PG_CONTROL_VERSION) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d," - " but the server was compiled with PG_CONTROL_VERSION %d.", - ControlFile->pg_control_version, PG_CONTROL_VERSION), + " but the server was compiled with PG_CONTROL_VERSION %d.", + ControlFile->pg_control_version, PG_CONTROL_VERSION), errhint("It looks like you need to initdb."))); /* Now check the CRC. */ INIT_CRC32(crc); @@ -3548,31 +3531,30 @@ ReadControlFile(void) (errmsg("incorrect checksum in control file"))); /* - * Do compatibility checking immediately. We do this here for 2 - * reasons: + * Do compatibility checking immediately. We do this here for 2 reasons: * - * (1) if the database isn't compatible with the backend executable, we - * want to abort before we can possibly do any damage; + * (1) if the database isn't compatible with the backend executable, we want + * to abort before we can possibly do any damage; * * (2) this code is executed in the postmaster, so the setlocale() will - * propagate to forked backends, which aren't going to read this file - * for themselves. (These locale settings are considered critical + * propagate to forked backends, which aren't going to read this file for + * themselves. (These locale settings are considered critical * compatibility items because they can affect sort order of indexes.) */ if (ControlFile->catalog_version_no != CATALOG_VERSION_NO) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with CATALOG_VERSION_NO %d," - " but the server was compiled with CATALOG_VERSION_NO %d.", - ControlFile->catalog_version_no, CATALOG_VERSION_NO), + " but the server was compiled with CATALOG_VERSION_NO %d.", + ControlFile->catalog_version_no, CATALOG_VERSION_NO), errhint("It looks like you need to initdb."))); if (ControlFile->maxAlign != MAXIMUM_ALIGNOF) ereport(FATAL, (errmsg("database files are incompatible with server"), - errdetail("The database cluster was initialized with MAXALIGN %d," - " but the server was compiled with MAXALIGN %d.", - ControlFile->maxAlign, MAXIMUM_ALIGNOF), - errhint("It looks like you need to initdb."))); + errdetail("The database cluster was initialized with MAXALIGN %d," + " but the server was compiled with MAXALIGN %d.", + ControlFile->maxAlign, MAXIMUM_ALIGNOF), + errhint("It looks like you need to initdb."))); if (ControlFile->floatFormat != FLOATFORMAT_VALUE) ereport(FATAL, (errmsg("database files are incompatible with server"), @@ -3581,76 +3563,76 @@ ReadControlFile(void) if (ControlFile->blcksz != BLCKSZ) ereport(FATAL, (errmsg("database files are incompatible with server"), - errdetail("The database cluster was initialized with BLCKSZ %d," - " but the server was compiled with BLCKSZ %d.", - ControlFile->blcksz, BLCKSZ), - errhint("It looks like you need to recompile or initdb."))); + errdetail("The database cluster was initialized with BLCKSZ %d," + " but the server was compiled with BLCKSZ %d.", + ControlFile->blcksz, BLCKSZ), + errhint("It looks like you need to recompile or initdb."))); if (ControlFile->relseg_size != RELSEG_SIZE) ereport(FATAL, (errmsg("database files are incompatible with server"), - errdetail("The database cluster was initialized with RELSEG_SIZE %d," - " but the server was compiled with RELSEG_SIZE %d.", - ControlFile->relseg_size, RELSEG_SIZE), - errhint("It looks like you need to recompile or initdb."))); + errdetail("The database cluster was initialized with RELSEG_SIZE %d," + " but the server was compiled with RELSEG_SIZE %d.", + ControlFile->relseg_size, RELSEG_SIZE), + errhint("It looks like you need to recompile or initdb."))); if (ControlFile->xlog_seg_size != XLOG_SEG_SIZE) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with XLOG_SEG_SIZE %d," - " but the server was compiled with XLOG_SEG_SIZE %d.", + " but the server was compiled with XLOG_SEG_SIZE %d.", ControlFile->xlog_seg_size, XLOG_SEG_SIZE), - errhint("It looks like you need to recompile or initdb."))); + errhint("It looks like you need to recompile or initdb."))); if (ControlFile->nameDataLen != NAMEDATALEN) ereport(FATAL, (errmsg("database files are incompatible with server"), - errdetail("The database cluster was initialized with NAMEDATALEN %d," - " but the server was compiled with NAMEDATALEN %d.", - ControlFile->nameDataLen, NAMEDATALEN), - errhint("It looks like you need to recompile or initdb."))); + errdetail("The database cluster was initialized with NAMEDATALEN %d," + " but the server was compiled with NAMEDATALEN %d.", + ControlFile->nameDataLen, NAMEDATALEN), + errhint("It looks like you need to recompile or initdb."))); if (ControlFile->indexMaxKeys != INDEX_MAX_KEYS) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with INDEX_MAX_KEYS %d," - " but the server was compiled with INDEX_MAX_KEYS %d.", + " but the server was compiled with INDEX_MAX_KEYS %d.", ControlFile->indexMaxKeys, INDEX_MAX_KEYS), - errhint("It looks like you need to recompile or initdb."))); + errhint("It looks like you need to recompile or initdb."))); #ifdef HAVE_INT64_TIMESTAMP if (ControlFile->enableIntTimes != TRUE) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized without HAVE_INT64_TIMESTAMP" - " but the server was compiled with HAVE_INT64_TIMESTAMP."), - errhint("It looks like you need to recompile or initdb."))); + " but the server was compiled with HAVE_INT64_TIMESTAMP."), + errhint("It looks like you need to recompile or initdb."))); #else if (ControlFile->enableIntTimes != FALSE) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with HAVE_INT64_TIMESTAMP" - " but the server was compiled without HAVE_INT64_TIMESTAMP."), - errhint("It looks like you need to recompile or initdb."))); + " but the server was compiled without HAVE_INT64_TIMESTAMP."), + errhint("It looks like you need to recompile or initdb."))); #endif if (ControlFile->localeBuflen != LOCALE_NAME_BUFLEN) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with LOCALE_NAME_BUFLEN %d," - " but the server was compiled with LOCALE_NAME_BUFLEN %d.", + " but the server was compiled with LOCALE_NAME_BUFLEN %d.", ControlFile->localeBuflen, LOCALE_NAME_BUFLEN), - errhint("It looks like you need to recompile or initdb."))); + errhint("It looks like you need to recompile or initdb."))); if (setlocale(LC_COLLATE, ControlFile->lc_collate) == NULL) ereport(FATAL, - (errmsg("database files are incompatible with operating system"), - errdetail("The database cluster was initialized with LC_COLLATE \"%s\"," - " which is not recognized by setlocale().", - ControlFile->lc_collate), - errhint("It looks like you need to initdb or install locale support."))); + (errmsg("database files are incompatible with operating system"), + errdetail("The database cluster was initialized with LC_COLLATE \"%s\"," + " which is not recognized by setlocale().", + ControlFile->lc_collate), + errhint("It looks like you need to initdb or install locale support."))); if (setlocale(LC_CTYPE, ControlFile->lc_ctype) == NULL) ereport(FATAL, - (errmsg("database files are incompatible with operating system"), - errdetail("The database cluster was initialized with LC_CTYPE \"%s\"," - " which is not recognized by setlocale().", - ControlFile->lc_ctype), - errhint("It looks like you need to initdb or install locale support."))); + (errmsg("database files are incompatible with operating system"), + errdetail("The database cluster was initialized with LC_CTYPE \"%s\"," + " which is not recognized by setlocale().", + ControlFile->lc_ctype), + errhint("It looks like you need to initdb or install locale support."))); /* Make the fixed locale settings visible as GUC variables, too */ SetConfigOption("lc_collate", ControlFile->lc_collate, @@ -3719,9 +3701,9 @@ XLOGShmemSize(void) size = add_size(size, mul_size(BLCKSZ, XLOGbuffers)); /* - * Note: we don't count ControlFileData, it comes out of the "slop - * factor" added by CreateSharedMemoryAndSemaphores. This lets us - * use this routine again below to compute the actual allocation size. + * Note: we don't count ControlFileData, it comes out of the "slop factor" + * added by CreateSharedMemoryAndSemaphores. This lets us use this + * routine again below to compute the actual allocation size. */ return size; @@ -3749,9 +3731,9 @@ XLOGShmemInit(void) memset(XLogCtl, 0, sizeof(XLogCtlData)); /* - * Since XLogCtlData contains XLogRecPtr fields, its sizeof should be - * a multiple of the alignment for same, so no extra alignment padding - * is needed here. + * Since XLogCtlData contains XLogRecPtr fields, its sizeof should be a + * multiple of the alignment for same, so no extra alignment padding is + * needed here. */ allocptr = ((char *) XLogCtl) + sizeof(XLogCtlData); XLogCtl->xlblocks = (XLogRecPtr *) allocptr; @@ -3766,18 +3748,19 @@ XLOGShmemInit(void) memset(XLogCtl->pages, 0, (Size) BLCKSZ * XLOGbuffers); /* - * Do basic initialization of XLogCtl shared data. (StartupXLOG will - * fill in additional info.) + * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill + * in additional info.) */ - XLogCtl->XLogCacheByte = (Size) BLCKSZ * XLOGbuffers; + XLogCtl->XLogCacheByte = (Size) BLCKSZ *XLOGbuffers; + XLogCtl->XLogCacheBlck = XLOGbuffers - 1; XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages); SpinLockInit(&XLogCtl->info_lck); /* - * If we are not in bootstrap mode, pg_control should already exist. - * Read and validate it immediately (see comments in ReadControlFile() - * for the reasons why). + * If we are not in bootstrap mode, pg_control should already exist. Read + * and validate it immediately (see comments in ReadControlFile() for the + * reasons why). */ if (!IsBootstrapProcessingMode()) ReadControlFile(); @@ -3801,17 +3784,16 @@ BootStrapXLOG(void) pg_crc32 crc; /* - * Select a hopefully-unique system identifier code for this - * installation. We use the result of gettimeofday(), including the - * fractional seconds field, as being about as unique as we can easily - * get. (Think not to use random(), since it hasn't been seeded and - * there's no portable way to seed it other than the system clock - * value...) The upper half of the uint64 value is just the tv_sec - * part, while the lower half is the XOR of tv_sec and tv_usec. This - * is to ensure that we don't lose uniqueness unnecessarily if - * "uint64" is really only 32 bits wide. A person knowing this - * encoding can determine the initialization time of the installation, - * which could perhaps be useful sometimes. + * Select a hopefully-unique system identifier code for this installation. + * We use the result of gettimeofday(), including the fractional seconds + * field, as being about as unique as we can easily get. (Think not to + * use random(), since it hasn't been seeded and there's no portable way + * to seed it other than the system clock value...) The upper half of the + * uint64 value is just the tv_sec part, while the lower half is the XOR + * of tv_sec and tv_usec. This is to ensure that we don't lose uniqueness + * unnecessarily if "uint64" is really only 32 bits wide. A person + * knowing this encoding can determine the initialization time of the + * installation, which could perhaps be useful sometimes. */ gettimeofday(&tv, NULL); sysidentifier = ((uint64) tv.tv_sec) << 32; @@ -3821,7 +3803,7 @@ BootStrapXLOG(void) ThisTimeLineID = 1; /* page buffer must be aligned suitably for O_DIRECT */ - buffer = (char *) palloc(BLCKSZ + ALIGNOF_XLOG_BUFFER); + buffer = (char *) palloc(BLCKSZ + ALIGNOF_XLOG_BUFFER); page = (XLogPageHeader) TYPEALIGN(ALIGNOF_XLOG_BUFFER, buffer); memset(page, 0, BLCKSZ); @@ -3882,18 +3864,18 @@ BootStrapXLOG(void) errno = ENOSPC; ereport(PANIC, (errcode_for_file_access(), - errmsg("could not write bootstrap transaction log file: %m"))); + errmsg("could not write bootstrap transaction log file: %m"))); } if (pg_fsync(openLogFile) != 0) ereport(PANIC, (errcode_for_file_access(), - errmsg("could not fsync bootstrap transaction log file: %m"))); + errmsg("could not fsync bootstrap transaction log file: %m"))); if (close(openLogFile)) ereport(PANIC, (errcode_for_file_access(), - errmsg("could not close bootstrap transaction log file: %m"))); + errmsg("could not close bootstrap transaction log file: %m"))); openLogFile = -1; @@ -4036,8 +4018,8 @@ readRecoveryCommandFile(void) recoveryTargetXid = (TransactionId) strtoul(tok2, NULL, 0); if (errno == EINVAL || errno == ERANGE) ereport(FATAL, - (errmsg("recovery_target_xid is not a valid number: \"%s\"", - tok2))); + (errmsg("recovery_target_xid is not a valid number: \"%s\"", + tok2))); ereport(LOG, (errmsg("recovery_target_xid = %u", recoveryTargetXid))); @@ -4056,17 +4038,17 @@ readRecoveryCommandFile(void) recoveryTargetExact = false; /* - * Convert the time string given by the user to the time_t - * format. We use type abstime's input converter because we - * know abstime has the same representation as time_t. + * Convert the time string given by the user to the time_t format. + * We use type abstime's input converter because we know abstime + * has the same representation as time_t. */ recoveryTargetTime = (time_t) DatumGetAbsoluteTime(DirectFunctionCall1(abstimein, - CStringGetDatum(tok2))); + CStringGetDatum(tok2))); ereport(LOG, (errmsg("recovery_target_time = %s", - DatumGetCString(DirectFunctionCall1(abstimeout, - AbsoluteTimeGetDatum((AbsoluteTime) recoveryTargetTime)))))); + DatumGetCString(DirectFunctionCall1(abstimeout, + AbsoluteTimeGetDatum((AbsoluteTime) recoveryTargetTime)))))); } else if (strcmp(tok1, "recovery_target_inclusive") == 0) { @@ -4095,7 +4077,7 @@ readRecoveryCommandFile(void) ereport(FATAL, (errmsg("syntax error in recovery command file: %s", cmdline), - errhint("Lines should have the format parameter = 'value'."))); + errhint("Lines should have the format parameter = 'value'."))); /* Check that required parameters were supplied */ if (recoveryRestoreCommand == NULL) @@ -4107,10 +4089,10 @@ readRecoveryCommandFile(void) InArchiveRecovery = true; /* - * If user specified recovery_target_timeline, validate it or compute - * the "latest" value. We can't do this until after we've gotten the - * restore command and set InArchiveRecovery, because we need to fetch - * timeline history files from the archive. + * If user specified recovery_target_timeline, validate it or compute the + * "latest" value. We can't do this until after we've gotten the restore + * command and set InArchiveRecovery, because we need to fetch timeline + * history files from the archive. */ if (rtliGiven) { @@ -4119,8 +4101,8 @@ readRecoveryCommandFile(void) /* Timeline 1 does not have a history file, all else should */ if (rtli != 1 && !existsTimeLineHistory(rtli)) ereport(FATAL, - (errmsg("recovery_target_timeline %u does not exist", - rtli))); + (errmsg("recovery_target_timeline %u does not exist", + rtli))); recoveryTargetTLI = rtli; } else @@ -4146,9 +4128,9 @@ exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg) InArchiveRecovery = false; /* - * We should have the ending log segment currently open. Verify, and - * then close it (to avoid problems on Windows with trying to rename - * or delete an open file). + * We should have the ending log segment currently open. Verify, and then + * close it (to avoid problems on Windows with trying to rename or delete + * an open file). */ Assert(readFile >= 0); Assert(readId == endLogId); @@ -4158,17 +4140,17 @@ exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg) readFile = -1; /* - * If the segment was fetched from archival storage, we want to - * replace the existing xlog segment (if any) with the archival - * version. This is because whatever is in XLOGDIR is very possibly - * older than what we have from the archives, since it could have come - * from restoring a PGDATA backup. In any case, the archival version - * certainly is more descriptive of what our current database state - * is, because that is what we replayed from. + * If the segment was fetched from archival storage, we want to replace + * the existing xlog segment (if any) with the archival version. This is + * because whatever is in XLOGDIR is very possibly older than what we have + * from the archives, since it could have come from restoring a PGDATA + * backup. In any case, the archival version certainly is more + * descriptive of what our current database state is, because that is what + * we replayed from. * - * Note that if we are establishing a new timeline, ThisTimeLineID is - * already set to the new value, and so we will create a new file - * instead of overwriting any existing file. + * Note that if we are establishing a new timeline, ThisTimeLineID is already + * set to the new value, and so we will create a new file instead of + * overwriting any existing file. */ snprintf(recoveryPath, MAXPGPATH, XLOGDIR "/RECOVERYXLOG"); XLogFilePath(xlogpath, ThisTimeLineID, endLogId, endLogSeg); @@ -4195,9 +4177,9 @@ exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg) unlink(recoveryPath); /* ignore any error */ /* - * If we are establishing a new timeline, we have to copy data - * from the last WAL segment of the old timeline to create a - * starting WAL segment for the new timeline. + * If we are establishing a new timeline, we have to copy data from + * the last WAL segment of the old timeline to create a starting WAL + * segment for the new timeline. */ if (endTLI != ThisTimeLineID) XLogFileCopy(endLogId, endLogSeg, @@ -4205,8 +4187,8 @@ exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg) } /* - * Let's just make real sure there are not .ready or .done flags - * posted for the new segment. + * Let's just make real sure there are not .ready or .done flags posted + * for the new segment. */ XLogFileName(xlogpath, ThisTimeLineID, endLogId, endLogSeg); XLogArchiveCleanup(xlogpath); @@ -4216,8 +4198,8 @@ exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg) unlink(recoveryPath); /* ignore any error */ /* - * Rename the config file out of the way, so that we don't - * accidentally re-enter archive recovery mode in a subsequent crash. + * Rename the config file out of the way, so that we don't accidentally + * re-enter archive recovery mode in a subsequent crash. */ unlink(RECOVERY_COMMAND_DONE); if (rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE) != 0) @@ -4278,9 +4260,9 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis) * transactionid * * when testing for an xid, we MUST test for equality only, since - * transactions are numbered in the order they start, not the - * order they complete. A higher numbered xid will complete before - * you about 50% of the time... + * transactions are numbered in the order they start, not the order + * they complete. A higher numbered xid will complete before you about + * 50% of the time... */ stopsHere = (record->xl_xid == recoveryTargetXid); if (stopsHere) @@ -4289,9 +4271,9 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis) else { /* - * there can be many transactions that share the same commit time, - * so we stop after the last one, if we are inclusive, or stop at - * the first one if we are exclusive + * there can be many transactions that share the same commit time, so + * we stop after the last one, if we are inclusive, or stop at the + * first one if we are exclusive */ if (recoveryTargetInclusive) stopsHere = (recordXtime > recoveryTargetTime); @@ -4312,22 +4294,22 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis) if (recoveryStopAfter) ereport(LOG, (errmsg("recovery stopping after commit of transaction %u, time %s", - recoveryStopXid, str_time(recoveryStopTime)))); + recoveryStopXid, str_time(recoveryStopTime)))); else ereport(LOG, (errmsg("recovery stopping before commit of transaction %u, time %s", - recoveryStopXid, str_time(recoveryStopTime)))); + recoveryStopXid, str_time(recoveryStopTime)))); } else { if (recoveryStopAfter) ereport(LOG, (errmsg("recovery stopping after abort of transaction %u, time %s", - recoveryStopXid, str_time(recoveryStopTime)))); + recoveryStopXid, str_time(recoveryStopTime)))); else ereport(LOG, (errmsg("recovery stopping before abort of transaction %u, time %s", - recoveryStopXid, str_time(recoveryStopTime)))); + recoveryStopXid, str_time(recoveryStopTime)))); } } @@ -4359,8 +4341,8 @@ StartupXLOG(void) /* * Read control file and check XLOG status looks valid. * - * Note: in most control paths, *ControlFile is already valid and we need - * not do ReadControlFile() here, but might as well do it to be sure. + * Note: in most control paths, *ControlFile is already valid and we need not + * do ReadControlFile() here, but might as well do it to be sure. */ ReadControlFile(); @@ -4381,10 +4363,10 @@ StartupXLOG(void) str_time(ControlFile->time)))); else if (ControlFile->state == DB_IN_RECOVERY) ereport(LOG, - (errmsg("database system was interrupted while in recovery at %s", - str_time(ControlFile->time)), - errhint("This probably means that some data is corrupted and" - " you will have to use the last backup for recovery."))); + (errmsg("database system was interrupted while in recovery at %s", + str_time(ControlFile->time)), + errhint("This probably means that some data is corrupted and" + " you will have to use the last backup for recovery."))); else if (ControlFile->state == DB_IN_PRODUCTION) ereport(LOG, (errmsg("database system was interrupted at %s", @@ -4397,8 +4379,8 @@ StartupXLOG(void) #endif /* - * Initialize on the assumption we want to recover to the same - * timeline that's active according to pg_control. + * Initialize on the assumption we want to recover to the same timeline + * that's active according to pg_control. */ recoveryTargetTLI = ControlFile->checkPointCopy.ThisTimeLineID; @@ -4417,7 +4399,7 @@ StartupXLOG(void) * timeline. */ if (!list_member_int(expectedTLIs, - (int) ControlFile->checkPointCopy.ThisTimeLineID)) + (int) ControlFile->checkPointCopy.ThisTimeLineID)) ereport(FATAL, (errmsg("requested timeline %u is not a child of database system timeline %u", recoveryTargetTLI, @@ -4426,30 +4408,29 @@ StartupXLOG(void) if (read_backup_label(&checkPointLoc)) { /* - * When a backup_label file is present, we want to roll forward - * from the checkpoint it identifies, rather than using - * pg_control. + * When a backup_label file is present, we want to roll forward from + * the checkpoint it identifies, rather than using pg_control. */ record = ReadCheckpointRecord(checkPointLoc, 0); if (record != NULL) { ereport(LOG, (errmsg("checkpoint record is at %X/%X", - checkPointLoc.xlogid, checkPointLoc.xrecoff))); + checkPointLoc.xlogid, checkPointLoc.xrecoff))); InRecovery = true; /* force recovery even if SHUTDOWNED */ } else { ereport(PANIC, - (errmsg("could not locate required checkpoint record"), - errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir))); + (errmsg("could not locate required checkpoint record"), + errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir))); } } else { /* - * Get the last valid checkpoint record. If the latest one - * according to pg_control is broken, try the next-to-last one. + * Get the last valid checkpoint record. If the latest one according + * to pg_control is broken, try the next-to-last one. */ checkPointLoc = ControlFile->checkPoint; record = ReadCheckpointRecord(checkPointLoc, 1); @@ -4457,7 +4438,7 @@ StartupXLOG(void) { ereport(LOG, (errmsg("checkpoint record is at %X/%X", - checkPointLoc.xlogid, checkPointLoc.xrecoff))); + checkPointLoc.xlogid, checkPointLoc.xrecoff))); } else { @@ -4466,14 +4447,13 @@ StartupXLOG(void) if (record != NULL) { ereport(LOG, - (errmsg("using previous checkpoint record at %X/%X", - checkPointLoc.xlogid, checkPointLoc.xrecoff))); - InRecovery = true; /* force recovery even if - * SHUTDOWNED */ + (errmsg("using previous checkpoint record at %X/%X", + checkPointLoc.xlogid, checkPointLoc.xrecoff))); + InRecovery = true; /* force recovery even if SHUTDOWNED */ } else ereport(PANIC, - (errmsg("could not locate a valid checkpoint record"))); + (errmsg("could not locate a valid checkpoint record"))); } } @@ -4482,10 +4462,10 @@ StartupXLOG(void) wasShutdown = (record->xl_info == XLOG_CHECKPOINT_SHUTDOWN); ereport(LOG, - (errmsg("redo record is at %X/%X; undo record is at %X/%X; shutdown %s", - checkPoint.redo.xlogid, checkPoint.redo.xrecoff, - checkPoint.undo.xlogid, checkPoint.undo.xrecoff, - wasShutdown ? "TRUE" : "FALSE"))); + (errmsg("redo record is at %X/%X; undo record is at %X/%X; shutdown %s", + checkPoint.redo.xlogid, checkPoint.redo.xrecoff, + checkPoint.undo.xlogid, checkPoint.undo.xrecoff, + wasShutdown ? "TRUE" : "FALSE"))); ereport(LOG, (errmsg("next transaction ID: %u; next OID: %u", checkPoint.nextXid, checkPoint.nextOid))); @@ -4502,9 +4482,9 @@ StartupXLOG(void) MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); /* - * We must replay WAL entries using the same TimeLineID they were - * created under, so temporarily adopt the TLI indicated by the - * checkpoint (see also xlog_redo()). + * We must replay WAL entries using the same TimeLineID they were created + * under, so temporarily adopt the TLI indicated by the checkpoint (see + * also xlog_redo()). */ ThisTimeLineID = checkPoint.ThisTimeLineID; @@ -4518,15 +4498,15 @@ StartupXLOG(void) /* * Check whether we need to force recovery from WAL. If it appears to - * have been a clean shutdown and we did not have a recovery.conf - * file, then assume no recovery needed. + * have been a clean shutdown and we did not have a recovery.conf file, + * then assume no recovery needed. */ if (XLByteLT(checkPoint.undo, RecPtr) || XLByteLT(checkPoint.redo, RecPtr)) { if (wasShutdown) ereport(PANIC, - (errmsg("invalid redo/undo record in shutdown checkpoint"))); + (errmsg("invalid redo/undo record in shutdown checkpoint"))); InRecovery = true; } else if (ControlFile->state != DB_SHUTDOWNED) @@ -4563,8 +4543,8 @@ StartupXLOG(void) } /* - * Find the first record that logically follows the checkpoint --- - * it might physically precede it, though. + * Find the first record that logically follows the checkpoint --- it + * might physically precede it, though. */ if (XLByteLT(checkPoint.redo, RecPtr)) { @@ -4603,7 +4583,7 @@ StartupXLOG(void) xlog_outrec(buf, record); strcat(buf, " - "); RmgrTable[record->xl_rmid].rm_desc(buf, - record->xl_info, XLogRecGetData(record)); + record->xl_info, XLogRecGetData(record)); elog(LOG, "%s", buf); } #endif @@ -4621,7 +4601,7 @@ StartupXLOG(void) /* nextXid must be beyond record's xid */ if (TransactionIdFollowsOrEquals(record->xl_xid, - ShmemVariableCache->nextXid)) + ShmemVariableCache->nextXid)) { ShmemVariableCache->nextXid = record->xl_xid; TransactionIdAdvance(ShmemVariableCache->nextXid); @@ -4655,8 +4635,8 @@ StartupXLOG(void) } /* - * Re-fetch the last valid or last applied record, so we can identify - * the exact endpoint of what we consider the valid portion of WAL. + * Re-fetch the last valid or last applied record, so we can identify the + * exact endpoint of what we consider the valid portion of WAL. */ record = ReadRecord(&LastRec, PANIC); EndOfLog = EndRecPtr; @@ -4682,8 +4662,8 @@ StartupXLOG(void) * * If we stopped short of the end of WAL during recovery, then we are * generating a new timeline and must assign it a unique new ID. - * Otherwise, we can just extend the timeline we were in when we ran - * out of WAL. + * Otherwise, we can just extend the timeline we were in when we ran out + * of WAL. */ if (needNewTimeLine) { @@ -4698,10 +4678,10 @@ StartupXLOG(void) XLogCtl->ThisTimeLineID = ThisTimeLineID; /* - * We are now done reading the old WAL. Turn off archive fetching if - * it was active, and make a writable copy of the last WAL segment. - * (Note that we also have a copy of the last block of the old WAL in - * readBuf; we will use that below.) + * We are now done reading the old WAL. Turn off archive fetching if it + * was active, and make a writable copy of the last WAL segment. (Note + * that we also have a copy of the last block of the old WAL in readBuf; + * we will use that below.) */ if (InArchiveRecovery) exitArchiveRecovery(curFileTLI, endLogId, endLogSeg); @@ -4724,9 +4704,9 @@ StartupXLOG(void) ((EndOfLog.xrecoff - 1) / BLCKSZ + 1) * BLCKSZ; /* - * Tricky point here: readBuf contains the *last* block that the - * LastRec record spans, not the one it starts in. The last block is - * indeed the one we want to use. + * Tricky point here: readBuf contains the *last* block that the LastRec + * record spans, not the one it starts in. The last block is indeed the + * one we want to use. */ Assert(readOff == (XLogCtl->xlblocks[0].xrecoff - BLCKSZ) % XLogSegSize); memcpy((char *) Insert->currpage, readBuf, BLCKSZ); @@ -4752,9 +4732,8 @@ StartupXLOG(void) else { /* - * Whenever Write.LogwrtResult points to exactly the end of a - * page, Write.curridx must point to the *next* page (see - * XLogWrite()). + * Whenever Write.LogwrtResult points to exactly the end of a page, + * Write.curridx must point to the *next* page (see XLogWrite()). * * Note: it might seem we should do AdvanceXLInsertBuffer() here, but * this is sufficient. The first actual attempt to insert a log @@ -4785,17 +4764,16 @@ StartupXLOG(void) pgstat_reset_all(); /* - * Perform a new checkpoint to update our recovery activity to - * disk. + * Perform a new checkpoint to update our recovery activity to disk. * - * Note that we write a shutdown checkpoint rather than an on-line - * one. This is not particularly critical, but since we may be - * assigning a new TLI, using a shutdown checkpoint allows us to - * have the rule that TLI only changes in shutdown checkpoints, - * which allows some extra error checking in xlog_redo. + * Note that we write a shutdown checkpoint rather than an on-line one. + * This is not particularly critical, but since we may be assigning a + * new TLI, using a shutdown checkpoint allows us to have the rule + * that TLI only changes in shutdown checkpoints, which allows some + * extra error checking in xlog_redo. * - * In case we had to use the secondary checkpoint, make sure that it - * will still be shown as the secondary checkpoint after this + * In case we had to use the secondary checkpoint, make sure that it will + * still be shown as the secondary checkpoint after this * CreateCheckPoint operation; we don't want the broken primary * checkpoint to become prevCheckPoint... */ @@ -4810,8 +4788,8 @@ StartupXLOG(void) XLogCloseRelationCache(); /* - * Now that we've checkpointed the recovery, it's safe to flush - * old backup_label, if present. + * Now that we've checkpointed the recovery, it's safe to flush old + * backup_label, if present. */ remove_backup_label(); } @@ -4878,7 +4856,7 @@ ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt) { case 1: ereport(LOG, - (errmsg("invalid primary checkpoint link in control file"))); + (errmsg("invalid primary checkpoint link in control file"))); break; case 2: ereport(LOG, @@ -4886,7 +4864,7 @@ ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt) break; default: ereport(LOG, - (errmsg("invalid checkpoint link in backup_label file"))); + (errmsg("invalid checkpoint link in backup_label file"))); break; } return NULL; @@ -4927,7 +4905,7 @@ ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt) break; default: ereport(LOG, - (errmsg("invalid resource manager ID in checkpoint record"))); + (errmsg("invalid resource manager ID in checkpoint record"))); break; } return NULL; @@ -4939,11 +4917,11 @@ ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt) { case 1: ereport(LOG, - (errmsg("invalid xl_info in primary checkpoint record"))); + (errmsg("invalid xl_info in primary checkpoint record"))); break; case 2: ereport(LOG, - (errmsg("invalid xl_info in secondary checkpoint record"))); + (errmsg("invalid xl_info in secondary checkpoint record"))); break; default: ereport(LOG, @@ -4959,11 +4937,11 @@ ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt) { case 1: ereport(LOG, - (errmsg("invalid length of primary checkpoint record"))); + (errmsg("invalid length of primary checkpoint record"))); break; case 2: ereport(LOG, - (errmsg("invalid length of secondary checkpoint record"))); + (errmsg("invalid length of secondary checkpoint record"))); break; default: ereport(LOG, @@ -5084,10 +5062,10 @@ CreateCheckPoint(bool shutdown, bool force) int nsegsrecycled = 0; /* - * Acquire CheckpointLock to ensure only one checkpoint happens at a - * time. (This is just pro forma, since in the present system - * structure there is only one process that is allowed to issue - * checkpoints at any given time.) + * Acquire CheckpointLock to ensure only one checkpoint happens at a time. + * (This is just pro forma, since in the present system structure there is + * only one process that is allowed to issue checkpoints at any given + * time.) */ LWLockAcquire(CheckpointLock, LW_EXCLUSIVE); @@ -5108,10 +5086,10 @@ CreateCheckPoint(bool shutdown, bool force) checkPoint.time = time(NULL); /* - * We must hold CheckpointStartLock while determining the checkpoint - * REDO pointer. This ensures that any concurrent transaction commits - * will be either not yet logged, or logged and recorded in pg_clog. - * See notes in RecordTransactionCommit(). + * We must hold CheckpointStartLock while determining the checkpoint REDO + * pointer. This ensures that any concurrent transaction commits will be + * either not yet logged, or logged and recorded in pg_clog. See notes in + * RecordTransactionCommit(). */ LWLockAcquire(CheckpointStartLock, LW_EXCLUSIVE); @@ -5119,20 +5097,19 @@ CreateCheckPoint(bool shutdown, bool force) LWLockAcquire(WALInsertLock, LW_EXCLUSIVE); /* - * If this isn't a shutdown or forced checkpoint, and we have not - * inserted any XLOG records since the start of the last checkpoint, - * skip the checkpoint. The idea here is to avoid inserting duplicate - * checkpoints when the system is idle. That wastes log space, and - * more importantly it exposes us to possible loss of both current and - * previous checkpoint records if the machine crashes just as we're - * writing the update. (Perhaps it'd make even more sense to - * checkpoint only when the previous checkpoint record is in a - * different xlog page?) + * If this isn't a shutdown or forced checkpoint, and we have not inserted + * any XLOG records since the start of the last checkpoint, skip the + * checkpoint. The idea here is to avoid inserting duplicate checkpoints + * when the system is idle. That wastes log space, and more importantly it + * exposes us to possible loss of both current and previous checkpoint + * records if the machine crashes just as we're writing the update. + * (Perhaps it'd make even more sense to checkpoint only when the previous + * checkpoint record is in a different xlog page?) * - * We have to make two tests to determine that nothing has happened since - * the start of the last checkpoint: current insertion point must - * match the end of the last checkpoint record, and its redo pointer - * must point to itself. + * We have to make two tests to determine that nothing has happened since the + * start of the last checkpoint: current insertion point must match the + * end of the last checkpoint record, and its redo pointer must point to + * itself. */ if (!shutdown && !force) { @@ -5158,10 +5135,10 @@ CreateCheckPoint(bool shutdown, bool force) /* * Compute new REDO record ptr = location of next XLOG record. * - * NB: this is NOT necessarily where the checkpoint record itself will - * be, since other backends may insert more XLOG records while we're - * off doing the buffer flush work. Those XLOG records are logically - * after the checkpoint, even though physically before it. Got that? + * NB: this is NOT necessarily where the checkpoint record itself will be, + * since other backends may insert more XLOG records while we're off doing + * the buffer flush work. Those XLOG records are logically after the + * checkpoint, even though physically before it. Got that? */ freespace = INSERT_FREESPACE(Insert); if (freespace < SizeOfXLogRecord) @@ -5173,16 +5150,15 @@ CreateCheckPoint(bool shutdown, bool force) INSERT_RECPTR(checkPoint.redo, Insert, Insert->curridx); /* - * Here we update the shared RedoRecPtr for future XLogInsert calls; - * this must be done while holding the insert lock AND the info_lck. + * Here we update the shared RedoRecPtr for future XLogInsert calls; this + * must be done while holding the insert lock AND the info_lck. * * Note: if we fail to complete the checkpoint, RedoRecPtr will be left - * pointing past where it really needs to point. This is okay; the - * only consequence is that XLogInsert might back up whole buffers - * that it didn't really need to. We can't postpone advancing - * RedoRecPtr because XLogInserts that happen while we are dumping - * buffers must assume that their buffer changes are not included in - * the checkpoint. + * pointing past where it really needs to point. This is okay; the only + * consequence is that XLogInsert might back up whole buffers that it + * didn't really need to. We can't postpone advancing RedoRecPtr because + * XLogInserts that happen while we are dumping buffers must assume that + * their buffer changes are not included in the checkpoint. */ { /* use volatile pointer to prevent code rearrangement */ @@ -5219,15 +5195,15 @@ CreateCheckPoint(bool shutdown, bool force) &checkPoint.nextMultiOffset); /* - * Having constructed the checkpoint record, ensure all shmem disk - * buffers and commit-log buffers are flushed to disk. + * Having constructed the checkpoint record, ensure all shmem disk buffers + * and commit-log buffers are flushed to disk. * - * This I/O could fail for various reasons. If so, we will fail to - * complete the checkpoint, but there is no reason to force a system - * panic. Accordingly, exit critical section while doing it. (If - * we are doing a shutdown checkpoint, we probably *should* panic --- - * but that will happen anyway because we'll still be inside the - * critical section established by ShutdownXLOG.) + * This I/O could fail for various reasons. If so, we will fail to complete + * the checkpoint, but there is no reason to force a system panic. + * Accordingly, exit critical section while doing it. (If we are doing a + * shutdown checkpoint, we probably *should* panic --- but that will + * happen anyway because we'll still be inside the critical section + * established by ShutdownXLOG.) */ END_CRIT_SECTION(); @@ -5260,8 +5236,8 @@ CreateCheckPoint(bool shutdown, bool force) XLogFlush(recptr); /* - * We now have ProcLastRecPtr = start of actual checkpoint record, - * recptr = end of actual checkpoint record. + * We now have ProcLastRecPtr = start of actual checkpoint record, recptr + * = end of actual checkpoint record. */ if (shutdown && !XLByteEQ(checkPoint.redo, ProcLastRecPtr)) ereport(PANIC, @@ -5287,8 +5263,8 @@ CreateCheckPoint(bool shutdown, bool force) LWLockRelease(ControlFileLock); /* - * We are now done with critical updates; no need for system panic if - * we have trouble while fooling with offline log segments. + * We are now done with critical updates; no need for system panic if we + * have trouble while fooling with offline log segments. */ END_CRIT_SECTION(); @@ -5304,19 +5280,18 @@ CreateCheckPoint(bool shutdown, bool force) } /* - * Make more log segments if needed. (Do this after deleting offline - * log segments, to avoid having peak disk space usage higher than - * necessary.) + * Make more log segments if needed. (Do this after deleting offline log + * segments, to avoid having peak disk space usage higher than necessary.) */ if (!shutdown) nsegsadded = PreallocXlogFiles(recptr); /* - * Truncate pg_subtrans if possible. We can throw away all data - * before the oldest XMIN of any running transaction. No future - * transaction will attempt to reference any pg_subtrans entry older - * than that (see Asserts in subtrans.c). During recovery, though, we - * mustn't do this because StartupSUBTRANS hasn't been called yet. + * Truncate pg_subtrans if possible. We can throw away all data before + * the oldest XMIN of any running transaction. No future transaction will + * attempt to reference any pg_subtrans entry older than that (see Asserts + * in subtrans.c). During recovery, though, we mustn't do this because + * StartupSUBTRANS hasn't been called yet. */ if (!InRecovery) TruncateSUBTRANS(GetOldestXmin(true)); @@ -5342,13 +5317,14 @@ XLogPutNextOid(Oid nextOid) rdata.buffer = InvalidBuffer; rdata.next = NULL; (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID, &rdata); + /* * We need not flush the NEXTOID record immediately, because any of the - * just-allocated OIDs could only reach disk as part of a tuple insert - * or update that would have its own XLOG record that must follow the - * NEXTOID record. Therefore, the standard buffer LSN interlock applied - * to those records will ensure no such OID reaches disk before the - * NEXTOID record does. + * just-allocated OIDs could only reach disk as part of a tuple insert or + * update that would have its own XLOG record that must follow the NEXTOID + * record. Therefore, the standard buffer LSN interlock applied to those + * records will ensure no such OID reaches disk before the NEXTOID record + * does. */ } @@ -5384,8 +5360,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) checkPoint.nextMultiOffset); /* - * TLI may change in a shutdown checkpoint, but it shouldn't - * decrease + * TLI may change in a shutdown checkpoint, but it shouldn't decrease */ if (checkPoint.ThisTimeLineID != ThisTimeLineID) { @@ -5394,7 +5369,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) (int) checkPoint.ThisTimeLineID)) ereport(PANIC, (errmsg("unexpected timeline ID %u (after %u) in checkpoint record", - checkPoint.ThisTimeLineID, ThisTimeLineID))); + checkPoint.ThisTimeLineID, ThisTimeLineID))); /* Following WAL records should be run with new TLI */ ThisTimeLineID = checkPoint.ThisTimeLineID; } @@ -5441,7 +5416,7 @@ xlog_desc(char *buf, uint8 xl_info, char *rec) checkpoint->nextOid, checkpoint->nextMulti, checkpoint->nextMultiOffset, - (info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online"); + (info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online"); } else if (info == XLOG_NEXTOID) { @@ -5535,23 +5510,23 @@ assign_xlog_sync_method(const char *method, bool doit, GucSource source) /* * To ensure that no blocks escape unsynced, force an fsync on the * currently open log segment (if any). Also, if the open flag is - * changing, close the log file so it will be reopened (with new - * flag bit) at next use. + * changing, close the log file so it will be reopened (with new flag + * bit) at next use. */ if (openLogFile >= 0) { if (pg_fsync(openLogFile) != 0) ereport(PANIC, (errcode_for_file_access(), - errmsg("could not fsync log file %u, segment %u: %m", - openLogId, openLogSeg))); + errmsg("could not fsync log file %u, segment %u: %m", + openLogId, openLogSeg))); if (open_sync_bit != new_sync_bit) { if (close(openLogFile)) ereport(PANIC, (errcode_for_file_access(), - errmsg("could not close log file %u, segment %u: %m", - openLogId, openLogSeg))); + errmsg("could not close log file %u, segment %u: %m", + openLogId, openLogSeg))); openLogFile = -1; } } @@ -5575,16 +5550,16 @@ issue_xlog_fsync(void) if (pg_fsync_no_writethrough(openLogFile) != 0) ereport(PANIC, (errcode_for_file_access(), - errmsg("could not fsync log file %u, segment %u: %m", - openLogId, openLogSeg))); + errmsg("could not fsync log file %u, segment %u: %m", + openLogId, openLogSeg))); break; #ifdef HAVE_FSYNC_WRITETHROUGH case SYNC_METHOD_FSYNC_WRITETHROUGH: if (pg_fsync_writethrough(openLogFile) != 0) ereport(PANIC, (errcode_for_file_access(), - errmsg("could not fsync write-through log file %u, segment %u: %m", - openLogId, openLogSeg))); + errmsg("could not fsync write-through log file %u, segment %u: %m", + openLogId, openLogSeg))); break; #endif #ifdef HAVE_FDATASYNC @@ -5592,8 +5567,8 @@ issue_xlog_fsync(void) if (pg_fdatasync(openLogFile) != 0) ereport(PANIC, (errcode_for_file_access(), - errmsg("could not fdatasync log file %u, segment %u: %m", - openLogId, openLogSeg))); + errmsg("could not fdatasync log file %u, segment %u: %m", + openLogId, openLogSeg))); break; #endif case SYNC_METHOD_OPEN: @@ -5640,25 +5615,25 @@ pg_start_backup(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), (errmsg("WAL archiving is not active"), - (errhint("archive_command must be defined before " - "online backups can be made safely."))))); + (errhint("archive_command must be defined before " + "online backups can be made safely."))))); backupidstr = DatumGetCString(DirectFunctionCall1(textout, - PointerGetDatum(backupid))); + PointerGetDatum(backupid))); /* - * Force a CHECKPOINT. This is not strictly necessary, but it seems - * like a good idea to minimize the amount of past WAL needed to use - * the backup. Also, this guarantees that two successive backup runs - * will have different checkpoint positions and hence different - * history file names, even if nothing happened in between. + * Force a CHECKPOINT. This is not strictly necessary, but it seems like + * a good idea to minimize the amount of past WAL needed to use the + * backup. Also, this guarantees that two successive backup runs will + * have different checkpoint positions and hence different history file + * names, even if nothing happened in between. */ RequestCheckpoint(true, false); /* - * Now we need to fetch the checkpoint record location, and also its - * REDO pointer. The oldest point in WAL that would be needed to - * restore starting from the checkpoint is precisely the REDO pointer. + * Now we need to fetch the checkpoint record location, and also its REDO + * pointer. The oldest point in WAL that would be needed to restore + * starting from the checkpoint is precisely the REDO pointer. */ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); checkpointloc = ControlFile->checkPoint; @@ -5669,10 +5644,10 @@ pg_start_backup(PG_FUNCTION_ARGS) XLogFileName(xlogfilename, ThisTimeLineID, _logId, _logSeg); /* - * We deliberately use strftime/localtime not the src/timezone - * functions, so that backup labels will consistently be recorded in - * the same timezone regardless of TimeZone setting. This matches - * elog.c's practice. + * We deliberately use strftime/localtime not the src/timezone functions, + * so that backup labels will consistently be recorded in the same + * timezone regardless of TimeZone setting. This matches elog.c's + * practice. */ stamp_time = time(NULL); strftime(strfbuf, sizeof(strfbuf), @@ -5680,8 +5655,7 @@ pg_start_backup(PG_FUNCTION_ARGS) localtime(&stamp_time)); /* - * Check for existing backup label --- implies a backup is already - * running + * Check for existing backup label --- implies a backup is already running */ if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0) { @@ -5725,7 +5699,7 @@ pg_start_backup(PG_FUNCTION_ARGS) snprintf(xlogfilename, sizeof(xlogfilename), "%X/%X", startpoint.xlogid, startpoint.xrecoff); result = DatumGetTextP(DirectFunctionCall1(textin, - CStringGetDatum(xlogfilename))); + CStringGetDatum(xlogfilename))); PG_RETURN_TEXT_P(result); } @@ -5762,8 +5736,8 @@ pg_stop_backup(PG_FUNCTION_ARGS) (errmsg("must be superuser to run a backup")))); /* - * Get the current end-of-WAL position; it will be unsafe to use this - * dump to restore to a point in advance of this time. + * Get the current end-of-WAL position; it will be unsafe to use this dump + * to restore to a point in advance of this time. */ LWLockAcquire(WALInsertLock, LW_EXCLUSIVE); INSERT_RECPTR(stoppoint, Insert, Insert->curridx); @@ -5773,10 +5747,10 @@ pg_stop_backup(PG_FUNCTION_ARGS) XLogFileName(stopxlogfilename, ThisTimeLineID, _logId, _logSeg); /* - * We deliberately use strftime/localtime not the src/timezone - * functions, so that backup labels will consistently be recorded in - * the same timezone regardless of TimeZone setting. This matches - * elog.c's practice. + * We deliberately use strftime/localtime not the src/timezone functions, + * so that backup labels will consistently be recorded in the same + * timezone regardless of TimeZone setting. This matches elog.c's + * practice. */ stamp_time = time(NULL); strftime(strfbuf, sizeof(strfbuf), @@ -5800,9 +5774,8 @@ pg_stop_backup(PG_FUNCTION_ARGS) } /* - * Read and parse the START WAL LOCATION line (this code is pretty - * crude, but we are not expecting any variability in the file - * format). + * Read and parse the START WAL LOCATION line (this code is pretty crude, + * but we are not expecting any variability in the file format). */ if (fscanf(lfp, "START WAL LOCATION: %X/%X (file %24s)%c", &startpoint.xlogid, &startpoint.xrecoff, startxlogfilename, @@ -5869,7 +5842,7 @@ pg_stop_backup(PG_FUNCTION_ARGS) snprintf(stopxlogfilename, sizeof(stopxlogfilename), "%X/%X", stoppoint.xlogid, stoppoint.xrecoff); result = DatumGetTextP(DirectFunctionCall1(textin, - CStringGetDatum(stopxlogfilename))); + CStringGetDatum(stopxlogfilename))); PG_RETURN_TEXT_P(result); } @@ -5921,9 +5894,9 @@ read_backup_label(XLogRecPtr *checkPointLoc) } /* - * Read and parse the START WAL LOCATION and CHECKPOINT lines (this - * code is pretty crude, but we are not expecting any variability in - * the file format). + * Read and parse the START WAL LOCATION and CHECKPOINT lines (this code + * is pretty crude, but we are not expecting any variability in the file + * format). */ if (fscanf(lfp, "START WAL LOCATION: %X/%X (file %08X%16s)%c", &startpoint.xlogid, &startpoint.xrecoff, &tli, @@ -5963,17 +5936,17 @@ read_backup_label(XLogRecPtr *checkPointLoc) * Parse history file to identify stop point. */ if (fscanf(fp, "START WAL LOCATION: %X/%X (file %24s)%c", - &startpoint.xlogid, &startpoint.xrecoff, startxlogfilename, + &startpoint.xlogid, &startpoint.xrecoff, startxlogfilename, &ch) != 4 || ch != '\n') ereport(FATAL, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("invalid data in file \"%s\"", histfilename))); + errmsg("invalid data in file \"%s\"", histfilename))); if (fscanf(fp, "STOP WAL LOCATION: %X/%X (file %24s)%c", - &stoppoint.xlogid, &stoppoint.xrecoff, stopxlogfilename, + &stoppoint.xlogid, &stoppoint.xrecoff, stopxlogfilename, &ch) != 4 || ch != '\n') ereport(FATAL, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("invalid data in file \"%s\"", histfilename))); + errmsg("invalid data in file \"%s\"", histfilename))); recoveryMinXlogOffset = stoppoint; if (ferror(fp) || FreeFile(fp)) ereport(FATAL, diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index 55caf84a04..485aa52474 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -11,7 +11,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.38 2005/06/06 17:01:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.39 2005/10/15 02:49:11 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -121,7 +121,7 @@ _xl_remove_hash_entry(XLogRelDesc *rdesc) rdesc->moreRecently->lessRecently = rdesc->lessRecently; hentry = (XLogRelCacheEntry *) hash_search(_xlrelcache, - (void *) &(rdesc->reldata.rd_node), HASH_REMOVE, NULL); + (void *) &(rdesc->reldata.rd_node), HASH_REMOVE, NULL); if (hentry == NULL) elog(PANIC, "_xl_remove_hash_entry: file was not found in cache"); @@ -211,11 +211,11 @@ XLogOpenRelation(RelFileNode rnode) res->reldata.rd_node = rnode; /* - * We set up the lockRelId in case anything tries to lock the - * dummy relation. Note that this is fairly bogus since relNode - * may be different from the relation's OID. It shouldn't really - * matter though, since we are presumably running by ourselves and - * can't have any lock conflicts ... + * We set up the lockRelId in case anything tries to lock the dummy + * relation. Note that this is fairly bogus since relNode may be + * different from the relation's OID. It shouldn't really matter + * though, since we are presumably running by ourselves and can't have + * any lock conflicts ... */ res->reldata.rd_lockInfo.lockRelId.dbId = rnode.dbNode; res->reldata.rd_lockInfo.lockRelId.relId = rnode.relNode; @@ -233,13 +233,13 @@ XLogOpenRelation(RelFileNode rnode) RelationOpenSmgr(&(res->reldata)); /* - * Create the target file if it doesn't already exist. This lets - * us cope if the replay sequence contains writes to a relation - * that is later deleted. (The original coding of this routine - * would instead return NULL, causing the writes to be suppressed. - * But that seems like it risks losing valuable data if the - * filesystem loses an inode during a crash. Better to write the - * data until we are actually told to delete the file.) + * Create the target file if it doesn't already exist. This lets us + * cope if the replay sequence contains writes to a relation that is + * later deleted. (The original coding of this routine would instead + * return NULL, causing the writes to be suppressed. But that seems + * like it risks losing valuable data if the filesystem loses an inode + * during a crash. Better to write the data until we are actually + * told to delete the file.) */ smgrcreate(res->reldata.rd_smgr, res->reldata.rd_istemp, true); } |