src/backend/utils/cache/inval.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237

/*-------------------------------------------------------------------------
 *
 * inval.c
 *	  POSTGRES cache invalidation dispatcher code.
 *
 *	This is subtle stuff, so pay attention:
 *
 *	When a tuple is updated or deleted, our standard time qualification rules
 *	consider that it is *still valid* so long as we are in the same command,
 *	ie, until the next CommandCounterIncrement() or transaction commit.
 *	(See utils/time/tqual.c, and note that system catalogs are generally
 *	scanned under SnapshotNow rules by the system, or plain user snapshots
 *	for user queries.)	At the command boundary, the old tuple stops
 *	being valid and the new version, if any, becomes valid.  Therefore,
 *	we cannot simply flush a tuple from the system caches during heap_update()
 *	or heap_delete().  The tuple is still good at that point; what's more,
 *	even if we did flush it, it might be reloaded into the caches by a later
 *	request in the same command.  So the correct behavior is to keep a list
 *	of outdated (updated/deleted) tuples and then do the required cache
 *	flushes at the next command boundary.  We must also keep track of
 *	inserted tuples so that we can flush "negative" cache entries that match
 *	the new tuples; again, that mustn't happen until end of command.
 *
 *	Once we have finished the command, we still need to remember inserted
 *	tuples (including new versions of updated tuples), so that we can flush
 *	them from the caches if we abort the transaction.  Similarly, we'd better
 *	be able to flush "negative" cache entries that may have been loaded in
 *	place of deleted tuples, so we still need the deleted ones too.
 *
 *	If we successfully complete the transaction, we have to broadcast all
 *	these invalidation events to other backends (via the SI message queue)
 *	so that they can flush obsolete entries from their caches.  Note we have
 *	to record the transaction commit before sending SI messages, otherwise
 *	the other backends won't see our updated tuples as good.
 *
 *	When a subtransaction aborts, we can process and discard any events
 *	it has queued.  When a subtransaction commits, we just add its events
 *	to the pending lists of the parent transaction.
 *
 *	In short, we need to remember until xact end every insert or delete
 *	of a tuple that might be in the system caches.  Updates are treated as
 *	two events, delete + insert, for simplicity.  (There are cases where
 *	it'd be possible to record just one event, but we don't currently try.)
 *
 *	We do not need to register EVERY tuple operation in this way, just those
 *	on tuples in relations that have associated catcaches.  We do, however,
 *	have to register every operation on every tuple that *could* be in a
 *	catcache, whether or not it currently is in our cache.  Also, if the
 *	tuple is in a relation that has multiple catcaches, we need to register
 *	an invalidation message for each such catcache.  catcache.c's
 *	PrepareToInvalidateCacheTuple() routine provides the knowledge of which
 *	catcaches may need invalidation for a given tuple.
 *
 *	Also, whenever we see an operation on a pg_class or pg_attribute tuple,
 *	we register a relcache flush operation for the relation described by that
 *	tuple.  pg_class updates trigger an smgr flush operation as well.
 *
 *	We keep the relcache and smgr flush requests in lists separate from the
 *	catcache tuple flush requests.  This allows us to issue all the pending
 *	catcache flushes before we issue relcache flushes, which saves us from
 *	loading a catcache tuple during relcache load only to flush it again
 *	right away.  Also, we avoid queuing multiple relcache flush requests for
 *	the same relation, since a relcache flush is relatively expensive to do.
 *	(XXX is it worth testing likewise for duplicate catcache flush entries?
 *	Probably not.)
 *
 *	If a relcache flush is issued for a system relation that we preload
 *	from the relcache init file, we must also delete the init file so that
 *	it will be rebuilt during the next backend restart.  The actual work of
 *	manipulating the init file is in relcache.c, but we keep track of the
 *	need for it here.
 *
 *	The request lists proper are kept in CurTransactionContext of their
 *	creating (sub)transaction, since they can be forgotten on abort of that
 *	transaction but must be kept till top-level commit otherwise.  For
 *	simplicity we keep the controlling list-of-lists in TopTransactionContext.
 *
 *
 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
 *	  $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.89 2009/06/11 14:49:05 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "access/twophase_rmgr.h"
#include "access/xact.h"
#include "catalog/catalog.h"
#include "miscadmin.h"
#include "storage/sinval.h"
#include "storage/smgr.h"
#include "utils/inval.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/syscache.h"


/*
 * To minimize palloc traffic, we keep pending requests in successively-
 * larger chunks (a slightly more sophisticated version of an expansible
 * array).  All request types can be stored as SharedInvalidationMessage
 * records.  The ordering of requests within a list is never significant.
 */
typedef struct InvalidationChunk
{
	struct InvalidationChunk *next;		/* list link */
	int			nitems;			/* # items currently stored in chunk */
	int			maxitems;		/* size of allocated array in this chunk */
	SharedInvalidationMessage msgs[1];	/* VARIABLE LENGTH ARRAY */
} InvalidationChunk;			/* VARIABLE LENGTH STRUCTURE */

typedef struct InvalidationListHeader
{
	InvalidationChunk *cclist;	/* list of chunks holding catcache msgs */
	InvalidationChunk *rclist;	/* list of chunks holding relcache/smgr msgs */
} InvalidationListHeader;

/*----------------
 * Invalidation info is divided into two lists:
 *	1) events so far in current command, not yet reflected to caches.
 *	2) events in previous commands of current transaction; these have
 *	   been reflected to local caches, and must be either broadcast to
 *	   other backends or rolled back from local cache when we commit
 *	   or abort the transaction.
 * Actually, we need two such lists for each level of nested transaction,
 * so that we can discard events from an aborted subtransaction.  When
 * a subtransaction commits, we append its lists to the parent's lists.
 *
 * The relcache-file-invalidated flag can just be a simple boolean,
 * since we only act on it at transaction commit; we don't care which
 * command of the transaction set it.
 *----------------
 */

typedef struct TransInvalidationInfo
{
	/* Back link to parent transaction's info */
	struct TransInvalidationInfo *parent;

	/* Subtransaction nesting depth */
	int			my_level;

	/* head of current-command event list */
	InvalidationListHeader CurrentCmdInvalidMsgs;

	/* head of previous-commands event list */
	InvalidationListHeader PriorCmdInvalidMsgs;

	/* init file must be invalidated? */
	bool		RelcacheInitFileInval;
} TransInvalidationInfo;

static TransInvalidationInfo *transInvalInfo = NULL;

/*
 * Dynamically-registered callback functions.  Current implementation
 * assumes there won't be very many of these at once; could improve if needed.
 */

#define MAX_SYSCACHE_CALLBACKS 20
#define MAX_RELCACHE_CALLBACKS 5

static struct SYSCACHECALLBACK
{
	int16		id;				/* cache number */
	SyscacheCallbackFunction function;
	Datum		arg;
}	syscache_callback_list[MAX_SYSCACHE_CALLBACKS];

static int	syscache_callback_count = 0;

static struct RELCACHECALLBACK
{
	RelcacheCallbackFunction function;
	Datum		arg;
}	relcache_callback_list[MAX_RELCACHE_CALLBACKS];

static int	relcache_callback_count = 0;

/* info values for 2PC callback */
#define TWOPHASE_INFO_MSG			0	/* SharedInvalidationMessage */
#define TWOPHASE_INFO_FILE_BEFORE	1	/* relcache file inval */
#define TWOPHASE_INFO_FILE_AFTER	2	/* relcache file inval */

static void PersistInvalidationMessage(SharedInvalidationMessage *msg);


/* ----------------------------------------------------------------
 *				Invalidation list support functions
 *
 * These three routines encapsulate processing of the "chunked"
 * representation of what is logically just a list of messages.
 * ----------------------------------------------------------------
 */

/*
 * AddInvalidationMessage
 *		Add an invalidation message to a list (of chunks).
 *
 * Note that we do not pay any great attention to maintaining the original
 * ordering of the messages.
 */
static void
AddInvalidationMessage(InvalidationChunk **listHdr,
					   SharedInvalidationMessage *msg)
{
	InvalidationChunk *chunk = *listHdr;

	if (chunk == NULL)
	{
		/* First time through; create initial chunk */
#define FIRSTCHUNKSIZE 32
		chunk = (InvalidationChunk *)
			MemoryContextAlloc(CurTransactionContext,
							   sizeof(InvalidationChunk) +
					(FIRSTCHUNKSIZE - 1) *sizeof(SharedInvalidationMessage));
		chunk->nitems = 0;
		chunk->maxitems = FIRSTCHUNKSIZE;
		chunk->next = *listHdr;
		*listHdr = chunk;
	}
	else if (chunk->nitems >= chunk->maxitems)
	{
		/* Need another chunk; double size of last chunk */
		int			chunksize = 2 * chunk->maxitems;

		chunk = (InvalidationChunk *)
			MemoryContextAlloc(CurTransactionContext,
							   sizeof(InvalidationChunk) +
						 (chunksize - 1) *sizeof(SharedInvalidationMessage));
		chunk->nitems = 0;
		chunk->maxitems = chunksize;
		chunk->next = *listHdr;
		*listHdr = chunk;
	}
	/* Okay, add message to current chunk */
	chunk->msgs[chunk->nitems] = *msg;
	chunk->nitems++;
}

/*
 * Append one list of invalidation message chunks to another, resetting
 * the source chunk-list pointer to NULL.
 */
static void
AppendInvalidationMessageList(InvalidationChunk **destHdr,
							  InvalidationChunk **srcHdr)
{
	InvalidationChunk *chunk = *srcHdr;

	if (chunk == NULL)
		return;					/* nothing to do */

	while (chunk->next != NULL)
		chunk = chunk->next;

	chunk->next = *destHdr;

	*destHdr = *srcHdr;

	*srcHdr = NULL;
}

/*
 * Process a list of invalidation messages.
 *
 * This is a macro that executes the given code fragment for each message in
 * a message chunk list.  The fragment should refer to the message as *msg.
 */
#define ProcessMessageList(listHdr, codeFragment) \
	do { \
		InvalidationChunk *_chunk; \
		for (_chunk = (listHdr); _chunk != NULL; _chunk = _chunk->next) \
		{ \
			int		_cindex; \
			for (_cindex = 0; _cindex < _chunk->nitems; _cindex++) \
			{ \
				SharedInvalidationMessage *msg = &_chunk->msgs[_cindex]; \
				codeFragment; \
			} \
		} \
	} while (0)

/*
 * Process a list of invalidation messages group-wise.
 *
 * As above, but the code fragment can handle an array of messages.
 * The fragment should refer to the messages as msgs[], with n entries.
 */
#define ProcessMessageListMulti(listHdr, codeFragment) \
	do { \
		InvalidationChunk *_chunk; \
		for (_chunk = (listHdr); _chunk != NULL; _chunk = _chunk->next) \
		{ \
			SharedInvalidationMessage *msgs = _chunk->msgs; \
			int		n = _chunk->nitems; \
			codeFragment; \
		} \
	} while (0)


/* ----------------------------------------------------------------
 *				Invalidation set support functions
 *
 * These routines understand about the division of a logical invalidation
 * list into separate physical lists for catcache and relcache/smgr entries.
 * ----------------------------------------------------------------
 */

/*
 * Add a catcache inval entry
 */
static void
AddCatcacheInvalidationMessage(InvalidationListHeader *hdr,
							   int id, uint32 hashValue,
							   ItemPointer tuplePtr, Oid dbId)
{
	SharedInvalidationMessage msg;

	msg.cc.id = (int16) id;
	msg.cc.tuplePtr = *tuplePtr;
	msg.cc.dbId = dbId;
	msg.cc.hashValue = hashValue;
	AddInvalidationMessage(&hdr->cclist, &msg);
}

/*
 * Add a relcache inval entry
 */
static void
AddRelcacheInvalidationMessage(InvalidationListHeader *hdr,
							   Oid dbId, Oid relId)
{
	SharedInvalidationMessage msg;

	/* Don't add a duplicate item */
	/* We assume dbId need not be checked because it will never change */
	ProcessMessageList(hdr->rclist,
					   if (msg->rc.id == SHAREDINVALRELCACHE_ID &&
						   msg->rc.relId == relId)
					   return);

	/* OK, add the item */
	msg.rc.id = SHAREDINVALRELCACHE_ID;
	msg.rc.dbId = dbId;
	msg.rc.relId = relId;
	AddInvalidationMessage(&hdr->rclist, &msg);
}

/*
 * Add an smgr inval entry
 */
static void
AddSmgrInvalidationMessage(InvalidationListHeader *hdr,
						   RelFileNode rnode)
{
	SharedInvalidationMessage msg;

	/* Don't add a duplicate item */
	ProcessMessageList(hdr->rclist,
					   if (msg->sm.id == SHAREDINVALSMGR_ID &&
						   RelFileNodeEquals(msg->sm.rnode, rnode))
					   return);

	/* OK, add the item */
	msg.sm.id = SHAREDINVALSMGR_ID;
	msg.sm.rnode = rnode;
	AddInvalidationMessage(&hdr->rclist, &msg);
}

/*
 * Append one list of invalidation messages to another, resetting
 * the source list to empty.
 */
static void
AppendInvalidationMessages(InvalidationListHeader *dest,
						   InvalidationListHeader *src)
{
	AppendInvalidationMessageList(&dest->cclist, &src->cclist);
	AppendInvalidationMessageList(&dest->rclist, &src->rclist);
}

/*
 * Execute the given function for all the messages in an invalidation list.
 * The list is not altered.
 *
 * catcache entries are processed first, for reasons mentioned above.
 */
static void
ProcessInvalidationMessages(InvalidationListHeader *hdr,
							void (*func) (SharedInvalidationMessage *msg))
{
	ProcessMessageList(hdr->cclist, func(msg));
	ProcessMessageList(hdr->rclist, func(msg));
}

/*
 * As above, but the function is able to process an array of messages
 * rather than just one at a time.
 */
static void
ProcessInvalidationMessagesMulti(InvalidationListHeader *hdr,
				 void (*func) (const SharedInvalidationMessage *msgs, int n))
{
	ProcessMessageListMulti(hdr->cclist, func(msgs, n));
	ProcessMessageListMulti(hdr->rclist, func(msgs, n));
}

/* ----------------------------------------------------------------
 *					  private support functions
 * ----------------------------------------------------------------
 */

/*
 * RegisterCatcacheInvalidation
 *
 * Register an invalidation event for a catcache tuple entry.
 */
static void
RegisterCatcacheInvalidation(int cacheId,
							 uint32 hashValue,
							 ItemPointer tuplePtr,
							 Oid dbId)
{
	AddCatcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
								   cacheId, hashValue, tuplePtr, dbId);
}

/*
 * RegisterRelcacheInvalidation
 *
 * As above, but register a relcache invalidation event.
 */
static void
RegisterRelcacheInvalidation(Oid dbId, Oid relId)
{
	AddRelcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
								   dbId, relId);

	/*
	 * Most of the time, relcache invalidation is associated with system
	 * catalog updates, but there are a few cases where it isn't.  Quick hack
	 * to ensure that the next CommandCounterIncrement() will think that we
	 * need to do CommandEndInvalidationMessages().
	 */
	(void) GetCurrentCommandId(true);

	/*
	 * If the relation being invalidated is one of those cached in the
	 * relcache init file, mark that we need to zap that file at commit.
	 */
	if (RelationIdIsInInitFile(relId))
		transInvalInfo->RelcacheInitFileInval = true;
}

/*
 * RegisterSmgrInvalidation
 *
 * As above, but register an smgr invalidation event.
 */
static void
RegisterSmgrInvalidation(RelFileNode rnode)
{
	AddSmgrInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
							   rnode);

	/*
	 * As above, just in case there is not an associated catalog change.
	 */
	(void) GetCurrentCommandId(true);
}

/*
 * LocalExecuteInvalidationMessage
 *
 * Process a single invalidation message (which could be of any type).
 * Only the local caches are flushed; this does not transmit the message
 * to other backends.
 */
static void
LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
{
	int			i;

	if (msg->id >= 0)
	{
		if (msg->cc.dbId == MyDatabaseId || msg->cc.dbId == 0)
		{
			CatalogCacheIdInvalidate(msg->cc.id,
									 msg->cc.hashValue,
									 &msg->cc.tuplePtr);

			for (i = 0; i < syscache_callback_count; i++)
			{
				struct SYSCACHECALLBACK *ccitem = syscache_callback_list + i;

				if (ccitem->id == msg->cc.id)
					(*ccitem->function) (ccitem->arg,
										 msg->cc.id, &msg->cc.tuplePtr);
			}
		}
	}
	else if (msg->id == SHAREDINVALRELCACHE_ID)
	{
		if (msg->rc.dbId == MyDatabaseId || msg->rc.dbId == InvalidOid)
		{
			RelationCacheInvalidateEntry(msg->rc.relId);

			for (i = 0; i < relcache_callback_count; i++)
			{
				struct RELCACHECALLBACK *ccitem = relcache_callback_list + i;

				(*ccitem->function) (ccitem->arg, msg->rc.relId);
			}
		}
	}
	else if (msg->id == SHAREDINVALSMGR_ID)
	{
		/*
		 * We could have smgr entries for relations of other databases, so no
		 * short-circuit test is possible here.
		 */
		smgrclosenode(msg->sm.rnode);
	}
	else
		elog(FATAL, "unrecognized SI message id: %d", msg->id);
}

/*
 *		InvalidateSystemCaches
 *
 *		This blows away all tuples in the system catalog caches and
 *		all the cached relation descriptors and smgr cache entries.
 *		Relation descriptors that have positive refcounts are then rebuilt.
 *
 *		We call this when we see a shared-inval-queue overflow signal,
 *		since that tells us we've lost some shared-inval messages and hence
 *		don't know what needs to be invalidated.
 */
static void
InvalidateSystemCaches(void)
{
	int			i;

	ResetCatalogCaches();
	RelationCacheInvalidate();	/* gets smgr cache too */

	for (i = 0; i < syscache_callback_count; i++)
	{
		struct SYSCACHECALLBACK *ccitem = syscache_callback_list + i;

		(*ccitem->function) (ccitem->arg, ccitem->id, NULL);
	}

	for (i = 0; i < relcache_callback_count; i++)
	{
		struct RELCACHECALLBACK *ccitem = relcache_callback_list + i;

		(*ccitem->function) (ccitem->arg, InvalidOid);
	}
}

/*
 * PrepareForTupleInvalidation
 *		Detect whether invalidation of this tuple implies invalidation
 *		of catalog/relation cache entries; if so, register inval events.
 */
static void
PrepareForTupleInvalidation(Relation relation, HeapTuple tuple)
{
	Oid			tupleRelId;
	Oid			databaseId;
	Oid			relationId;

	/* Do nothing during bootstrap */
	if (IsBootstrapProcessingMode())
		return;

	/*
	 * We only need to worry about invalidation for tuples that are in system
	 * relations; user-relation tuples are never in catcaches and can't affect
	 * the relcache either.
	 */
	if (!IsSystemRelation(relation))
		return;

	/*
	 * TOAST tuples can likewise be ignored here. Note that TOAST tables are
	 * considered system relations so they are not filtered by the above test.
	 */
	if (IsToastRelation(relation))
		return;

	/*
	 * First let the catcache do its thing
	 */
	PrepareToInvalidateCacheTuple(relation, tuple,
								  RegisterCatcacheInvalidation);

	/*
	 * Now, is this tuple one of the primary definers of a relcache entry?
	 */
	tupleRelId = RelationGetRelid(relation);

	if (tupleRelId == RelationRelationId)
	{
		Form_pg_class classtup = (Form_pg_class) GETSTRUCT(tuple);
		RelFileNode rnode;

		relationId = HeapTupleGetOid(tuple);
		if (classtup->relisshared)
			databaseId = InvalidOid;
		else
			databaseId = MyDatabaseId;

		/*
		 * We need to send out an smgr inval as well as a relcache inval. This
		 * is needed because other backends might possibly possess smgr cache
		 * but not relcache entries for the target relation.
		 *
		 * Note: during a pg_class row update that assigns a new relfilenode
		 * or reltablespace value, we will be called on both the old and new
		 * tuples, and thus will broadcast invalidation messages showing both
		 * the old and new RelFileNode values.  This ensures that other
		 * backends will close smgr references to the old file.
		 *
		 * XXX possible future cleanup: it might be better to trigger smgr
		 * flushes explicitly, rather than indirectly from pg_class updates.
		 */
		if (classtup->reltablespace)
			rnode.spcNode = classtup->reltablespace;
		else
			rnode.spcNode = MyDatabaseTableSpace;
		rnode.dbNode = databaseId;
		rnode.relNode = classtup->relfilenode;
		RegisterSmgrInvalidation(rnode);
	}
	else if (tupleRelId == AttributeRelationId)
	{
		Form_pg_attribute atttup = (Form_pg_attribute) GETSTRUCT(tuple);

		relationId = atttup->attrelid;

		/*
		 * KLUGE ALERT: we always send the relcache event with MyDatabaseId,
		 * even if the rel in question is shared (which we can't easily tell).
		 * This essentially means that only backends in this same database
		 * will react to the relcache flush request.  This is in fact
		 * appropriate, since only those backends could see our pg_attribute
		 * change anyway.  It looks a bit ugly though.  (In practice, shared
		 * relations can't have schema changes after bootstrap, so we should
		 * never come here for a shared rel anyway.)
		 */
		databaseId = MyDatabaseId;
	}
	else if (tupleRelId == IndexRelationId)
	{
		Form_pg_index indextup = (Form_pg_index) GETSTRUCT(tuple);

		/*
		 * When a pg_index row is updated, we should send out a relcache inval
		 * for the index relation.  As above, we don't know the shared status
		 * of the index, but in practice it doesn't matter since indexes of
		 * shared catalogs can't have such updates.
		 */
		relationId = indextup->indexrelid;
		databaseId = MyDatabaseId;
	}
	else
		return;

	/*
	 * Yes.  We need to register a relcache invalidation event.
	 */
	RegisterRelcacheInvalidation(databaseId, relationId);
}


/* ----------------------------------------------------------------
 *					  public functions
 * ----------------------------------------------------------------
 */

/*
 * AcceptInvalidationMessages
 *		Read and process invalidation messages from the shared invalidation
 *		message queue.
 *
 * Note:
 *		This should be called as the first step in processing a transaction.
 */
void
AcceptInvalidationMessages(void)
{
	ReceiveSharedInvalidMessages(LocalExecuteInvalidationMessage,
								 InvalidateSystemCaches);

	/*
	 * Test code to force cache flushes anytime a flush could happen.
	 *
	 * If used with CLOBBER_FREED_MEMORY, CLOBBER_CACHE_ALWAYS provides a
	 * fairly thorough test that the system contains no cache-flush hazards.
	 * However, it also makes the system unbelievably slow --- the regression
	 * tests take about 100 times longer than normal.
	 *
	 * If you're a glutton for punishment, try CLOBBER_CACHE_RECURSIVELY. This
	 * slows things by at least a factor of 10000, so I wouldn't suggest
	 * trying to run the entire regression tests that way.  It's useful to try
	 * a few simple tests, to make sure that cache reload isn't subject to
	 * internal cache-flush hazards, but after you've done a few thousand
	 * recursive reloads it's unlikely you'll learn more.
	 */
#if defined(CLOBBER_CACHE_ALWAYS)
	{
		static bool in_recursion = false;

		if (!in_recursion)
		{
			in_recursion = true;
			InvalidateSystemCaches();
			in_recursion = false;
		}
	}
#elif defined(CLOBBER_CACHE_RECURSIVELY)
	InvalidateSystemCaches();
#endif
}

/*
 * AtStart_Inval
 *		Initialize inval lists at start of a main transaction.
 */
void
AtStart_Inval(void)
{
	Assert(transInvalInfo == NULL);
	transInvalInfo = (TransInvalidationInfo *)
		MemoryContextAllocZero(TopTransactionContext,
							   sizeof(TransInvalidationInfo));
	transInvalInfo->my_level = GetCurrentTransactionNestLevel();
}

/*
 * AtPrepare_Inval
 *		Save the inval lists state at 2PC transaction prepare.
 *
 * In this phase we just generate 2PC records for all the pending invalidation
 * work.
 */
void
AtPrepare_Inval(void)
{
	/* Must be at top of stack */
	Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL);

	/*
	 * Relcache init file invalidation requires processing both before and
	 * after we send the SI messages.
	 */
	if (transInvalInfo->RelcacheInitFileInval)
		RegisterTwoPhaseRecord(TWOPHASE_RM_INVAL_ID, TWOPHASE_INFO_FILE_BEFORE,
							   NULL, 0);

	AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
							   &transInvalInfo->CurrentCmdInvalidMsgs);

	ProcessInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
								PersistInvalidationMessage);

	if (transInvalInfo->RelcacheInitFileInval)
		RegisterTwoPhaseRecord(TWOPHASE_RM_INVAL_ID, TWOPHASE_INFO_FILE_AFTER,
							   NULL, 0);
}

/*
 * PostPrepare_Inval
 *		Clean up after successful PREPARE.
 *
 * Here, we want to act as though the transaction aborted, so that we will
 * undo any syscache changes it made, thereby bringing us into sync with the
 * outside world, which doesn't believe the transaction committed yet.
 *
 * If the prepared transaction is later aborted, there is nothing more to
 * do; if it commits, we will receive the consequent inval messages just
 * like everyone else.
 */
void
PostPrepare_Inval(void)
{
	AtEOXact_Inval(false);
}

/*
 * AtSubStart_Inval
 *		Initialize inval lists at start of a subtransaction.
 */
void
AtSubStart_Inval(void)
{
	TransInvalidationInfo *myInfo;

	Assert(transInvalInfo != NULL);
	myInfo = (TransInvalidationInfo *)
		MemoryContextAllocZero(TopTransactionContext,
							   sizeof(TransInvalidationInfo));
	myInfo->parent = transInvalInfo;
	myInfo->my_level = GetCurrentTransactionNestLevel();
	transInvalInfo = myInfo;
}

/*
 * PersistInvalidationMessage
 *		Write an invalidation message to the 2PC state file.
 */
static void
PersistInvalidationMessage(SharedInvalidationMessage *msg)
{
	RegisterTwoPhaseRecord(TWOPHASE_RM_INVAL_ID, TWOPHASE_INFO_MSG,
						   msg, sizeof(SharedInvalidationMessage));
}

/*
 * inval_twophase_postcommit
 *		Process an invalidation message from the 2PC state file.
 */
void
inval_twophase_postcommit(TransactionId xid, uint16 info,
						  void *recdata, uint32 len)
{
	SharedInvalidationMessage *msg;

	switch (info)
	{
		case TWOPHASE_INFO_MSG:
			msg = (SharedInvalidationMessage *) recdata;
			Assert(len == sizeof(SharedInvalidationMessage));
			SendSharedInvalidMessages(msg, 1);
			break;
		case TWOPHASE_INFO_FILE_BEFORE:
			RelationCacheInitFilePreInvalidate();
			break;
		case TWOPHASE_INFO_FILE_AFTER:
			RelationCacheInitFilePostInvalidate();
			break;
		default:
			Assert(false);
			break;
	}
}


/*
 * AtEOXact_Inval
 *		Process queued-up invalidation messages at end of main transaction.
 *
 * If isCommit, we must send out the messages in our PriorCmdInvalidMsgs list
 * to the shared invalidation message queue.  Note that these will be read
 * not only by other backends, but also by our own backend at the next
 * transaction start (via AcceptInvalidationMessages).  This means that
 * we can skip immediate local processing of anything that's still in
 * CurrentCmdInvalidMsgs, and just send that list out too.
 *
 * If not isCommit, we are aborting, and must locally process the messages
 * in PriorCmdInvalidMsgs.  No messages need be sent to other backends,
 * since they'll not have seen our changed tuples anyway.  We can forget
 * about CurrentCmdInvalidMsgs too, since those changes haven't touched
 * the caches yet.
 *
 * In any case, reset the various lists to empty.  We need not physically
 * free memory here, since TopTransactionContext is about to be emptied
 * anyway.
 *
 * Note:
 *		This should be called as the last step in processing a transaction.
 */
void
AtEOXact_Inval(bool isCommit)
{
	if (isCommit)
	{
		/* Must be at top of stack */
		Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL);

		/*
		 * Relcache init file invalidation requires processing both before and
		 * after we send the SI messages.  However, we need not do anything
		 * unless we committed.
		 */
		if (transInvalInfo->RelcacheInitFileInval)
			RelationCacheInitFilePreInvalidate();

		AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
								   &transInvalInfo->CurrentCmdInvalidMsgs);

		ProcessInvalidationMessagesMulti(&transInvalInfo->PriorCmdInvalidMsgs,
										 SendSharedInvalidMessages);

		if (transInvalInfo->RelcacheInitFileInval)
			RelationCacheInitFilePostInvalidate();
	}
	else if (transInvalInfo != NULL)
	{
		/* Must be at top of stack */
		Assert(transInvalInfo->parent == NULL);

		ProcessInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
									LocalExecuteInvalidationMessage);
	}

	/* Need not free anything explicitly */
	transInvalInfo = NULL;
}

/*
 * AtEOSubXact_Inval
 *		Process queued-up invalidation messages at end of subtransaction.
 *
 * If isCommit, process CurrentCmdInvalidMsgs if any (there probably aren't),
 * and then attach both CurrentCmdInvalidMsgs and PriorCmdInvalidMsgs to the
 * parent's PriorCmdInvalidMsgs list.
 *
 * If not isCommit, we are aborting, and must locally process the messages
 * in PriorCmdInvalidMsgs.  No messages need be sent to other backends.
 * We can forget about CurrentCmdInvalidMsgs too, since those changes haven't
 * touched the caches yet.
 *
 * In any case, pop the transaction stack.  We need not physically free memory
 * here, since CurTransactionContext is about to be emptied anyway
 * (if aborting).  Beware of the possibility of aborting the same nesting
 * level twice, though.
 */
void
AtEOSubXact_Inval(bool isCommit)
{
	int			my_level = GetCurrentTransactionNestLevel();
	TransInvalidationInfo *myInfo = transInvalInfo;

	if (isCommit)
	{
		/* Must be at non-top of stack */
		Assert(myInfo != NULL && myInfo->parent != NULL);
		Assert(myInfo->my_level == my_level);

		/* If CurrentCmdInvalidMsgs still has anything, fix it */
		CommandEndInvalidationMessages();

		/* Pass up my inval messages to parent */
		AppendInvalidationMessages(&myInfo->parent->PriorCmdInvalidMsgs,
								   &myInfo->PriorCmdInvalidMsgs);

		/* Pending relcache inval becomes parent's problem too */
		if (myInfo->RelcacheInitFileInval)
			myInfo->parent->RelcacheInitFileInval = true;

		/* Pop the transaction state stack */
		transInvalInfo = myInfo->parent;

		/* Need not free anything else explicitly */
		pfree(myInfo);
	}
	else if (myInfo != NULL && myInfo->my_level == my_level)
	{
		/* Must be at non-top of stack */
		Assert(myInfo->parent != NULL);

		ProcessInvalidationMessages(&myInfo->PriorCmdInvalidMsgs,
									LocalExecuteInvalidationMessage);

		/* Pop the transaction state stack */
		transInvalInfo = myInfo->parent;

		/* Need not free anything else explicitly */
		pfree(myInfo);
	}
}

/*
 * CommandEndInvalidationMessages
 *		Process queued-up invalidation messages at end of one command
 *		in a transaction.
 *
 * Here, we send no messages to the shared queue, since we don't know yet if
 * we will commit.  We do need to locally process the CurrentCmdInvalidMsgs
 * list, so as to flush our caches of any entries we have outdated in the
 * current command.  We then move the current-cmd list over to become part
 * of the prior-cmds list.
 *
 * Note:
 *		This should be called during CommandCounterIncrement(),
 *		after we have advanced the command ID.
 */
void
CommandEndInvalidationMessages(void)
{
	/*
	 * You might think this shouldn't be called outside any transaction, but
	 * bootstrap does it, and also ABORT issued when not in a transaction. So
	 * just quietly return if no state to work on.
	 */
	if (transInvalInfo == NULL)
		return;

	ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs,
								LocalExecuteInvalidationMessage);
	AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
							   &transInvalInfo->CurrentCmdInvalidMsgs);
}


/*
 * BeginNonTransactionalInvalidation
 *		Prepare for invalidation messages for nontransactional updates.
 *
 * A nontransactional invalidation is one that must be sent whether or not
 * the current transaction eventually commits.  We arrange for all invals
 * queued between this call and EndNonTransactionalInvalidation() to be sent
 * immediately when the latter is called.
 *
 * Currently, this is only used by heap_page_prune(), and only when it is
 * invoked during VACUUM FULL's first pass over a table.  We expect therefore
 * that we are not inside a subtransaction and there are no already-pending
 * invalidations.  This could be relaxed by setting up a new nesting level of
 * invalidation data, but for now there's no need.  Note that heap_page_prune
 * knows that this function does not change any state, and therefore there's
 * no need to worry about cleaning up if there's an elog(ERROR) before
 * reaching EndNonTransactionalInvalidation (the invals will just be thrown
 * away if that happens).
 */
void
BeginNonTransactionalInvalidation(void)
{
	/* Must be at top of stack */
	Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL);

	/* Must not have any previously-queued activity */
	Assert(transInvalInfo->PriorCmdInvalidMsgs.cclist == NULL);
	Assert(transInvalInfo->PriorCmdInvalidMsgs.rclist == NULL);
	Assert(transInvalInfo->CurrentCmdInvalidMsgs.cclist == NULL);
	Assert(transInvalInfo->CurrentCmdInvalidMsgs.rclist == NULL);
	Assert(transInvalInfo->RelcacheInitFileInval == false);
}

/*
 * EndNonTransactionalInvalidation
 *		Process queued-up invalidation messages for nontransactional updates.
 *
 * We expect to find messages in CurrentCmdInvalidMsgs only (else there
 * was a CommandCounterIncrement within the "nontransactional" update).
 * We must process them locally and send them out to the shared invalidation
 * message queue.
 *
 * We must also reset the lists to empty and explicitly free memory (we can't
 * rely on end-of-transaction cleanup for that).
 */
void
EndNonTransactionalInvalidation(void)
{
	InvalidationChunk *chunk;
	InvalidationChunk *next;

	/* Must be at top of stack */
	Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL);

	/* Must not have any prior-command messages */
	Assert(transInvalInfo->PriorCmdInvalidMsgs.cclist == NULL);
	Assert(transInvalInfo->PriorCmdInvalidMsgs.rclist == NULL);

	/*
	 * At present, this function is only used for CTID-changing updates; since
	 * the relcache init file doesn't store any tuple CTIDs, we don't have to
	 * invalidate it.  That might not be true forever though, in which case
	 * we'd need code similar to AtEOXact_Inval.
	 */

	/* Send out the invals */
	ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs,
								LocalExecuteInvalidationMessage);
	ProcessInvalidationMessagesMulti(&transInvalInfo->CurrentCmdInvalidMsgs,
									 SendSharedInvalidMessages);

	/* Clean up and release memory */
	for (chunk = transInvalInfo->CurrentCmdInvalidMsgs.cclist;
		 chunk != NULL;
		 chunk = next)
	{
		next = chunk->next;
		pfree(chunk);
	}
	for (chunk = transInvalInfo->CurrentCmdInvalidMsgs.rclist;
		 chunk != NULL;
		 chunk = next)
	{
		next = chunk->next;
		pfree(chunk);
	}
	transInvalInfo->CurrentCmdInvalidMsgs.cclist = NULL;
	transInvalInfo->CurrentCmdInvalidMsgs.rclist = NULL;
	transInvalInfo->RelcacheInitFileInval = false;
}


/*
 * CacheInvalidateHeapTuple
 *		Register the given tuple for invalidation at end of command
 *		(ie, current command is creating or outdating this tuple).
 */
void
CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple)
{
	PrepareForTupleInvalidation(relation, tuple);
}

/*
 * CacheInvalidateRelcache
 *		Register invalidation of the specified relation's relcache entry
 *		at end of command.
 *
 * This is used in places that need to force relcache rebuild but aren't
 * changing any of the tuples recognized as contributors to the relcache
 * entry by PrepareForTupleInvalidation.  (An example is dropping an index.)
 * We assume in particular that relfilenode/reltablespace aren't changing
 * (so the rd_node value is still good).
 *
 * XXX most callers of this probably don't need to force an smgr flush.
 */
void
CacheInvalidateRelcache(Relation relation)
{
	Oid			databaseId;
	Oid			relationId;

	relationId = RelationGetRelid(relation);
	if (relation->rd_rel->relisshared)
		databaseId = InvalidOid;
	else
		databaseId = MyDatabaseId;

	RegisterRelcacheInvalidation(databaseId, relationId);
	RegisterSmgrInvalidation(relation->rd_node);
}

/*
 * CacheInvalidateRelcacheByTuple
 *		As above, but relation is identified by passing its pg_class tuple.
 */
void
CacheInvalidateRelcacheByTuple(HeapTuple classTuple)
{
	Form_pg_class classtup = (Form_pg_class) GETSTRUCT(classTuple);
	Oid			databaseId;
	Oid			relationId;
	RelFileNode rnode;

	relationId = HeapTupleGetOid(classTuple);
	if (classtup->relisshared)
		databaseId = InvalidOid;
	else
		databaseId = MyDatabaseId;
	if (classtup->reltablespace)
		rnode.spcNode = classtup->reltablespace;
	else
		rnode.spcNode = MyDatabaseTableSpace;
	rnode.dbNode = databaseId;
	rnode.relNode = classtup->relfilenode;

	RegisterRelcacheInvalidation(databaseId, relationId);
	RegisterSmgrInvalidation(rnode);
}

/*
 * CacheInvalidateRelcacheByRelid
 *		As above, but relation is identified by passing its OID.
 *		This is the least efficient of the three options; use one of
 *		the above routines if you have a Relation or pg_class tuple.
 */
void
CacheInvalidateRelcacheByRelid(Oid relid)
{
	HeapTuple	tup;

	tup = SearchSysCache(RELOID,
						 ObjectIdGetDatum(relid),
						 0, 0, 0);
	if (!HeapTupleIsValid(tup))
		elog(ERROR, "cache lookup failed for relation %u", relid);
	CacheInvalidateRelcacheByTuple(tup);
	ReleaseSysCache(tup);
}

/*
 * CacheRegisterSyscacheCallback
 *		Register the specified function to be called for all future
 *		invalidation events in the specified cache.  The cache ID and the
 *		TID of the tuple being invalidated will be passed to the function.
 *
 * NOTE: NULL will be passed for the TID if a cache reset request is received.
 * In this case the called routines should flush all cached state.
 */
void
CacheRegisterSyscacheCallback(int cacheid,
							  SyscacheCallbackFunction func,
							  Datum arg)
{
	if (syscache_callback_count >= MAX_SYSCACHE_CALLBACKS)
		elog(FATAL, "out of syscache_callback_list slots");

	syscache_callback_list[syscache_callback_count].id = cacheid;
	syscache_callback_list[syscache_callback_count].function = func;
	syscache_callback_list[syscache_callback_count].arg = arg;

	++syscache_callback_count;
}

/*
 * CacheRegisterRelcacheCallback
 *		Register the specified function to be called for all future
 *		relcache invalidation events.  The OID of the relation being
 *		invalidated will be passed to the function.
 *
 * NOTE: InvalidOid will be passed if a cache reset request is received.
 * In this case the called routines should flush all cached state.
 */
void
CacheRegisterRelcacheCallback(RelcacheCallbackFunction func,
							  Datum arg)
{
	if (relcache_callback_count >= MAX_RELCACHE_CALLBACKS)
		elog(FATAL, "out of relcache_callback_list slots");

	relcache_callback_list[relcache_callback_count].function = func;
	relcache_callback_list[relcache_callback_count].arg = arg;

	++relcache_callback_count;
}