summaryrefslogtreecommitdiff
path: root/src/backend/storage/buffer/buf_init.c
blob: 601d4fc6e7299a0246a7c419fc19fbf49c276090 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
/*-------------------------------------------------------------------------
 *
 * buf_init.c
 *	  buffer manager initialization routines
 *
 * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *	  $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.57 2003/11/13 14:57:15 wieck Exp $
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include <sys/file.h>
#include <math.h>
#include <signal.h>

#include "catalog/catalog.h"
#include "executor/execdebug.h"
#include "miscadmin.h"
#include "storage/buf.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/shmem.h"
#include "storage/smgr.h"
#include "storage/lwlock.h"
#include "utils/builtins.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"


/*
 *	if BMTRACE is defined, we trace the last 200 buffer allocations and
 *	deallocations in a circular buffer in shared memory.
 */
#ifdef	BMTRACE
bmtrace    *TraceBuf;
long	   *CurTraceBuf;

#define BMT_LIMIT		200
#endif   /* BMTRACE */
int			ShowPinTrace = 0;

int			Data_Descriptors;

BufferDesc *BufferDescriptors;
Block	   *BufferBlockPointers;

long	   *PrivateRefCount;	/* also used in freelist.c */
bits8	   *BufferLocks;		/* flag bits showing locks I have set */


/*
 * Data Structures:
 *		buffers live in a freelist and a lookup data structure.
 *
 *
 * Buffer Lookup:
 *		Two important notes.  First, the buffer has to be
 *		available for lookup BEFORE an IO begins.  Otherwise
 *		a second process trying to read the buffer will
 *		allocate its own copy and the buffer pool will
 *		become inconsistent.
 *
 * Buffer Replacement:
 *		see freelist.c.  A buffer cannot be replaced while in
 *		use either by data manager or during IO.
 *
 * WriteBufferBack:
 *		currently, a buffer is only written back at the time
 *		it is selected for replacement.  It should
 *		be done sooner if possible to reduce latency of
 *		BufferAlloc().	Maybe there should be a daemon process.
 *
 * Synchronization/Locking:
 *
 * BufMgrLock lock -- must be acquired before manipulating the
 *		buffer queues (lookup/freelist).  Must be released
 *		before exit and before doing any IO.
 *
 * IO_IN_PROGRESS -- this is a flag in the buffer descriptor.
 *		It must be set when an IO is initiated and cleared at
 *		the end of	the IO.  It is there to make sure that one
 *		process doesn't start to use a buffer while another is
 *		faulting it in.  see IOWait/IOSignal.
 *
 * refcount --	A buffer is pinned during IO and immediately
 *		after a BufferAlloc().	A buffer is always either pinned
 *		or on the freelist but never both.	The buffer must be
 *		released, written, or flushed before the end of
 *		transaction.
 *
 * PrivateRefCount -- Each buffer also has a private refcount the keeps
 *		track of the number of times the buffer is pinned in the current
 *		processes.	This is used for two purposes, first, if we pin a
 *		a buffer more than once, we only need to change the shared refcount
 *		once, thus only lock the buffer pool once, second, when a transaction
 *		aborts, it should only unpin the buffers exactly the number of times it
 *		has pinned them, so that it will not blow away buffers of another
 *		backend.
 *
 */

long int	ReadBufferCount;
long int	ReadLocalBufferCount;
long int	BufferHitCount;
long int	LocalBufferHitCount;
long int	BufferFlushCount;
long int	LocalBufferFlushCount;


/*
 * Initialize shared buffer pool
 *
 * This is called once during shared-memory initialization (either in the
 * postmaster, or in a standalone backend).
 */
void
InitBufferPool(void)
{
	char	   *BufferBlocks;
	bool		foundBufs,
				foundDescs;
	int			i;

	Data_Descriptors = NBuffers;

	/*
	 * It's probably not really necessary to grab the lock --- if there's
	 * anyone else attached to the shmem at this point, we've got
	 * problems.
	 */
	LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);

#ifdef BMTRACE
	CurTraceBuf = (long *) ShmemInitStruct("Buffer trace",
							(BMT_LIMIT * sizeof(bmtrace)) + sizeof(long),
										   &foundDescs);
	if (!foundDescs)
		MemSet(CurTraceBuf, 0, (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long));

	TraceBuf = (bmtrace *) & (CurTraceBuf[1]);
#endif

	BufferDescriptors = (BufferDesc *)
		ShmemInitStruct("Buffer Descriptors",
					  Data_Descriptors * sizeof(BufferDesc), &foundDescs);

	BufferBlocks = (char *)
		ShmemInitStruct("Buffer Blocks",
						NBuffers * BLCKSZ, &foundBufs);

	if (foundDescs || foundBufs)
	{
		/* both should be present or neither */
		Assert(foundDescs && foundBufs);
	}
	else
	{
		BufferDesc *buf;
		char	   *block;

		buf = BufferDescriptors;
		block = BufferBlocks;

		/*
		 * link the buffers into a single linked list. This will become the
		 * LiFo list of unused buffers returned by StragegyGetBuffer().
		 */
		for (i = 0; i < Data_Descriptors; block += BLCKSZ, buf++, i++)
		{
			Assert(ShmemIsValid((unsigned long) block));

			buf->bufNext = i + 1;

			CLEAR_BUFFERTAG(&(buf->tag));
			buf->buf_id = i;

			buf->data = MAKE_OFFSET(block);
			buf->flags = (BM_DELETED | BM_FREE | BM_VALID);
			buf->refcount = 0;
			buf->io_in_progress_lock = LWLockAssign();
			buf->cntx_lock = LWLockAssign();
			buf->cntxDirty = false;
			buf->wait_backend_id = 0;
		}

		/* Correct last entry */
		BufferDescriptors[Data_Descriptors - 1].bufNext = -1;
	}

	/* Init other shared buffer-management stuff */
	StrategyInitialize(!foundDescs);

	LWLockRelease(BufMgrLock);
}

/*
 * Initialize access to shared buffer pool
 *
 * This is called during backend startup (whether standalone or under the
 * postmaster).  It sets up for this backend's access to the already-existing
 * buffer pool.
 *
 * NB: this is called before InitProcess(), so we do not have a PGPROC and
 * cannot do LWLockAcquire; hence we can't actually access the bufmgr's
 * shared memory yet.  We are only initializing local data here.
 */
void
InitBufferPoolAccess(void)
{
	int			i;

	/*
	 * Allocate and zero local arrays of per-buffer info.
	 */
	BufferBlockPointers = (Block *) calloc(NBuffers, sizeof(Block));
	PrivateRefCount = (long *) calloc(NBuffers, sizeof(long));
	BufferLocks = (bits8 *) calloc(NBuffers, sizeof(bits8));

	/*
	 * Convert shmem offsets into addresses as seen by this process. This
	 * is just to speed up the BufferGetBlock() macro.
	 */
	for (i = 0; i < NBuffers; i++)
		BufferBlockPointers[i] = (Block) MAKE_PTR(BufferDescriptors[i].data);
}

/* -----------------------------------------------------
 * BufferShmemSize
 *
 * compute the size of shared memory for the buffer pool including
 * data pages, buffer descriptors, hash tables, etc.
 * ----------------------------------------------------
 */
int
BufferShmemSize(void)
{
	int			size = 0;

	/* size of shmem index hash table */
	size += hash_estimate_size(SHMEM_INDEX_SIZE, sizeof(ShmemIndexEnt));

	/* size of buffer descriptors */
	size += MAXALIGN((NBuffers + 1) * sizeof(BufferDesc));

	/* size of data pages */
	size += NBuffers * MAXALIGN(BLCKSZ);

	/* size of buffer hash table */
	size += hash_estimate_size(NBuffers, sizeof(BufferLookupEnt));

#ifdef BMTRACE
	size += (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long);
#endif

	return size;
}