summaryrefslogtreecommitdiff
path: root/include/jemalloc/internal/ph.h
blob: 8ceadb90b1759ceb0ec66e5c38e10281235dd0ba (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
#ifndef JEMALLOC_INTERNAL_PH_H
#define JEMALLOC_INTERNAL_PH_H

/*
 * A Pairing Heap implementation.
 *
 * "The Pairing Heap: A New Form of Self-Adjusting Heap"
 * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf
 *
 * With auxiliary twopass list, described in a follow on paper.
 *
 * "Pairing Heaps: Experiments and Analysis"
 * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf
 *
 *******************************************************************************
 *
 * We include a non-obvious optimization:
 * - First, we introduce a new pop-and-link operation; pop the two most
 *   recently-inserted items off the aux-list, link them, and push the resulting
 *   heap.
 * - We maintain a count of the number of insertions since the last time we
 *   merged the aux-list (i.e. via first() or remove_first()).  After N inserts,
 *   we do ffs(N) pop-and-link operations.
 *
 * One way to think of this is that we're progressively building up a tree in
 * the aux-list, rather than a linked-list (think of the series of merges that
 * will be performed as the aux-count grows).
 *
 * There's a couple reasons we benefit from this:
 * - Ordinarily, after N insertions, the aux-list is of size N.  With our
 *   strategy, it's of size O(log(N)).  So we decrease the worst-case time of
 *   first() calls, and reduce the average cost of remove_min calls.  Since
 *   these almost always occur while holding a lock, we practically reduce the
 *   frequency of unusually long hold times.
 * - This moves the bulk of the work of merging the aux-list onto the threads
 *   that are inserting into the heap.  In some common scenarios, insertions
 *   happen in bulk, from a single thread (think tcache flushing; we potentially
 *   move many slabs from slabs_full to slabs_nonfull).  All the nodes in this
 *   case are in the inserting threads cache, and linking them is very cheap
 *   (cache misses dominate linking cost).  Without this optimization, linking
 *   happens on the next call to remove_first.  Since that remove_first call
 *   likely happens on a different thread (or at least, after the cache has
 *   gotten cold if done on the same thread), deferring linking trades cheap
 *   link operations now for expensive ones later.
 *
 * The ffs trick keeps amortized insert cost at constant time.  Similar
 * strategies based on periodically sorting the list after a batch of operations
 * perform worse than this in practice, even with various fancy tricks; they
 * all took amortized complexity of an insert from O(1) to O(log(n)).
 */

typedef int (*ph_cmp_t)(void *, void *);

/* Node structure. */
typedef struct phn_link_s phn_link_t;
struct phn_link_s {
	void *prev;
	void *next;
	void *lchild;
};

typedef struct ph_s ph_t;
struct ph_s {
	void *root;
	/*
	 * Inserts done since the last aux-list merge.  This is not necessarily
	 * the size of the aux-list, since it's possible that removals have
	 * happened since, and we don't track whether or not those removals are
	 * from the aux list.
	 */
	size_t auxcount;
};

JEMALLOC_ALWAYS_INLINE phn_link_t *
phn_link_get(void *phn, size_t offset) {
	return (phn_link_t *)(((uintptr_t)phn) + offset);
}

JEMALLOC_ALWAYS_INLINE void
phn_link_init(void *phn, size_t offset) {
	phn_link_get(phn, offset)->prev = NULL;
	phn_link_get(phn, offset)->next = NULL;
	phn_link_get(phn, offset)->lchild = NULL;
}

/* Internal utility helpers. */
JEMALLOC_ALWAYS_INLINE void *
phn_lchild_get(void *phn, size_t offset) {
	return phn_link_get(phn, offset)->lchild;
}

JEMALLOC_ALWAYS_INLINE void
phn_lchild_set(void *phn, void *lchild, size_t offset) {
	phn_link_get(phn, offset)->lchild = lchild;
}

JEMALLOC_ALWAYS_INLINE void *
phn_next_get(void *phn, size_t offset) {
	return phn_link_get(phn, offset)->next;
}

JEMALLOC_ALWAYS_INLINE void
phn_next_set(void *phn, void *next, size_t offset) {
	phn_link_get(phn, offset)->next = next;
}

JEMALLOC_ALWAYS_INLINE void *
phn_prev_get(void *phn, size_t offset) {
	return phn_link_get(phn, offset)->prev;
}

JEMALLOC_ALWAYS_INLINE void
phn_prev_set(void *phn, void *prev, size_t offset) {
	phn_link_get(phn, offset)->prev = prev;
}

JEMALLOC_ALWAYS_INLINE void
phn_merge_ordered(void *phn0, void *phn1, size_t offset,
    ph_cmp_t cmp) {
	void *phn0child;

	assert(phn0 != NULL);
	assert(phn1 != NULL);
	assert(cmp(phn0, phn1) <= 0);

	phn_prev_set(phn1, phn0, offset);
	phn0child = phn_lchild_get(phn0, offset);
	phn_next_set(phn1, phn0child, offset);
	if (phn0child != NULL) {
		phn_prev_set(phn0child, phn1, offset);
	}
	phn_lchild_set(phn0, phn1, offset);
}

JEMALLOC_ALWAYS_INLINE void *
phn_merge(void *phn0, void *phn1, size_t offset, ph_cmp_t cmp) {
	void *result;
	if (phn0 == NULL) {
		result = phn1;
	} else if (phn1 == NULL) {
		result = phn0;
	} else if (cmp(phn0, phn1) < 0) {
		phn_merge_ordered(phn0, phn1, offset, cmp);
		result = phn0;
	} else {
		phn_merge_ordered(phn1, phn0, offset, cmp);
		result = phn1;
	}
	return result;
}

JEMALLOC_ALWAYS_INLINE void *
phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) {
	void *head = NULL;
	void *tail = NULL;
	void *phn0 = phn;
	void *phn1 = phn_next_get(phn0, offset);

	/*
	 * Multipass merge, wherein the first two elements of a FIFO
	 * are repeatedly merged, and each result is appended to the
	 * singly linked FIFO, until the FIFO contains only a single
	 * element.  We start with a sibling list but no reference to
	 * its tail, so we do a single pass over the sibling list to
	 * populate the FIFO.
	 */
	if (phn1 != NULL) {
		void *phnrest = phn_next_get(phn1, offset);
		if (phnrest != NULL) {
			phn_prev_set(phnrest, NULL, offset);
		}
		phn_prev_set(phn0, NULL, offset);
		phn_next_set(phn0, NULL, offset);
		phn_prev_set(phn1, NULL, offset);
		phn_next_set(phn1, NULL, offset);
		phn0 = phn_merge(phn0, phn1, offset, cmp);
		head = tail = phn0;
		phn0 = phnrest;
		while (phn0 != NULL) {
			phn1 = phn_next_get(phn0, offset);
			if (phn1 != NULL) {
				phnrest = phn_next_get(phn1, offset);
				if (phnrest != NULL) {
					phn_prev_set(phnrest, NULL, offset);
				}
				phn_prev_set(phn0, NULL, offset);
				phn_next_set(phn0, NULL, offset);
				phn_prev_set(phn1, NULL, offset);
				phn_next_set(phn1, NULL, offset);
				phn0 = phn_merge(phn0, phn1, offset, cmp);
				phn_next_set(tail, phn0, offset);
				tail = phn0;
				phn0 = phnrest;
			} else {
				phn_next_set(tail, phn0, offset);
				tail = phn0;
				phn0 = NULL;
			}
		}
		phn0 = head;
		phn1 = phn_next_get(phn0, offset);
		if (phn1 != NULL) {
			while (true) {
				head = phn_next_get(phn1, offset);
				assert(phn_prev_get(phn0, offset) == NULL);
				phn_next_set(phn0, NULL, offset);
				assert(phn_prev_get(phn1, offset) == NULL);
				phn_next_set(phn1, NULL, offset);
				phn0 = phn_merge(phn0, phn1, offset, cmp);
				if (head == NULL) {
					break;
				}
				phn_next_set(tail, phn0, offset);
				tail = phn0;
				phn0 = head;
				phn1 = phn_next_get(phn0, offset);
			}
		}
	}
	return phn0;
}

JEMALLOC_ALWAYS_INLINE void
ph_merge_aux(ph_t *ph, size_t offset, ph_cmp_t cmp) {
	ph->auxcount = 0;
	void *phn = phn_next_get(ph->root, offset);
	if (phn != NULL) {
		phn_prev_set(ph->root, NULL, offset);
		phn_next_set(ph->root, NULL, offset);
		phn_prev_set(phn, NULL, offset);
		phn = phn_merge_siblings(phn, offset, cmp);
		assert(phn_next_get(phn, offset) == NULL);
		ph->root = phn_merge(ph->root, phn, offset, cmp);
	}
}

JEMALLOC_ALWAYS_INLINE void *
ph_merge_children(void *phn, size_t offset, ph_cmp_t cmp) {
	void *result;
	void *lchild = phn_lchild_get(phn, offset);
	if (lchild == NULL) {
		result = NULL;
	} else {
		result = phn_merge_siblings(lchild, offset, cmp);
	}
	return result;
}

JEMALLOC_ALWAYS_INLINE void
ph_new(ph_t *ph) {
	ph->root = NULL;
	ph->auxcount = 0;
}

JEMALLOC_ALWAYS_INLINE bool
ph_empty(ph_t *ph) {
	return ph->root == NULL;
}

JEMALLOC_ALWAYS_INLINE void *
ph_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
	if (ph->root == NULL) {
		return NULL;
	}
	ph_merge_aux(ph, offset, cmp);
	return ph->root;
}

JEMALLOC_ALWAYS_INLINE void *
ph_any(ph_t *ph, size_t offset) {
	if (ph->root == NULL) {
		return NULL;
	}
	void *aux = phn_next_get(ph->root, offset);
	if (aux != NULL) {
		return aux;
	}
	return ph->root;
}

/* Returns true if we should stop trying to merge. */
JEMALLOC_ALWAYS_INLINE bool
ph_try_aux_merge_pair(ph_t *ph, size_t offset, ph_cmp_t cmp) {
	assert(ph->root != NULL);
	void *phn0 = phn_next_get(ph->root, offset);
	if (phn0 == NULL) {
		return true;
	}
	void *phn1 = phn_next_get(phn0, offset);
	if (phn1 == NULL) {
		return true;
	}
	void *next_phn1 = phn_next_get(phn1, offset);
	phn_next_set(phn0, NULL, offset);
	phn_prev_set(phn0, NULL, offset);
	phn_next_set(phn1, NULL, offset);
	phn_prev_set(phn1, NULL, offset);
	phn0 = phn_merge(phn0, phn1, offset, cmp);
	phn_next_set(phn0, next_phn1, offset);
	if (next_phn1 != NULL) {
		phn_prev_set(next_phn1, phn0, offset);
	}
	phn_next_set(ph->root, phn0, offset);
	phn_prev_set(phn0, ph->root, offset);
	return next_phn1 == NULL;
}

JEMALLOC_ALWAYS_INLINE void
ph_insert(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
	phn_link_init(phn, offset);

	/*
	 * Treat the root as an aux list during insertion, and lazily merge
	 * during a_prefix##remove_first().  For elements that are inserted,
	 * then removed via a_prefix##remove() before the aux list is ever
	 * processed, this makes insert/remove constant-time, whereas eager
	 * merging would make insert O(log n).
	 */
	if (ph->root == NULL) {
		ph->root = phn;
		return;
	}

	/*
	 * As a special case, check to see if we can replace the root.
	 * This is practically common in some important cases, and lets
	 * us defer some insertions (hopefully, until the point where
	 * some of the items in the aux list have been removed, savings
	 * us from linking them at all).
	 */
	if (cmp(phn, ph->root) < 0) {
		phn_lchild_set(phn, ph->root, offset);
		phn_prev_set(ph->root, phn, offset);
		ph->root = phn;
		ph->auxcount = 0;
		return;
	}

	phn_next_set(phn, phn_next_get(ph->root, offset), offset);
	if (phn_next_get(ph->root, offset) != NULL) {
		phn_prev_set(phn_next_get(ph->root, offset), phn,
		    offset);
	}
	phn_prev_set(phn, ph->root, offset);
	phn_next_set(ph->root, phn, offset);

	ph->auxcount++;
	unsigned nmerges = ffs_zu(ph->auxcount);
	bool done = false;
	for (unsigned i = 0; i < nmerges && !done; i++) {
		done = ph_try_aux_merge_pair(ph, offset, cmp);
	}
}

JEMALLOC_ALWAYS_INLINE void *
ph_remove_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
	void *ret;

	if (ph->root == NULL) {
		return NULL;
	}
	ph_merge_aux(ph, offset, cmp);
	ret = ph->root;
	ph->root = ph_merge_children(ph->root, offset, cmp);

	return ret;

}

JEMALLOC_ALWAYS_INLINE void
ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
	void *replace;
	void *parent;

	if (ph->root == phn) {
		/*
		 * We can delete from aux list without merging it, but we need
		 * to merge if we are dealing with the root node and it has
		 * children.
		 */
		if (phn_lchild_get(phn, offset) == NULL) {
			ph->root = phn_next_get(phn, offset);
			if (ph->root != NULL) {
				phn_prev_set(ph->root, NULL, offset);
			}
			return;
		}
		ph_merge_aux(ph, offset, cmp);
		if (ph->root == phn) {
			ph->root = ph_merge_children(ph->root, offset, cmp);
			return;
		}
	}

	/* Get parent (if phn is leftmost child) before mutating. */
	if ((parent = phn_prev_get(phn, offset)) != NULL) {
		if (phn_lchild_get(parent, offset) != phn) {
			parent = NULL;
		}
	}
	/* Find a possible replacement node, and link to parent. */
	replace = ph_merge_children(phn, offset, cmp);
	/* Set next/prev for sibling linked list. */
	if (replace != NULL) {
		if (parent != NULL) {
			phn_prev_set(replace, parent, offset);
			phn_lchild_set(parent, replace, offset);
		} else {
			phn_prev_set(replace, phn_prev_get(phn, offset),
			    offset);
			if (phn_prev_get(phn, offset) != NULL) {
				phn_next_set(phn_prev_get(phn, offset), replace,
				    offset);
			}
		}
		phn_next_set(replace, phn_next_get(phn, offset), offset);
		if (phn_next_get(phn, offset) != NULL) {
			phn_prev_set(phn_next_get(phn, offset), replace,
			    offset);
		}
	} else {
		if (parent != NULL) {
			void *next = phn_next_get(phn, offset);
			phn_lchild_set(parent, next, offset);
			if (next != NULL) {
				phn_prev_set(next, parent, offset);
			}
		} else {
			assert(phn_prev_get(phn, offset) != NULL);
			phn_next_set(
			    phn_prev_get(phn, offset),
			    phn_next_get(phn, offset), offset);
		}
		if (phn_next_get(phn, offset) != NULL) {
			phn_prev_set(
			    phn_next_get(phn, offset),
			    phn_prev_get(phn, offset), offset);
		}
	}
}

#define ph_structs(a_prefix, a_type)					\
typedef struct {							\
	phn_link_t link;						\
} a_prefix##_link_t;							\
									\
typedef struct {							\
	ph_t ph;							\
} a_prefix##_t;

/*
 * The ph_proto() macro generates function prototypes that correspond to the
 * functions generated by an equivalently parameterized call to ph_gen().
 */
#define ph_proto(a_attr, a_prefix, a_type)				\
									\
a_attr void a_prefix##_new(a_prefix##_t *ph);				\
a_attr bool a_prefix##_empty(a_prefix##_t *ph);				\
a_attr a_type *a_prefix##_first(a_prefix##_t *ph);			\
a_attr a_type *a_prefix##_any(a_prefix##_t *ph);			\
a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn);		\
a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph);		\
a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn);		\
a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);

/* The ph_gen() macro generates a type-specific pairing heap implementation. */
#define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp)		\
JEMALLOC_ALWAYS_INLINE int						\
a_prefix##_ph_cmp(void *a, void *b) {					\
	return a_cmp((a_type *)a, (a_type *)b);				\
}									\
									\
a_attr void								\
a_prefix##_new(a_prefix##_t *ph) {					\
	ph_new(&ph->ph);						\
}									\
									\
a_attr bool								\
a_prefix##_empty(a_prefix##_t *ph) {					\
	return ph_empty(&ph->ph);					\
}									\
									\
a_attr a_type *								\
a_prefix##_first(a_prefix##_t *ph) {					\
	return ph_first(&ph->ph, offsetof(a_type, a_field),		\
	    &a_prefix##_ph_cmp);					\
}									\
									\
a_attr a_type *								\
a_prefix##_any(a_prefix##_t *ph) {					\
	return ph_any(&ph->ph, offsetof(a_type, a_field));		\
}									\
									\
a_attr void								\
a_prefix##_insert(a_prefix##_t *ph, a_type *phn) {			\
	ph_insert(&ph->ph, phn, offsetof(a_type, a_field),		\
	    a_prefix##_ph_cmp);						\
}									\
									\
a_attr a_type *								\
a_prefix##_remove_first(a_prefix##_t *ph) {				\
	return ph_remove_first(&ph->ph, offsetof(a_type, a_field),	\
	    a_prefix##_ph_cmp);						\
}									\
									\
a_attr void								\
a_prefix##_remove(a_prefix##_t *ph, a_type *phn) {			\
	ph_remove(&ph->ph, phn, offsetof(a_type, a_field),		\
	    a_prefix##_ph_cmp);						\
}									\
									\
a_attr a_type *								\
a_prefix##_remove_any(a_prefix##_t *ph) {				\
	a_type *ret = a_prefix##_any(ph);				\
	if (ret != NULL) {						\
		a_prefix##_remove(ph, ret);				\
	}								\
	return ret;							\
}

#endif /* JEMALLOC_INTERNAL_PH_H */