xen/arch/x86/mm/mm-locks.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386

/* SPDX-License-Identifier: GPL-2.0-or-later */
/******************************************************************************
 * arch/x86/mm/mm-locks.h
 *
 * Spinlocks used by the code in arch/x86/mm.
 *
 * Copyright (c) 2011 Citrix Systems, inc.
 * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
 * Copyright (c) 2006-2007 XenSource Inc.
 * Copyright (c) 2006 Michael A Fetterman
 */

#ifndef _MM_LOCKS_H
#define _MM_LOCKS_H

/* Per-CPU variable for enforcing the lock ordering */
DECLARE_PER_CPU(int, mm_lock_level);

DECLARE_PERCPU_RWLOCK_GLOBAL(p2m_percpu_rwlock);

static inline void mm_lock_init(mm_lock_t *l)
{
    spin_lock_init(&l->lock);
    l->locker = -1;
    l->locker_function = "nobody";
    l->unlock_level = 0;
}

static inline bool mm_locked_by_me(const mm_lock_t *l)
{
    return (l->lock.recurse_cpu == current->processor);
}

static inline int _get_lock_level(void)
{
    return this_cpu(mm_lock_level);
}

#define MM_LOCK_ORDER_MAX                    64
/*
 * Return the lock level taking the domain bias into account. If the domain is
 * privileged a bias of MM_LOCK_ORDER_MAX is applied to the lock level, so that
 * mm locks that belong to a control domain can be acquired after having
 * acquired mm locks of an unprivileged domain.
 *
 * This is required in order to use some hypercalls from a paging domain that
 * take locks of a subject domain and then attempt to copy data to/from the
 * caller domain.
 */
static inline int _lock_level(const struct domain *d, int l)
{
    ASSERT(l <= MM_LOCK_ORDER_MAX);

    return l + (d && is_control_domain(d) ? MM_LOCK_ORDER_MAX : 0);
}

/*
 * If you see this crash, the numbers printed are order levels defined
 * in this file.
 */
static inline void _check_lock_level(const struct domain *d, int l)
{
    int lvl = _lock_level(d, l);

    if ( unlikely(_get_lock_level() > lvl) )
    {
        printk("mm locking order violation: %i > %i\n", _get_lock_level(), lvl);
        BUG();
    }
}

static inline void _set_lock_level(int l)
{
    this_cpu(mm_lock_level) = l;
}

static inline void _mm_lock(const struct domain *d, mm_lock_t *l,
                            const char *func, int level, int rec)
{
    if ( !((mm_locked_by_me(l)) && rec) )
        _check_lock_level(d, level);
    spin_lock_recursive(&l->lock);
    if ( l->lock.recurse_cnt == 1 )
    {
        l->locker_function = func;
        l->unlock_level = _get_lock_level();
    }
    else if ( (unlikely(!rec)) )
        panic("mm lock already held by %s\n", l->locker_function);
    _set_lock_level(_lock_level(d, level));
}

static inline void _mm_enforce_order_lock_pre(const struct domain *d, int level)
{
    _check_lock_level(d, level);
}

static inline void _mm_enforce_order_lock_post(const struct domain *d, int level,
                                               int *unlock_level,
                                               unsigned short *recurse_count)
{
    if ( recurse_count )
    {
        if ( (*recurse_count)++ == 0 )
        {
            *unlock_level = _get_lock_level();
        }
    } else {
        *unlock_level = _get_lock_level();
    }
    _set_lock_level(_lock_level(d, level));
}


static inline void mm_rwlock_init(mm_rwlock_t *l)
{
    percpu_rwlock_resource_init(&l->lock, p2m_percpu_rwlock);
    l->locker = -1;
    l->locker_function = "nobody";
    l->unlock_level = 0;
}

static inline int mm_write_locked_by_me(mm_rwlock_t *l)
{
    return (l->locker == get_processor_id());
}

static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
                                  const char *func, int level)
{
    if ( !mm_write_locked_by_me(l) )
    {
        _check_lock_level(d, level);
        percpu_write_lock(p2m_percpu_rwlock, &l->lock);
        l->locker = get_processor_id();
        l->locker_function = func;
        l->unlock_level = _get_lock_level();
        _set_lock_level(_lock_level(d, level));
    }
    l->recurse_count++;
}

static inline void mm_write_unlock(mm_rwlock_t *l)
{
    if ( --(l->recurse_count) != 0 )
        return;
    l->locker = -1;
    l->locker_function = "nobody";
    _set_lock_level(l->unlock_level);
    percpu_write_unlock(p2m_percpu_rwlock, &l->lock);
}

static inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l,
                                 int level)
{
    _check_lock_level(d, level);
    percpu_read_lock(p2m_percpu_rwlock, &l->lock);
    /* There's nowhere to store the per-CPU unlock level so we can't
     * set the lock level. */
}

static inline void mm_read_unlock(mm_rwlock_t *l)
{
    percpu_read_unlock(p2m_percpu_rwlock, &l->lock);
}

/* This wrapper uses the line number to express the locking order below */
#define declare_mm_lock(name)                                                 \
    static inline void mm_lock_##name(const struct domain *d, mm_lock_t *l,   \
                                      const char *func, int rec)              \
    { _mm_lock(d, l, func, MM_LOCK_ORDER_##name, rec); }
#define declare_mm_rwlock(name)                                               \
    static inline void mm_write_lock_##name(const struct domain *d,           \
                                            mm_rwlock_t *l, const char *func) \
    { _mm_write_lock(d, l, func, MM_LOCK_ORDER_##name); }                     \
    static inline void mm_read_lock_##name(const struct domain *d,            \
                                           mm_rwlock_t *l)                    \
    { _mm_read_lock(d, l, MM_LOCK_ORDER_##name); }
/* These capture the name of the calling function */
#define mm_lock(name, d, l) mm_lock_##name(d, l, __func__, 0)
#define mm_lock_recursive(name, d, l) mm_lock_##name(d, l, __func__, 1)
#define mm_write_lock(name, d, l) mm_write_lock_##name(d, l, __func__)
#define mm_read_lock(name, d, l) mm_read_lock_##name(d, l)

/* This wrapper is intended for "external" locks which do not use
 * the mm_lock_t types. Such locks inside the mm code are also subject
 * to ordering constraints. */
#define declare_mm_order_constraint(name)                                       \
    static inline void mm_enforce_order_lock_pre_##name(const struct domain *d) \
    { _mm_enforce_order_lock_pre(d, MM_LOCK_ORDER_##name); }                    \
    static inline void mm_enforce_order_lock_post_##name(const struct domain *d,\
                        int *unlock_level, unsigned short *recurse_count)       \
    { _mm_enforce_order_lock_post(d, MM_LOCK_ORDER_##name, unlock_level,        \
                                  recurse_count); }

static inline void mm_unlock(mm_lock_t *l)
{
    if ( l->lock.recurse_cnt == 1 )
    {
        l->locker_function = "nobody";
        _set_lock_level(l->unlock_level);
    }
    spin_unlock_recursive(&l->lock);
}

static inline void mm_enforce_order_unlock(int unlock_level,
                                            unsigned short *recurse_count)
{
    if ( recurse_count )
    {
        BUG_ON(*recurse_count == 0);
        if ( (*recurse_count)-- == 1 )
        {
            _set_lock_level(unlock_level);
        }
    } else {
        _set_lock_level(unlock_level);
    }
}

/************************************************************************
 *                                                                      *
 * To avoid deadlocks, these locks _MUST_ be taken in the order listed  *
 * below.  The locking functions will enforce this.                     *
 *                                                                      *
 ************************************************************************/

#ifdef CONFIG_HVM

/* Nested P2M lock (per-domain)
 *
 * A per-domain lock that protects the mapping from nested-CR3 to
 * nested-p2m.  In particular it covers:
 * - the array of nested-p2m tables, and all LRU activity therein; and
 * - setting the "cr3" field of any p2m table to a non-P2M_BASE_EAADR value.
 *   (i.e. assigning a p2m table to be the shadow of that cr3 */

#define MM_LOCK_ORDER_nestedp2m               8
declare_mm_lock(nestedp2m)
#define nestedp2m_lock(d)   mm_lock(nestedp2m, d, &(d)->arch.nested_p2m_lock)
#define nestedp2m_unlock(d) mm_unlock(&(d)->arch.nested_p2m_lock)

/* P2M lock (per-non-alt-p2m-table)
 *
 * This protects all queries and updates to the p2m table.
 * Queries may be made under the read lock but all modifications
 * need the main (write) lock.
 *
 * The write lock is recursive as it is common for a code path to look
 * up a gfn and later mutate it.
 *
 * Note that this lock shares its implementation with the altp2m
 * lock (not the altp2m list lock), so the implementation
 * is found there.
 *
 * Changes made to the host p2m when in altp2m mode are propagated to the
 * altp2ms synchronously in ept_set_entry().  At that point, we will hold
 * the host p2m lock; propagating this change involves grabbing the
 * altp2m_list lock, and the locks of the individual alternate p2ms.  In
 * order to allow us to maintain locking order discipline, we split the p2m
 * lock into p2m (for host p2ms) and altp2m (for alternate p2ms), putting
 * the altp2mlist lock in the middle.
 */

#define MM_LOCK_ORDER_p2m                    16
declare_mm_rwlock(p2m);

/* Sharing per page lock
 *
 * This is an external lock, not represented by an mm_lock_t. The memory
 * sharing lock uses it to protect addition and removal of (gfn,domain)
 * tuples to a shared page. We enforce order here against the p2m lock,
 * which is taken after the page_lock to change the gfn's p2m entry.
 *
 * The lock is recursive because during share we lock two pages. */

#define MM_LOCK_ORDER_per_page_sharing       24
declare_mm_order_constraint(per_page_sharing)
#define page_sharing_mm_pre_lock() \
        mm_enforce_order_lock_pre_per_page_sharing(NULL)
#define page_sharing_mm_post_lock(l, r) \
        mm_enforce_order_lock_post_per_page_sharing(NULL, (l), (r))
#define page_sharing_mm_unlock(l, r) mm_enforce_order_unlock((l), (r))

/* Alternate P2M list lock (per-domain)
 *
 * A per-domain lock that protects the list of alternate p2m's.
 * Any operation that walks the list needs to acquire this lock.
 * Additionally, before destroying an alternate p2m all VCPU's
 * in the target domain must be paused.
 */

#define MM_LOCK_ORDER_altp2mlist             32
declare_mm_lock(altp2mlist)
#define altp2m_list_lock(d)   mm_lock(altp2mlist, d, \
                                      &(d)->arch.altp2m_list_lock)
#define altp2m_list_unlock(d) mm_unlock(&(d)->arch.altp2m_list_lock)

/* P2M lock (per-altp2m-table)
 *
 * This protects all queries and updates to the p2m table.
 * Queries may be made under the read lock but all modifications
 * need the main (write) lock.
 *
 * The write lock is recursive as it is common for a code path to look
 * up a gfn and later mutate it.
 */

#define MM_LOCK_ORDER_altp2m                 40
declare_mm_rwlock(altp2m);

static inline void p2m_lock(struct p2m_domain *p)
{
    if ( p2m_is_altp2m(p) )
        mm_write_lock(altp2m, p->domain, &p->lock);
    else
        mm_write_lock(p2m, p->domain, &p->lock);
    p->defer_flush++;
}

static inline void p2m_unlock(struct p2m_domain *p)
{
    if ( --p->defer_flush == 0 )
        p2m_unlock_and_tlb_flush(p);
    else
        mm_write_unlock(&p->lock);
}

#define gfn_lock(p,g,o)       p2m_lock(p)
#define gfn_unlock(p,g,o)     p2m_unlock(p)
#define p2m_read_lock(p)      mm_read_lock(p2m, (p)->domain, &(p)->lock)
#define p2m_read_unlock(p)    mm_read_unlock(&(p)->lock)
#define p2m_locked_by_me(p)   mm_write_locked_by_me(&(p)->lock)
#define gfn_locked_by_me(p,g) p2m_locked_by_me(p)

/* PoD lock (per-p2m-table)
 *
 * Protects private PoD data structs: entry and cache
 * counts, page lists, sweep parameters. */

#define MM_LOCK_ORDER_pod                    48
declare_mm_lock(pod)
#define pod_lock(p)           mm_lock(pod, (p)->domain, &(p)->pod.lock)
#define pod_unlock(p)         mm_unlock(&(p)->pod.lock)
#define pod_locked_by_me(p)   mm_locked_by_me(&(p)->pod.lock)

#endif /* CONFIG_HVM */

/* Page alloc lock (per-domain)
 *
 * This is an external lock, not represented by an mm_lock_t. However,
 * pod code uses it in conjunction with the p2m lock, and expecting
 * the ordering which we enforce here.
 * The lock is not recursive. */

#define MM_LOCK_ORDER_page_alloc             56
declare_mm_order_constraint(page_alloc)
#define page_alloc_mm_pre_lock(d)  mm_enforce_order_lock_pre_page_alloc(d)
#define page_alloc_mm_post_lock(d, l) \
        mm_enforce_order_lock_post_page_alloc(d, &(l), NULL)
#define page_alloc_mm_unlock(l)    mm_enforce_order_unlock((l), NULL)

/* Paging lock (per-domain)
 *
 * For shadow pagetables, this lock protects
 *   - all changes to shadow page table pages
 *   - the shadow hash table
 *   - the shadow page allocator
 *   - all changes to guest page table pages
 *   - all changes to the page_info->tlbflush_timestamp
 *   - the page_info->count fields on shadow pages
 *
 * For HAP, it protects the NPT/EPT tables and mode changes.
 *
 * It also protects the log-dirty bitmap from concurrent accesses (and
 * teardowns, etc). */

#define MM_LOCK_ORDER_paging                 64
declare_mm_lock(paging)
#define paging_lock(d)         mm_lock(paging, d, &(d)->arch.paging.lock)
#define paging_lock_recursive(d) \
                    mm_lock_recursive(paging, d, &(d)->arch.paging.lock)
#define paging_unlock(d)       mm_unlock(&(d)->arch.paging.lock)
#define paging_locked_by_me(d) mm_locked_by_me(&(d)->arch.paging.lock)

#endif /* _MM_LOCKS_H */