1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
|
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* nested_ept.c: Handling virtulized EPT for guest in nested case.
*
* Copyright (c) 2012, Intel Corporation
* Xiantao Zhang <xiantao.zhang@intel.com>
*/
#include <xen/vm_event.h>
#include <xen/event.h>
#include <public/vm_event.h>
#include <asm/domain.h>
#include <asm/page.h>
#include <asm/paging.h>
#include <asm/p2m.h>
#include <asm/mem_sharing.h>
#include <asm/hap.h>
#include <asm/hvm/nestedhvm.h>
#include "private.h"
#include <asm/hvm/vmx/vmx.h>
#include <asm/hvm/vmx/vvmx.h>
/* Must reserved bits in all level entries */
#define EPT_MUST_RSV_BITS (((1ull << PADDR_BITS) - 1) & \
~((1ull << paddr_bits) - 1))
#define NEPT_CAP_BITS \
(VMX_EPT_INVEPT_ALL_CONTEXT | VMX_EPT_INVEPT_SINGLE_CONTEXT | \
VMX_EPT_INVEPT_INSTRUCTION | VMX_EPT_SUPERPAGE_1GB | \
VMX_EPT_SUPERPAGE_2MB | VMX_EPT_MEMORY_TYPE_WB | \
VMX_EPT_MEMORY_TYPE_UC | VMX_EPT_WALK_LENGTH_4_SUPPORTED | \
VMX_EPT_EXEC_ONLY_SUPPORTED)
#define NVPID_CAP_BITS \
(VMX_VPID_INVVPID_INSTRUCTION | VMX_VPID_INVVPID_INDIVIDUAL_ADDR | \
VMX_VPID_INVVPID_SINGLE_CONTEXT | VMX_VPID_INVVPID_ALL_CONTEXT | \
VMX_VPID_INVVPID_SINGLE_CONTEXT_RETAINING_GLOBAL)
#define NEPT_1G_ENTRY_FLAG (1 << 11)
#define NEPT_2M_ENTRY_FLAG (1 << 10)
#define NEPT_4K_ENTRY_FLAG (1 << 9)
static bool_t nept_rsv_bits_check(ept_entry_t e, uint32_t level)
{
uint64_t rsv_bits = EPT_MUST_RSV_BITS;
switch ( level )
{
case 1:
break;
case 2 ... 3:
if ( e.sp )
rsv_bits |= ((1ull << (9 * (level - 1))) - 1) << PAGE_SHIFT;
else
rsv_bits |= EPTE_EMT_MASK | EPTE_IGMT_MASK;
break;
case 4:
rsv_bits |= EPTE_EMT_MASK | EPTE_IGMT_MASK | EPTE_SUPER_PAGE_MASK;
break;
default:
gdprintk(XENLOG_ERR,"Unsupported EPT paging level: %d\n", level);
BUG();
break;
}
return !!(e.epte & rsv_bits);
}
/* EMT checking*/
static bool_t nept_emt_bits_check(ept_entry_t e, uint32_t level)
{
if ( e.sp || level == 1 )
{
if ( e.emt == X86_MT_RSVD_2 || e.emt == X86_MT_RSVD_3 ||
e.emt == X86_MT_UCM )
return 1;
}
return 0;
}
static bool_t nept_permission_check(uint32_t rwx_acc, uint32_t rwx_bits)
{
return !(EPTE_RWX_MASK & rwx_acc & ~rwx_bits);
}
/* nept's non-present check */
static bool_t nept_non_present_check(ept_entry_t e)
{
if ( e.epte & EPTE_RWX_MASK )
return 0;
return 1;
}
uint64_t nept_get_ept_vpid_cap(void)
{
uint64_t caps = 0;
if ( cpu_has_vmx_ept )
caps |= NEPT_CAP_BITS;
if ( !cpu_has_vmx_ept_exec_only_supported )
caps &= ~VMX_EPT_EXEC_ONLY_SUPPORTED;
if ( cpu_has_vmx_vpid )
caps |= NVPID_CAP_BITS;
return caps;
}
static bool_t nept_rwx_bits_check(ept_entry_t e)
{
/*write only or write/execute only*/
uint8_t rwx_bits = e.epte & EPTE_RWX_MASK;
if ( rwx_bits == ept_access_w || rwx_bits == ept_access_wx )
return 1;
if ( rwx_bits == ept_access_x &&
!(nept_get_ept_vpid_cap() & VMX_EPT_EXEC_ONLY_SUPPORTED) )
return 1;
return 0;
}
/* nept's misconfiguration check */
static bool_t nept_misconfiguration_check(ept_entry_t e, uint32_t level)
{
return nept_rsv_bits_check(e, level) ||
nept_emt_bits_check(e, level) ||
nept_rwx_bits_check(e);
}
static int ept_lvl_table_offset(unsigned long gpa, int lvl)
{
return (gpa >> (EPT_L4_PAGETABLE_SHIFT -(4 - lvl) * 9)) &
(EPT_PAGETABLE_ENTRIES - 1);
}
static uint32_t
nept_walk_tables(struct vcpu *v, unsigned long l2ga, ept_walk_t *gw)
{
int lvl;
uint32_t rc = 0, ret = 0, gflags;
struct domain *d = v->domain;
struct p2m_domain *p2m = d->arch.p2m;
gfn_t base_gfn = _gfn(nhvm_vcpu_p2m_base(v) >> PAGE_SHIFT);
mfn_t lxmfn;
ept_entry_t *lxp = NULL;
memset(gw, 0, sizeof(*gw));
for (lvl = 4; lvl > 0; lvl--)
{
lxp = map_domain_gfn(p2m, base_gfn, &lxmfn, P2M_ALLOC, &rc);
if ( !lxp )
goto map_err;
gw->lxe[lvl] = lxp[ept_lvl_table_offset(l2ga, lvl)];
unmap_domain_page(lxp);
put_page(mfn_to_page(lxmfn));
if ( nept_non_present_check(gw->lxe[lvl]) )
goto non_present;
if ( nept_misconfiguration_check(gw->lxe[lvl], lvl) )
goto misconfig_err;
if ( (lvl == 2 || lvl == 3) && gw->lxe[lvl].sp )
{
/* Generate a fake l1 table entry so callers don't all
* have to understand superpages. */
unsigned long gfn_lvl_mask = (1ull << ((lvl - 1) * 9)) - 1;
gfn_t start = _gfn(gw->lxe[lvl].mfn);
/* Increment the pfn by the right number of 4k pages. */
start = _gfn((gfn_x(start) & ~gfn_lvl_mask) +
((l2ga >> PAGE_SHIFT) & gfn_lvl_mask));
gflags = (gw->lxe[lvl].epte & EPTE_FLAG_MASK) |
(lvl == 3 ? NEPT_1G_ENTRY_FLAG: NEPT_2M_ENTRY_FLAG);
gw->lxe[0].epte = (gfn_x(start) << PAGE_SHIFT) | gflags;
goto done;
}
if ( lvl > 1 )
base_gfn = _gfn(gw->lxe[lvl].mfn);
}
/* If this is not a super entry, we can reach here. */
gflags = (gw->lxe[1].epte & EPTE_FLAG_MASK) | NEPT_4K_ENTRY_FLAG;
gw->lxe[0].epte = (gw->lxe[1].epte & PAGE_MASK) | gflags;
done:
ret = EPT_TRANSLATE_SUCCEED;
goto out;
map_err:
if ( rc == PFEC_page_paged )
{
ret = EPT_TRANSLATE_RETRY;
goto out;
}
/* fall through to misconfig error */
misconfig_err:
ret = EPT_TRANSLATE_MISCONFIG;
goto out;
non_present:
ret = EPT_TRANSLATE_VIOLATION;
/* fall through. */
out:
return ret;
}
/* Translate a L2 guest address to L1 gpa via L1 EPT paging structure */
int nept_translate_l2ga(struct vcpu *v, paddr_t l2ga,
unsigned int *page_order, uint32_t rwx_acc,
unsigned long *l1gfn, uint8_t *p2m_acc,
uint64_t *exit_qual, uint32_t *exit_reason)
{
uint32_t rc, rwx_bits = 0;
ept_walk_t gw;
rwx_acc &= EPTE_RWX_MASK;
*l1gfn = gfn_x(INVALID_GFN);
rc = nept_walk_tables(v, l2ga, &gw);
switch ( rc )
{
case EPT_TRANSLATE_SUCCEED:
if ( likely(gw.lxe[0].epte & NEPT_2M_ENTRY_FLAG) )
{
rwx_bits = gw.lxe[4].epte & gw.lxe[3].epte & gw.lxe[2].epte &
EPTE_RWX_MASK;
*page_order = 9;
}
else if ( gw.lxe[0].epte & NEPT_4K_ENTRY_FLAG )
{
rwx_bits = gw.lxe[4].epte & gw.lxe[3].epte & gw.lxe[2].epte &
gw.lxe[1].epte & EPTE_RWX_MASK;
*page_order = 0;
}
else if ( gw.lxe[0].epte & NEPT_1G_ENTRY_FLAG )
{
rwx_bits = gw.lxe[4].epte & gw.lxe[3].epte & EPTE_RWX_MASK;
*page_order = 18;
}
else
{
gdprintk(XENLOG_ERR, "Uncorrect l1 entry!\n");
BUG();
}
if ( nept_permission_check(rwx_acc, rwx_bits) )
{
*l1gfn = gw.lxe[0].mfn;
*p2m_acc = (uint8_t)rwx_bits;
break;
}
rc = EPT_TRANSLATE_VIOLATION;
/* Fall through to EPT violation if permission check fails. */
case EPT_TRANSLATE_VIOLATION:
*exit_qual = (*exit_qual & 0xffffffc0) | (rwx_bits << 3) | rwx_acc;
*exit_reason = EXIT_REASON_EPT_VIOLATION;
break;
case EPT_TRANSLATE_MISCONFIG:
rc = EPT_TRANSLATE_MISCONFIG;
*exit_qual = 0;
*exit_reason = EXIT_REASON_EPT_MISCONFIG;
break;
case EPT_TRANSLATE_RETRY:
break;
default:
gdprintk(XENLOG_ERR, "Unsupported ept translation type!:%d\n", rc);
BUG();
break;
}
return rc;
}
|