1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
/******************************************************************************
* arch/x86/msr.c
*
* Policy objects for Model-Specific Registers.
*
* Copyright (c) 2017 Citrix Systems Ltd.
*/
#include <xen/init.h>
#include <xen/lib.h>
#include <xen/nospec.h>
#include <xen/sched.h>
#include <asm/amd.h>
#include <asm/cpu-policy.h>
#include <asm/debugreg.h>
#include <asm/hvm/nestedhvm.h>
#include <asm/hvm/viridian.h>
#include <asm/msr.h>
#include <asm/pv/domain.h>
#include <asm/setup.h>
#include <asm/xstate.h>
#include <public/hvm/params.h>
DEFINE_PER_CPU(uint32_t, tsc_aux);
int init_vcpu_msr_policy(struct vcpu *v)
{
struct vcpu_msrs *msrs = xzalloc(struct vcpu_msrs);
if ( !msrs )
return -ENOMEM;
v->arch.msrs = msrs;
return 0;
}
int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
{
const struct vcpu *curr = current;
const struct domain *d = v->domain;
const struct cpu_policy *cp = d->arch.cpu_policy;
const struct vcpu_msrs *msrs = v->arch.msrs;
int ret = X86EMUL_OKAY;
switch ( msr )
{
/* Write-only */
case MSR_AMD_PATCHLOADER:
case MSR_IA32_UCODE_WRITE:
case MSR_PRED_CMD:
case MSR_FLUSH_CMD:
/* Not offered to guests. */
case MSR_TEST_CTRL:
case MSR_CORE_CAPABILITIES:
case MSR_TSX_FORCE_ABORT:
case MSR_TSX_CTRL:
case MSR_MCU_OPT_CTRL:
case MSR_RTIT_OUTPUT_BASE ... MSR_RTIT_ADDR_B(7):
case MSR_U_CET:
case MSR_S_CET:
case MSR_PL0_SSP ... MSR_INTERRUPT_SSP_TABLE:
case MSR_AMD64_LWP_CFG:
case MSR_AMD64_LWP_CBADDR:
case MSR_PPIN_CTL:
case MSR_PPIN:
case MSR_AMD_PPIN_CTL:
case MSR_AMD_PPIN:
goto gp_fault;
case MSR_IA32_FEATURE_CONTROL:
/*
* Architecturally, availability of this MSR is enumerated by the
* visibility of any sub-feature. However, Win10 in at some
* configurations performs a read before setting up a #GP handler.
*
* The MSR has existed on all Intel parts since before the 64bit days,
* and is implemented by other vendors.
*/
if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_CENTAUR |
X86_VENDOR_SHANGHAI)) )
goto gp_fault;
*val = IA32_FEATURE_CONTROL_LOCK;
if ( vmce_has_lmce(v) )
*val |= IA32_FEATURE_CONTROL_LMCE_ON;
if ( cp->basic.vmx )
*val |= IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX;
break;
case MSR_IA32_PLATFORM_ID:
if ( !(cp->x86_vendor & X86_VENDOR_INTEL) ||
!(boot_cpu_data.x86_vendor & X86_VENDOR_INTEL) )
goto gp_fault;
rdmsrl(MSR_IA32_PLATFORM_ID, *val);
break;
case MSR_AMD_PATCHLEVEL:
BUILD_BUG_ON(MSR_IA32_UCODE_REV != MSR_AMD_PATCHLEVEL);
/*
* AMD and Intel use the same MSR for the current microcode version.
*
* There is no need to jump through the SDM-provided hoops for Intel.
* A guest might itself perform the "write 0, CPUID, read" sequence,
* but servicing the CPUID for the guest typically wont result in
* actually executing a CPUID instruction.
*
* As a guest can't influence the value of this MSR, the value will be
* from Xen's last microcode load, which can be forwarded straight to
* the guest.
*/
if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_AMD)) ||
!(boot_cpu_data.x86_vendor &
(X86_VENDOR_INTEL | X86_VENDOR_AMD)) ||
rdmsr_safe(MSR_AMD_PATCHLEVEL, *val) )
goto gp_fault;
break;
case MSR_SPEC_CTRL:
if ( !cp->feat.ibrsb && !cp->extd.ibrs )
goto gp_fault;
goto get_reg;
case MSR_INTEL_PLATFORM_INFO:
*val = cp->platform_info.raw;
break;
case MSR_ARCH_CAPABILITIES:
if ( !cp->feat.arch_caps )
goto gp_fault;
*val = cp->arch_caps.raw;
break;
case MSR_INTEL_MISC_FEATURES_ENABLES:
*val = msrs->misc_features_enables.raw;
break;
case MSR_P5_MC_ADDR:
case MSR_P5_MC_TYPE:
case MSR_IA32_MCG_CAP ... MSR_IA32_MCG_CTL: /* 0x179 -> 0x17b */
case MSR_IA32_MCx_CTL2(0) ... MSR_IA32_MCx_CTL2(31): /* 0x280 -> 0x29f */
case MSR_IA32_MCx_CTL(0) ... MSR_IA32_MCx_MISC(31): /* 0x400 -> 0x47f */
case MSR_IA32_MCG_EXT_CTL: /* 0x4d0 */
if ( vmce_rdmsr(msr, val) < 0 )
goto gp_fault;
break;
/*
* These MSRs are not enumerated in CPUID. They have been around
* since the Pentium 4, and implemented by other vendors.
*
* Some versions of Windows try reading these before setting up a #GP
* handler, and Linux has several unguarded reads as well. Provide
* RAZ semantics, in general, but permit a cpufreq controller dom0 to
* have full access.
*/
case MSR_IA32_PERF_STATUS:
case MSR_IA32_PERF_CTL:
if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_CENTAUR)) )
goto gp_fault;
*val = 0;
if ( likely(!is_cpufreq_controller(d)) || rdmsr_safe(msr, *val) == 0 )
break;
goto gp_fault;
case MSR_IA32_THERM_STATUS:
if ( cp->x86_vendor != X86_VENDOR_INTEL )
goto gp_fault;
*val = 0;
break;
case MSR_PKRS:
if ( !cp->feat.pks )
goto gp_fault;
goto get_reg;
case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
if ( !is_hvm_domain(d) || v != curr )
goto gp_fault;
ret = guest_rdmsr_x2apic(v, msr, val);
break;
case MSR_IA32_BNDCFGS:
if ( !cp->feat.mpx ) /* Implies Intel HVM only */
goto gp_fault;
goto get_reg;
case MSR_IA32_XSS:
if ( !cp->xstate.xsaves )
goto gp_fault;
*val = msrs->xss.raw;
break;
case 0x40000000 ... 0x400001ff:
if ( is_viridian_domain(d) )
{
ret = guest_rdmsr_viridian(v, msr, val);
break;
}
/* Fallthrough. */
case 0x40000200 ... 0x400002ff:
ret = guest_rdmsr_xen(v, msr, val);
break;
case MSR_TSC_AUX:
if ( !cp->extd.rdtscp && !cp->feat.rdpid )
goto gp_fault;
*val = msrs->tsc_aux;
break;
case MSR_K8_SYSCFG:
case MSR_K8_TOP_MEM1:
case MSR_K8_TOP_MEM2:
case MSR_K8_IORR_BASE0:
case MSR_K8_IORR_MASK0:
case MSR_K8_IORR_BASE1:
case MSR_K8_IORR_MASK1:
case MSR_K8_TSEG_BASE:
case MSR_K8_TSEG_MASK:
if ( !(cp->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) )
goto gp_fault;
if ( !is_hardware_domain(d) )
return X86EMUL_UNHANDLEABLE;
if ( rdmsr_safe(msr, *val) )
goto gp_fault;
if ( msr == MSR_K8_SYSCFG )
*val &= (SYSCFG_TOM2_FORCE_WB | SYSCFG_MTRR_TOM2_EN |
SYSCFG_MTRR_VAR_DRAM_EN | SYSCFG_MTRR_FIX_DRAM_EN);
break;
case MSR_K8_HWCR:
if ( !(cp->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) )
goto gp_fault;
*val = get_cpu_family(cp->basic.raw_fms, NULL, NULL) >= 0x10
? K8_HWCR_TSC_FREQ_SEL : 0;
break;
case MSR_VIRT_SPEC_CTRL:
if ( !cp->extd.virt_ssbd )
goto gp_fault;
if ( cpu_has_amd_ssbd )
*val = msrs->spec_ctrl.raw & SPEC_CTRL_SSBD;
else
*val = msrs->virt_spec_ctrl.raw;
break;
case MSR_AMD64_DE_CFG:
if ( !(cp->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) )
goto gp_fault;
*val = AMD64_DE_CFG_LFENCE_SERIALISE;
break;
case MSR_AMD64_DR0_ADDRESS_MASK:
case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
if ( !cp->extd.dbext )
goto gp_fault;
/*
* In HVM context when we've allowed the guest direct access to debug
* registers, the value in msrs->dr_mask[] may be stale. Re-read it
* out of hardware.
*/
#ifdef CONFIG_HVM
if ( v == curr && is_hvm_domain(d) && v->arch.hvm.flag_dr_dirty )
rdmsrl(msr, *val);
else
#endif
*val = msrs->dr_mask[
array_index_nospec((msr == MSR_AMD64_DR0_ADDRESS_MASK)
? 0 : (msr - MSR_AMD64_DR1_ADDRESS_MASK + 1),
ARRAY_SIZE(msrs->dr_mask))];
break;
/*
* TODO: Implement when we have better topology representation.
case MSR_INTEL_CORE_THREAD_COUNT:
*/
default:
return X86EMUL_UNHANDLEABLE;
}
/*
* Interim safety check that functions we dispatch to don't alias "Not yet
* handled by the new MSR infrastructure".
*/
ASSERT(ret != X86EMUL_UNHANDLEABLE);
return ret;
get_reg: /* Delegate register access to per-vm-type logic. */
if ( is_pv_domain(d) )
*val = pv_get_reg(v, msr);
else
*val = hvm_get_reg(v, msr);
return X86EMUL_OKAY;
gp_fault:
return X86EMUL_EXCEPTION;
}
/*
* Caller to confirm that MSR_SPEC_CTRL is available. Intel and AMD have
* separate CPUID features for this functionality, but only set will be
* active.
*/
uint64_t msr_spec_ctrl_valid_bits(const struct cpu_policy *cp)
{
bool ssbd = cp->feat.ssbd || cp->extd.amd_ssbd;
bool psfd = cp->feat.intel_psfd || cp->extd.psfd;
/*
* Note: SPEC_CTRL_STIBP is specified as safe to use (i.e. ignored)
* when STIBP isn't enumerated in hardware.
*/
return (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP |
(ssbd ? SPEC_CTRL_SSBD : 0) |
(psfd ? SPEC_CTRL_PSFD : 0) |
0);
}
int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
{
const struct vcpu *curr = current;
struct domain *d = v->domain;
const struct cpu_policy *cp = d->arch.cpu_policy;
struct vcpu_msrs *msrs = v->arch.msrs;
int ret = X86EMUL_OKAY;
switch ( msr )
{
uint64_t rsvd;
/* Read-only */
case MSR_IA32_PLATFORM_ID:
case MSR_CORE_CAPABILITIES:
case MSR_INTEL_CORE_THREAD_COUNT:
case MSR_INTEL_PLATFORM_INFO:
case MSR_ARCH_CAPABILITIES:
/* Not offered to guests. */
case MSR_TEST_CTRL:
case MSR_TSX_FORCE_ABORT:
case MSR_TSX_CTRL:
case MSR_MCU_OPT_CTRL:
case MSR_RTIT_OUTPUT_BASE ... MSR_RTIT_ADDR_B(7):
case MSR_U_CET:
case MSR_S_CET:
case MSR_PL0_SSP ... MSR_INTERRUPT_SSP_TABLE:
case MSR_AMD64_LWP_CFG:
case MSR_AMD64_LWP_CBADDR:
case MSR_PPIN_CTL:
case MSR_PPIN:
case MSR_AMD_PPIN_CTL:
case MSR_AMD_PPIN:
goto gp_fault;
case MSR_AMD_PATCHLEVEL:
BUILD_BUG_ON(MSR_IA32_UCODE_REV != MSR_AMD_PATCHLEVEL);
/*
* AMD and Intel use the same MSR for the current microcode version.
*
* Both document it as read-only. However Intel also document that,
* for backwards compatiblity, the OS should write 0 to it before
* trying to access the current microcode version.
*/
if ( cp->x86_vendor != X86_VENDOR_INTEL || val != 0 )
goto gp_fault;
break;
case MSR_AMD_PATCHLOADER:
/*
* See note on MSR_IA32_UCODE_WRITE below, which may or may not apply
* to AMD CPUs as well (at least the architectural/CPUID part does).
*/
if ( is_pv_domain(d) ||
cp->x86_vendor != X86_VENDOR_AMD )
goto gp_fault;
break;
case MSR_IA32_UCODE_WRITE:
/*
* Some versions of Windows at least on certain hardware try to load
* microcode before setting up an IDT. Therefore we must not inject #GP
* for such attempts. Also the MSR is architectural and not qualified
* by any CPUID bit.
*/
if ( is_pv_domain(d) ||
cp->x86_vendor != X86_VENDOR_INTEL )
goto gp_fault;
break;
case MSR_SPEC_CTRL:
if ( (!cp->feat.ibrsb && !cp->extd.ibrs) ||
(val & ~msr_spec_ctrl_valid_bits(cp)) )
goto gp_fault;
goto set_reg;
case MSR_PRED_CMD:
if ( !cp->feat.ibrsb && !cp->extd.ibpb )
goto gp_fault; /* MSR available? */
if ( val & ~PRED_CMD_IBPB )
goto gp_fault; /* Rsvd bit set? */
if ( v == curr )
wrmsrl(MSR_PRED_CMD, val);
break;
case MSR_FLUSH_CMD:
if ( !cp->feat.l1d_flush )
goto gp_fault; /* MSR available? */
if ( val & ~FLUSH_CMD_L1D )
goto gp_fault; /* Rsvd bit set? */
if ( v == curr )
wrmsrl(MSR_FLUSH_CMD, val);
break;
case MSR_INTEL_MISC_FEATURES_ENABLES:
{
bool old_cpuid_faulting = msrs->misc_features_enables.cpuid_faulting;
rsvd = ~0ull;
if ( cp->platform_info.cpuid_faulting )
rsvd &= ~MSR_MISC_FEATURES_CPUID_FAULTING;
if ( val & rsvd )
goto gp_fault;
msrs->misc_features_enables.raw = val;
if ( v == curr && is_hvm_domain(d) && cpu_has_cpuid_faulting &&
(old_cpuid_faulting ^ msrs->misc_features_enables.cpuid_faulting) )
ctxt_switch_levelling(v);
break;
}
case MSR_IA32_MCG_CAP ... MSR_IA32_MCG_CTL: /* 0x179 -> 0x17b */
case MSR_IA32_MCx_CTL2(0) ... MSR_IA32_MCx_CTL2(31): /* 0x280 -> 0x29f */
case MSR_IA32_MCx_CTL(0) ... MSR_IA32_MCx_MISC(31): /* 0x400 -> 0x47f */
case MSR_IA32_MCG_EXT_CTL: /* 0x4d0 */
if ( vmce_wrmsr(msr, val) < 0 )
goto gp_fault;
break;
/*
* This MSR is not enumerated in CPUID. It has been around since the
* Pentium 4, and implemented by other vendors.
*
* To match the RAZ semantics, implement as write-discard, except for
* a cpufreq controller dom0 which has full access.
*/
case MSR_IA32_PERF_CTL:
if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_CENTAUR)) )
goto gp_fault;
if ( likely(!is_cpufreq_controller(d)) || wrmsr_safe(msr, val) == 0 )
break;
goto gp_fault;
case MSR_PKRS:
if ( !cp->feat.pks || val != (uint32_t)val )
goto gp_fault;
goto set_reg;
case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
if ( !is_hvm_domain(d) || v != curr )
goto gp_fault;
ret = guest_wrmsr_x2apic(v, msr, val);
break;
#ifdef CONFIG_HVM
case MSR_IA32_BNDCFGS:
if ( !cp->feat.mpx || /* Implies Intel HVM only */
!is_canonical_address(val) || (val & IA32_BNDCFGS_RESERVED) )
goto gp_fault;
/*
* While MPX instructions are supposed to be gated on XCR0.BND*, let's
* nevertheless force the relevant XCR0 bits on when the feature is
* being enabled in BNDCFGS.
*/
if ( (val & IA32_BNDCFGS_ENABLE) &&
!(v->arch.xcr0_accum & (X86_XCR0_BNDREGS | X86_XCR0_BNDCSR)) )
{
uint64_t xcr0 = get_xcr0();
if ( v != curr ||
handle_xsetbv(XCR_XFEATURE_ENABLED_MASK,
xcr0 | X86_XCR0_BNDREGS | X86_XCR0_BNDCSR) )
goto gp_fault;
if ( handle_xsetbv(XCR_XFEATURE_ENABLED_MASK, xcr0) )
/* nothing, best effort only */;
}
goto set_reg;
#endif /* CONFIG_HVM */
case MSR_IA32_XSS:
if ( !cp->xstate.xsaves )
goto gp_fault;
/* No XSS features currently supported for guests */
if ( val != 0 )
goto gp_fault;
msrs->xss.raw = val;
break;
case 0x40000000 ... 0x400001ff:
if ( is_viridian_domain(d) )
{
ret = guest_wrmsr_viridian(v, msr, val);
break;
}
/* Fallthrough. */
case 0x40000200 ... 0x400002ff:
ret = guest_wrmsr_xen(v, msr, val);
break;
case MSR_TSC_AUX:
if ( !cp->extd.rdtscp && !cp->feat.rdpid )
goto gp_fault;
if ( val != (uint32_t)val )
goto gp_fault;
msrs->tsc_aux = val;
if ( v == curr )
wrmsr_tsc_aux(val);
break;
case MSR_VIRT_SPEC_CTRL:
if ( !cp->extd.virt_ssbd )
goto gp_fault;
/* Only supports SSBD bit, the rest are ignored. */
if ( cpu_has_amd_ssbd )
{
if ( val & SPEC_CTRL_SSBD )
msrs->spec_ctrl.raw |= SPEC_CTRL_SSBD;
else
msrs->spec_ctrl.raw &= ~SPEC_CTRL_SSBD;
}
else
{
msrs->virt_spec_ctrl.raw = val & SPEC_CTRL_SSBD;
if ( v == curr )
/*
* Propagate the value to hardware, as it won't be set on guest
* resume path.
*/
amd_set_legacy_ssbd(val & SPEC_CTRL_SSBD);
}
break;
case MSR_AMD64_DE_CFG:
/*
* OpenBSD 6.7 will panic if writing to DE_CFG triggers a #GP:
* https://www.illumos.org/issues/12998 - drop writes.
*/
if ( !(cp->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) )
goto gp_fault;
break;
case MSR_AMD64_DR0_ADDRESS_MASK:
case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
if ( !cp->extd.dbext || val != (uint32_t)val )
goto gp_fault;
msrs->dr_mask[
array_index_nospec((msr == MSR_AMD64_DR0_ADDRESS_MASK)
? 0 : (msr - MSR_AMD64_DR1_ADDRESS_MASK + 1),
ARRAY_SIZE(msrs->dr_mask))] = val;
if ( v == curr && (curr->arch.dr7 & DR7_ACTIVE_MASK) )
wrmsrl(msr, val);
break;
default:
return X86EMUL_UNHANDLEABLE;
}
/*
* Interim safety check that functions we dispatch to don't alias "Not yet
* handled by the new MSR infrastructure".
*/
ASSERT(ret != X86EMUL_UNHANDLEABLE);
return ret;
set_reg: /* Delegate register access to per-vm-type logic. */
if ( is_pv_domain(d) )
pv_set_reg(v, msr, val);
else
hvm_set_reg(v, msr, val);
return X86EMUL_OKAY;
gp_fault:
return X86EMUL_EXCEPTION;
}
/*
* Local variables:
* mode: C
* c-file-style: "BSD"
* c-basic-offset: 4
* tab-width: 4
* indent-tabs-mode: nil
* End:
*/
|