summaryrefslogtreecommitdiff
path: root/xen/arch/x86/cpu/mcheck/non-fatal.c
blob: 1c0c32ba0821a4702da0a0fab9795d3f51b0cba5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
/*
 * Non Fatal Machine Check Exception Reporting
 *
 * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk>
 *
 * This file contains routines to check for non-fatal MCEs every 15s
 *
 */

#include <xen/init.h>
#include <xen/types.h>
#include <xen/kernel.h>
#include <xen/smp.h>
#include <xen/timer.h>
#include <xen/errno.h>
#include <xen/event.h>
#include <xen/sched.h>
#include <asm/processor.h> 
#include <asm/system.h>
#include <asm/msr.h>

#include "mce.h"
#include "vmce.h"

static struct timer mce_timer;

#define MCE_PERIOD MILLISECS(8000)
#define MCE_PERIOD_MIN MILLISECS(2000)
#define MCE_PERIOD_MAX MILLISECS(16000)

static uint64_t period = MCE_PERIOD;
static int adjust = 0;
static int variable_period = 1;

static void cf_check mce_checkregs(void *info)
{
	mctelem_cookie_t mctc;
	struct mca_summary bs;
	static uint64_t dumpcount = 0;

	mctc = mcheck_mca_logout(MCA_POLLER, this_cpu(poll_bankmask),
				 &bs, NULL);

	if (bs.errcnt && mctc != NULL) {
		adjust++;

		/* If Dom0 enabled the VIRQ_MCA event, then notify it.
		 * Otherwise, if dom0 has had plenty of time to register
		 * the virq handler but still hasn't then dump telemetry
		 * to the Xen console.  The call count may be incremented
		 * on multiple cpus at once and is indicative only - just
		 * a simple-minded attempt to avoid spamming the console
		 * for corrected errors in early startup.
		 */

		if (dom0_vmce_enabled()) {
			mctelem_commit(mctc);
			send_global_virq(VIRQ_MCA);
		} else if (++dumpcount >= 10) {
			x86_mcinfo_dump((struct mc_info *)mctelem_dataptr(mctc));
			mctelem_dismiss(mctc);
		} else {
			mctelem_dismiss(mctc);
		}
	} else if (mctc != NULL) {
		mctelem_dismiss(mctc);
	}
}

static void cf_check mce_work_fn(void *data)
{ 
	on_each_cpu(mce_checkregs, NULL, 1);

	if (variable_period) {
		if (adjust)
			period /= (adjust + 1);
		else
			period *= 2;
		if (period > MCE_PERIOD_MAX)
			period = MCE_PERIOD_MAX;
		if (period < MCE_PERIOD_MIN)
			period = MCE_PERIOD_MIN;
	}

	set_timer(&mce_timer, NOW() + period);
	adjust = 0;
}

static int __init cf_check init_nonfatal_mce_checker(void)
{
	struct cpuinfo_x86 *c = &boot_cpu_data;

	/* Check for MCE support */
	if (!opt_mce || !mce_available(c))
		return -ENODEV;

	if (!this_cpu(poll_bankmask))
		return -EINVAL;

	/*
	 * Check for non-fatal errors every MCE_RATE s
	 */
	switch (c->x86_vendor) {
	case X86_VENDOR_AMD:
	case X86_VENDOR_HYGON:
		/* Assume we are on K8 or newer AMD or Hygon CPU here */
		amd_nonfatal_mcheck_init(c);
		break;

	case X86_VENDOR_INTEL:
		init_timer(&mce_timer, mce_work_fn, NULL, 0);
		set_timer(&mce_timer, NOW() + MCE_PERIOD);
		break;
	}

	printk(KERN_INFO "mcheck_poll: Machine check polling timer started.\n");
	return 0;
}
__initcall(init_nonfatal_mce_checker);