/*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
unsigned int __read_mostly iommu_dev_iotlb_timeout = 1000;
integer_param("iommu_dev_iotlb_timeout", iommu_dev_iotlb_timeout);
bool_t __initdata iommu_enable = 1;
bool_t __read_mostly iommu_enabled;
bool_t __read_mostly force_iommu;
bool_t __read_mostly iommu_verbose;
static bool_t __read_mostly iommu_crash_disable;
#define IOMMU_quarantine_none 0 /* aka false */
#define IOMMU_quarantine_basic 1 /* aka true */
#define IOMMU_quarantine_scratch_page 2
#ifdef CONFIG_HAS_PCI
uint8_t __read_mostly iommu_quarantine =
# if defined(CONFIG_IOMMU_QUARANTINE_NONE)
IOMMU_quarantine_none;
# elif defined(CONFIG_IOMMU_QUARANTINE_BASIC)
IOMMU_quarantine_basic;
# elif defined(CONFIG_IOMMU_QUARANTINE_SCRATCH_PAGE)
IOMMU_quarantine_scratch_page;
# endif
#else
# define iommu_quarantine IOMMU_quarantine_none
#endif /* CONFIG_HAS_PCI */
static bool __hwdom_initdata iommu_hwdom_none;
bool __hwdom_initdata iommu_hwdom_strict;
bool __read_mostly iommu_hwdom_passthrough;
bool __hwdom_initdata iommu_hwdom_inclusive;
int8_t __hwdom_initdata iommu_hwdom_reserved = -1;
#ifndef iommu_hap_pt_share
bool __read_mostly iommu_hap_pt_share = true;
#endif
bool_t __read_mostly iommu_debug;
DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
static int __init cf_check parse_iommu_param(const char *s)
{
const char *ss;
int val, rc = 0;
do {
ss = strchr(s, ',');
if ( !ss )
ss = strchr(s, '\0');
if ( (val = parse_bool(s, ss)) >= 0 )
iommu_enable = val;
else if ( (val = parse_boolean("force", s, ss)) >= 0 ||
(val = parse_boolean("required", s, ss)) >= 0 )
force_iommu = val;
#ifdef CONFIG_HAS_PCI
else if ( (val = parse_boolean("quarantine", s, ss)) >= 0 )
iommu_quarantine = val;
else if ( ss == s + 23 && !strncmp(s, "quarantine=scratch-page", 23) )
iommu_quarantine = IOMMU_quarantine_scratch_page;
#endif
else if ( (val = parse_boolean("igfx", s, ss)) >= 0 )
#ifdef CONFIG_INTEL_IOMMU
iommu_igfx = val;
#else
no_config_param("INTEL_IOMMU", "iommu", s, ss);
#endif
else if ( (val = parse_boolean("qinval", s, ss)) >= 0 )
#ifdef CONFIG_INTEL_IOMMU
iommu_qinval = val;
#else
no_config_param("INTEL_IOMMU", "iommu", s, ss);
#endif
#ifdef CONFIG_X86
else if ( (val = parse_boolean("superpages", s, ss)) >= 0 )
iommu_superpages = val;
#endif
else if ( (val = parse_boolean("verbose", s, ss)) >= 0 )
iommu_verbose = val;
#ifndef iommu_snoop
else if ( (val = parse_boolean("snoop", s, ss)) >= 0 )
iommu_snoop = val;
#endif
#ifndef iommu_intremap
else if ( (val = parse_boolean("intremap", s, ss)) >= 0 )
iommu_intremap = val ? iommu_intremap_full : iommu_intremap_off;
#endif
#ifndef iommu_intpost
else if ( (val = parse_boolean("intpost", s, ss)) >= 0 )
iommu_intpost = val;
#endif
#ifdef CONFIG_KEXEC
else if ( (val = parse_boolean("crash-disable", s, ss)) >= 0 )
iommu_crash_disable = val;
#endif
else if ( (val = parse_boolean("debug", s, ss)) >= 0 )
{
iommu_debug = val;
if ( val )
iommu_verbose = 1;
}
else if ( (val = parse_boolean("amd-iommu-perdev-intremap", s, ss)) >= 0 )
#ifdef CONFIG_AMD_IOMMU
amd_iommu_perdev_intremap = val;
#else
no_config_param("AMD_IOMMU", "iommu", s, ss);
#endif
else if ( (val = parse_boolean("dom0-passthrough", s, ss)) >= 0 )
iommu_hwdom_passthrough = val;
else if ( (val = parse_boolean("dom0-strict", s, ss)) >= 0 )
iommu_hwdom_strict = val;
#ifndef iommu_hap_pt_share
else if ( (val = parse_boolean("sharept", s, ss)) >= 0 )
iommu_hap_pt_share = val;
#endif
else
rc = -EINVAL;
s = ss + 1;
} while ( *ss );
return rc;
}
custom_param("iommu", parse_iommu_param);
static int __init cf_check parse_dom0_iommu_param(const char *s)
{
const char *ss;
int rc = 0;
do {
int val;
ss = strchr(s, ',');
if ( !ss )
ss = strchr(s, '\0');
if ( (val = parse_boolean("passthrough", s, ss)) >= 0 )
iommu_hwdom_passthrough = val;
else if ( (val = parse_boolean("strict", s, ss)) >= 0 )
iommu_hwdom_strict = val;
else if ( (val = parse_boolean("map-inclusive", s, ss)) >= 0 )
iommu_hwdom_inclusive = val;
else if ( (val = parse_boolean("map-reserved", s, ss)) >= 0 )
iommu_hwdom_reserved = val;
else if ( !cmdline_strcmp(s, "none") )
iommu_hwdom_none = true;
else
rc = -EINVAL;
s = ss + 1;
} while ( *ss );
return rc;
}
custom_param("dom0-iommu", parse_dom0_iommu_param);
static void __hwdom_init check_hwdom_reqs(struct domain *d)
{
if ( iommu_hwdom_none || !is_hvm_domain(d) )
return;
iommu_hwdom_passthrough = false;
iommu_hwdom_strict = true;
arch_iommu_check_autotranslated_hwdom(d);
}
int iommu_domain_init(struct domain *d, unsigned int opts)
{
struct domain_iommu *hd = dom_iommu(d);
int ret = 0;
if ( is_hardware_domain(d) )
check_hwdom_reqs(d); /* may modify iommu_hwdom_strict */
if ( !is_iommu_enabled(d) )
return 0;
#ifdef CONFIG_NUMA
hd->node = NUMA_NO_NODE;
#endif
ret = arch_iommu_domain_init(d);
if ( ret )
return ret;
hd->platform_ops = iommu_get_ops();
ret = iommu_call(hd->platform_ops, init, d);
if ( ret || is_system_domain(d) )
return ret;
/*
* Use shared page tables for HAP and IOMMU if the global option
* is enabled (from which we can infer the h/w is capable) and
* the domain options do not disallow it. HAP must, of course, also
* be enabled.
*/
hd->hap_pt_share = hap_enabled(d) && iommu_hap_pt_share &&
!(opts & XEN_DOMCTL_IOMMU_no_sharept);
/*
* NB: 'relaxed' h/w domains don't need the IOMMU mappings to be kept
* in-sync with their assigned pages because all host RAM will be
* mapped during hwdom_init().
*/
if ( !is_hardware_domain(d) || iommu_hwdom_strict )
hd->need_sync = !iommu_use_hap_pt(d);
ASSERT(!(hd->need_sync && hd->hap_pt_share));
return 0;
}
static void cf_check iommu_dump_page_tables(unsigned char key)
{
struct domain *d;
ASSERT(iommu_enabled);
rcu_read_lock(&domlist_read_lock);
for_each_domain(d)
{
if ( is_hardware_domain(d) || !is_iommu_enabled(d) )
continue;
if ( iommu_use_hap_pt(d) )
{
printk("%pd sharing page tables\n", d);
continue;
}
iommu_vcall(dom_iommu(d)->platform_ops, dump_page_tables, d);
}
rcu_read_unlock(&domlist_read_lock);
}
void __hwdom_init iommu_hwdom_init(struct domain *d)
{
struct domain_iommu *hd = dom_iommu(d);
if ( !is_iommu_enabled(d) )
return;
register_keyhandler('o', &iommu_dump_page_tables, "dump iommu page tables", 0);
iommu_vcall(hd->platform_ops, hwdom_init, d);
}
static void iommu_teardown(struct domain *d)
{
struct domain_iommu *hd = dom_iommu(d);
/*
* During early domain creation failure, we may reach here with the
* ops not yet initialized.
*/
if ( !hd->platform_ops )
return;
iommu_vcall(hd->platform_ops, teardown, d);
}
void iommu_domain_destroy(struct domain *d)
{
if ( !is_iommu_enabled(d) )
return;
iommu_teardown(d);
arch_iommu_domain_destroy(d);
}
static unsigned int mapping_order(const struct domain_iommu *hd,
dfn_t dfn, mfn_t mfn, unsigned long nr)
{
unsigned long res = dfn_x(dfn) | mfn_x(mfn);
unsigned long sizes = hd->platform_ops->page_sizes;
unsigned int bit = find_first_set_bit(sizes), order = 0;
ASSERT(bit == PAGE_SHIFT);
while ( (sizes = (sizes >> bit) & ~1) )
{
unsigned long mask;
bit = find_first_set_bit(sizes);
mask = (1UL << bit) - 1;
if ( nr <= mask || (res & mask) )
break;
order += bit;
nr >>= bit;
res >>= bit;
}
return order;
}
long iommu_map(struct domain *d, dfn_t dfn0, mfn_t mfn0,
unsigned long page_count, unsigned int flags,
unsigned int *flush_flags)
{
const struct domain_iommu *hd = dom_iommu(d);
unsigned long i;
unsigned int order, j = 0;
int rc = 0;
if ( !is_iommu_enabled(d) )
return 0;
ASSERT(!IOMMUF_order(flags));
for ( i = 0; i < page_count; i += 1UL << order )
{
dfn_t dfn = dfn_add(dfn0, i);
mfn_t mfn = mfn_add(mfn0, i);
order = mapping_order(hd, dfn, mfn, page_count - i);
if ( (flags & IOMMUF_preempt) &&
((!(++j & 0xfff) && general_preempt_check()) ||
i > LONG_MAX - (1UL << order)) )
return i;
rc = iommu_call(hd->platform_ops, map_page, d, dfn, mfn,
flags | IOMMUF_order(order), flush_flags);
if ( likely(!rc) )
continue;
if ( !d->is_shutting_down && printk_ratelimit() )
printk(XENLOG_ERR
"d%d: IOMMU mapping dfn %"PRI_dfn" to mfn %"PRI_mfn" failed: %d\n",
d->domain_id, dfn_x(dfn), mfn_x(mfn), rc);
/* while statement to satisfy __must_check */
while ( iommu_unmap(d, dfn0, i, 0, flush_flags) )
break;
if ( !is_hardware_domain(d) )
domain_crash(d);
break;
}
/*
* Something went wrong so, if we were dealing with more than a single
* page, flush everything and clear flush flags.
*/
if ( page_count > 1 && unlikely(rc) &&
!iommu_iotlb_flush_all(d, *flush_flags) )
*flush_flags = 0;
return rc;
}
int iommu_legacy_map(struct domain *d, dfn_t dfn, mfn_t mfn,
unsigned long page_count, unsigned int flags)
{
unsigned int flush_flags = 0;
int rc;
ASSERT(!(flags & IOMMUF_preempt));
rc = iommu_map(d, dfn, mfn, page_count, flags, &flush_flags);
if ( !this_cpu(iommu_dont_flush_iotlb) && !rc )
rc = iommu_iotlb_flush(d, dfn, page_count, flush_flags);
return rc;
}
long iommu_unmap(struct domain *d, dfn_t dfn0, unsigned long page_count,
unsigned int flags, unsigned int *flush_flags)
{
const struct domain_iommu *hd = dom_iommu(d);
unsigned long i;
unsigned int order, j = 0;
int rc = 0;
if ( !is_iommu_enabled(d) )
return 0;
ASSERT(!(flags & ~IOMMUF_preempt));
for ( i = 0; i < page_count; i += 1UL << order )
{
dfn_t dfn = dfn_add(dfn0, i);
int err;
order = mapping_order(hd, dfn, _mfn(0), page_count - i);
if ( (flags & IOMMUF_preempt) &&
((!(++j & 0xfff) && general_preempt_check()) ||
i > LONG_MAX - (1UL << order)) )
return i;
err = iommu_call(hd->platform_ops, unmap_page, d, dfn,
flags | IOMMUF_order(order), flush_flags);
if ( likely(!err) )
continue;
if ( !d->is_shutting_down && printk_ratelimit() )
printk(XENLOG_ERR
"d%d: IOMMU unmapping dfn %"PRI_dfn" failed: %d\n",
d->domain_id, dfn_x(dfn), err);
if ( !rc )
rc = err;
if ( !is_hardware_domain(d) )
{
domain_crash(d);
break;
}
}
/*
* Something went wrong so, if we were dealing with more than a single
* page, flush everything and clear flush flags.
*/
if ( page_count > 1 && unlikely(rc) &&
!iommu_iotlb_flush_all(d, *flush_flags) )
*flush_flags = 0;
return rc;
}
int iommu_legacy_unmap(struct domain *d, dfn_t dfn, unsigned long page_count)
{
unsigned int flush_flags = 0;
int rc = iommu_unmap(d, dfn, page_count, 0, &flush_flags);
if ( !this_cpu(iommu_dont_flush_iotlb) && !rc )
rc = iommu_iotlb_flush(d, dfn, page_count, flush_flags);
return rc;
}
int iommu_lookup_page(struct domain *d, dfn_t dfn, mfn_t *mfn,
unsigned int *flags)
{
const struct domain_iommu *hd = dom_iommu(d);
if ( !is_iommu_enabled(d) || !hd->platform_ops->lookup_page )
return -EOPNOTSUPP;
return iommu_call(hd->platform_ops, lookup_page, d, dfn, mfn, flags);
}
int iommu_iotlb_flush(struct domain *d, dfn_t dfn, unsigned long page_count,
unsigned int flush_flags)
{
const struct domain_iommu *hd = dom_iommu(d);
int rc;
if ( !is_iommu_enabled(d) || !hd->platform_ops->iotlb_flush ||
!page_count || !flush_flags )
return 0;
if ( dfn_eq(dfn, INVALID_DFN) )
return -EINVAL;
rc = iommu_call(hd->platform_ops, iotlb_flush, d, dfn, page_count,
flush_flags);
if ( unlikely(rc) )
{
if ( !d->is_shutting_down && printk_ratelimit() )
printk(XENLOG_ERR
"d%d: IOMMU IOTLB flush failed: %d, dfn %"PRI_dfn", page count %lu flags %x\n",
d->domain_id, rc, dfn_x(dfn), page_count, flush_flags);
if ( !is_hardware_domain(d) )
domain_crash(d);
}
return rc;
}
int iommu_iotlb_flush_all(struct domain *d, unsigned int flush_flags)
{
const struct domain_iommu *hd = dom_iommu(d);
int rc;
if ( !is_iommu_enabled(d) || !hd->platform_ops->iotlb_flush ||
!flush_flags )
return 0;
rc = iommu_call(hd->platform_ops, iotlb_flush, d, INVALID_DFN, 0,
flush_flags | IOMMU_FLUSHF_all);
if ( unlikely(rc) )
{
if ( !d->is_shutting_down && printk_ratelimit() )
printk(XENLOG_ERR
"d%d: IOMMU IOTLB flush all failed: %d\n",
d->domain_id, rc);
if ( !is_hardware_domain(d) )
domain_crash(d);
}
return rc;
}
int iommu_quarantine_dev_init(device_t *dev)
{
const struct domain_iommu *hd = dom_iommu(dom_io);
if ( !iommu_quarantine || !hd->platform_ops->quarantine_init )
return 0;
return iommu_call(hd->platform_ops, quarantine_init,
dev, iommu_quarantine == IOMMU_quarantine_scratch_page);
}
static int __init iommu_quarantine_init(void)
{
dom_io->options |= XEN_DOMCTL_CDF_iommu;
return iommu_domain_init(dom_io, 0);
}
int __init iommu_setup(void)
{
int rc = -ENODEV;
bool_t force_intremap = force_iommu && iommu_intremap;
if ( iommu_hwdom_strict )
iommu_hwdom_passthrough = false;
if ( iommu_enable )
{
const struct iommu_ops *ops = NULL;
rc = iommu_hardware_setup();
if ( !rc )
ops = iommu_get_ops();
if ( ops && (ops->page_sizes & -ops->page_sizes) != PAGE_SIZE )
{
printk(XENLOG_ERR "IOMMU: page size mask %lx unsupported\n",
ops->page_sizes);
rc = ops->page_sizes ? -EPERM : -ENODATA;
}
iommu_enabled = (rc == 0);
}
#ifndef iommu_intremap
if ( !iommu_enabled )
iommu_intremap = iommu_intremap_off;
#endif
if ( (force_iommu && !iommu_enabled) ||
(force_intremap && !iommu_intremap) )
panic("Couldn't enable %s and iommu=required/force\n",
!iommu_enabled ? "IOMMU" : "Interrupt Remapping");
#ifndef iommu_intpost
if ( !iommu_intremap )
iommu_intpost = false;
#endif
printk("I/O virtualisation %sabled\n", iommu_enabled ? "en" : "dis");
if ( !iommu_enabled )
{
iommu_hwdom_passthrough = false;
iommu_hwdom_strict = false;
}
else
{
if ( iommu_quarantine_init() )
panic("Could not set up quarantine\n");
printk(" - Dom0 mode: %s\n",
iommu_hwdom_passthrough ? "Passthrough" :
iommu_hwdom_strict ? "Strict" : "Relaxed");
#ifndef iommu_intremap
printk("Interrupt remapping %sabled\n", iommu_intremap ? "en" : "dis");
#endif
}
return rc;
}
int iommu_suspend(void)
{
if ( iommu_enabled )
return iommu_call(iommu_get_ops(), suspend);
return 0;
}
void iommu_resume(void)
{
if ( iommu_enabled )
iommu_vcall(iommu_get_ops(), resume);
}
int iommu_do_domctl(
struct xen_domctl *domctl, struct domain *d,
XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
{
int ret = -ENODEV;
if ( !(d ? is_iommu_enabled(d) : iommu_enabled) )
return -EOPNOTSUPP;
#ifdef CONFIG_HAS_PCI
ret = iommu_do_pci_domctl(domctl, d, u_domctl);
#endif
#ifdef CONFIG_HAS_DEVICE_TREE
if ( ret == -ENODEV )
ret = iommu_do_dt_domctl(domctl, d, u_domctl);
#endif
return ret;
}
void iommu_crash_shutdown(void)
{
if ( !iommu_crash_disable )
return;
if ( iommu_enabled )
iommu_vcall(iommu_get_ops(), crash_shutdown);
iommu_enabled = false;
#ifndef iommu_intremap
iommu_intremap = iommu_intremap_off;
#endif
#ifndef iommu_intpost
iommu_intpost = false;
#endif
}
int iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt)
{
const struct iommu_ops *ops;
if ( !iommu_enabled )
return 0;
ops = iommu_get_ops();
if ( !ops->get_reserved_device_memory )
return 0;
return iommu_call(ops, get_reserved_device_memory, func, ctxt);
}
bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature)
{
return is_iommu_enabled(d) && test_bit(feature, dom_iommu(d)->features);
}
#define MAX_EXTRA_RESERVED_RANGES 20
struct extra_reserved_range {
unsigned long start;
unsigned long nr;
pci_sbdf_t sbdf;
};
static unsigned int __initdata nr_extra_reserved_ranges;
static struct extra_reserved_range __initdata
extra_reserved_ranges[MAX_EXTRA_RESERVED_RANGES];
int __init iommu_add_extra_reserved_device_memory(unsigned long start,
unsigned long nr,
pci_sbdf_t sbdf)
{
unsigned int idx;
if ( nr_extra_reserved_ranges >= MAX_EXTRA_RESERVED_RANGES )
return -ENOMEM;
idx = nr_extra_reserved_ranges++;
extra_reserved_ranges[idx].start = start;
extra_reserved_ranges[idx].nr = nr;
extra_reserved_ranges[idx].sbdf = sbdf;
return 0;
}
int __init iommu_get_extra_reserved_device_memory(iommu_grdm_t *func,
void *ctxt)
{
unsigned int idx;
int ret;
for ( idx = 0; idx < nr_extra_reserved_ranges; idx++ )
{
ret = func(extra_reserved_ranges[idx].start,
extra_reserved_ranges[idx].nr,
extra_reserved_ranges[idx].sbdf.sbdf,
ctxt);
if ( ret < 0 )
return ret;
}
return 0;
}
/*
* Local variables:
* mode: C
* c-file-style: "BSD"
* c-basic-offset: 4
* indent-tabs-mode: nil
* End:
*/