diff options
Diffstat (limited to 'arch/arm/xen')
-rw-r--r-- | arch/arm/xen/Makefile | 2 | ||||
-rw-r--r-- | arch/arm/xen/enlighten.c | 450 | ||||
-rw-r--r-- | arch/arm/xen/grant-table.c | 58 | ||||
-rw-r--r-- | arch/arm/xen/hypercall.S | 122 | ||||
-rw-r--r-- | arch/arm/xen/mm.c | 153 | ||||
-rw-r--r-- | arch/arm/xen/p2m.c | 211 |
6 files changed, 996 insertions, 0 deletions
diff --git a/arch/arm/xen/Makefile b/arch/arm/xen/Makefile new file mode 100644 index 000000000..c32d04713 --- /dev/null +++ b/arch/arm/xen/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-y := enlighten.o hypercall.o grant-table.o p2m.o mm.o diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c new file mode 100644 index 000000000..518c7557f --- /dev/null +++ b/arch/arm/xen/enlighten.c @@ -0,0 +1,450 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <xen/xen.h> +#include <xen/events.h> +#include <xen/grant_table.h> +#include <xen/hvm.h> +#include <xen/interface/vcpu.h> +#include <xen/interface/xen.h> +#include <xen/interface/memory.h> +#include <xen/interface/hvm/params.h> +#include <xen/features.h> +#include <xen/platform_pci.h> +#include <xen/xenbus.h> +#include <xen/page.h> +#include <xen/interface/sched.h> +#include <xen/xen-ops.h> +#include <asm/xen/hypervisor.h> +#include <asm/xen/hypercall.h> +#include <asm/system_misc.h> +#include <asm/efi.h> +#include <linux/interrupt.h> +#include <linux/irqreturn.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_fdt.h> +#include <linux/of_irq.h> +#include <linux/of_address.h> +#include <linux/cpuidle.h> +#include <linux/cpufreq.h> +#include <linux/cpu.h> +#include <linux/console.h> +#include <linux/pvclock_gtod.h> +#include <linux/time64.h> +#include <linux/timekeeping.h> +#include <linux/timekeeper_internal.h> +#include <linux/acpi.h> + +#include <linux/mm.h> + +static struct start_info _xen_start_info; +struct start_info *xen_start_info = &_xen_start_info; +EXPORT_SYMBOL(xen_start_info); + +enum xen_domain_type xen_domain_type = XEN_NATIVE; +EXPORT_SYMBOL(xen_domain_type); + +struct shared_info xen_dummy_shared_info; +struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; + +DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); +static struct vcpu_info __percpu *xen_vcpu_info; + +/* Linux <-> Xen vCPU id mapping */ +DEFINE_PER_CPU(uint32_t, xen_vcpu_id); +EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); + +/* These are unused until we support booting "pre-ballooned" */ +unsigned long xen_released_pages; +struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; + +static __read_mostly unsigned int xen_events_irq; + +uint32_t xen_start_flags; +EXPORT_SYMBOL(xen_start_flags); + +int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, + int nr, struct page **pages) +{ + return xen_xlate_unmap_gfn_range(vma, nr, pages); +} +EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range); + +static void xen_read_wallclock(struct timespec64 *ts) +{ + u32 version; + struct timespec64 now, ts_monotonic; + struct shared_info *s = HYPERVISOR_shared_info; + struct pvclock_wall_clock *wall_clock = &(s->wc); + + /* get wallclock at system boot */ + do { + version = wall_clock->version; + rmb(); /* fetch version before time */ + now.tv_sec = ((uint64_t)wall_clock->sec_hi << 32) | wall_clock->sec; + now.tv_nsec = wall_clock->nsec; + rmb(); /* fetch time before checking version */ + } while ((wall_clock->version & 1) || (version != wall_clock->version)); + + /* time since system boot */ + ktime_get_ts64(&ts_monotonic); + *ts = timespec64_add(now, ts_monotonic); +} + +static int xen_pvclock_gtod_notify(struct notifier_block *nb, + unsigned long was_set, void *priv) +{ + /* Protected by the calling core code serialization */ + static struct timespec64 next_sync; + + struct xen_platform_op op; + struct timespec64 now, system_time; + struct timekeeper *tk = priv; + + now.tv_sec = tk->xtime_sec; + now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); + system_time = timespec64_add(now, tk->wall_to_monotonic); + + /* + * We only take the expensive HV call when the clock was set + * or when the 11 minutes RTC synchronization time elapsed. + */ + if (!was_set && timespec64_compare(&now, &next_sync) < 0) + return NOTIFY_OK; + + op.cmd = XENPF_settime64; + op.u.settime64.mbz = 0; + op.u.settime64.secs = now.tv_sec; + op.u.settime64.nsecs = now.tv_nsec; + op.u.settime64.system_time = timespec64_to_ns(&system_time); + (void)HYPERVISOR_platform_op(&op); + + /* + * Move the next drift compensation time 11 minutes + * ahead. That's emulating the sync_cmos_clock() update for + * the hardware RTC. + */ + next_sync = now; + next_sync.tv_sec += 11 * 60; + + return NOTIFY_OK; +} + +static struct notifier_block xen_pvclock_gtod_notifier = { + .notifier_call = xen_pvclock_gtod_notify, +}; + +static int xen_starting_cpu(unsigned int cpu) +{ + struct vcpu_register_vcpu_info info; + struct vcpu_info *vcpup; + int err; + + /* + * VCPUOP_register_vcpu_info cannot be called twice for the same + * vcpu, so if vcpu_info is already registered, just get out. This + * can happen with cpu-hotplug. + */ + if (per_cpu(xen_vcpu, cpu) != NULL) + goto after_register_vcpu_info; + + pr_info("Xen: initializing cpu%d\n", cpu); + vcpup = per_cpu_ptr(xen_vcpu_info, cpu); + + info.mfn = percpu_to_gfn(vcpup); + info.offset = xen_offset_in_page(vcpup); + + err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu), + &info); + BUG_ON(err); + per_cpu(xen_vcpu, cpu) = vcpup; + +after_register_vcpu_info: + enable_percpu_irq(xen_events_irq, 0); + return 0; +} + +static int xen_dying_cpu(unsigned int cpu) +{ + disable_percpu_irq(xen_events_irq); + return 0; +} + +void xen_reboot(int reason) +{ + struct sched_shutdown r = { .reason = reason }; + int rc; + + rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r); + BUG_ON(rc); +} + +static void xen_restart(enum reboot_mode reboot_mode, const char *cmd) +{ + xen_reboot(SHUTDOWN_reboot); +} + + +static void xen_power_off(void) +{ + xen_reboot(SHUTDOWN_poweroff); +} + +static irqreturn_t xen_arm_callback(int irq, void *arg) +{ + xen_hvm_evtchn_do_upcall(); + return IRQ_HANDLED; +} + +static __initdata struct { + const char *compat; + const char *prefix; + const char *version; + bool found; +} hyper_node = {"xen,xen", "xen,xen-", NULL, false}; + +static int __init fdt_find_hyper_node(unsigned long node, const char *uname, + int depth, void *data) +{ + const void *s = NULL; + int len; + + if (depth != 1 || strcmp(uname, "hypervisor") != 0) + return 0; + + if (of_flat_dt_is_compatible(node, hyper_node.compat)) + hyper_node.found = true; + + s = of_get_flat_dt_prop(node, "compatible", &len); + if (strlen(hyper_node.prefix) + 3 < len && + !strncmp(hyper_node.prefix, s, strlen(hyper_node.prefix))) + hyper_node.version = s + strlen(hyper_node.prefix); + + /* + * Check if Xen supports EFI by checking whether there is the + * "/hypervisor/uefi" node in DT. If so, runtime services are available + * through proxy functions (e.g. in case of Xen dom0 EFI implementation + * they call special hypercall which executes relevant EFI functions) + * and that is why they are always enabled. + */ + if (IS_ENABLED(CONFIG_XEN_EFI)) { + if ((of_get_flat_dt_subnode_by_name(node, "uefi") > 0) && + !efi_runtime_disabled()) + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + } + + return 0; +} + +/* + * see Documentation/devicetree/bindings/arm/xen.txt for the + * documentation of the Xen Device Tree format. + */ +void __init xen_early_init(void) +{ + of_scan_flat_dt(fdt_find_hyper_node, NULL); + if (!hyper_node.found) { + pr_debug("No Xen support\n"); + return; + } + + if (hyper_node.version == NULL) { + pr_debug("Xen version not found\n"); + return; + } + + pr_info("Xen %s support found\n", hyper_node.version); + + xen_domain_type = XEN_HVM_DOMAIN; + + xen_setup_features(); + + if (xen_feature(XENFEAT_dom0)) + xen_start_flags |= SIF_INITDOMAIN|SIF_PRIVILEGED; + + if (!console_set_on_cmdline && !xen_initial_domain()) + add_preferred_console("hvc", 0, NULL); +} + +static void __init xen_acpi_guest_init(void) +{ +#ifdef CONFIG_ACPI + struct xen_hvm_param a; + int interrupt, trigger, polarity; + + a.domid = DOMID_SELF; + a.index = HVM_PARAM_CALLBACK_IRQ; + + if (HYPERVISOR_hvm_op(HVMOP_get_param, &a) + || (a.value >> 56) != HVM_PARAM_CALLBACK_TYPE_PPI) { + xen_events_irq = 0; + return; + } + + interrupt = a.value & 0xff; + trigger = ((a.value >> 8) & 0x1) ? ACPI_EDGE_SENSITIVE + : ACPI_LEVEL_SENSITIVE; + polarity = ((a.value >> 8) & 0x2) ? ACPI_ACTIVE_LOW + : ACPI_ACTIVE_HIGH; + xen_events_irq = acpi_register_gsi(NULL, interrupt, trigger, polarity); +#endif +} + +static void __init xen_dt_guest_init(void) +{ + struct device_node *xen_node; + + xen_node = of_find_compatible_node(NULL, NULL, "xen,xen"); + if (!xen_node) { + pr_err("Xen support was detected before, but it has disappeared\n"); + return; + } + + xen_events_irq = irq_of_parse_and_map(xen_node, 0); +} + +static int __init xen_guest_init(void) +{ + struct xen_add_to_physmap xatp; + struct shared_info *shared_info_page = NULL; + int cpu; + + if (!xen_domain()) + return 0; + + if (!acpi_disabled) + xen_acpi_guest_init(); + else + xen_dt_guest_init(); + + if (!xen_events_irq) { + pr_err("Xen event channel interrupt not found\n"); + return -ENODEV; + } + + /* + * The fdt parsing codes have set EFI_RUNTIME_SERVICES if Xen EFI + * parameters are found. Force enable runtime services. + */ + if (efi_enabled(EFI_RUNTIME_SERVICES)) + xen_efi_runtime_setup(); + + shared_info_page = (struct shared_info *)get_zeroed_page(GFP_KERNEL); + + if (!shared_info_page) { + pr_err("not enough memory\n"); + return -ENOMEM; + } + xatp.domid = DOMID_SELF; + xatp.idx = 0; + xatp.space = XENMAPSPACE_shared_info; + xatp.gpfn = virt_to_gfn(shared_info_page); + if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) + BUG(); + + HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; + + /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info + * page, we use it in the event channel upcall and in some pvclock + * related functions. + * The shared info contains exactly 1 CPU (the boot CPU). The guest + * is required to use VCPUOP_register_vcpu_info to place vcpu info + * for secondary CPUs as they are brought up. + * For uniformity we use VCPUOP_register_vcpu_info even on cpu0. + */ + xen_vcpu_info = __alloc_percpu(sizeof(struct vcpu_info), + 1 << fls(sizeof(struct vcpu_info) - 1)); + if (xen_vcpu_info == NULL) + return -ENOMEM; + + /* Direct vCPU id mapping for ARM guests. */ + for_each_possible_cpu(cpu) + per_cpu(xen_vcpu_id, cpu) = cpu; + + xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames(); + if (xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn, + &xen_auto_xlat_grant_frames.vaddr, + xen_auto_xlat_grant_frames.count)) { + free_percpu(xen_vcpu_info); + return -ENOMEM; + } + gnttab_init(); + + /* + * Making sure board specific code will not set up ops for + * cpu idle and cpu freq. + */ + disable_cpuidle(); + disable_cpufreq(); + + xen_init_IRQ(); + + if (request_percpu_irq(xen_events_irq, xen_arm_callback, + "events", &xen_vcpu)) { + pr_err("Error request IRQ %d\n", xen_events_irq); + return -EINVAL; + } + + if (xen_initial_domain()) + pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); + + return cpuhp_setup_state(CPUHP_AP_ARM_XEN_STARTING, + "arm/xen:starting", xen_starting_cpu, + xen_dying_cpu); +} +early_initcall(xen_guest_init); + +static int xen_starting_runstate_cpu(unsigned int cpu) +{ + xen_setup_runstate_info(cpu); + return 0; +} + +static int __init xen_late_init(void) +{ + if (!xen_domain()) + return -ENODEV; + + pm_power_off = xen_power_off; + arm_pm_restart = xen_restart; + if (!xen_initial_domain()) { + struct timespec64 ts; + xen_read_wallclock(&ts); + do_settimeofday64(&ts); + } + + if (xen_kernel_unmapped_at_usr()) + return 0; + + xen_time_setup_guest(); + + return cpuhp_setup_state(CPUHP_AP_ARM_XEN_RUNSTATE_STARTING, + "arm/xen_runstate:starting", + xen_starting_runstate_cpu, NULL); +} +late_initcall(xen_late_init); + + +/* empty stubs */ +void xen_arch_pre_suspend(void) { } +void xen_arch_post_suspend(int suspend_cancelled) { } +void xen_timer_resume(void) { } +void xen_arch_resume(void) { } +void xen_arch_suspend(void) { } + + +/* In the hypercall.S file. */ +EXPORT_SYMBOL_GPL(HYPERVISOR_event_channel_op); +EXPORT_SYMBOL_GPL(HYPERVISOR_grant_table_op); +EXPORT_SYMBOL_GPL(HYPERVISOR_xen_version); +EXPORT_SYMBOL_GPL(HYPERVISOR_console_io); +EXPORT_SYMBOL_GPL(HYPERVISOR_sched_op); +EXPORT_SYMBOL_GPL(HYPERVISOR_hvm_op); +EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op); +EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op); +EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op); +EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); +EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op_raw); +EXPORT_SYMBOL_GPL(HYPERVISOR_multicall); +EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist); +EXPORT_SYMBOL_GPL(HYPERVISOR_dm_op); +EXPORT_SYMBOL_GPL(privcmd_call); diff --git a/arch/arm/xen/grant-table.c b/arch/arm/xen/grant-table.c new file mode 100644 index 000000000..91cf08ba1 --- /dev/null +++ b/arch/arm/xen/grant-table.c @@ -0,0 +1,58 @@ +/****************************************************************************** + * grant_table.c + * ARM specific part + * + * Granting foreign access to our memory reservation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <xen/interface/xen.h> +#include <xen/page.h> +#include <xen/grant_table.h> + +int arch_gnttab_map_shared(xen_pfn_t *frames, unsigned long nr_gframes, + unsigned long max_nr_gframes, + void **__shared) +{ + return -ENOSYS; +} + +void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) +{ + return; +} + +int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, + unsigned long max_nr_gframes, + grant_status_t **__shared) +{ + return -ENOSYS; +} + +int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status) +{ + return 0; +} diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S new file mode 100644 index 000000000..b11bba542 --- /dev/null +++ b/arch/arm/xen/hypercall.S @@ -0,0 +1,122 @@ +/****************************************************************************** + * hypercall.S + * + * Xen hypercall wrappers + * + * Stefano Stabellini <stefano.stabellini@eu.citrix.com>, Citrix, 2012 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* + * The Xen hypercall calling convention is very similar to the ARM + * procedure calling convention: the first paramter is passed in r0, the + * second in r1, the third in r2 and the fourth in r3. Considering that + * Xen hypercalls have 5 arguments at most, the fifth paramter is passed + * in r4, differently from the procedure calling convention of using the + * stack for that case. + * + * The hypercall number is passed in r12. + * + * The return value is in r0. + * + * The hvc ISS is required to be 0xEA1, that is the Xen specific ARM + * hypercall tag. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/opcodes-virt.h> +#include <xen/interface/xen.h> + + +#define XEN_IMM 0xEA1 + +#define HYPERCALL_SIMPLE(hypercall) \ +ENTRY(HYPERVISOR_##hypercall) \ + mov r12, #__HYPERVISOR_##hypercall; \ + __HVC(XEN_IMM); \ + ret lr; \ +ENDPROC(HYPERVISOR_##hypercall) + +#define HYPERCALL0 HYPERCALL_SIMPLE +#define HYPERCALL1 HYPERCALL_SIMPLE +#define HYPERCALL2 HYPERCALL_SIMPLE +#define HYPERCALL3 HYPERCALL_SIMPLE +#define HYPERCALL4 HYPERCALL_SIMPLE + +#define HYPERCALL5(hypercall) \ +ENTRY(HYPERVISOR_##hypercall) \ + stmdb sp!, {r4} \ + ldr r4, [sp, #4] \ + mov r12, #__HYPERVISOR_##hypercall; \ + __HVC(XEN_IMM); \ + ldm sp!, {r4} \ + ret lr \ +ENDPROC(HYPERVISOR_##hypercall) + + .text + +HYPERCALL2(xen_version); +HYPERCALL3(console_io); +HYPERCALL3(grant_table_op); +HYPERCALL2(sched_op); +HYPERCALL2(event_channel_op); +HYPERCALL2(hvm_op); +HYPERCALL2(memory_op); +HYPERCALL2(physdev_op); +HYPERCALL3(vcpu_op); +HYPERCALL1(tmem_op); +HYPERCALL1(platform_op_raw); +HYPERCALL2(multicall); +HYPERCALL2(vm_assist); +HYPERCALL3(dm_op); + +ENTRY(privcmd_call) + stmdb sp!, {r4} + mov r12, r0 + mov r0, r1 + mov r1, r2 + mov r2, r3 + ldr r3, [sp, #8] + /* + * Privcmd calls are issued by the userspace. We need to allow the + * kernel to access the userspace memory before issuing the hypercall. + */ + uaccess_enable r4 + + /* r4 is loaded now as we use it as scratch register before */ + ldr r4, [sp, #4] + __HVC(XEN_IMM) + + /* + * Disable userspace access from kernel. This is fine to do it + * unconditionally as no set_fs(KERNEL_DS) is called before. + */ + uaccess_disable r4 + + ldm sp!, {r4} + ret lr +ENDPROC(privcmd_call); diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c new file mode 100644 index 000000000..467fa225c --- /dev/null +++ b/arch/arm/xen/mm.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/cpu.h> +#include <linux/dma-direct.h> +#include <linux/dma-map-ops.h> +#include <linux/gfp.h> +#include <linux/highmem.h> +#include <linux/export.h> +#include <linux/memblock.h> +#include <linux/of_address.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/vmalloc.h> +#include <linux/swiotlb.h> + +#include <xen/xen.h> +#include <xen/interface/grant_table.h> +#include <xen/interface/memory.h> +#include <xen/page.h> +#include <xen/xen-ops.h> +#include <xen/swiotlb-xen.h> + +#include <asm/cacheflush.h> +#include <asm/xen/hypercall.h> +#include <asm/xen/interface.h> + +unsigned long xen_get_swiotlb_free_pages(unsigned int order) +{ + phys_addr_t base; + gfp_t flags = __GFP_NOWARN|__GFP_KSWAPD_RECLAIM; + u64 i; + + for_each_mem_range(i, &base, NULL) { + if (base < (phys_addr_t)0xffffffff) { + if (IS_ENABLED(CONFIG_ZONE_DMA32)) + flags |= __GFP_DMA32; + else + flags |= __GFP_DMA; + break; + } + } + return __get_free_pages(flags, order); +} + +static bool hypercall_cflush = false; + +/* buffers in highmem or foreign pages cannot cross page boundaries */ +static void dma_cache_maint(struct device *dev, dma_addr_t handle, + size_t size, u32 op) +{ + struct gnttab_cache_flush cflush; + + cflush.offset = xen_offset_in_page(handle); + cflush.op = op; + handle &= XEN_PAGE_MASK; + + do { + cflush.a.dev_bus_addr = dma_to_phys(dev, handle); + + if (size + cflush.offset > XEN_PAGE_SIZE) + cflush.length = XEN_PAGE_SIZE - cflush.offset; + else + cflush.length = size; + + HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1); + + cflush.offset = 0; + handle += cflush.length; + size -= cflush.length; + } while (size); +} + +/* + * Dom0 is mapped 1:1, and while the Linux page can span across multiple Xen + * pages, it is not possible for it to contain a mix of local and foreign Xen + * pages. Calling pfn_valid on a foreign mfn will always return false, so if + * pfn_valid returns true the pages is local and we can use the native + * dma-direct functions, otherwise we call the Xen specific version. + */ +void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir) +{ + if (dir != DMA_TO_DEVICE) + dma_cache_maint(dev, handle, size, GNTTAB_CACHE_INVAL); +} + +void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir) +{ + if (dir == DMA_FROM_DEVICE) + dma_cache_maint(dev, handle, size, GNTTAB_CACHE_INVAL); + else + dma_cache_maint(dev, handle, size, GNTTAB_CACHE_CLEAN); +} + +bool xen_arch_need_swiotlb(struct device *dev, + phys_addr_t phys, + dma_addr_t dev_addr) +{ + unsigned int xen_pfn = XEN_PFN_DOWN(phys); + unsigned int bfn = XEN_PFN_DOWN(dma_to_phys(dev, dev_addr)); + + /* + * The swiotlb buffer should be used if + * - Xen doesn't have the cache flush hypercall + * - The Linux page refers to foreign memory + * - The device doesn't support coherent DMA request + * + * The Linux page may be spanned acrros multiple Xen page, although + * it's not possible to have a mix of local and foreign Xen page. + * Furthermore, range_straddles_page_boundary is already checking + * if buffer is physically contiguous in the host RAM. + * + * Therefore we only need to check the first Xen page to know if we + * require a bounce buffer because the device doesn't support coherent + * memory and we are not able to flush the cache. + */ + return (!hypercall_cflush && (xen_pfn != bfn) && + !dev_is_dma_coherent(dev)); +} + +int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, + unsigned int address_bits, + dma_addr_t *dma_handle) +{ + if (!xen_initial_domain()) + return -EINVAL; + + /* we assume that dom0 is mapped 1:1 for now */ + *dma_handle = pstart; + return 0; +} + +void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) +{ + return; +} + +static int __init xen_mm_init(void) +{ + struct gnttab_cache_flush cflush; + if (!xen_initial_domain()) + return 0; + xen_swiotlb_init(1, false); + + cflush.op = 0; + cflush.a.dev_bus_addr = 0; + cflush.offset = 0; + cflush.length = 0; + if (HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1) != -ENOSYS) + hypercall_cflush = true; + return 0; +} +arch_initcall(xen_mm_init); diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c new file mode 100644 index 000000000..4a1991a10 --- /dev/null +++ b/arch/arm/xen/p2m.c @@ -0,0 +1,211 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/memblock.h> +#include <linux/gfp.h> +#include <linux/export.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/dma-mapping.h> +#include <linux/vmalloc.h> +#include <linux/swiotlb.h> + +#include <xen/xen.h> +#include <xen/interface/memory.h> +#include <xen/page.h> +#include <xen/swiotlb-xen.h> + +#include <asm/cacheflush.h> +#include <asm/xen/hypercall.h> +#include <asm/xen/interface.h> + +struct xen_p2m_entry { + unsigned long pfn; + unsigned long mfn; + unsigned long nr_pages; + struct rb_node rbnode_phys; +}; + +static rwlock_t p2m_lock; +struct rb_root phys_to_mach = RB_ROOT; +EXPORT_SYMBOL_GPL(phys_to_mach); + +static int xen_add_phys_to_mach_entry(struct xen_p2m_entry *new) +{ + struct rb_node **link = &phys_to_mach.rb_node; + struct rb_node *parent = NULL; + struct xen_p2m_entry *entry; + int rc = 0; + + while (*link) { + parent = *link; + entry = rb_entry(parent, struct xen_p2m_entry, rbnode_phys); + + if (new->pfn == entry->pfn) + goto err_out; + + if (new->pfn < entry->pfn) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + rb_link_node(&new->rbnode_phys, parent, link); + rb_insert_color(&new->rbnode_phys, &phys_to_mach); + goto out; + +err_out: + rc = -EINVAL; + pr_warn("%s: cannot add pfn=%pa -> mfn=%pa: pfn=%pa -> mfn=%pa already exists\n", + __func__, &new->pfn, &new->mfn, &entry->pfn, &entry->mfn); +out: + return rc; +} + +unsigned long __pfn_to_mfn(unsigned long pfn) +{ + struct rb_node *n; + struct xen_p2m_entry *entry; + unsigned long irqflags; + + read_lock_irqsave(&p2m_lock, irqflags); + n = phys_to_mach.rb_node; + while (n) { + entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); + if (entry->pfn <= pfn && + entry->pfn + entry->nr_pages > pfn) { + unsigned long mfn = entry->mfn + (pfn - entry->pfn); + read_unlock_irqrestore(&p2m_lock, irqflags); + return mfn; + } + if (pfn < entry->pfn) + n = n->rb_left; + else + n = n->rb_right; + } + read_unlock_irqrestore(&p2m_lock, irqflags); + + return INVALID_P2M_ENTRY; +} +EXPORT_SYMBOL_GPL(__pfn_to_mfn); + +int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) +{ + int i; + + for (i = 0; i < count; i++) { + struct gnttab_unmap_grant_ref unmap; + int rc; + + if (map_ops[i].status) + continue; + if (likely(set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT, + map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT))) + continue; + + /* + * Signal an error for this slot. This in turn requires + * immediate unmapping. + */ + map_ops[i].status = GNTST_general_error; + unmap.host_addr = map_ops[i].host_addr, + unmap.handle = map_ops[i].handle; + map_ops[i].handle = ~0; + if (map_ops[i].flags & GNTMAP_device_map) + unmap.dev_bus_addr = map_ops[i].dev_bus_addr; + else + unmap.dev_bus_addr = 0; + + /* + * Pre-populate the status field, to be recognizable in + * the log message below. + */ + unmap.status = 1; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + &unmap, 1); + if (rc || unmap.status != GNTST_okay) + pr_err_once("gnttab unmap failed: rc=%d st=%d\n", + rc, unmap.status); + } + + return 0; +} +EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); + +int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, + struct gnttab_unmap_grant_ref *kunmap_ops, + struct page **pages, unsigned int count) +{ + int i; + + for (i = 0; i < count; i++) { + set_phys_to_machine(unmap_ops[i].host_addr >> XEN_PAGE_SHIFT, + INVALID_P2M_ENTRY); + } + + return 0; +} +EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); + +bool __set_phys_to_machine_multi(unsigned long pfn, + unsigned long mfn, unsigned long nr_pages) +{ + int rc; + unsigned long irqflags; + struct xen_p2m_entry *p2m_entry; + struct rb_node *n; + + if (mfn == INVALID_P2M_ENTRY) { + write_lock_irqsave(&p2m_lock, irqflags); + n = phys_to_mach.rb_node; + while (n) { + p2m_entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); + if (p2m_entry->pfn <= pfn && + p2m_entry->pfn + p2m_entry->nr_pages > pfn) { + rb_erase(&p2m_entry->rbnode_phys, &phys_to_mach); + write_unlock_irqrestore(&p2m_lock, irqflags); + kfree(p2m_entry); + return true; + } + if (pfn < p2m_entry->pfn) + n = n->rb_left; + else + n = n->rb_right; + } + write_unlock_irqrestore(&p2m_lock, irqflags); + return true; + } + + p2m_entry = kzalloc(sizeof(*p2m_entry), GFP_NOWAIT); + if (!p2m_entry) + return false; + + p2m_entry->pfn = pfn; + p2m_entry->nr_pages = nr_pages; + p2m_entry->mfn = mfn; + + write_lock_irqsave(&p2m_lock, irqflags); + rc = xen_add_phys_to_mach_entry(p2m_entry); + if (rc < 0) { + write_unlock_irqrestore(&p2m_lock, irqflags); + kfree(p2m_entry); + return false; + } + write_unlock_irqrestore(&p2m_lock, irqflags); + return true; +} +EXPORT_SYMBOL_GPL(__set_phys_to_machine_multi); + +bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) +{ + return __set_phys_to_machine_multi(pfn, mfn, 1); +} +EXPORT_SYMBOL_GPL(__set_phys_to_machine); + +static int p2m_init(void) +{ + rwlock_init(&p2m_lock); + return 0; +} +arch_initcall(p2m_init); |