summaryrefslogtreecommitdiffstats
path: root/src/VBox/Runtime/r0drv/linux/alloc-r0drv-linux.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/VBox/Runtime/r0drv/linux/alloc-r0drv-linux.c501
1 files changed, 501 insertions, 0 deletions
diff --git a/src/VBox/Runtime/r0drv/linux/alloc-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/alloc-r0drv-linux.c
new file mode 100644
index 00000000..e7240915
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/alloc-r0drv-linux.c
@@ -0,0 +1,501 @@
+/* $Id: alloc-r0drv-linux.c $ */
+/** @file
+ * IPRT - Memory Allocation, Ring-0 Driver, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+#include <iprt/mem.h>
+
+#include <iprt/assert.h>
+#include <iprt/errcore.h>
+#include "r0drv/alloc-r0drv.h"
+
+
+#if (defined(RT_ARCH_AMD64) || defined(DOXYGEN_RUNNING)) && !defined(RTMEMALLOC_EXEC_HEAP)
+# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
+/**
+ * Starting with 2.6.23 we can use __get_vm_area and map_vm_area to allocate
+ * memory in the moduel range. This is preferrable to the exec heap below.
+ */
+# define RTMEMALLOC_EXEC_VM_AREA
+# else
+/**
+ * We need memory in the module range (~2GB to ~0) this can only be obtained
+ * thru APIs that are not exported (see module_alloc()).
+ *
+ * So, we'll have to create a quick and dirty heap here using BSS memory.
+ * Very annoying and it's going to restrict us!
+ */
+# define RTMEMALLOC_EXEC_HEAP
+# endif
+#endif
+
+#ifdef RTMEMALLOC_EXEC_HEAP
+# include <iprt/heap.h>
+# include <iprt/spinlock.h>
+# include <iprt/errcore.h>
+#endif
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+#ifdef RTMEMALLOC_EXEC_VM_AREA
+/**
+ * Extended header used for headers marked with RTMEMHDR_FLAG_EXEC_VM_AREA.
+ *
+ * This is used with allocating executable memory, for things like generated
+ * code and loaded modules.
+ */
+typedef struct RTMEMLNXHDREX
+{
+ /** The VM area for this allocation. */
+ struct vm_struct *pVmArea;
+ void *pvDummy;
+ /** The header we present to the generic API. */
+ RTMEMHDR Hdr;
+} RTMEMLNXHDREX;
+AssertCompileSize(RTMEMLNXHDREX, 32);
+/** Pointer to an extended memory header. */
+typedef RTMEMLNXHDREX *PRTMEMLNXHDREX;
+#endif
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+#ifdef RTMEMALLOC_EXEC_HEAP
+/** The heap. */
+static RTHEAPSIMPLE g_HeapExec = NIL_RTHEAPSIMPLE;
+/** Spinlock protecting the heap. */
+static RTSPINLOCK g_HeapExecSpinlock = NIL_RTSPINLOCK;
+#endif
+
+
+/**
+ * API for cleaning up the heap spinlock on IPRT termination.
+ * This is as RTMemExecDonate specific to AMD64 Linux/GNU.
+ */
+DECLHIDDEN(void) rtR0MemExecCleanup(void)
+{
+#ifdef RTMEMALLOC_EXEC_HEAP
+ RTSpinlockDestroy(g_HeapExecSpinlock);
+ g_HeapExecSpinlock = NIL_RTSPINLOCK;
+#endif
+}
+
+
+/**
+ * Donate read+write+execute memory to the exec heap.
+ *
+ * This API is specific to AMD64 and Linux/GNU. A kernel module that desires to
+ * use RTMemExecAlloc on AMD64 Linux/GNU will have to donate some statically
+ * allocated memory in the module if it wishes for GCC generated code to work.
+ * GCC can only generate modules that work in the address range ~2GB to ~0
+ * currently.
+ *
+ * The API only accept one single donation.
+ *
+ * @returns IPRT status code.
+ * @retval VERR_NOT_SUPPORTED if the code isn't enabled.
+ * @param pvMemory Pointer to the memory block.
+ * @param cb The size of the memory block.
+ */
+RTR0DECL(int) RTR0MemExecDonate(void *pvMemory, size_t cb)
+{
+#ifdef RTMEMALLOC_EXEC_HEAP
+ int rc;
+ AssertReturn(g_HeapExec == NIL_RTHEAPSIMPLE, VERR_WRONG_ORDER);
+
+ rc = RTSpinlockCreate(&g_HeapExecSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "RTR0MemExecDonate");
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTHeapSimpleInit(&g_HeapExec, pvMemory, cb);
+ if (RT_FAILURE(rc))
+ rtR0MemExecCleanup();
+ }
+ return rc;
+#else
+ RT_NOREF_PV(pvMemory); RT_NOREF_PV(cb);
+ return VERR_NOT_SUPPORTED;
+#endif
+}
+RT_EXPORT_SYMBOL(RTR0MemExecDonate);
+
+
+
+#ifdef RTMEMALLOC_EXEC_VM_AREA
+/**
+ * Allocate executable kernel memory in the module range.
+ *
+ * @returns Pointer to a allocation header success. NULL on failure.
+ *
+ * @param cb The size the user requested.
+ */
+static PRTMEMHDR rtR0MemAllocExecVmArea(size_t cb)
+{
+ size_t const cbAlloc = RT_ALIGN_Z(sizeof(RTMEMLNXHDREX) + cb, PAGE_SIZE);
+ size_t const cPages = cbAlloc >> PAGE_SHIFT;
+ struct page **papPages;
+ struct vm_struct *pVmArea;
+ size_t iPage;
+
+ pVmArea = __get_vm_area(cbAlloc, VM_ALLOC, MODULES_VADDR, MODULES_END);
+ if (!pVmArea)
+ return NULL;
+ pVmArea->nr_pages = 0; /* paranoia? */
+ pVmArea->pages = NULL; /* paranoia? */
+
+ papPages = (struct page **)kmalloc(cPages * sizeof(papPages[0]), GFP_KERNEL | __GFP_NOWARN);
+ if (!papPages)
+ {
+ vunmap(pVmArea->addr);
+ return NULL;
+ }
+
+ for (iPage = 0; iPage < cPages; iPage++)
+ {
+ papPages[iPage] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN);
+ if (!papPages[iPage])
+ break;
+ }
+ if (iPage == cPages)
+ {
+ /*
+ * Map the pages.
+ *
+ * Not entirely sure we really need to set nr_pages and pages here, but
+ * they provide a very convenient place for storing something we need
+ * in the free function, if nothing else...
+ */
+# if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+ struct page **papPagesIterator = papPages;
+# endif
+ pVmArea->nr_pages = cPages;
+ pVmArea->pages = papPages;
+ if (!map_vm_area(pVmArea, PAGE_KERNEL_EXEC,
+# if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+ &papPagesIterator
+# else
+ papPages
+# endif
+ ))
+ {
+ PRTMEMLNXHDREX pHdrEx = (PRTMEMLNXHDREX)pVmArea->addr;
+ pHdrEx->pVmArea = pVmArea;
+ pHdrEx->pvDummy = NULL;
+ return &pHdrEx->Hdr;
+ }
+ /* bail out */
+# if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+ pVmArea->nr_pages = papPagesIterator - papPages;
+# endif
+ }
+
+ vunmap(pVmArea->addr);
+
+ while (iPage-- > 0)
+ __free_page(papPages[iPage]);
+ kfree(papPages);
+
+ return NULL;
+}
+#endif /* RTMEMALLOC_EXEC_VM_AREA */
+
+
+/**
+ * OS specific allocation function.
+ */
+DECLHIDDEN(int) rtR0MemAllocEx(size_t cb, uint32_t fFlags, PRTMEMHDR *ppHdr)
+{
+ PRTMEMHDR pHdr;
+ IPRT_LINUX_SAVE_EFL_AC();
+
+ /*
+ * Allocate.
+ */
+ if (fFlags & RTMEMHDR_FLAG_EXEC)
+ {
+ if (fFlags & RTMEMHDR_FLAG_ANY_CTX)
+ return VERR_NOT_SUPPORTED;
+
+#if defined(RT_ARCH_AMD64)
+# ifdef RTMEMALLOC_EXEC_HEAP
+ if (g_HeapExec != NIL_RTHEAPSIMPLE)
+ {
+ RTSpinlockAcquire(g_HeapExecSpinlock);
+ pHdr = (PRTMEMHDR)RTHeapSimpleAlloc(g_HeapExec, cb + sizeof(*pHdr), 0);
+ RTSpinlockRelease(g_HeapExecSpinlock);
+ fFlags |= RTMEMHDR_FLAG_EXEC_HEAP;
+ }
+ else
+ pHdr = NULL;
+
+# elif defined(RTMEMALLOC_EXEC_VM_AREA)
+ pHdr = rtR0MemAllocExecVmArea(cb);
+ fFlags |= RTMEMHDR_FLAG_EXEC_VM_AREA;
+
+# else /* !RTMEMALLOC_EXEC_HEAP */
+# error "you don not want to go here..."
+ pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, MY_PAGE_KERNEL_EXEC);
+# endif /* !RTMEMALLOC_EXEC_HEAP */
+
+#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
+ pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, MY_PAGE_KERNEL_EXEC);
+#else
+ pHdr = (PRTMEMHDR)vmalloc(cb + sizeof(*pHdr));
+#endif
+ }
+ else
+ {
+ if (
+#if 1 /* vmalloc has serious performance issues, avoid it. */
+ cb <= PAGE_SIZE*16 - sizeof(*pHdr)
+#else
+ cb <= PAGE_SIZE
+#endif
+ || (fFlags & RTMEMHDR_FLAG_ANY_CTX)
+ )
+ {
+ fFlags |= RTMEMHDR_FLAG_KMALLOC;
+ pHdr = kmalloc(cb + sizeof(*pHdr),
+ (fFlags & RTMEMHDR_FLAG_ANY_CTX_ALLOC) ? (GFP_ATOMIC | __GFP_NOWARN)
+ : (GFP_KERNEL | __GFP_NOWARN));
+ if (RT_UNLIKELY( !pHdr
+ && cb > PAGE_SIZE
+ && !(fFlags & RTMEMHDR_FLAG_ANY_CTX) ))
+ {
+ fFlags &= ~RTMEMHDR_FLAG_KMALLOC;
+ pHdr = vmalloc(cb + sizeof(*pHdr));
+ }
+ }
+ else
+ pHdr = vmalloc(cb + sizeof(*pHdr));
+ }
+ if (RT_UNLIKELY(!pHdr))
+ {
+ IPRT_LINUX_RESTORE_EFL_AC();
+ return VERR_NO_MEMORY;
+ }
+
+ /*
+ * Initialize.
+ */
+ pHdr->u32Magic = RTMEMHDR_MAGIC;
+ pHdr->fFlags = fFlags;
+ pHdr->cb = cb;
+ pHdr->cbReq = cb;
+
+ *ppHdr = pHdr;
+ IPRT_LINUX_RESTORE_EFL_AC();
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * OS specific free function.
+ */
+DECLHIDDEN(void) rtR0MemFree(PRTMEMHDR pHdr)
+{
+ IPRT_LINUX_SAVE_EFL_AC();
+
+ pHdr->u32Magic += 1;
+ if (pHdr->fFlags & RTMEMHDR_FLAG_KMALLOC)
+ kfree(pHdr);
+#ifdef RTMEMALLOC_EXEC_HEAP
+ else if (pHdr->fFlags & RTMEMHDR_FLAG_EXEC_HEAP)
+ {
+ RTSpinlockAcquire(g_HeapExecSpinlock);
+ RTHeapSimpleFree(g_HeapExec, pHdr);
+ RTSpinlockRelease(g_HeapExecSpinlock);
+ }
+#endif
+#ifdef RTMEMALLOC_EXEC_VM_AREA
+ else if (pHdr->fFlags & RTMEMHDR_FLAG_EXEC_VM_AREA)
+ {
+ PRTMEMLNXHDREX pHdrEx = RT_FROM_MEMBER(pHdr, RTMEMLNXHDREX, Hdr);
+ size_t iPage = pHdrEx->pVmArea->nr_pages;
+ struct page **papPages = pHdrEx->pVmArea->pages;
+ void *pvMapping = pHdrEx->pVmArea->addr;
+
+ vunmap(pvMapping);
+
+ while (iPage-- > 0)
+ __free_page(papPages[iPage]);
+ kfree(papPages);
+ }
+#endif
+ else
+ vfree(pHdr);
+
+ IPRT_LINUX_RESTORE_EFL_AC();
+}
+
+
+
+/**
+ * Compute order. Some functions allocate 2^order pages.
+ *
+ * @returns order.
+ * @param cPages Number of pages.
+ */
+static int CalcPowerOf2Order(unsigned long cPages)
+{
+ int iOrder;
+ unsigned long cTmp;
+
+ for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
+ ;
+ if (cPages & ~(1 << iOrder))
+ ++iOrder;
+
+ return iOrder;
+}
+
+
+/**
+ * Allocates physical contiguous memory (below 4GB).
+ * The allocation is page aligned and the content is undefined.
+ *
+ * @returns Pointer to the memory block. This is page aligned.
+ * @param pPhys Where to store the physical address.
+ * @param cb The allocation size in bytes. This is always
+ * rounded up to PAGE_SIZE.
+ */
+RTR0DECL(void *) RTMemContAlloc(PRTCCPHYS pPhys, size_t cb)
+{
+ int cOrder;
+ unsigned cPages;
+ struct page *paPages;
+ void *pvRet;
+ IPRT_LINUX_SAVE_EFL_AC();
+
+ /*
+ * validate input.
+ */
+ Assert(VALID_PTR(pPhys));
+ Assert(cb > 0);
+
+ /*
+ * Allocate page pointer array.
+ */
+ cb = RT_ALIGN_Z(cb, PAGE_SIZE);
+ cPages = cb >> PAGE_SHIFT;
+ cOrder = CalcPowerOf2Order(cPages);
+#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
+ /* ZONE_DMA32: 0-4GB */
+ paPages = alloc_pages(GFP_DMA32 | __GFP_NOWARN, cOrder);
+ if (!paPages)
+#endif
+#ifdef RT_ARCH_AMD64
+ /* ZONE_DMA; 0-16MB */
+ paPages = alloc_pages(GFP_DMA | __GFP_NOWARN, cOrder);
+#else
+ /* ZONE_NORMAL: 0-896MB */
+ paPages = alloc_pages(GFP_USER | __GFP_NOWARN, cOrder);
+#endif
+ if (paPages)
+ {
+ /*
+ * Reserve the pages and mark them executable.
+ */
+ unsigned iPage;
+ for (iPage = 0; iPage < cPages; iPage++)
+ {
+ Assert(!PageHighMem(&paPages[iPage]));
+ if (iPage + 1 < cPages)
+ {
+ AssertMsg( (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage])) + PAGE_SIZE
+ == (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage + 1]))
+ && page_to_phys(&paPages[iPage]) + PAGE_SIZE
+ == page_to_phys(&paPages[iPage + 1]),
+ ("iPage=%i cPages=%u [0]=%#llx,%p [1]=%#llx,%p\n", iPage, cPages,
+ (long long)page_to_phys(&paPages[iPage]), phys_to_virt(page_to_phys(&paPages[iPage])),
+ (long long)page_to_phys(&paPages[iPage + 1]), phys_to_virt(page_to_phys(&paPages[iPage + 1])) ));
+ }
+
+ SetPageReserved(&paPages[iPage]);
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 4, 20) /** @todo find the exact kernel where change_page_attr was introduced. */
+ MY_SET_PAGES_EXEC(&paPages[iPage], 1);
+#endif
+ }
+ *pPhys = page_to_phys(paPages);
+ pvRet = phys_to_virt(page_to_phys(paPages));
+ }
+ else
+ pvRet = NULL;
+
+ IPRT_LINUX_RESTORE_EFL_AC();
+ return pvRet;
+}
+RT_EXPORT_SYMBOL(RTMemContAlloc);
+
+
+/**
+ * Frees memory allocated using RTMemContAlloc().
+ *
+ * @param pv Pointer to return from RTMemContAlloc().
+ * @param cb The cb parameter passed to RTMemContAlloc().
+ */
+RTR0DECL(void) RTMemContFree(void *pv, size_t cb)
+{
+ if (pv)
+ {
+ int cOrder;
+ unsigned cPages;
+ unsigned iPage;
+ struct page *paPages;
+ IPRT_LINUX_SAVE_EFL_AC();
+
+ /* validate */
+ AssertMsg(!((uintptr_t)pv & PAGE_OFFSET_MASK), ("pv=%p\n", pv));
+ Assert(cb > 0);
+
+ /* calc order and get pages */
+ cb = RT_ALIGN_Z(cb, PAGE_SIZE);
+ cPages = cb >> PAGE_SHIFT;
+ cOrder = CalcPowerOf2Order(cPages);
+ paPages = virt_to_page(pv);
+
+ /*
+ * Restore page attributes freeing the pages.
+ */
+ for (iPage = 0; iPage < cPages; iPage++)
+ {
+ ClearPageReserved(&paPages[iPage]);
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 4, 20) /** @todo find the exact kernel where change_page_attr was introduced. */
+ MY_SET_PAGES_NOEXEC(&paPages[iPage], 1);
+#endif
+ }
+ __free_pages(paPages, cOrder);
+ IPRT_LINUX_RESTORE_EFL_AC();
+ }
+}
+RT_EXPORT_SYMBOL(RTMemContFree);
+