diff options
Diffstat (limited to 'src/VBox/Runtime/r0drv/solaris/memobj-r0drv-solaris.c')
-rw-r--r-- | src/VBox/Runtime/r0drv/solaris/memobj-r0drv-solaris.c | 1172 |
1 files changed, 1172 insertions, 0 deletions
diff --git a/src/VBox/Runtime/r0drv/solaris/memobj-r0drv-solaris.c b/src/VBox/Runtime/r0drv/solaris/memobj-r0drv-solaris.c new file mode 100644 index 00000000..ebf2eae2 --- /dev/null +++ b/src/VBox/Runtime/r0drv/solaris/memobj-r0drv-solaris.c @@ -0,0 +1,1172 @@ +/* $Id: memobj-r0drv-solaris.c $ */ +/** @file + * IPRT - Ring-0 Memory Objects, Solaris. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL) only, as it comes in the "COPYING.CDDL" file of the + * VirtualBox OSE distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include "the-solaris-kernel.h" +#include "internal/iprt.h" +#include <iprt/memobj.h> + +#include <iprt/asm.h> +#include <iprt/assert.h> +#include <iprt/err.h> +#include <iprt/log.h> +#include <iprt/mem.h> +#include <iprt/param.h> +#include <iprt/process.h> +#include "internal/memobj.h" +#include "memobj-r0drv-solaris.h" + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +#define SOL_IS_KRNL_ADDR(vx) ((uintptr_t)(vx) >= kernelbase) + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * The Solaris version of the memory object structure. + */ +typedef struct RTR0MEMOBJSOL +{ + /** The core structure. */ + RTR0MEMOBJINTERNAL Core; + /** Pointer to kernel memory cookie. */ + ddi_umem_cookie_t Cookie; + /** Shadow locked pages. */ + void *pvHandle; + /** Access during locking. */ + int fAccess; + /** Set if large pages are involved in an RTR0MEMOBJTYPE_PHYS + * allocation. */ + bool fLargePage; + /** Whether we have individual pages or a kernel-mapped virtual memory block in + * an RTR0MEMOBJTYPE_PHYS_NC allocation. */ + bool fIndivPages; +} RTR0MEMOBJSOL, *PRTR0MEMOBJSOL; + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +static vnode_t g_PageVnode; +static kmutex_t g_OffsetMtx; +static u_offset_t g_offPage; + +static vnode_t g_LargePageVnode; +static kmutex_t g_LargePageOffsetMtx; +static u_offset_t g_offLargePage; +static bool g_fLargePageNoReloc; + + +/** + * Returns the physical address for a virtual address. + * + * @param pv The virtual address. + * + * @returns The physical address corresponding to @a pv. + */ +static uint64_t rtR0MemObjSolVirtToPhys(void *pv) +{ + struct hat *pHat = NULL; + pfn_t PageFrameNum = 0; + uintptr_t uVirtAddr = (uintptr_t)pv; + + if (SOL_IS_KRNL_ADDR(pv)) + pHat = kas.a_hat; + else + { + proc_t *pProcess = (proc_t *)RTR0ProcHandleSelf(); + AssertRelease(pProcess); + pHat = pProcess->p_as->a_hat; + } + + PageFrameNum = hat_getpfnum(pHat, (caddr_t)(uVirtAddr & PAGEMASK)); + AssertReleaseMsg(PageFrameNum != PFN_INVALID, ("rtR0MemObjSolVirtToPhys failed. pv=%p\n", pv)); + return (((uint64_t)PageFrameNum << PAGE_SHIFT) | (uVirtAddr & PAGE_OFFSET_MASK)); +} + + +/** + * Returns the physical address for a page. + * + * @param pPage Pointer to the page. + * + * @returns The physical address for a page. + */ +static inline uint64_t rtR0MemObjSolPagePhys(page_t *pPage) +{ + AssertPtr(pPage); + pfn_t PageFrameNum = page_pptonum(pPage); + AssertReleaseMsg(PageFrameNum != PFN_INVALID, ("rtR0MemObjSolPagePhys failed pPage=%p\n")); + return (uint64_t)PageFrameNum << PAGE_SHIFT; +} + + +/** + * Allocates one page. + * + * @param virtAddr The virtual address to which this page maybe mapped in + * the future. + * + * @returns Pointer to the allocated page, NULL on failure. + */ +static page_t *rtR0MemObjSolPageAlloc(caddr_t virtAddr) +{ + u_offset_t offPage; + seg_t KernelSeg; + + /* + * 16777215 terabytes of total memory for all VMs or + * restart 8000 1GB VMs 2147483 times until wraparound! + */ + mutex_enter(&g_OffsetMtx); + AssertCompileSize(u_offset_t, sizeof(uint64_t)); NOREF(RTASSERTVAR); + g_offPage = RT_ALIGN_64(g_offPage, PAGE_SIZE) + PAGE_SIZE; + offPage = g_offPage; + mutex_exit(&g_OffsetMtx); + + KernelSeg.s_as = &kas; + page_t *pPage = page_create_va(&g_PageVnode, offPage, PAGE_SIZE, PG_WAIT | PG_NORELOC, &KernelSeg, virtAddr); + if (RT_LIKELY(pPage)) + { + /* + * Lock this page into memory "long term" to prevent this page from being paged out + * when we drop the page lock temporarily (during free). Downgrade to a shared lock + * to prevent page relocation. + */ + page_pp_lock(pPage, 0 /* COW */, 1 /* Kernel */); + page_io_unlock(pPage); + page_downgrade(pPage); + Assert(PAGE_LOCKED_SE(pPage, SE_SHARED)); + } + + return pPage; +} + + +/** + * Destroys an allocated page. + * + * @param pPage Pointer to the page to be destroyed. + * @remarks This function expects page in @c pPage to be shared locked. + */ +static void rtR0MemObjSolPageDestroy(page_t *pPage) +{ + /* + * We need to exclusive lock the pages before freeing them, if upgrading the shared lock to exclusive fails, + * drop the page lock and look it up from the hash. Record the page offset before we drop the page lock as + * we cannot touch any page_t members once the lock is dropped. + */ + AssertPtr(pPage); + Assert(PAGE_LOCKED_SE(pPage, SE_SHARED)); + + u_offset_t offPage = pPage->p_offset; + int rc = page_tryupgrade(pPage); + if (!rc) + { + page_unlock(pPage); + page_t *pFoundPage = page_lookup(&g_PageVnode, offPage, SE_EXCL); + + /* + * Since we allocated the pages as PG_NORELOC we should only get back the exact page always. + */ + AssertReleaseMsg(pFoundPage == pPage, ("Page lookup failed %p:%llx returned %p, expected %p\n", + &g_PageVnode, offPage, pFoundPage, pPage)); + } + Assert(PAGE_LOCKED_SE(pPage, SE_EXCL)); + page_pp_unlock(pPage, 0 /* COW */, 1 /* Kernel */); + page_destroy(pPage, 0 /* move it to the free list */); +} + + +/* Currently not used on 32-bits, define it to shut up gcc. */ +#if HC_ARCH_BITS == 64 +/** + * Allocates physical, non-contiguous memory of pages. + * + * @param puPhys Where to store the physical address of first page. Optional, + * can be NULL. + * @param cb The size of the allocation. + * + * @return Array of allocated pages, NULL on failure. + */ +static page_t **rtR0MemObjSolPagesAlloc(uint64_t *puPhys, size_t cb) +{ + /* + * VM1: + * The page freelist and cachelist both hold pages that are not mapped into any address space. + * The cachelist is not really free pages but when memory is exhausted they'll be moved to the + * free lists, it's the total of the free+cache list that we see on the 'free' column in vmstat. + * + * VM2: + * @todo Document what happens behind the scenes in VM2 regarding the free and cachelist. + */ + + /* + * Non-pageable memory reservation request for _4K pages, don't sleep. + */ + size_t cPages = (cb + PAGE_SIZE - 1) >> PAGE_SHIFT; + int rc = page_resv(cPages, KM_NOSLEEP); + if (rc) + { + size_t cbPages = cPages * sizeof(page_t *); + page_t **ppPages = kmem_zalloc(cbPages, KM_SLEEP); + if (RT_LIKELY(ppPages)) + { + /* + * Get pages from kseg, the 'virtAddr' here is only for colouring but unfortunately + * we don't yet have the 'virtAddr' to which this memory may be mapped. + */ + caddr_t virtAddr = 0; + for (size_t i = 0; i < cPages; i++, virtAddr += PAGE_SIZE) + { + /* + * Get a page from the free list locked exclusively. The page will be named (hashed in) + * and we rely on it during free. The page we get will be shared locked to prevent the page + * from being relocated. + */ + page_t *pPage = rtR0MemObjSolPageAlloc(virtAddr); + if (RT_UNLIKELY(!pPage)) + { + /* + * No page found, release whatever pages we grabbed so far. + */ + for (size_t k = 0; k < i; k++) + rtR0MemObjSolPageDestroy(ppPages[k]); + kmem_free(ppPages, cbPages); + page_unresv(cPages); + return NULL; + } + + ppPages[i] = pPage; + } + + if (puPhys) + *puPhys = rtR0MemObjSolPagePhys(ppPages[0]); + return ppPages; + } + + page_unresv(cPages); + } + + return NULL; +} +#endif /* HC_ARCH_BITS == 64 */ + + +/** + * Frees the allocates pages. + * + * @param ppPages Pointer to the page list. + * @param cbPages Size of the allocation. + */ +static void rtR0MemObjSolPagesFree(page_t **ppPages, size_t cb) +{ + size_t cPages = (cb + PAGE_SIZE - 1) >> PAGE_SHIFT; + size_t cbPages = cPages * sizeof(page_t *); + for (size_t iPage = 0; iPage < cPages; iPage++) + rtR0MemObjSolPageDestroy(ppPages[iPage]); + + kmem_free(ppPages, cbPages); + page_unresv(cPages); +} + + +/** + * Allocates one large page. + * + * @param puPhys Where to store the physical address of the allocated + * page. Optional, can be NULL. + * @param cbLargePage Size of the large page. + * + * @returns Pointer to a list of pages that cover the large page, NULL on + * failure. + */ +static page_t **rtR0MemObjSolLargePageAlloc(uint64_t *puPhys, size_t cbLargePage) +{ + /* + * Check PG_NORELOC support for large pages. Using this helps prevent _1G page + * fragementation on systems that support it. + */ + static bool fPageNoRelocChecked = false; + if (fPageNoRelocChecked == false) + { + fPageNoRelocChecked = true; + g_fLargePageNoReloc = false; + if ( g_pfnrtR0Sol_page_noreloc_supported + && g_pfnrtR0Sol_page_noreloc_supported(cbLargePage)) + { + g_fLargePageNoReloc = true; + } + } + + /* + * Non-pageable memory reservation request for _4K pages, don't sleep. + */ + size_t cPages = (cbLargePage + PAGE_SIZE - 1) >> PAGE_SHIFT; + size_t cbPages = cPages * sizeof(page_t *); + u_offset_t offPage = 0; + int rc = page_resv(cPages, KM_NOSLEEP); + if (rc) + { + page_t **ppPages = kmem_zalloc(cbPages, KM_SLEEP); + if (RT_LIKELY(ppPages)) + { + mutex_enter(&g_LargePageOffsetMtx); + AssertCompileSize(u_offset_t, sizeof(uint64_t)); NOREF(RTASSERTVAR); + g_offLargePage = RT_ALIGN_64(g_offLargePage, cbLargePage) + cbLargePage; + offPage = g_offLargePage; + mutex_exit(&g_LargePageOffsetMtx); + + seg_t KernelSeg; + KernelSeg.s_as = &kas; + page_t *pRootPage = page_create_va_large(&g_LargePageVnode, offPage, cbLargePage, + PG_EXCL | (g_fLargePageNoReloc ? PG_NORELOC : 0), &KernelSeg, + 0 /* vaddr */,NULL /* locality group */); + if (pRootPage) + { + /* + * Split it into sub-pages, downgrade each page to a shared lock to prevent page relocation. + */ + page_t *pPageList = pRootPage; + for (size_t iPage = 0; iPage < cPages; iPage++) + { + page_t *pPage = pPageList; + AssertPtr(pPage); + AssertMsg(page_pptonum(pPage) == iPage + page_pptonum(pRootPage), + ("%p:%lx %lx+%lx\n", pPage, page_pptonum(pPage), iPage, page_pptonum(pRootPage))); + AssertMsg(pPage->p_szc == pRootPage->p_szc, ("Size code mismatch %p %d %d\n", pPage, + (int)pPage->p_szc, (int)pRootPage->p_szc)); + + /* + * Lock the page into memory "long term". This prevents callers of page_try_demote_pages() (such as the + * pageout scanner) from demoting the large page into smaller pages while we temporarily release the + * exclusive lock (during free). We pass "0, 1" since we've already accounted for availrmem during + * page_resv(). + */ + page_pp_lock(pPage, 0 /* COW */, 1 /* Kernel */); + + page_sub(&pPageList, pPage); + page_io_unlock(pPage); + page_downgrade(pPage); + Assert(PAGE_LOCKED_SE(pPage, SE_SHARED)); + + ppPages[iPage] = pPage; + } + Assert(pPageList == NULL); + Assert(ppPages[0] == pRootPage); + + uint64_t uPhys = rtR0MemObjSolPagePhys(pRootPage); + AssertMsg(!(uPhys & (cbLargePage - 1)), ("%llx %zx\n", uPhys, cbLargePage)); + if (puPhys) + *puPhys = uPhys; + return ppPages; + } + + /* + * Don't restore offPrev in case of failure (race condition), we have plenty of offset space. + * The offset must be unique (for the same vnode) or we'll encounter panics on page_create_va_large(). + */ + kmem_free(ppPages, cbPages); + } + + page_unresv(cPages); + } + return NULL; +} + + +/** + * Frees the large page. + * + * @param ppPages Pointer to the list of small pages that cover the + * large page. + * @param cbLargePage Size of the allocation (i.e. size of the large + * page). + */ +static void rtR0MemObjSolLargePageFree(page_t **ppPages, size_t cbLargePage) +{ + Assert(ppPages); + Assert(cbLargePage > PAGE_SIZE); + + bool fDemoted = false; + size_t cPages = (cbLargePage + PAGE_SIZE - 1) >> PAGE_SHIFT; + size_t cbPages = cPages * sizeof(page_t *); + page_t *pPageList = ppPages[0]; + + for (size_t iPage = 0; iPage < cPages; iPage++) + { + /* + * We need the pages exclusively locked, try upgrading the shared lock. + * If it fails, drop the shared page lock (cannot access any page_t members once this is done) + * and lookup the page from the page hash locking it exclusively. + */ + page_t *pPage = ppPages[iPage]; + u_offset_t offPage = pPage->p_offset; + int rc = page_tryupgrade(pPage); + if (!rc) + { + page_unlock(pPage); + page_t *pFoundPage = page_lookup(&g_LargePageVnode, offPage, SE_EXCL); + AssertRelease(pFoundPage); + + if (g_fLargePageNoReloc) + { + /* + * This can only be guaranteed if PG_NORELOC is used while allocating the pages. + */ + AssertReleaseMsg(pFoundPage == pPage, + ("lookup failed %p:%llu returned %p, expected %p\n", &g_LargePageVnode, offPage, + pFoundPage, pPage)); + } + + /* + * Check for page demotion (regardless of relocation). Some places in Solaris (e.g. VM1 page_retire()) + * could possibly demote the large page to _4K pages between our call to page_unlock() and page_lookup(). + */ + if (page_get_pagecnt(pFoundPage->p_szc) == 1) /* Base size of only _4K associated with this page. */ + fDemoted = true; + pPage = pFoundPage; + ppPages[iPage] = pFoundPage; + } + Assert(PAGE_LOCKED_SE(pPage, SE_EXCL)); + page_pp_unlock(pPage, 0 /* COW */, 1 /* Kernel */); + } + + if (fDemoted) + { + for (size_t iPage = 0; iPage < cPages; iPage++) + { + Assert(page_get_pagecnt(ppPages[iPage]->p_szc) == 1); + page_destroy(ppPages[iPage], 0 /* move it to the free list */); + } + } + else + { + /* + * Although we shred the adjacent pages in the linked list, page_destroy_pages works on + * adjacent pages via array increments. So this does indeed free all the pages. + */ + AssertPtr(pPageList); + page_destroy_pages(pPageList); + } + kmem_free(ppPages, cbPages); + page_unresv(cPages); +} + + +/** + * Unmaps kernel/user-space mapped memory. + * + * @param pv Pointer to the mapped memory block. + * @param cb Size of the memory block. + */ +static void rtR0MemObjSolUnmap(void *pv, size_t cb) +{ + if (SOL_IS_KRNL_ADDR(pv)) + { + hat_unload(kas.a_hat, pv, cb, HAT_UNLOAD | HAT_UNLOAD_UNLOCK); + vmem_free(heap_arena, pv, cb); + } + else + { + struct as *pAddrSpace = ((proc_t *)RTR0ProcHandleSelf())->p_as; + AssertPtr(pAddrSpace); + as_rangelock(pAddrSpace); + as_unmap(pAddrSpace, pv, cb); + as_rangeunlock(pAddrSpace); + } +} + + +/** + * Lock down memory mappings for a virtual address. + * + * @param pv Pointer to the memory to lock down. + * @param cb Size of the memory block. + * @param fAccess Page access rights (S_READ, S_WRITE, S_EXEC) + * + * @returns IPRT status code. + */ +static int rtR0MemObjSolLock(void *pv, size_t cb, int fPageAccess) +{ + /* + * Kernel memory mappings on x86/amd64 are always locked, only handle user-space memory. + */ + if (!SOL_IS_KRNL_ADDR(pv)) + { + proc_t *pProc = (proc_t *)RTR0ProcHandleSelf(); + AssertPtr(pProc); + faultcode_t rc = as_fault(pProc->p_as->a_hat, pProc->p_as, (caddr_t)pv, cb, F_SOFTLOCK, fPageAccess); + if (rc) + { + LogRel(("rtR0MemObjSolLock failed for pv=%pv cb=%lx fPageAccess=%d rc=%d\n", pv, cb, fPageAccess, rc)); + return VERR_LOCK_FAILED; + } + } + return VINF_SUCCESS; +} + + +/** + * Unlock memory mappings for a virtual address. + * + * @param pv Pointer to the locked memory. + * @param cb Size of the memory block. + * @param fPageAccess Page access rights (S_READ, S_WRITE, S_EXEC). + */ +static void rtR0MemObjSolUnlock(void *pv, size_t cb, int fPageAccess) +{ + if (!SOL_IS_KRNL_ADDR(pv)) + { + proc_t *pProcess = (proc_t *)RTR0ProcHandleSelf(); + AssertPtr(pProcess); + as_fault(pProcess->p_as->a_hat, pProcess->p_as, (caddr_t)pv, cb, F_SOFTUNLOCK, fPageAccess); + } +} + + +/** + * Maps a list of physical pages into user address space. + * + * @param pVirtAddr Where to store the virtual address of the mapping. + * @param fPageAccess Page access rights (PROT_READ, PROT_WRITE, + * PROT_EXEC) + * @param paPhysAddrs Array of physical addresses to pages. + * @param cb Size of memory being mapped. + * + * @returns IPRT status code. + */ +static int rtR0MemObjSolUserMap(caddr_t *pVirtAddr, unsigned fPageAccess, uint64_t *paPhysAddrs, size_t cb, size_t cbPageSize) +{ + struct as *pAddrSpace = ((proc_t *)RTR0ProcHandleSelf())->p_as; + int rc; + SEGVBOX_CRARGS Args; + + Args.paPhysAddrs = paPhysAddrs; + Args.fPageAccess = fPageAccess; + Args.cbPageSize = cbPageSize; + + as_rangelock(pAddrSpace); + if (g_frtSolOldMapAddr) + g_rtSolMapAddr.u.pfnSol_map_addr_old(pVirtAddr, cb, 0 /* offset */, 0 /* vacalign */, MAP_SHARED); + else + g_rtSolMapAddr.u.pfnSol_map_addr(pVirtAddr, cb, 0 /* offset */, MAP_SHARED); + if (*pVirtAddr != NULL) + rc = as_map(pAddrSpace, *pVirtAddr, cb, rtR0SegVBoxSolCreate, &Args); + else + rc = ENOMEM; + as_rangeunlock(pAddrSpace); + + return RTErrConvertFromErrno(rc); +} + + +DECLHIDDEN(int) rtR0MemObjNativeFree(RTR0MEMOBJ pMem) +{ + PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)pMem; + + switch (pMemSolaris->Core.enmType) + { + case RTR0MEMOBJTYPE_LOW: + rtR0SolMemFree(pMemSolaris->Core.pv, pMemSolaris->Core.cb); + break; + + case RTR0MEMOBJTYPE_PHYS: + if (pMemSolaris->Core.u.Phys.fAllocated) + { + if (pMemSolaris->fLargePage) + rtR0MemObjSolLargePageFree(pMemSolaris->pvHandle, pMemSolaris->Core.cb); + else + rtR0SolMemFree(pMemSolaris->Core.pv, pMemSolaris->Core.cb); + } + break; + + case RTR0MEMOBJTYPE_PHYS_NC: + if (pMemSolaris->fIndivPages) + rtR0MemObjSolPagesFree(pMemSolaris->pvHandle, pMemSolaris->Core.cb); + else + rtR0SolMemFree(pMemSolaris->Core.pv, pMemSolaris->Core.cb); + break; + + case RTR0MEMOBJTYPE_PAGE: + ddi_umem_free(pMemSolaris->Cookie); + break; + + case RTR0MEMOBJTYPE_LOCK: + rtR0MemObjSolUnlock(pMemSolaris->Core.pv, pMemSolaris->Core.cb, pMemSolaris->fAccess); + break; + + case RTR0MEMOBJTYPE_MAPPING: + rtR0MemObjSolUnmap(pMemSolaris->Core.pv, pMemSolaris->Core.cb); + break; + + case RTR0MEMOBJTYPE_RES_VIRT: + { + if (pMemSolaris->Core.u.ResVirt.R0Process == NIL_RTR0PROCESS) + vmem_xfree(heap_arena, pMemSolaris->Core.pv, pMemSolaris->Core.cb); + else + AssertFailed(); + break; + } + + case RTR0MEMOBJTYPE_CONT: /* we don't use this type here. */ + default: + AssertMsgFailed(("enmType=%d\n", pMemSolaris->Core.enmType)); + return VERR_INTERNAL_ERROR; + } + + return VINF_SUCCESS; +} + + +DECLHIDDEN(int) rtR0MemObjNativeAllocPage(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable) +{ + /* Create the object. */ + PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_PAGE, NULL, cb); + if (RT_UNLIKELY(!pMemSolaris)) + return VERR_NO_MEMORY; + + void *pvMem = ddi_umem_alloc(cb, DDI_UMEM_SLEEP, &pMemSolaris->Cookie); + if (RT_UNLIKELY(!pvMem)) + { + rtR0MemObjDelete(&pMemSolaris->Core); + return VERR_NO_PAGE_MEMORY; + } + + pMemSolaris->Core.pv = pvMem; + pMemSolaris->pvHandle = NULL; + *ppMem = &pMemSolaris->Core; + return VINF_SUCCESS; +} + + +DECLHIDDEN(int) rtR0MemObjNativeAllocLow(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable) +{ + NOREF(fExecutable); + + /* Create the object */ + PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_LOW, NULL, cb); + if (!pMemSolaris) + return VERR_NO_MEMORY; + + /* Allocate physically low page-aligned memory. */ + uint64_t uPhysHi = _4G - 1; + void *pvMem = rtR0SolMemAlloc(uPhysHi, NULL /* puPhys */, cb, PAGE_SIZE, false /* fContig */); + if (RT_UNLIKELY(!pvMem)) + { + rtR0MemObjDelete(&pMemSolaris->Core); + return VERR_NO_LOW_MEMORY; + } + pMemSolaris->Core.pv = pvMem; + pMemSolaris->pvHandle = NULL; + *ppMem = &pMemSolaris->Core; + return VINF_SUCCESS; +} + + +DECLHIDDEN(int) rtR0MemObjNativeAllocCont(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable) +{ + NOREF(fExecutable); + return rtR0MemObjNativeAllocPhys(ppMem, cb, _4G - 1, PAGE_SIZE /* alignment */); +} + + +DECLHIDDEN(int) rtR0MemObjNativeAllocPhysNC(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest) +{ +#if HC_ARCH_BITS == 64 + PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_PHYS_NC, NULL, cb); + if (RT_UNLIKELY(!pMemSolaris)) + return VERR_NO_MEMORY; + + if (PhysHighest == NIL_RTHCPHYS) + { + uint64_t PhysAddr = UINT64_MAX; + void *pvPages = rtR0MemObjSolPagesAlloc(&PhysAddr, cb); + if (!pvPages) + { + LogRel(("rtR0MemObjNativeAllocPhysNC: rtR0MemObjSolPagesAlloc failed for cb=%u.\n", cb)); + rtR0MemObjDelete(&pMemSolaris->Core); + return VERR_NO_MEMORY; + } + Assert(PhysAddr != UINT64_MAX); + Assert(!(PhysAddr & PAGE_OFFSET_MASK)); + + pMemSolaris->Core.pv = NULL; + pMemSolaris->pvHandle = pvPages; + pMemSolaris->fIndivPages = true; + *ppMem = &pMemSolaris->Core; + return VINF_SUCCESS; + } + else + { + /* + * If we must satisfy an upper limit constraint, it isn't feasible to grab individual pages. + * We fall back to using contig_alloc(). + */ + uint64_t PhysAddr = UINT64_MAX; + void *pvMem = rtR0SolMemAlloc(PhysHighest, &PhysAddr, cb, PAGE_SIZE, false /* fContig */); + if (!pvMem) + { + LogRel(("rtR0MemObjNativeAllocPhysNC: rtR0SolMemAlloc failed for cb=%u PhysHighest=%RHp.\n", cb, PhysHighest)); + rtR0MemObjDelete(&pMemSolaris->Core); + return VERR_NO_MEMORY; + } + Assert(PhysAddr != UINT64_MAX); + Assert(!(PhysAddr & PAGE_OFFSET_MASK)); + + pMemSolaris->Core.pv = pvMem; + pMemSolaris->pvHandle = NULL; + pMemSolaris->fIndivPages = false; + *ppMem = &pMemSolaris->Core; + return VINF_SUCCESS; + } + +#else /* 32 bit: */ + return VERR_NOT_SUPPORTED; /* see the RTR0MemObjAllocPhysNC specs */ +#endif +} + + +DECLHIDDEN(int) rtR0MemObjNativeAllocPhys(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest, size_t uAlignment) +{ + AssertMsgReturn(PhysHighest >= 16 *_1M, ("PhysHigest=%RHp\n", PhysHighest), VERR_NOT_SUPPORTED); + + PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_PHYS, NULL, cb); + if (RT_UNLIKELY(!pMemSolaris)) + return VERR_NO_MEMORY; + + /* + * Allocating one large page gets special treatment. + */ + static uint32_t s_cbLargePage = UINT32_MAX; + if (s_cbLargePage == UINT32_MAX) + { + if (page_num_pagesizes() > 1) + ASMAtomicWriteU32(&s_cbLargePage, page_get_pagesize(1)); /* Page-size code 1 maps to _2M on Solaris x86/amd64. */ + else + ASMAtomicWriteU32(&s_cbLargePage, 0); + } + + uint64_t PhysAddr; + if ( cb == s_cbLargePage + && cb == uAlignment + && PhysHighest == NIL_RTHCPHYS) + { + /* + * Allocate one large page (backed by physically contiguous memory). + */ + void *pvPages = rtR0MemObjSolLargePageAlloc(&PhysAddr, cb); + if (RT_LIKELY(pvPages)) + { + AssertMsg(!(PhysAddr & (cb - 1)), ("%RHp\n", PhysAddr)); + pMemSolaris->Core.pv = NULL; + pMemSolaris->Core.u.Phys.PhysBase = PhysAddr; + pMemSolaris->Core.u.Phys.fAllocated = true; + pMemSolaris->pvHandle = pvPages; + pMemSolaris->fLargePage = true; + + *ppMem = &pMemSolaris->Core; + return VINF_SUCCESS; + } + } + else + { + /* + * Allocate physically contiguous memory aligned as specified. + */ + AssertCompile(NIL_RTHCPHYS == UINT64_MAX); NOREF(RTASSERTVAR); + PhysAddr = PhysHighest; + void *pvMem = rtR0SolMemAlloc(PhysHighest, &PhysAddr, cb, uAlignment, true /* fContig */); + if (RT_LIKELY(pvMem)) + { + Assert(!(PhysAddr & PAGE_OFFSET_MASK)); + Assert(PhysAddr < PhysHighest); + Assert(PhysAddr + cb <= PhysHighest); + + pMemSolaris->Core.pv = pvMem; + pMemSolaris->Core.u.Phys.PhysBase = PhysAddr; + pMemSolaris->Core.u.Phys.fAllocated = true; + pMemSolaris->pvHandle = NULL; + pMemSolaris->fLargePage = false; + + *ppMem = &pMemSolaris->Core; + return VINF_SUCCESS; + } + } + rtR0MemObjDelete(&pMemSolaris->Core); + return VERR_NO_CONT_MEMORY; +} + + +DECLHIDDEN(int) rtR0MemObjNativeEnterPhys(PPRTR0MEMOBJINTERNAL ppMem, RTHCPHYS Phys, size_t cb, uint32_t uCachePolicy) +{ + AssertReturn(uCachePolicy == RTMEM_CACHE_POLICY_DONT_CARE, VERR_NOT_SUPPORTED); + + /* Create the object. */ + PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_PHYS, NULL, cb); + if (!pMemSolaris) + return VERR_NO_MEMORY; + + /* There is no allocation here, it needs to be mapped somewhere first. */ + pMemSolaris->Core.u.Phys.fAllocated = false; + pMemSolaris->Core.u.Phys.PhysBase = Phys; + pMemSolaris->Core.u.Phys.uCachePolicy = uCachePolicy; + *ppMem = &pMemSolaris->Core; + return VINF_SUCCESS; +} + + +DECLHIDDEN(int) rtR0MemObjNativeLockUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3Ptr, size_t cb, uint32_t fAccess, + RTR0PROCESS R0Process) +{ + AssertReturn(R0Process == RTR0ProcHandleSelf(), VERR_INVALID_PARAMETER); + NOREF(fAccess); + + /* Create the locking object */ + PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_LOCK, (void *)R3Ptr, cb); + if (!pMemSolaris) + return VERR_NO_MEMORY; + + /* Lock down user pages. */ + int fPageAccess = S_READ; + if (fAccess & RTMEM_PROT_WRITE) + fPageAccess = S_WRITE; + if (fAccess & RTMEM_PROT_EXEC) + fPageAccess = S_EXEC; + int rc = rtR0MemObjSolLock((void *)R3Ptr, cb, fPageAccess); + if (RT_FAILURE(rc)) + { + LogRel(("rtR0MemObjNativeLockUser: rtR0MemObjSolLock failed rc=%d\n", rc)); + rtR0MemObjDelete(&pMemSolaris->Core); + return rc; + } + + /* Fill in the object attributes and return successfully. */ + pMemSolaris->Core.u.Lock.R0Process = R0Process; + pMemSolaris->pvHandle = NULL; + pMemSolaris->fAccess = fPageAccess; + *ppMem = &pMemSolaris->Core; + return VINF_SUCCESS; +} + + +DECLHIDDEN(int) rtR0MemObjNativeLockKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pv, size_t cb, uint32_t fAccess) +{ + NOREF(fAccess); + + PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_LOCK, pv, cb); + if (!pMemSolaris) + return VERR_NO_MEMORY; + + /* Lock down kernel pages. */ + int fPageAccess = S_READ; + if (fAccess & RTMEM_PROT_WRITE) + fPageAccess = S_WRITE; + if (fAccess & RTMEM_PROT_EXEC) + fPageAccess = S_EXEC; + int rc = rtR0MemObjSolLock(pv, cb, fPageAccess); + if (RT_FAILURE(rc)) + { + LogRel(("rtR0MemObjNativeLockKernel: rtR0MemObjSolLock failed rc=%d\n", rc)); + rtR0MemObjDelete(&pMemSolaris->Core); + return rc; + } + + /* Fill in the object attributes and return successfully. */ + pMemSolaris->Core.u.Lock.R0Process = NIL_RTR0PROCESS; + pMemSolaris->pvHandle = NULL; + pMemSolaris->fAccess = fPageAccess; + *ppMem = &pMemSolaris->Core; + return VINF_SUCCESS; +} + + +DECLHIDDEN(int) rtR0MemObjNativeReserveKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pvFixed, size_t cb, size_t uAlignment) +{ + PRTR0MEMOBJSOL pMemSolaris; + + /* + * Use xalloc. + */ + void *pv = vmem_xalloc(heap_arena, cb, uAlignment, 0 /* phase */, 0 /* nocross */, + NULL /* minaddr */, NULL /* maxaddr */, VM_SLEEP); + if (RT_UNLIKELY(!pv)) + return VERR_NO_MEMORY; + + /* Create the object. */ + pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_RES_VIRT, pv, cb); + if (!pMemSolaris) + { + LogRel(("rtR0MemObjNativeReserveKernel failed to alloc memory object.\n")); + vmem_xfree(heap_arena, pv, cb); + return VERR_NO_MEMORY; + } + + pMemSolaris->Core.u.ResVirt.R0Process = NIL_RTR0PROCESS; + *ppMem = &pMemSolaris->Core; + return VINF_SUCCESS; +} + + +DECLHIDDEN(int) rtR0MemObjNativeReserveUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, + RTR0PROCESS R0Process) +{ + return VERR_NOT_SUPPORTED; +} + + +DECLHIDDEN(int) rtR0MemObjNativeMapKernel(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap, void *pvFixed, size_t uAlignment, + unsigned fProt, size_t offSub, size_t cbSub) +{ + /* Fail if requested to do something we can't. */ + AssertMsgReturn(pvFixed == (void *)-1, ("%p\n", pvFixed), VERR_NOT_SUPPORTED); + if (uAlignment > PAGE_SIZE) + return VERR_NOT_SUPPORTED; + + /* + * Use xalloc to get address space. + */ + if (!cbSub) + cbSub = pMemToMap->cb; + void *pv = vmem_xalloc(heap_arena, cbSub, uAlignment, 0 /* phase */, 0 /* nocross */, + NULL /* minaddr */, NULL /* maxaddr */, VM_SLEEP); + if (RT_UNLIKELY(!pv)) + return VERR_MAP_FAILED; + + /* + * Load the pages from the other object into it. + */ + uint32_t fAttr = HAT_UNORDERED_OK | HAT_MERGING_OK | HAT_LOADCACHING_OK | HAT_STORECACHING_OK; + if (fProt & RTMEM_PROT_READ) + fAttr |= PROT_READ; + if (fProt & RTMEM_PROT_EXEC) + fAttr |= PROT_EXEC; + if (fProt & RTMEM_PROT_WRITE) + fAttr |= PROT_WRITE; + fAttr |= HAT_NOSYNC; + + int rc = VINF_SUCCESS; + size_t off = 0; + while (off < cbSub) + { + RTHCPHYS HCPhys = RTR0MemObjGetPagePhysAddr(pMemToMap, (offSub + off) >> PAGE_SHIFT); + AssertBreakStmt(HCPhys != NIL_RTHCPHYS, rc = VERR_INTERNAL_ERROR_2); + pfn_t pfn = HCPhys >> PAGE_SHIFT; + AssertBreakStmt(((RTHCPHYS)pfn << PAGE_SHIFT) == HCPhys, rc = VERR_INTERNAL_ERROR_3); + + hat_devload(kas.a_hat, (uint8_t *)pv + off, PAGE_SIZE, pfn, fAttr, HAT_LOAD_LOCK); + + /* Advance. */ + off += PAGE_SIZE; + } + if (RT_SUCCESS(rc)) + { + /* + * Create a memory object for the mapping. + */ + PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_MAPPING, pv, cbSub); + if (pMemSolaris) + { + pMemSolaris->Core.u.Mapping.R0Process = NIL_RTR0PROCESS; + *ppMem = &pMemSolaris->Core; + return VINF_SUCCESS; + } + + LogRel(("rtR0MemObjNativeMapKernel failed to alloc memory object.\n")); + rc = VERR_NO_MEMORY; + } + + if (off) + hat_unload(kas.a_hat, pv, off, HAT_UNLOAD | HAT_UNLOAD_UNLOCK); + vmem_xfree(heap_arena, pv, cbSub); + return rc; +} + + +DECLHIDDEN(int) rtR0MemObjNativeMapUser(PPRTR0MEMOBJINTERNAL ppMem, PRTR0MEMOBJINTERNAL pMemToMap, RTR3PTR R3PtrFixed, + size_t uAlignment, unsigned fProt, RTR0PROCESS R0Process, size_t offSub, size_t cbSub) +{ + /* + * Fend off things we cannot do. + */ + AssertMsgReturn(R3PtrFixed == (RTR3PTR)-1, ("%p\n", R3PtrFixed), VERR_NOT_SUPPORTED); + AssertMsgReturn(R0Process == RTR0ProcHandleSelf(), ("%p != %p\n", R0Process, RTR0ProcHandleSelf()), VERR_NOT_SUPPORTED); + if (uAlignment != PAGE_SIZE) + return VERR_NOT_SUPPORTED; + + /* + * Get parameters from the source object and offSub/cbSub. + */ + PRTR0MEMOBJSOL pMemToMapSolaris = (PRTR0MEMOBJSOL)pMemToMap; + uint8_t *pb = pMemToMapSolaris->Core.pv ? (uint8_t *)pMemToMapSolaris->Core.pv + offSub : NULL; + size_t const cb = cbSub ? cbSub : pMemToMapSolaris->Core.cb; + size_t const cPages = cb >> PAGE_SHIFT; + Assert(!offSub || cbSub); + Assert(!(cb & PAGE_OFFSET_MASK)); + + /* + * Create the mapping object + */ + PRTR0MEMOBJSOL pMemSolaris; + pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_MAPPING, pb, cb); + if (RT_UNLIKELY(!pMemSolaris)) + return VERR_NO_MEMORY; + + /* + * Gather the physical page address of the pages to be mapped. + */ + int rc = VINF_SUCCESS; + uint64_t *paPhysAddrs = kmem_zalloc(sizeof(uint64_t) * cPages, KM_SLEEP); + if (RT_LIKELY(paPhysAddrs)) + { + if ( pMemToMapSolaris->Core.enmType == RTR0MEMOBJTYPE_PHYS_NC + && pMemToMapSolaris->fIndivPages) + { + /* Translate individual page_t to physical addresses. */ + page_t **papPages = pMemToMapSolaris->pvHandle; + AssertPtr(papPages); + papPages += offSub >> PAGE_SHIFT; + for (size_t iPage = 0; iPage < cPages; iPage++) + paPhysAddrs[iPage] = rtR0MemObjSolPagePhys(papPages[iPage]); + } + else if ( pMemToMapSolaris->Core.enmType == RTR0MEMOBJTYPE_PHYS + && pMemToMapSolaris->fLargePage) + { + /* Split up the large page into page-sized chunks. */ + RTHCPHYS Phys = pMemToMapSolaris->Core.u.Phys.PhysBase; + Phys += offSub; + for (size_t iPage = 0; iPage < cPages; iPage++, Phys += PAGE_SIZE) + paPhysAddrs[iPage] = Phys; + } + else + { + /* Have kernel mapping, just translate virtual to physical. */ + AssertPtr(pb); + for (size_t iPage = 0; iPage < cPages; iPage++) + { + paPhysAddrs[iPage] = rtR0MemObjSolVirtToPhys(pb); + if (RT_UNLIKELY(paPhysAddrs[iPage] == -(uint64_t)1)) + { + LogRel(("rtR0MemObjNativeMapUser: no page to map.\n")); + rc = VERR_MAP_FAILED; + break; + } + pb += PAGE_SIZE; + } + } + if (RT_SUCCESS(rc)) + { + /* + * Perform the actual mapping. + */ + unsigned fPageAccess = PROT_READ; + if (fProt & RTMEM_PROT_WRITE) + fPageAccess |= PROT_WRITE; + if (fProt & RTMEM_PROT_EXEC) + fPageAccess |= PROT_EXEC; + + caddr_t UserAddr = NULL; + rc = rtR0MemObjSolUserMap(&UserAddr, fPageAccess, paPhysAddrs, cb, PAGE_SIZE); + if (RT_SUCCESS(rc)) + { + pMemSolaris->Core.u.Mapping.R0Process = R0Process; + pMemSolaris->Core.pv = UserAddr; + + *ppMem = &pMemSolaris->Core; + kmem_free(paPhysAddrs, sizeof(uint64_t) * cPages); + return VINF_SUCCESS; + } + + LogRel(("rtR0MemObjNativeMapUser: rtR0MemObjSolUserMap failed rc=%d.\n", rc)); + } + + rc = VERR_MAP_FAILED; + kmem_free(paPhysAddrs, sizeof(uint64_t) * cPages); + } + else + rc = VERR_NO_MEMORY; + rtR0MemObjDelete(&pMemSolaris->Core); + return rc; +} + + +DECLHIDDEN(int) rtR0MemObjNativeProtect(PRTR0MEMOBJINTERNAL pMem, size_t offSub, size_t cbSub, uint32_t fProt) +{ + NOREF(pMem); + NOREF(offSub); + NOREF(cbSub); + NOREF(fProt); + return VERR_NOT_SUPPORTED; +} + + +DECLHIDDEN(RTHCPHYS) rtR0MemObjNativeGetPagePhysAddr(PRTR0MEMOBJINTERNAL pMem, size_t iPage) +{ + PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)pMem; + + switch (pMemSolaris->Core.enmType) + { + case RTR0MEMOBJTYPE_PHYS_NC: + if ( pMemSolaris->Core.u.Phys.fAllocated + || !pMemSolaris->fIndivPages) + { + uint8_t *pb = (uint8_t *)pMemSolaris->Core.pv + ((size_t)iPage << PAGE_SHIFT); + return rtR0MemObjSolVirtToPhys(pb); + } + page_t **ppPages = pMemSolaris->pvHandle; + return rtR0MemObjSolPagePhys(ppPages[iPage]); + + case RTR0MEMOBJTYPE_PAGE: + case RTR0MEMOBJTYPE_LOW: + case RTR0MEMOBJTYPE_LOCK: + { + uint8_t *pb = (uint8_t *)pMemSolaris->Core.pv + ((size_t)iPage << PAGE_SHIFT); + return rtR0MemObjSolVirtToPhys(pb); + } + + /* + * Although mapping can be handled by rtR0MemObjSolVirtToPhys(offset) like the above case, + * request it from the parent so that we have a clear distinction between CONT/PHYS_NC. + */ + case RTR0MEMOBJTYPE_MAPPING: + return rtR0MemObjNativeGetPagePhysAddr(pMemSolaris->Core.uRel.Child.pParent, iPage); + + case RTR0MEMOBJTYPE_CONT: + case RTR0MEMOBJTYPE_PHYS: + AssertFailed(); /* handled by the caller */ + case RTR0MEMOBJTYPE_RES_VIRT: + default: + return NIL_RTHCPHYS; + } +} + |