1 files changed, 271 insertions, 0 deletions
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
new file mode 100644
index 000000000..e5fe0a171
--- /dev/null
+++ b/drivers/misc/cxl/cxllib.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2017 IBM Corp.
+ */
+
+#include <linux/hugetlb.h>
+#include <linux/sched/mm.h>
+#include <asm/opal-api.h>
+#include <asm/pnv-pci.h>
+#include <misc/cxllib.h>
+
+#include "cxl.h"
+
+#define CXL_INVALID_DRA                 ~0ull
+#define CXL_DUMMY_READ_SIZE             128
+#define CXL_DUMMY_READ_ALIGN            8
+#define CXL_CAPI_WINDOW_START           0x2000000000000ull
+#define CXL_CAPI_WINDOW_LOG_SIZE        48
+#define CXL_XSL_CONFIG_CURRENT_VERSION  CXL_XSL_CONFIG_VERSION1
+
+
+bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags)
+{
+	int rc;
+	u32 phb_index;
+	u64 chip_id, capp_unit_id;
+
+	/* No flags currently supported */
+	if (flags)
+		return false;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return false;
+
+	if (!cxl_is_power9())
+		return false;
+
+	if (cxl_slot_is_switched(dev))
+		return false;
+
+	/* on p9, some pci slots are not connected to a CAPP unit */
+	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
+	if (rc)
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(cxllib_slot_is_supported);
+
+static DEFINE_MUTEX(dra_mutex);
+static u64 dummy_read_addr = CXL_INVALID_DRA;
+
+static int allocate_dummy_read_buf(void)
+{
+	u64 buf, vaddr;
+	size_t buf_size;
+
+	/*
+	 * Dummy read buffer is 128-byte long, aligned on a
+	 * 256-byte boundary and we need the physical address.
+	 */
+	buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN);
+	buf = (u64) kzalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) &
+					(~0ull << CXL_DUMMY_READ_ALIGN);
+
+	WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size),
+		"Dummy read buffer alignment issue");
+	dummy_read_addr = virt_to_phys((void *) vaddr);
+	return 0;
+}
+
+int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
+{
+	int rc;
+	u32 phb_index;
+	u64 chip_id, capp_unit_id;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return -EINVAL;
+
+	mutex_lock(&dra_mutex);
+	if (dummy_read_addr == CXL_INVALID_DRA) {
+		rc = allocate_dummy_read_buf();
+		if (rc) {
+			mutex_unlock(&dra_mutex);
+			return rc;
+		}
+	}
+	mutex_unlock(&dra_mutex);
+
+	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
+	if (rc)
+		return rc;
+
+	rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
+	if (rc)
+		return rc;
+
+	cfg->version  = CXL_XSL_CONFIG_CURRENT_VERSION;
+	cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE;
+	cfg->bar_addr = CXL_CAPI_WINDOW_START;
+	cfg->dra = dummy_read_addr;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxllib_get_xsl_config);
+
+int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode,
+			unsigned long flags)
+{
+	int rc = 0;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return -EINVAL;
+
+	switch (mode) {
+	case CXL_MODE_PCI:
+		/*
+		 * We currently don't support going back to PCI mode
+		 * However, we'll turn the invalidations off, so that
+		 * the firmware doesn't have to ack them and can do
+		 * things like reset, etc.. with no worries.
+		 * So always return EPERM (can't go back to PCI) or
+		 * EBUSY if we couldn't even turn off snooping
+		 */
+		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF);
+		if (rc)
+			rc = -EBUSY;
+		else
+			rc = -EPERM;
+		break;
+	case CXL_MODE_CXL:
+		/* DMA only supported on TVT1 for the time being */
+		if (flags != CXL_MODE_DMA_TVT1)
+			return -EINVAL;
+		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1);
+		if (rc)
+			return rc;
+		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON);
+		break;
+	default:
+		rc = -EINVAL;
+	}
+	return rc;
+}
+EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode);
+
+/*
+ * When switching the PHB to capi mode, the TVT#1 entry for
+ * the Partitionable Endpoint is set in bypass mode, like
+ * in PCI mode.
+ * Configure the device dma to use TVT#1, which is done
+ * by calling dma_set_mask() with a mask large enough.
+ */
+int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags)
+{
+	int rc;
+
+	if (flags)
+		return -EINVAL;
+
+	rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
+	return rc;
+}
+EXPORT_SYMBOL_GPL(cxllib_set_device_dma);
+
+int cxllib_get_PE_attributes(struct task_struct *task,
+			     unsigned long translation_mode,
+			     struct cxllib_pe_attributes *attr)
+{
+	if (translation_mode != CXL_TRANSLATED_MODE &&
+		translation_mode != CXL_REAL_MODE)
+		return -EINVAL;
+
+	attr->sr = cxl_calculate_sr(false,
+				task == NULL,
+				translation_mode == CXL_REAL_MODE,
+				true);
+	attr->lpid = mfspr(SPRN_LPID);
+	if (task) {
+		struct mm_struct *mm = get_task_mm(task);
+		if (mm == NULL)
+			return -EINVAL;
+		/*
+		 * Caller is keeping a reference on mm_users for as long
+		 * as XSL uses the memory context
+		 */
+		attr->pid = mm->context.id;
+		mmput(mm);
+		attr->tid = task->thread.tidr;
+	} else {
+		attr->pid = 0;
+		attr->tid = 0;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
+
+static int get_vma_info(struct mm_struct *mm, u64 addr,
+			u64 *vma_start, u64 *vma_end,
+			unsigned long *page_size)
+{
+	struct vm_area_struct *vma = NULL;
+	int rc = 0;
+
+	mmap_read_lock(mm);
+
+	vma = find_vma(mm, addr);
+	if (!vma) {
+		rc = -EFAULT;
+		goto out;
+	}
+	*page_size = vma_kernel_pagesize(vma);
+	*vma_start = vma->vm_start;
+	*vma_end = vma->vm_end;
+out:
+	mmap_read_unlock(mm);
+	return rc;
+}
+
+int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
+{
+	int rc;
+	u64 dar, vma_start, vma_end;
+	unsigned long page_size;
+
+	if (mm == NULL)
+		return -EFAULT;
+
+	/*
+	 * The buffer we have to process can extend over several pages
+	 * and may also cover several VMAs.
+	 * We iterate over all the pages. The page size could vary
+	 * between VMAs.
+	 */
+	rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size);
+	if (rc)
+		return rc;
+
+	for (dar = (addr & ~(page_size - 1)); dar < (addr + size);
+	     dar += page_size) {
+		if (dar < vma_start || dar >= vma_end) {
+			/*
+			 * We don't hold mm->mmap_lock while iterating, since
+			 * the lock is required by one of the lower-level page
+			 * fault processing functions and it could
+			 * create a deadlock.
+			 *
+			 * It means the VMAs can be altered between 2
+			 * loop iterations and we could theoretically
+			 * miss a page (however unlikely). But that's
+			 * not really a problem, as the driver will
+			 * retry access, get another page fault on the
+			 * missing page and call us again.
+			 */
+			rc = get_vma_info(mm, dar, &vma_start, &vma_end,
+					&page_size);
+			if (rc)
+				return rc;
+		}
+
+		rc = cxl_handle_mm_fault(mm, flags, dar);
+		if (rc)
+			return -EFAULT;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxllib_handle_fault);