diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
commit | ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch) | |
tree | b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/s390/cio/vfio_ccw_cp.c | |
parent | Initial commit. (diff) | |
download | linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip |
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/s390/cio/vfio_ccw_cp.c')
-rw-r--r-- | drivers/s390/cio/vfio_ccw_cp.c | 956 |
1 files changed, 956 insertions, 0 deletions
diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c new file mode 100644 index 0000000000..aafd66305e --- /dev/null +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -0,0 +1,956 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * channel program interfaces + * + * Copyright IBM Corp. 2017 + * + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> + */ + +#include <linux/ratelimit.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/highmem.h> +#include <linux/iommu.h> +#include <linux/vfio.h> +#include <asm/idals.h> + +#include "vfio_ccw_cp.h" +#include "vfio_ccw_private.h" + +struct page_array { + /* Array that stores pages need to pin. */ + dma_addr_t *pa_iova; + /* Array that receives the pinned pages. */ + struct page **pa_page; + /* Number of pages pinned from @pa_iova. */ + int pa_nr; +}; + +struct ccwchain { + struct list_head next; + struct ccw1 *ch_ccw; + /* Guest physical address of the current chain. */ + u64 ch_iova; + /* Count of the valid ccws in chain. */ + int ch_len; + /* Pinned PAGEs for the original data. */ + struct page_array *ch_pa; +}; + +/* + * page_array_alloc() - alloc memory for page array + * @pa: page_array on which to perform the operation + * @len: number of pages that should be pinned from @iova + * + * Attempt to allocate memory for page array. + * + * Usage of page_array: + * We expect (pa_nr == 0) and (pa_iova == NULL), any field in + * this structure will be filled in by this function. + * + * Returns: + * 0 if page array is allocated + * -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova is not NULL + * -ENOMEM if alloc failed + */ +static int page_array_alloc(struct page_array *pa, unsigned int len) +{ + if (pa->pa_nr || pa->pa_iova) + return -EINVAL; + + if (len == 0) + return -EINVAL; + + pa->pa_nr = len; + + pa->pa_iova = kcalloc(len, sizeof(*pa->pa_iova), GFP_KERNEL); + if (!pa->pa_iova) + return -ENOMEM; + + pa->pa_page = kcalloc(len, sizeof(*pa->pa_page), GFP_KERNEL); + if (!pa->pa_page) { + kfree(pa->pa_iova); + return -ENOMEM; + } + + return 0; +} + +/* + * page_array_unpin() - Unpin user pages in memory + * @pa: page_array on which to perform the operation + * @vdev: the vfio device to perform the operation + * @pa_nr: number of user pages to unpin + * @unaligned: were pages unaligned on the pin request + * + * Only unpin if any pages were pinned to begin with, i.e. pa_nr > 0, + * otherwise only clear pa->pa_nr + */ +static void page_array_unpin(struct page_array *pa, + struct vfio_device *vdev, int pa_nr, bool unaligned) +{ + int unpinned = 0, npage = 1; + + while (unpinned < pa_nr) { + dma_addr_t *first = &pa->pa_iova[unpinned]; + dma_addr_t *last = &first[npage]; + + if (unpinned + npage < pa_nr && + *first + npage * PAGE_SIZE == *last && + !unaligned) { + npage++; + continue; + } + + vfio_unpin_pages(vdev, *first, npage); + unpinned += npage; + npage = 1; + } + + pa->pa_nr = 0; +} + +/* + * page_array_pin() - Pin user pages in memory + * @pa: page_array on which to perform the operation + * @vdev: the vfio device to perform pin operations + * @unaligned: are pages aligned to 4K boundary? + * + * Returns number of pages pinned upon success. + * If the pin request partially succeeds, or fails completely, + * all pages are left unpinned and a negative error value is returned. + * + * Requests to pin "aligned" pages can be coalesced into a single + * vfio_pin_pages request for the sake of efficiency, based on the + * expectation of 4K page requests. Unaligned requests are probably + * dealing with 2K "pages", and cannot be coalesced without + * reworking this logic to incorporate that math. + */ +static int page_array_pin(struct page_array *pa, struct vfio_device *vdev, bool unaligned) +{ + int pinned = 0, npage = 1; + int ret = 0; + + while (pinned < pa->pa_nr) { + dma_addr_t *first = &pa->pa_iova[pinned]; + dma_addr_t *last = &first[npage]; + + if (pinned + npage < pa->pa_nr && + *first + npage * PAGE_SIZE == *last && + !unaligned) { + npage++; + continue; + } + + ret = vfio_pin_pages(vdev, *first, npage, + IOMMU_READ | IOMMU_WRITE, + &pa->pa_page[pinned]); + if (ret < 0) { + goto err_out; + } else if (ret > 0 && ret != npage) { + pinned += ret; + ret = -EINVAL; + goto err_out; + } + pinned += npage; + npage = 1; + } + + return ret; + +err_out: + page_array_unpin(pa, vdev, pinned, unaligned); + return ret; +} + +/* Unpin the pages before releasing the memory. */ +static void page_array_unpin_free(struct page_array *pa, struct vfio_device *vdev, bool unaligned) +{ + page_array_unpin(pa, vdev, pa->pa_nr, unaligned); + kfree(pa->pa_page); + kfree(pa->pa_iova); +} + +static bool page_array_iova_pinned(struct page_array *pa, u64 iova, u64 length) +{ + u64 iova_pfn_start = iova >> PAGE_SHIFT; + u64 iova_pfn_end = (iova + length - 1) >> PAGE_SHIFT; + u64 pfn; + int i; + + for (i = 0; i < pa->pa_nr; i++) { + pfn = pa->pa_iova[i] >> PAGE_SHIFT; + if (pfn >= iova_pfn_start && pfn <= iova_pfn_end) + return true; + } + + return false; +} +/* Create the list of IDAL words for a page_array. */ +static inline void page_array_idal_create_words(struct page_array *pa, + unsigned long *idaws) +{ + int i; + + /* + * Idal words (execept the first one) rely on the memory being 4k + * aligned. If a user virtual address is 4K aligned, then it's + * corresponding kernel physical address will also be 4K aligned. Thus + * there will be no problem here to simply use the phys to create an + * idaw. + */ + + for (i = 0; i < pa->pa_nr; i++) { + idaws[i] = page_to_phys(pa->pa_page[i]); + + /* Incorporate any offset from each starting address */ + idaws[i] += pa->pa_iova[i] & (PAGE_SIZE - 1); + } +} + +static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len) +{ + struct ccw0 ccw0; + struct ccw1 *pccw1 = source; + int i; + + for (i = 0; i < len; i++) { + ccw0 = *(struct ccw0 *)pccw1; + if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) { + pccw1->cmd_code = CCW_CMD_TIC; + pccw1->flags = 0; + pccw1->count = 0; + } else { + pccw1->cmd_code = ccw0.cmd_code; + pccw1->flags = ccw0.flags; + pccw1->count = ccw0.count; + } + pccw1->cda = ccw0.cda; + pccw1++; + } +} + +#define idal_is_2k(_cp) (!(_cp)->orb.cmd.c64 || (_cp)->orb.cmd.i2k) + +/* + * Helpers to operate ccwchain. + */ +#define ccw_is_read(_ccw) (((_ccw)->cmd_code & 0x03) == 0x02) +#define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C) +#define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE) + +#define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP) + +#define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC) + +#define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA) +#define ccw_is_skip(_ccw) ((_ccw)->flags & CCW_FLAG_SKIP) + +#define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC)) + +/* + * ccw_does_data_transfer() + * + * Determine whether a CCW will move any data, such that the guest pages + * would need to be pinned before performing the I/O. + * + * Returns 1 if yes, 0 if no. + */ +static inline int ccw_does_data_transfer(struct ccw1 *ccw) +{ + /* If the count field is zero, then no data will be transferred */ + if (ccw->count == 0) + return 0; + + /* If the command is a NOP, then no data will be transferred */ + if (ccw_is_noop(ccw)) + return 0; + + /* If the skip flag is off, then data will be transferred */ + if (!ccw_is_skip(ccw)) + return 1; + + /* + * If the skip flag is on, it is only meaningful if the command + * code is a read, read backward, sense, or sense ID. In those + * cases, no data will be transferred. + */ + if (ccw_is_read(ccw) || ccw_is_read_backward(ccw)) + return 0; + + if (ccw_is_sense(ccw)) + return 0; + + /* The skip flag is on, but it is ignored for this command code. */ + return 1; +} + +/* + * is_cpa_within_range() + * + * @cpa: channel program address being questioned + * @head: address of the beginning of a CCW chain + * @len: number of CCWs within the chain + * + * Determine whether the address of a CCW (whether a new chain, + * or the target of a TIC) falls within a range (including the end points). + * + * Returns 1 if yes, 0 if no. + */ +static inline int is_cpa_within_range(u32 cpa, u32 head, int len) +{ + u32 tail = head + (len - 1) * sizeof(struct ccw1); + + return (head <= cpa && cpa <= tail); +} + +static inline int is_tic_within_range(struct ccw1 *ccw, u32 head, int len) +{ + if (!ccw_is_tic(ccw)) + return 0; + + return is_cpa_within_range(ccw->cda, head, len); +} + +static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len) +{ + struct ccwchain *chain; + + chain = kzalloc(sizeof(*chain), GFP_KERNEL); + if (!chain) + return NULL; + + chain->ch_ccw = kcalloc(len, sizeof(*chain->ch_ccw), GFP_DMA | GFP_KERNEL); + if (!chain->ch_ccw) + goto out_err; + + chain->ch_pa = kcalloc(len, sizeof(*chain->ch_pa), GFP_KERNEL); + if (!chain->ch_pa) + goto out_err; + + list_add_tail(&chain->next, &cp->ccwchain_list); + + return chain; + +out_err: + kfree(chain->ch_ccw); + kfree(chain); + return NULL; +} + +static void ccwchain_free(struct ccwchain *chain) +{ + list_del(&chain->next); + kfree(chain->ch_pa); + kfree(chain->ch_ccw); + kfree(chain); +} + +/* Free resource for a ccw that allocated memory for its cda. */ +static void ccwchain_cda_free(struct ccwchain *chain, int idx) +{ + struct ccw1 *ccw = &chain->ch_ccw[idx]; + + if (ccw_is_tic(ccw)) + return; + + kfree(phys_to_virt(ccw->cda)); +} + +/** + * ccwchain_calc_length - calculate the length of the ccw chain. + * @iova: guest physical address of the target ccw chain + * @cp: channel_program on which to perform the operation + * + * This is the chain length not considering any TICs. + * You need to do a new round for each TIC target. + * + * The program is also validated for absence of not yet supported + * indirect data addressing scenarios. + * + * Returns: the length of the ccw chain or -errno. + */ +static int ccwchain_calc_length(u64 iova, struct channel_program *cp) +{ + struct ccw1 *ccw = cp->guest_cp; + int cnt = 0; + + do { + cnt++; + + /* + * We want to keep counting if the current CCW has the + * command-chaining flag enabled, or if it is a TIC CCW + * that loops back into the current chain. The latter + * is used for device orientation, where the CCW PRIOR to + * the TIC can either jump to the TIC or a CCW immediately + * after the TIC, depending on the results of its operation. + */ + if (!ccw_is_chain(ccw) && !is_tic_within_range(ccw, iova, cnt)) + break; + + ccw++; + } while (cnt < CCWCHAIN_LEN_MAX + 1); + + if (cnt == CCWCHAIN_LEN_MAX + 1) + cnt = -EINVAL; + + return cnt; +} + +static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp) +{ + struct ccwchain *chain; + u32 ccw_head; + + list_for_each_entry(chain, &cp->ccwchain_list, next) { + ccw_head = chain->ch_iova; + if (is_cpa_within_range(tic->cda, ccw_head, chain->ch_len)) + return 1; + } + + return 0; +} + +static int ccwchain_loop_tic(struct ccwchain *chain, + struct channel_program *cp); + +static int ccwchain_handle_ccw(u32 cda, struct channel_program *cp) +{ + struct vfio_device *vdev = + &container_of(cp, struct vfio_ccw_private, cp)->vdev; + struct ccwchain *chain; + int len, ret; + + /* Copy 2K (the most we support today) of possible CCWs */ + ret = vfio_dma_rw(vdev, cda, cp->guest_cp, CCWCHAIN_LEN_MAX * sizeof(struct ccw1), false); + if (ret) + return ret; + + /* Convert any Format-0 CCWs to Format-1 */ + if (!cp->orb.cmd.fmt) + convert_ccw0_to_ccw1(cp->guest_cp, CCWCHAIN_LEN_MAX); + + /* Count the CCWs in the current chain */ + len = ccwchain_calc_length(cda, cp); + if (len < 0) + return len; + + /* Need alloc a new chain for this one. */ + chain = ccwchain_alloc(cp, len); + if (!chain) + return -ENOMEM; + + chain->ch_len = len; + chain->ch_iova = cda; + + /* Copy the actual CCWs into the new chain */ + memcpy(chain->ch_ccw, cp->guest_cp, len * sizeof(struct ccw1)); + + /* Loop for tics on this new chain. */ + ret = ccwchain_loop_tic(chain, cp); + + if (ret) + ccwchain_free(chain); + + return ret; +} + +/* Loop for TICs. */ +static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp) +{ + struct ccw1 *tic; + int i, ret; + + for (i = 0; i < chain->ch_len; i++) { + tic = &chain->ch_ccw[i]; + + if (!ccw_is_tic(tic)) + continue; + + /* May transfer to an existing chain. */ + if (tic_target_chain_exists(tic, cp)) + continue; + + /* Build a ccwchain for the next segment */ + ret = ccwchain_handle_ccw(tic->cda, cp); + if (ret) + return ret; + } + + return 0; +} + +static int ccwchain_fetch_tic(struct ccw1 *ccw, + struct channel_program *cp) +{ + struct ccwchain *iter; + u32 ccw_head; + + list_for_each_entry(iter, &cp->ccwchain_list, next) { + ccw_head = iter->ch_iova; + if (is_cpa_within_range(ccw->cda, ccw_head, iter->ch_len)) { + ccw->cda = (__u32) (addr_t) (((char *)iter->ch_ccw) + + (ccw->cda - ccw_head)); + return 0; + } + } + + return -EFAULT; +} + +static unsigned long *get_guest_idal(struct ccw1 *ccw, + struct channel_program *cp, + int idaw_nr) +{ + struct vfio_device *vdev = + &container_of(cp, struct vfio_ccw_private, cp)->vdev; + unsigned long *idaws; + unsigned int *idaws_f1; + int idal_len = idaw_nr * sizeof(*idaws); + int idaw_size = idal_is_2k(cp) ? PAGE_SIZE / 2 : PAGE_SIZE; + int idaw_mask = ~(idaw_size - 1); + int i, ret; + + idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL); + if (!idaws) + return ERR_PTR(-ENOMEM); + + if (ccw_is_idal(ccw)) { + /* Copy IDAL from guest */ + ret = vfio_dma_rw(vdev, ccw->cda, idaws, idal_len, false); + if (ret) { + kfree(idaws); + return ERR_PTR(ret); + } + } else { + /* Fabricate an IDAL based off CCW data address */ + if (cp->orb.cmd.c64) { + idaws[0] = ccw->cda; + for (i = 1; i < idaw_nr; i++) + idaws[i] = (idaws[i - 1] + idaw_size) & idaw_mask; + } else { + idaws_f1 = (unsigned int *)idaws; + idaws_f1[0] = ccw->cda; + for (i = 1; i < idaw_nr; i++) + idaws_f1[i] = (idaws_f1[i - 1] + idaw_size) & idaw_mask; + } + } + + return idaws; +} + +/* + * ccw_count_idaws() - Calculate the number of IDAWs needed to transfer + * a specified amount of data + * + * @ccw: The Channel Command Word being translated + * @cp: Channel Program being processed + * + * The ORB is examined, since it specifies what IDAWs could actually be + * used by any CCW in the channel program, regardless of whether or not + * the CCW actually does. An ORB that does not specify Format-2-IDAW + * Control could still contain a CCW with an IDAL, which would be + * Format-1 and thus only move 2K with each IDAW. Thus all CCWs within + * the channel program must follow the same size requirements. + */ +static int ccw_count_idaws(struct ccw1 *ccw, + struct channel_program *cp) +{ + struct vfio_device *vdev = + &container_of(cp, struct vfio_ccw_private, cp)->vdev; + u64 iova; + int size = cp->orb.cmd.c64 ? sizeof(u64) : sizeof(u32); + int ret; + int bytes = 1; + + if (ccw->count) + bytes = ccw->count; + + if (ccw_is_idal(ccw)) { + /* Read first IDAW to check its starting address. */ + /* All subsequent IDAWs will be 2K- or 4K-aligned. */ + ret = vfio_dma_rw(vdev, ccw->cda, &iova, size, false); + if (ret) + return ret; + + /* + * Format-1 IDAWs only occupy the first 32 bits, + * and bit 0 is always off. + */ + if (!cp->orb.cmd.c64) + iova = iova >> 32; + } else { + iova = ccw->cda; + } + + /* Format-1 IDAWs operate on 2K each */ + if (!cp->orb.cmd.c64) + return idal_2k_nr_words((void *)iova, bytes); + + /* Using the 2K variant of Format-2 IDAWs? */ + if (cp->orb.cmd.i2k) + return idal_2k_nr_words((void *)iova, bytes); + + /* The 'usual' case is 4K Format-2 IDAWs */ + return idal_nr_words((void *)iova, bytes); +} + +static int ccwchain_fetch_ccw(struct ccw1 *ccw, + struct page_array *pa, + struct channel_program *cp) +{ + struct vfio_device *vdev = + &container_of(cp, struct vfio_ccw_private, cp)->vdev; + unsigned long *idaws; + unsigned int *idaws_f1; + int ret; + int idaw_nr; + int i; + + /* Calculate size of IDAL */ + idaw_nr = ccw_count_idaws(ccw, cp); + if (idaw_nr < 0) + return idaw_nr; + + /* Allocate an IDAL from host storage */ + idaws = get_guest_idal(ccw, cp, idaw_nr); + if (IS_ERR(idaws)) { + ret = PTR_ERR(idaws); + goto out_init; + } + + /* + * Allocate an array of pages to pin/translate. + * The number of pages is actually the count of the idaws + * required for the data transfer, since we only only support + * 4K IDAWs today. + */ + ret = page_array_alloc(pa, idaw_nr); + if (ret < 0) + goto out_free_idaws; + + /* + * Copy guest IDAWs into page_array, in case the memory they + * occupy is not contiguous. + */ + idaws_f1 = (unsigned int *)idaws; + for (i = 0; i < idaw_nr; i++) { + if (cp->orb.cmd.c64) + pa->pa_iova[i] = idaws[i]; + else + pa->pa_iova[i] = idaws_f1[i]; + } + + if (ccw_does_data_transfer(ccw)) { + ret = page_array_pin(pa, vdev, idal_is_2k(cp)); + if (ret < 0) + goto out_unpin; + } else { + pa->pa_nr = 0; + } + + ccw->cda = (__u32) virt_to_phys(idaws); + ccw->flags |= CCW_FLAG_IDA; + + /* Populate the IDAL with pinned/translated addresses from page */ + page_array_idal_create_words(pa, idaws); + + return 0; + +out_unpin: + page_array_unpin_free(pa, vdev, idal_is_2k(cp)); +out_free_idaws: + kfree(idaws); +out_init: + ccw->cda = 0; + return ret; +} + +/* + * Fetch one ccw. + * To reduce memory copy, we'll pin the cda page in memory, + * and to get rid of the cda 2G limitation of ccw1, we'll translate + * direct ccws to idal ccws. + */ +static int ccwchain_fetch_one(struct ccw1 *ccw, + struct page_array *pa, + struct channel_program *cp) + +{ + if (ccw_is_tic(ccw)) + return ccwchain_fetch_tic(ccw, cp); + + return ccwchain_fetch_ccw(ccw, pa, cp); +} + +/** + * cp_init() - allocate ccwchains for a channel program. + * @cp: channel_program on which to perform the operation + * @orb: control block for the channel program from the guest + * + * This creates one or more ccwchain(s), and copies the raw data of + * the target channel program from @orb->cmd.iova to the new ccwchain(s). + * + * Limitations: + * 1. Supports idal(c64) ccw chaining. + * 2. Supports 4k idaw. + * + * Returns: + * %0 on success and a negative error value on failure. + */ +int cp_init(struct channel_program *cp, union orb *orb) +{ + struct vfio_device *vdev = + &container_of(cp, struct vfio_ccw_private, cp)->vdev; + /* custom ratelimit used to avoid flood during guest IPL */ + static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 1); + int ret; + + /* this is an error in the caller */ + if (cp->initialized) + return -EBUSY; + + /* + * We only support prefetching the channel program. We assume all channel + * programs executed by supported guests likewise support prefetching. + * Executing a channel program that does not specify prefetching will + * typically not cause an error, but a warning is issued to help identify + * the problem if something does break. + */ + if (!orb->cmd.pfch && __ratelimit(&ratelimit_state)) + dev_warn( + vdev->dev, + "Prefetching channel program even though prefetch not specified in ORB"); + + INIT_LIST_HEAD(&cp->ccwchain_list); + memcpy(&cp->orb, orb, sizeof(*orb)); + + /* Build a ccwchain for the first CCW segment */ + ret = ccwchain_handle_ccw(orb->cmd.cpa, cp); + + if (!ret) + cp->initialized = true; + + return ret; +} + + +/** + * cp_free() - free resources for channel program. + * @cp: channel_program on which to perform the operation + * + * This unpins the memory pages and frees the memory space occupied by + * @cp, which must have been returned by a previous call to cp_init(). + * Otherwise, undefined behavior occurs. + */ +void cp_free(struct channel_program *cp) +{ + struct vfio_device *vdev = + &container_of(cp, struct vfio_ccw_private, cp)->vdev; + struct ccwchain *chain, *temp; + int i; + + if (!cp->initialized) + return; + + cp->initialized = false; + list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) { + for (i = 0; i < chain->ch_len; i++) { + page_array_unpin_free(&chain->ch_pa[i], vdev, idal_is_2k(cp)); + ccwchain_cda_free(chain, i); + } + ccwchain_free(chain); + } +} + +/** + * cp_prefetch() - translate a guest physical address channel program to + * a real-device runnable channel program. + * @cp: channel_program on which to perform the operation + * + * This function translates the guest-physical-address channel program + * and stores the result to ccwchain list. @cp must have been + * initialized by a previous call with cp_init(). Otherwise, undefined + * behavior occurs. + * For each chain composing the channel program: + * - On entry ch_len holds the count of CCWs to be translated. + * - On exit ch_len is adjusted to the count of successfully translated CCWs. + * This allows cp_free to find in ch_len the count of CCWs to free in a chain. + * + * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced + * as helpers to do ccw chain translation inside the kernel. Basically + * they accept a channel program issued by a virtual machine, and + * translate the channel program to a real-device runnable channel + * program. + * + * These APIs will copy the ccws into kernel-space buffers, and update + * the guest physical addresses with their corresponding host physical + * addresses. Then channel I/O device drivers could issue the + * translated channel program to real devices to perform an I/O + * operation. + * + * These interfaces are designed to support translation only for + * channel programs, which are generated and formatted by a + * guest. Thus this will make it possible for things like VFIO to + * leverage the interfaces to passthrough a channel I/O mediated + * device in QEMU. + * + * We support direct ccw chaining by translating them to idal ccws. + * + * Returns: + * %0 on success and a negative error value on failure. + */ +int cp_prefetch(struct channel_program *cp) +{ + struct ccwchain *chain; + struct ccw1 *ccw; + struct page_array *pa; + int len, idx, ret; + + /* this is an error in the caller */ + if (!cp->initialized) + return -EINVAL; + + list_for_each_entry(chain, &cp->ccwchain_list, next) { + len = chain->ch_len; + for (idx = 0; idx < len; idx++) { + ccw = &chain->ch_ccw[idx]; + pa = &chain->ch_pa[idx]; + + ret = ccwchain_fetch_one(ccw, pa, cp); + if (ret) + goto out_err; + } + } + + return 0; +out_err: + /* Only cleanup the chain elements that were actually translated. */ + chain->ch_len = idx; + list_for_each_entry_continue(chain, &cp->ccwchain_list, next) { + chain->ch_len = 0; + } + return ret; +} + +/** + * cp_get_orb() - get the orb of the channel program + * @cp: channel_program on which to perform the operation + * @sch: subchannel the operation will be performed against + * + * This function returns the address of the updated orb of the channel + * program. Channel I/O device drivers could use this orb to issue a + * ssch. + */ +union orb *cp_get_orb(struct channel_program *cp, struct subchannel *sch) +{ + union orb *orb; + struct ccwchain *chain; + struct ccw1 *cpa; + + /* this is an error in the caller */ + if (!cp->initialized) + return NULL; + + orb = &cp->orb; + + orb->cmd.intparm = (u32)virt_to_phys(sch); + orb->cmd.fmt = 1; + + /* + * Everything built by vfio-ccw is a Format-2 IDAL. + * If the input was a Format-1 IDAL, indicate that + * 2K Format-2 IDAWs were created here. + */ + if (!orb->cmd.c64) + orb->cmd.i2k = 1; + orb->cmd.c64 = 1; + + if (orb->cmd.lpm == 0) + orb->cmd.lpm = sch->lpm; + + chain = list_first_entry(&cp->ccwchain_list, struct ccwchain, next); + cpa = chain->ch_ccw; + orb->cmd.cpa = (__u32)virt_to_phys(cpa); + + return orb; +} + +/** + * cp_update_scsw() - update scsw for a channel program. + * @cp: channel_program on which to perform the operation + * @scsw: I/O results of the channel program and also the target to be + * updated + * + * @scsw contains the I/O results of the channel program that pointed + * to by @cp. However what @scsw->cpa stores is a host physical + * address, which is meaningless for the guest, which is waiting for + * the I/O results. + * + * This function updates @scsw->cpa to its coressponding guest physical + * address. + */ +void cp_update_scsw(struct channel_program *cp, union scsw *scsw) +{ + struct ccwchain *chain; + u32 cpa = scsw->cmd.cpa; + u32 ccw_head; + + if (!cp->initialized) + return; + + /* + * LATER: + * For now, only update the cmd.cpa part. We may need to deal with + * other portions of the schib as well, even if we don't return them + * in the ioctl directly. Path status changes etc. + */ + list_for_each_entry(chain, &cp->ccwchain_list, next) { + ccw_head = (u32)(u64)chain->ch_ccw; + /* + * On successful execution, cpa points just beyond the end + * of the chain. + */ + if (is_cpa_within_range(cpa, ccw_head, chain->ch_len + 1)) { + /* + * (cpa - ccw_head) is the offset value of the host + * physical ccw to its chain head. + * Adding this value to the guest physical ccw chain + * head gets us the guest cpa. + */ + cpa = chain->ch_iova + (cpa - ccw_head); + break; + } + } + + scsw->cmd.cpa = cpa; +} + +/** + * cp_iova_pinned() - check if an iova is pinned for a ccw chain. + * @cp: channel_program on which to perform the operation + * @iova: the iova to check + * @length: the length to check from @iova + * + * If the @iova is currently pinned for the ccw chain, return true; + * else return false. + */ +bool cp_iova_pinned(struct channel_program *cp, u64 iova, u64 length) +{ + struct ccwchain *chain; + int i; + + if (!cp->initialized) + return false; + + list_for_each_entry(chain, &cp->ccwchain_list, next) { + for (i = 0; i < chain->ch_len; i++) + if (page_array_iova_pinned(&chain->ch_pa[i], iova, length)) + return true; + } + + return false; +} |