diff options
Diffstat (limited to '')
-rw-r--r-- | drivers/crypto/cavium/cpt/Kconfig | 17 | ||||
-rw-r--r-- | drivers/crypto/cavium/cpt/Makefile | 3 | ||||
-rw-r--r-- | drivers/crypto/cavium/cpt/cpt_common.h | 156 | ||||
-rw-r--r-- | drivers/crypto/cavium/cpt/cpt_hw_types.h | 658 | ||||
-rw-r--r-- | drivers/crypto/cavium/cpt/cptpf.h | 64 | ||||
-rw-r--r-- | drivers/crypto/cavium/cpt/cptpf_main.c | 675 | ||||
-rw-r--r-- | drivers/crypto/cavium/cpt/cptpf_mbox.c | 163 | ||||
-rw-r--r-- | drivers/crypto/cavium/cpt/cptvf.h | 132 | ||||
-rw-r--r-- | drivers/crypto/cavium/cpt/cptvf_algs.c | 525 | ||||
-rw-r--r-- | drivers/crypto/cavium/cpt/cptvf_algs.h | 120 | ||||
-rw-r--r-- | drivers/crypto/cavium/cpt/cptvf_main.c | 866 | ||||
-rw-r--r-- | drivers/crypto/cavium/cpt/cptvf_mbox.c | 211 | ||||
-rw-r--r-- | drivers/crypto/cavium/cpt/cptvf_reqmanager.c | 594 | ||||
-rw-r--r-- | drivers/crypto/cavium/cpt/request_manager.h | 149 |
14 files changed, 4333 insertions, 0 deletions
diff --git a/drivers/crypto/cavium/cpt/Kconfig b/drivers/crypto/cavium/cpt/Kconfig new file mode 100644 index 000000000..cbd51b1aa --- /dev/null +++ b/drivers/crypto/cavium/cpt/Kconfig @@ -0,0 +1,17 @@ +# +# Cavium crypto device configuration +# + +config CRYPTO_DEV_CPT + tristate + +config CAVIUM_CPT + tristate "Cavium Cryptographic Accelerator driver" + depends on ARCH_THUNDER || COMPILE_TEST + depends on PCI_MSI && 64BIT + select CRYPTO_DEV_CPT + help + Support for Cavium CPT block found in octeon-tx series of + processors. + + To compile this as a module, choose M here. diff --git a/drivers/crypto/cavium/cpt/Makefile b/drivers/crypto/cavium/cpt/Makefile new file mode 100644 index 000000000..dbf055e14 --- /dev/null +++ b/drivers/crypto/cavium/cpt/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_CAVIUM_CPT) += cptpf.o cptvf.o +cptpf-objs := cptpf_main.o cptpf_mbox.o +cptvf-objs := cptvf_main.o cptvf_reqmanager.o cptvf_mbox.o cptvf_algs.o diff --git a/drivers/crypto/cavium/cpt/cpt_common.h b/drivers/crypto/cavium/cpt/cpt_common.h new file mode 100644 index 000000000..225078d03 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cpt_common.h @@ -0,0 +1,156 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef __CPT_COMMON_H +#define __CPT_COMMON_H + +#include <asm/byteorder.h> +#include <linux/delay.h> +#include <linux/pci.h> + +#include "cpt_hw_types.h" + +/* Device ID */ +#define CPT_81XX_PCI_PF_DEVICE_ID 0xa040 +#define CPT_81XX_PCI_VF_DEVICE_ID 0xa041 + +/* flags to indicate the features supported */ +#define CPT_FLAG_SRIOV_ENABLED BIT(1) +#define CPT_FLAG_VF_DRIVER BIT(2) +#define CPT_FLAG_DEVICE_READY BIT(3) + +#define cpt_sriov_enabled(cpt) ((cpt)->flags & CPT_FLAG_SRIOV_ENABLED) +#define cpt_vf_driver(cpt) ((cpt)->flags & CPT_FLAG_VF_DRIVER) +#define cpt_device_ready(cpt) ((cpt)->flags & CPT_FLAG_DEVICE_READY) + +#define CPT_MBOX_MSG_TYPE_ACK 1 +#define CPT_MBOX_MSG_TYPE_NACK 2 +#define CPT_MBOX_MSG_TIMEOUT 2000 +#define VF_STATE_DOWN 0 +#define VF_STATE_UP 1 + +/* + * CPT Registers map for 81xx + */ + +/* PF registers */ +#define CPTX_PF_CONSTANTS(a) (0x0ll + ((u64)(a) << 36)) +#define CPTX_PF_RESET(a) (0x100ll + ((u64)(a) << 36)) +#define CPTX_PF_DIAG(a) (0x120ll + ((u64)(a) << 36)) +#define CPTX_PF_BIST_STATUS(a) (0x160ll + ((u64)(a) << 36)) +#define CPTX_PF_ECC0_CTL(a) (0x200ll + ((u64)(a) << 36)) +#define CPTX_PF_ECC0_FLIP(a) (0x210ll + ((u64)(a) << 36)) +#define CPTX_PF_ECC0_INT(a) (0x220ll + ((u64)(a) << 36)) +#define CPTX_PF_ECC0_INT_W1S(a) (0x230ll + ((u64)(a) << 36)) +#define CPTX_PF_ECC0_ENA_W1S(a) (0x240ll + ((u64)(a) << 36)) +#define CPTX_PF_ECC0_ENA_W1C(a) (0x250ll + ((u64)(a) << 36)) +#define CPTX_PF_MBOX_INTX(a, b) \ + (0x400ll + ((u64)(a) << 36) + ((b) << 3)) +#define CPTX_PF_MBOX_INT_W1SX(a, b) \ + (0x420ll + ((u64)(a) << 36) + ((b) << 3)) +#define CPTX_PF_MBOX_ENA_W1CX(a, b) \ + (0x440ll + ((u64)(a) << 36) + ((b) << 3)) +#define CPTX_PF_MBOX_ENA_W1SX(a, b) \ + (0x460ll + ((u64)(a) << 36) + ((b) << 3)) +#define CPTX_PF_EXEC_INT(a) (0x500ll + 0x1000000000ll * ((a) & 0x1)) +#define CPTX_PF_EXEC_INT_W1S(a) (0x520ll + ((u64)(a) << 36)) +#define CPTX_PF_EXEC_ENA_W1C(a) (0x540ll + ((u64)(a) << 36)) +#define CPTX_PF_EXEC_ENA_W1S(a) (0x560ll + ((u64)(a) << 36)) +#define CPTX_PF_GX_EN(a, b) \ + (0x600ll + ((u64)(a) << 36) + ((b) << 3)) +#define CPTX_PF_EXEC_INFO(a) (0x700ll + ((u64)(a) << 36)) +#define CPTX_PF_EXEC_BUSY(a) (0x800ll + ((u64)(a) << 36)) +#define CPTX_PF_EXEC_INFO0(a) (0x900ll + ((u64)(a) << 36)) +#define CPTX_PF_EXEC_INFO1(a) (0x910ll + ((u64)(a) << 36)) +#define CPTX_PF_INST_REQ_PC(a) (0x10000ll + ((u64)(a) << 36)) +#define CPTX_PF_INST_LATENCY_PC(a) \ + (0x10020ll + ((u64)(a) << 36)) +#define CPTX_PF_RD_REQ_PC(a) (0x10040ll + ((u64)(a) << 36)) +#define CPTX_PF_RD_LATENCY_PC(a) (0x10060ll + ((u64)(a) << 36)) +#define CPTX_PF_RD_UC_PC(a) (0x10080ll + ((u64)(a) << 36)) +#define CPTX_PF_ACTIVE_CYCLES_PC(a) (0x10100ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_CTL(a) (0x4000000ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_STATUS(a) (0x4000008ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_CLK(a) (0x4000010ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_DBG_CTL(a) (0x4000018ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_DBG_DATA(a) (0x4000020ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_BIST_STATUS(a) (0x4000028ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_REQ_TIMER(a) (0x4000030ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_MEM_CTL(a) (0x4000038ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_PERF_CTL(a) (0x4001000ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_DBG_CNTX(a, b) \ + (0x4001100ll + ((u64)(a) << 36) + ((b) << 3)) +#define CPTX_PF_EXE_PERF_EVENT_CNT(a) (0x4001180ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_EPCI_INBX_CNT(a, b) \ + (0x4001200ll + ((u64)(a) << 36) + ((b) << 3)) +#define CPTX_PF_EXE_EPCI_OUTBX_CNT(a, b) \ + (0x4001240ll + ((u64)(a) << 36) + ((b) << 3)) +#define CPTX_PF_ENGX_UCODE_BASE(a, b) \ + (0x4002000ll + ((u64)(a) << 36) + ((b) << 3)) +#define CPTX_PF_QX_CTL(a, b) \ + (0x8000000ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_PF_QX_GMCTL(a, b) \ + (0x8000020ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_PF_QX_CTL2(a, b) \ + (0x8000100ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_PF_VFX_MBOXX(a, b, c) \ + (0x8001000ll + ((u64)(a) << 36) + ((b) << 20) + ((c) << 8)) + +/* VF registers */ +#define CPTX_VQX_CTL(a, b) (0x100ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_SADDR(a, b) (0x200ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_DONE_WAIT(a, b) (0x400ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_INPROG(a, b) (0x410ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_DONE(a, b) (0x420ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_DONE_ACK(a, b) (0x440ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_DONE_INT_W1S(a, b) (0x460ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_DONE_INT_W1C(a, b) (0x468ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_DONE_ENA_W1S(a, b) (0x470ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_DONE_ENA_W1C(a, b) (0x478ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_MISC_INT(a, b) (0x500ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_MISC_INT_W1S(a, b) (0x508ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_MISC_ENA_W1S(a, b) (0x510ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_MISC_ENA_W1C(a, b) (0x518ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_DOORBELL(a, b) (0x600ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VFX_PF_MBOXX(a, b, c) \ + (0x1000ll + ((u64)(a) << 36) + ((b) << 20) + ((c) << 3)) + +enum vftype { + AE_TYPES = 1, + SE_TYPES = 2, + BAD_CPT_TYPES, +}; + +/* Max CPT devices supported */ +enum cpt_mbox_opcode { + CPT_MSG_VF_UP = 1, + CPT_MSG_VF_DOWN, + CPT_MSG_READY, + CPT_MSG_QLEN, + CPT_MSG_QBIND_GRP, + CPT_MSG_VQ_PRIORITY, +}; + +/* CPT mailbox structure */ +struct cpt_mbox { + u64 msg; /* Message type MBOX[0] */ + u64 data;/* Data MBOX[1] */ +}; + +/* Register read/write APIs */ +static inline void cpt_write_csr64(u8 __iomem *hw_addr, u64 offset, + u64 val) +{ + writeq(val, hw_addr + offset); +} + +static inline u64 cpt_read_csr64(u8 __iomem *hw_addr, u64 offset) +{ + return readq(hw_addr + offset); +} +#endif /* __CPT_COMMON_H */ diff --git a/drivers/crypto/cavium/cpt/cpt_hw_types.h b/drivers/crypto/cavium/cpt/cpt_hw_types.h new file mode 100644 index 000000000..279669494 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cpt_hw_types.h @@ -0,0 +1,658 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef __CPT_HW_TYPES_H +#define __CPT_HW_TYPES_H + +#include "cpt_common.h" + +/** + * Enumeration cpt_comp_e + * + * CPT Completion Enumeration + * Enumerates the values of CPT_RES_S[COMPCODE]. + */ +enum cpt_comp_e { + CPT_COMP_E_NOTDONE = 0x00, + CPT_COMP_E_GOOD = 0x01, + CPT_COMP_E_FAULT = 0x02, + CPT_COMP_E_SWERR = 0x03, + CPT_COMP_E_LAST_ENTRY = 0xFF +}; + +/** + * Structure cpt_inst_s + * + * CPT Instruction Structure + * This structure specifies the instruction layout. Instructions are + * stored in memory as little-endian unless CPT()_PF_Q()_CTL[INST_BE] is set. + * cpt_inst_s_s + * Word 0 + * doneint:1 Done interrupt. + * 0 = No interrupts related to this instruction. + * 1 = When the instruction completes, CPT()_VQ()_DONE[DONE] will be + * incremented,and based on the rules described there an interrupt may + * occur. + * Word 1 + * res_addr [127: 64] Result IOVA. + * If nonzero, specifies where to write CPT_RES_S. + * If zero, no result structure will be written. + * Address must be 16-byte aligned. + * Bits <63:49> are ignored by hardware; software should use a + * sign-extended bit <48> for forward compatibility. + * Word 2 + * grp:10 [171:162] If [WQ_PTR] is nonzero, the SSO guest-group to use when + * CPT submits work SSO. + * For the SSO to not discard the add-work request, FPA_PF_MAP() must map + * [GRP] and CPT()_PF_Q()_GMCTL[GMID] as valid. + * tt:2 [161:160] If [WQ_PTR] is nonzero, the SSO tag type to use when CPT + * submits work to SSO + * tag:32 [159:128] If [WQ_PTR] is nonzero, the SSO tag to use when CPT + * submits work to SSO. + * Word 3 + * wq_ptr [255:192] If [WQ_PTR] is nonzero, it is a pointer to a + * work-queue entry that CPT submits work to SSO after all context, + * output data, and result write operations are visible to other + * CNXXXX units and the cores. Bits <2:0> must be zero. + * Bits <63:49> are ignored by hardware; software should + * use a sign-extended bit <48> for forward compatibility. + * Internal: + * Bits <63:49>, <2:0> are ignored by hardware, treated as always 0x0. + * Word 4 + * ei0; [319:256] Engine instruction word 0. Passed to the AE/SE. + * Word 5 + * ei1; [383:320] Engine instruction word 1. Passed to the AE/SE. + * Word 6 + * ei2; [447:384] Engine instruction word 1. Passed to the AE/SE. + * Word 7 + * ei3; [511:448] Engine instruction word 1. Passed to the AE/SE. + * + */ +union cpt_inst_s { + u64 u[8]; + struct cpt_inst_s_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_17_63:47; + u64 doneint:1; + u64 reserved_0_1:16; +#else /* Word 0 - Little Endian */ + u64 reserved_0_15:16; + u64 doneint:1; + u64 reserved_17_63:47; +#endif /* Word 0 - End */ + u64 res_addr; +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 2 - Big Endian */ + u64 reserved_172_19:20; + u64 grp:10; + u64 tt:2; + u64 tag:32; +#else /* Word 2 - Little Endian */ + u64 tag:32; + u64 tt:2; + u64 grp:10; + u64 reserved_172_191:20; +#endif /* Word 2 - End */ + u64 wq_ptr; + u64 ei0; + u64 ei1; + u64 ei2; + u64 ei3; + } s; +}; + +/** + * Structure cpt_res_s + * + * CPT Result Structure + * The CPT coprocessor writes the result structure after it completes a + * CPT_INST_S instruction. The result structure is exactly 16 bytes, and + * each instruction completion produces exactly one result structure. + * + * This structure is stored in memory as little-endian unless + * CPT()_PF_Q()_CTL[INST_BE] is set. + * cpt_res_s_s + * Word 0 + * doneint:1 [16:16] Done interrupt. This bit is copied from the + * corresponding instruction's CPT_INST_S[DONEINT]. + * compcode:8 [7:0] Indicates completion/error status of the CPT coprocessor + * for the associated instruction, as enumerated by CPT_COMP_E. + * Core software may write the memory location containing [COMPCODE] to + * 0x0 before ringing the doorbell, and then poll for completion by + * checking for a nonzero value. + * Once the core observes a nonzero [COMPCODE] value in this case,the CPT + * coprocessor will have also completed L2/DRAM write operations. + * Word 1 + * reserved + * + */ +union cpt_res_s { + u64 u[2]; + struct cpt_res_s_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_17_63:47; + u64 doneint:1; + u64 reserved_8_15:8; + u64 compcode:8; +#else /* Word 0 - Little Endian */ + u64 compcode:8; + u64 reserved_8_15:8; + u64 doneint:1; + u64 reserved_17_63:47; +#endif /* Word 0 - End */ + u64 reserved_64_127; + } s; +}; + +/** + * Register (NCB) cpt#_pf_bist_status + * + * CPT PF Control Bist Status Register + * This register has the BIST status of memories. Each bit is the BIST result + * of an individual memory (per bit, 0 = pass and 1 = fail). + * cptx_pf_bist_status_s + * Word0 + * bstatus [29:0](RO/H) BIST status. One bit per memory, enumerated by + * CPT_RAMS_E. + */ +union cptx_pf_bist_status { + u64 u; + struct cptx_pf_bist_status_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_30_63:34; + u64 bstatus:30; +#else /* Word 0 - Little Endian */ + u64 bstatus:30; + u64 reserved_30_63:34; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_pf_constants + * + * CPT PF Constants Register + * This register contains implementation-related parameters of CPT in CNXXXX. + * cptx_pf_constants_s + * Word 0 + * reserved_40_63:24 [63:40] Reserved. + * epcis:8 [39:32](RO) Number of EPCI busses. + * grps:8 [31:24](RO) Number of engine groups implemented. + * ae:8 [23:16](RO/H) Number of AEs. In CNXXXX, for CPT0 returns 0x0, + * for CPT1 returns 0x18, or less if there are fuse-disables. + * se:8 [15:8](RO/H) Number of SEs. In CNXXXX, for CPT0 returns 0x30, + * or less if there are fuse-disables, for CPT1 returns 0x0. + * vq:8 [7:0](RO) Number of VQs. + */ +union cptx_pf_constants { + u64 u; + struct cptx_pf_constants_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_40_63:24; + u64 epcis:8; + u64 grps:8; + u64 ae:8; + u64 se:8; + u64 vq:8; +#else /* Word 0 - Little Endian */ + u64 vq:8; + u64 se:8; + u64 ae:8; + u64 grps:8; + u64 epcis:8; + u64 reserved_40_63:24; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_pf_exe_bist_status + * + * CPT PF Engine Bist Status Register + * This register has the BIST status of each engine. Each bit is the + * BIST result of an individual engine (per bit, 0 = pass and 1 = fail). + * cptx_pf_exe_bist_status_s + * Word0 + * reserved_48_63:16 [63:48] reserved + * bstatus:48 [47:0](RO/H) BIST status. One bit per engine. + * + */ +union cptx_pf_exe_bist_status { + u64 u; + struct cptx_pf_exe_bist_status_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_48_63:16; + u64 bstatus:48; +#else /* Word 0 - Little Endian */ + u64 bstatus:48; + u64 reserved_48_63:16; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_pf_q#_ctl + * + * CPT Queue Control Register + * This register configures queues. This register should be changed only + * when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]). + * cptx_pf_qx_ctl_s + * Word0 + * reserved_60_63:4 [63:60] reserved. + * aura:12; [59:48](R/W) Guest-aura for returning this queue's + * instruction-chunk buffers to FPA. Only used when [INST_FREE] is set. + * For the FPA to not discard the request, FPA_PF_MAP() must map + * [AURA] and CPT()_PF_Q()_GMCTL[GMID] as valid. + * reserved_45_47:3 [47:45] reserved. + * size:13 [44:32](R/W) Command-buffer size, in number of 64-bit words per + * command buffer segment. Must be 8*n + 1, where n is the number of + * instructions per buffer segment. + * reserved_11_31:21 [31:11] Reserved. + * cont_err:1 [10:10](R/W) Continue on error. + * 0 = When CPT()_VQ()_MISC_INT[NWRP], CPT()_VQ()_MISC_INT[IRDE] or + * CPT()_VQ()_MISC_INT[DOVF] are set by hardware or software via + * CPT()_VQ()_MISC_INT_W1S, then CPT()_VQ()_CTL[ENA] is cleared. Due to + * pipelining, additional instructions may have been processed between the + * instruction causing the error and the next instruction in the disabled + * queue (the instruction at CPT()_VQ()_SADDR). + * 1 = Ignore errors and continue processing instructions. + * For diagnostic use only. + * inst_free:1 [9:9](R/W) Instruction FPA free. When set, when CPT reaches the + * end of an instruction chunk, that chunk will be freed to the FPA. + * inst_be:1 [8:8](R/W) Instruction big-endian control. When set, instructions, + * instruction next chunk pointers, and result structures are stored in + * big-endian format in memory. + * iqb_ldwb:1 [7:7](R/W) Instruction load don't write back. + * 0 = The hardware issues NCB transient load (LDT) towards the cache, + * which if the line hits and is is dirty will cause the line to be + * written back before being replaced. + * 1 = The hardware issues NCB LDWB read-and-invalidate command towards + * the cache when fetching the last word of instructions; as a result the + * line will not be written back when replaced. This improves + * performance, but software must not read the instructions after they are + * posted to the hardware. Reads that do not consume the last word of a + * cache line always use LDI. + * reserved_4_6:3 [6:4] Reserved. + * grp:3; [3:1](R/W) Engine group. + * pri:1; [0:0](R/W) Queue priority. + * 1 = This queue has higher priority. Round-robin between higher + * priority queues. + * 0 = This queue has lower priority. Round-robin between lower + * priority queues. + */ +union cptx_pf_qx_ctl { + u64 u; + struct cptx_pf_qx_ctl_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_60_63:4; + u64 aura:12; + u64 reserved_45_47:3; + u64 size:13; + u64 reserved_11_31:21; + u64 cont_err:1; + u64 inst_free:1; + u64 inst_be:1; + u64 iqb_ldwb:1; + u64 reserved_4_6:3; + u64 grp:3; + u64 pri:1; +#else /* Word 0 - Little Endian */ + u64 pri:1; + u64 grp:3; + u64 reserved_4_6:3; + u64 iqb_ldwb:1; + u64 inst_be:1; + u64 inst_free:1; + u64 cont_err:1; + u64 reserved_11_31:21; + u64 size:13; + u64 reserved_45_47:3; + u64 aura:12; + u64 reserved_60_63:4; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_saddr + * + * CPT Queue Starting Buffer Address Registers + * These registers set the instruction buffer starting address. + * cptx_vqx_saddr_s + * Word0 + * reserved_49_63:15 [63:49] Reserved. + * ptr:43 [48:6](R/W/H) Instruction buffer IOVA <48:6> (64-byte aligned). + * When written, it is the initial buffer starting address; when read, + * it is the next read pointer to be requested from L2C. The PTR field + * is overwritten with the next pointer each time that the command buffer + * segment is exhausted. New commands will then be read from the newly + * specified command buffer pointer. + * reserved_0_5:6 [5:0] Reserved. + * + */ +union cptx_vqx_saddr { + u64 u; + struct cptx_vqx_saddr_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_49_63:15; + u64 ptr:43; + u64 reserved_0_5:6; +#else /* Word 0 - Little Endian */ + u64 reserved_0_5:6; + u64 ptr:43; + u64 reserved_49_63:15; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_misc_ena_w1s + * + * CPT Queue Misc Interrupt Enable Set Register + * This register sets interrupt enable bits. + * cptx_vqx_misc_ena_w1s_s + * Word0 + * reserved_5_63:59 [63:5] Reserved. + * swerr:1 [4:4](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[SWERR]. + * nwrp:1 [3:3](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[NWRP]. + * irde:1 [2:2](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[IRDE]. + * dovf:1 [1:1](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[DOVF]. + * mbox:1 [0:0](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[MBOX]. + * + */ +union cptx_vqx_misc_ena_w1s { + u64 u; + struct cptx_vqx_misc_ena_w1s_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_5_63:59; + u64 swerr:1; + u64 nwrp:1; + u64 irde:1; + u64 dovf:1; + u64 mbox:1; +#else /* Word 0 - Little Endian */ + u64 mbox:1; + u64 dovf:1; + u64 irde:1; + u64 nwrp:1; + u64 swerr:1; + u64 reserved_5_63:59; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_doorbell + * + * CPT Queue Doorbell Registers + * Doorbells for the CPT instruction queues. + * cptx_vqx_doorbell_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * dbell_cnt:20 [19:0](R/W/H) Number of instruction queue 64-bit words to add + * to the CPT instruction doorbell count. Readback value is the the + * current number of pending doorbell requests. If counter overflows + * CPT()_VQ()_MISC_INT[DBELL_DOVF] is set. To reset the count back to + * zero, write one to clear CPT()_VQ()_MISC_INT_ENA_W1C[DBELL_DOVF], + * then write a value of 2^20 minus the read [DBELL_CNT], then write one + * to CPT()_VQ()_MISC_INT_W1C[DBELL_DOVF] and + * CPT()_VQ()_MISC_INT_ENA_W1S[DBELL_DOVF]. Must be a multiple of 8. + * All CPT instructions are 8 words and require a doorbell count of + * multiple of 8. + */ +union cptx_vqx_doorbell { + u64 u; + struct cptx_vqx_doorbell_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 dbell_cnt:20; +#else /* Word 0 - Little Endian */ + u64 dbell_cnt:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_inprog + * + * CPT Queue In Progress Count Registers + * These registers contain the per-queue instruction in flight registers. + * cptx_vqx_inprog_s + * Word0 + * reserved_8_63:56 [63:8] Reserved. + * inflight:8 [7:0](RO/H) Inflight count. Counts the number of instructions + * for the VF for which CPT is fetching, executing or responding to + * instructions. However this does not include any interrupts that are + * awaiting software handling (CPT()_VQ()_DONE[DONE] != 0x0). + * A queue may not be reconfigured until: + * 1. CPT()_VQ()_CTL[ENA] is cleared by software. + * 2. [INFLIGHT] is polled until equals to zero. + */ +union cptx_vqx_inprog { + u64 u; + struct cptx_vqx_inprog_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_8_63:56; + u64 inflight:8; +#else /* Word 0 - Little Endian */ + u64 inflight:8; + u64 reserved_8_63:56; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_misc_int + * + * CPT Queue Misc Interrupt Register + * These registers contain the per-queue miscellaneous interrupts. + * cptx_vqx_misc_int_s + * Word 0 + * reserved_5_63:59 [63:5] Reserved. + * swerr:1 [4:4](R/W1C/H) Software error from engines. + * nwrp:1 [3:3](R/W1C/H) NCB result write response error. + * irde:1 [2:2](R/W1C/H) Instruction NCB read response error. + * dovf:1 [1:1](R/W1C/H) Doorbell overflow. + * mbox:1 [0:0](R/W1C/H) PF to VF mailbox interrupt. Set when + * CPT()_VF()_PF_MBOX(0) is written. + * + */ +union cptx_vqx_misc_int { + u64 u; + struct cptx_vqx_misc_int_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_5_63:59; + u64 swerr:1; + u64 nwrp:1; + u64 irde:1; + u64 dovf:1; + u64 mbox:1; +#else /* Word 0 - Little Endian */ + u64 mbox:1; + u64 dovf:1; + u64 irde:1; + u64 nwrp:1; + u64 swerr:1; + u64 reserved_5_63:59; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_done_ack + * + * CPT Queue Done Count Ack Registers + * This register is written by software to acknowledge interrupts. + * cptx_vqx_done_ack_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * done_ack:20 [19:0](R/W/H) Number of decrements to CPT()_VQ()_DONE[DONE]. + * Reads CPT()_VQ()_DONE[DONE]. Written by software to acknowledge + * interrupts. If CPT()_VQ()_DONE[DONE] is still nonzero the interrupt + * will be re-sent if the conditions described in CPT()_VQ()_DONE[DONE] + * are satisfied. + * + */ +union cptx_vqx_done_ack { + u64 u; + struct cptx_vqx_done_ack_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 done_ack:20; +#else /* Word 0 - Little Endian */ + u64 done_ack:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_done + * + * CPT Queue Done Count Registers + * These registers contain the per-queue instruction done count. + * cptx_vqx_done_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * done:20 [19:0](R/W/H) Done count. When CPT_INST_S[DONEINT] set and that + * instruction completes, CPT()_VQ()_DONE[DONE] is incremented when the + * instruction finishes. Write to this field are for diagnostic use only; + * instead software writes CPT()_VQ()_DONE_ACK with the number of + * decrements for this field. + * Interrupts are sent as follows: + * * When CPT()_VQ()_DONE[DONE] = 0, then no results are pending, the + * interrupt coalescing timer is held to zero, and an interrupt is not + * sent. + * * When CPT()_VQ()_DONE[DONE] != 0, then the interrupt coalescing timer + * counts. If the counter is >= CPT()_VQ()_DONE_WAIT[TIME_WAIT]*1024, or + * CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT], i.e. enough + * time has passed or enough results have arrived, then the interrupt is + * sent. + * * When CPT()_VQ()_DONE_ACK is written (or CPT()_VQ()_DONE is written + * but this is not typical), the interrupt coalescing timer restarts. + * Note after decrementing this interrupt equation is recomputed, + * for example if CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT] + * and because the timer is zero, the interrupt will be resent immediately. + * (This covers the race case between software acknowledging an interrupt + * and a result returning.) + * * When CPT()_VQ()_DONE_ENA_W1S[DONE] = 0, interrupts are not sent, + * but the counting described above still occurs. + * Since CPT instructions complete out-of-order, if software is using + * completion interrupts the suggested scheme is to request a DONEINT on + * each request, and when an interrupt arrives perform a "greedy" scan for + * completions; even if a later command is acknowledged first this will + * not result in missing a completion. + * Software is responsible for making sure [DONE] does not overflow; + * for example by insuring there are not more than 2^20-1 instructions in + * flight that may request interrupts. + * + */ +union cptx_vqx_done { + u64 u; + struct cptx_vqx_done_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 done:20; +#else /* Word 0 - Little Endian */ + u64 done:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_done_wait + * + * CPT Queue Done Interrupt Coalescing Wait Registers + * Specifies the per queue interrupt coalescing settings. + * cptx_vqx_done_wait_s + * Word0 + * reserved_48_63:16 [63:48] Reserved. + * time_wait:16; [47:32](R/W) Time hold-off. When CPT()_VQ()_DONE[DONE] = 0 + * or CPT()_VQ()_DONE_ACK is written a timer is cleared. When the timer + * reaches [TIME_WAIT]*1024 then interrupt coalescing ends. + * see CPT()_VQ()_DONE[DONE]. If 0x0, time coalescing is disabled. + * reserved_20_31:12 [31:20] Reserved. + * num_wait:20 [19:0](R/W) Number of messages hold-off. + * When CPT()_VQ()_DONE[DONE] >= [NUM_WAIT] then interrupt coalescing ends + * see CPT()_VQ()_DONE[DONE]. If 0x0, same behavior as 0x1. + * + */ +union cptx_vqx_done_wait { + u64 u; + struct cptx_vqx_done_wait_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_48_63:16; + u64 time_wait:16; + u64 reserved_20_31:12; + u64 num_wait:20; +#else /* Word 0 - Little Endian */ + u64 num_wait:20; + u64 reserved_20_31:12; + u64 time_wait:16; + u64 reserved_48_63:16; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_done_ena_w1s + * + * CPT Queue Done Interrupt Enable Set Registers + * Write 1 to these registers will enable the DONEINT interrupt for the queue. + * cptx_vqx_done_ena_w1s_s + * Word0 + * reserved_1_63:63 [63:1] Reserved. + * done:1 [0:0](R/W1S/H) Write 1 will enable DONEINT for this queue. + * Write 0 has no effect. Read will return the enable bit. + */ +union cptx_vqx_done_ena_w1s { + u64 u; + struct cptx_vqx_done_ena_w1s_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_1_63:63; + u64 done:1; +#else /* Word 0 - Little Endian */ + u64 done:1; + u64 reserved_1_63:63; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_ctl + * + * CPT VF Queue Control Registers + * This register configures queues. This register should be changed (other than + * clearing [ENA]) only when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]). + * cptx_vqx_ctl_s + * Word0 + * reserved_1_63:63 [63:1] Reserved. + * ena:1 [0:0](R/W/H) Enables the logical instruction queue. + * See also CPT()_PF_Q()_CTL[CONT_ERR] and CPT()_VQ()_INPROG[INFLIGHT]. + * 1 = Queue is enabled. + * 0 = Queue is disabled. + */ +union cptx_vqx_ctl { + u64 u; + struct cptx_vqx_ctl_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_1_63:63; + u64 ena:1; +#else /* Word 0 - Little Endian */ + u64 ena:1; + u64 reserved_1_63:63; +#endif /* Word 0 - End */ + } s; +}; +#endif /*__CPT_HW_TYPES_H*/ diff --git a/drivers/crypto/cavium/cpt/cptpf.h b/drivers/crypto/cavium/cpt/cptpf.h new file mode 100644 index 000000000..c0556c5f6 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cptpf.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef __CPTPF_H +#define __CPTPF_H + +#include "cpt_common.h" + +#define CSR_DELAY 30 +#define CPT_MAX_CORE_GROUPS 8 +#define CPT_MAX_SE_CORES 10 +#define CPT_MAX_AE_CORES 6 +#define CPT_MAX_TOTAL_CORES (CPT_MAX_SE_CORES + CPT_MAX_AE_CORES) +#define CPT_MAX_VF_NUM 16 +#define CPT_PF_MSIX_VECTORS 3 +#define CPT_PF_INT_VEC_E_MBOXX(a) (0x02 + (a)) +#define CPT_UCODE_VERSION_SZ 32 +struct cpt_device; + +struct microcode { + u8 is_mc_valid; + u8 is_ae; + u8 group; + u8 num_cores; + u32 code_size; + u64 core_mask; + u8 version[CPT_UCODE_VERSION_SZ]; + /* Base info */ + dma_addr_t phys_base; + void *code; +}; + +struct cpt_vf_info { + u8 state; + u8 priority; + u8 id; + u32 qlen; +}; + +/** + * cpt device structure + */ +struct cpt_device { + u16 flags; /* Flags to hold device status bits */ + u8 num_vf_en; /* Number of VFs enabled (0...CPT_MAX_VF_NUM) */ + struct cpt_vf_info vfinfo[CPT_MAX_VF_NUM]; /* Per VF info */ + + void __iomem *reg_base; /* Register start address */ + struct pci_dev *pdev; /* pci device handle */ + + struct microcode mcode[CPT_MAX_CORE_GROUPS]; + u8 next_mc_idx; /* next microcode index */ + u8 next_group; + u8 max_se_cores; + u8 max_ae_cores; +}; + +void cpt_mbox_intr_handler(struct cpt_device *cpt, int mbx); +#endif /* __CPTPF_H */ diff --git a/drivers/crypto/cavium/cpt/cptpf_main.c b/drivers/crypto/cavium/cpt/cptpf_main.c new file mode 100644 index 000000000..06ad85ab5 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cptpf_main.c @@ -0,0 +1,675 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include <linux/device.h> +#include <linux/firmware.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/pci.h> +#include <linux/printk.h> +#include <linux/version.h> + +#include "cptpf.h" + +#define DRV_NAME "thunder-cpt" +#define DRV_VERSION "1.0" + +static u32 num_vfs = 4; /* Default 4 VF enabled */ +module_param(num_vfs, uint, 0444); +MODULE_PARM_DESC(num_vfs, "Number of VFs to enable(1-16)"); + +/* + * Disable cores specified by coremask + */ +static void cpt_disable_cores(struct cpt_device *cpt, u64 coremask, + u8 type, u8 grp) +{ + u64 pf_exe_ctl; + u32 timeout = 100; + u64 grpmask = 0; + struct device *dev = &cpt->pdev->dev; + + if (type == AE_TYPES) + coremask = (coremask << cpt->max_se_cores); + + /* Disengage the cores from groups */ + grpmask = cpt_read_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp)); + cpt_write_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp), + (grpmask & ~coremask)); + udelay(CSR_DELAY); + grp = cpt_read_csr64(cpt->reg_base, CPTX_PF_EXEC_BUSY(0)); + while (grp & coremask) { + dev_err(dev, "Cores still busy %llx", coremask); + grp = cpt_read_csr64(cpt->reg_base, + CPTX_PF_EXEC_BUSY(0)); + if (timeout--) + break; + + udelay(CSR_DELAY); + } + + /* Disable the cores */ + pf_exe_ctl = cpt_read_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0)); + cpt_write_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0), + (pf_exe_ctl & ~coremask)); + udelay(CSR_DELAY); +} + +/* + * Enable cores specified by coremask + */ +static void cpt_enable_cores(struct cpt_device *cpt, u64 coremask, + u8 type) +{ + u64 pf_exe_ctl; + + if (type == AE_TYPES) + coremask = (coremask << cpt->max_se_cores); + + pf_exe_ctl = cpt_read_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0)); + cpt_write_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0), + (pf_exe_ctl | coremask)); + udelay(CSR_DELAY); +} + +static void cpt_configure_group(struct cpt_device *cpt, u8 grp, + u64 coremask, u8 type) +{ + u64 pf_gx_en = 0; + + if (type == AE_TYPES) + coremask = (coremask << cpt->max_se_cores); + + pf_gx_en = cpt_read_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp)); + cpt_write_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp), + (pf_gx_en | coremask)); + udelay(CSR_DELAY); +} + +static void cpt_disable_mbox_interrupts(struct cpt_device *cpt) +{ + /* Clear mbox(0) interupts for all vfs */ + cpt_write_csr64(cpt->reg_base, CPTX_PF_MBOX_ENA_W1CX(0, 0), ~0ull); +} + +static void cpt_disable_ecc_interrupts(struct cpt_device *cpt) +{ + /* Clear ecc(0) interupts for all vfs */ + cpt_write_csr64(cpt->reg_base, CPTX_PF_ECC0_ENA_W1C(0), ~0ull); +} + +static void cpt_disable_exec_interrupts(struct cpt_device *cpt) +{ + /* Clear exec interupts for all vfs */ + cpt_write_csr64(cpt->reg_base, CPTX_PF_EXEC_ENA_W1C(0), ~0ull); +} + +static void cpt_disable_all_interrupts(struct cpt_device *cpt) +{ + cpt_disable_mbox_interrupts(cpt); + cpt_disable_ecc_interrupts(cpt); + cpt_disable_exec_interrupts(cpt); +} + +static void cpt_enable_mbox_interrupts(struct cpt_device *cpt) +{ + /* Set mbox(0) interupts for all vfs */ + cpt_write_csr64(cpt->reg_base, CPTX_PF_MBOX_ENA_W1SX(0, 0), ~0ull); +} + +static int cpt_load_microcode(struct cpt_device *cpt, struct microcode *mcode) +{ + int ret = 0, core = 0, shift = 0; + u32 total_cores = 0; + struct device *dev = &cpt->pdev->dev; + + if (!mcode || !mcode->code) { + dev_err(dev, "Either the mcode is null or data is NULL\n"); + return -EINVAL; + } + + if (mcode->code_size == 0) { + dev_err(dev, "microcode size is 0\n"); + return -EINVAL; + } + + /* Assumes 0-9 are SE cores for UCODE_BASE registers and + * AE core bases follow + */ + if (mcode->is_ae) { + core = CPT_MAX_SE_CORES; /* start couting from 10 */ + total_cores = CPT_MAX_TOTAL_CORES; /* upto 15 */ + } else { + core = 0; /* start couting from 0 */ + total_cores = CPT_MAX_SE_CORES; /* upto 9 */ + } + + /* Point to microcode for each core of the group */ + for (; core < total_cores ; core++, shift++) { + if (mcode->core_mask & (1 << shift)) { + cpt_write_csr64(cpt->reg_base, + CPTX_PF_ENGX_UCODE_BASE(0, core), + (u64)mcode->phys_base); + } + } + return ret; +} + +static int do_cpt_init(struct cpt_device *cpt, struct microcode *mcode) +{ + int ret = 0; + struct device *dev = &cpt->pdev->dev; + + /* Make device not ready */ + cpt->flags &= ~CPT_FLAG_DEVICE_READY; + /* Disable All PF interrupts */ + cpt_disable_all_interrupts(cpt); + /* Calculate mcode group and coremasks */ + if (mcode->is_ae) { + if (mcode->num_cores > cpt->max_ae_cores) { + dev_err(dev, "Requested for more cores than available AE cores\n"); + ret = -EINVAL; + goto cpt_init_fail; + } + + if (cpt->next_group >= CPT_MAX_CORE_GROUPS) { + dev_err(dev, "Can't load, all eight microcode groups in use"); + return -ENFILE; + } + + mcode->group = cpt->next_group; + /* Convert requested cores to mask */ + mcode->core_mask = GENMASK(mcode->num_cores, 0); + cpt_disable_cores(cpt, mcode->core_mask, AE_TYPES, + mcode->group); + /* Load microcode for AE engines */ + ret = cpt_load_microcode(cpt, mcode); + if (ret) { + dev_err(dev, "Microcode load Failed for %s\n", + mcode->version); + goto cpt_init_fail; + } + cpt->next_group++; + /* Configure group mask for the mcode */ + cpt_configure_group(cpt, mcode->group, mcode->core_mask, + AE_TYPES); + /* Enable AE cores for the group mask */ + cpt_enable_cores(cpt, mcode->core_mask, AE_TYPES); + } else { + if (mcode->num_cores > cpt->max_se_cores) { + dev_err(dev, "Requested for more cores than available SE cores\n"); + ret = -EINVAL; + goto cpt_init_fail; + } + if (cpt->next_group >= CPT_MAX_CORE_GROUPS) { + dev_err(dev, "Can't load, all eight microcode groups in use"); + return -ENFILE; + } + + mcode->group = cpt->next_group; + /* Covert requested cores to mask */ + mcode->core_mask = GENMASK(mcode->num_cores, 0); + cpt_disable_cores(cpt, mcode->core_mask, SE_TYPES, + mcode->group); + /* Load microcode for SE engines */ + ret = cpt_load_microcode(cpt, mcode); + if (ret) { + dev_err(dev, "Microcode load Failed for %s\n", + mcode->version); + goto cpt_init_fail; + } + cpt->next_group++; + /* Configure group mask for the mcode */ + cpt_configure_group(cpt, mcode->group, mcode->core_mask, + SE_TYPES); + /* Enable SE cores for the group mask */ + cpt_enable_cores(cpt, mcode->core_mask, SE_TYPES); + } + + /* Enabled PF mailbox interrupts */ + cpt_enable_mbox_interrupts(cpt); + cpt->flags |= CPT_FLAG_DEVICE_READY; + + return ret; + +cpt_init_fail: + /* Enabled PF mailbox interrupts */ + cpt_enable_mbox_interrupts(cpt); + + return ret; +} + +struct ucode_header { + u8 version[CPT_UCODE_VERSION_SZ]; + u32 code_length; + u32 data_length; + u64 sram_address; +}; + +static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae) +{ + const struct firmware *fw_entry; + struct device *dev = &cpt->pdev->dev; + struct ucode_header *ucode; + struct microcode *mcode; + int j, ret = 0; + + ret = request_firmware(&fw_entry, fw, dev); + if (ret) + return ret; + + ucode = (struct ucode_header *)fw_entry->data; + mcode = &cpt->mcode[cpt->next_mc_idx]; + memcpy(mcode->version, (u8 *)fw_entry->data, CPT_UCODE_VERSION_SZ); + mcode->code_size = ntohl(ucode->code_length) * 2; + if (!mcode->code_size) { + ret = -EINVAL; + goto fw_release; + } + + mcode->is_ae = is_ae; + mcode->core_mask = 0ULL; + mcode->num_cores = is_ae ? 6 : 10; + + /* Allocate DMAable space */ + mcode->code = dma_zalloc_coherent(&cpt->pdev->dev, mcode->code_size, + &mcode->phys_base, GFP_KERNEL); + if (!mcode->code) { + dev_err(dev, "Unable to allocate space for microcode"); + ret = -ENOMEM; + goto fw_release; + } + + memcpy((void *)mcode->code, (void *)(fw_entry->data + sizeof(*ucode)), + mcode->code_size); + + /* Byte swap 64-bit */ + for (j = 0; j < (mcode->code_size / 8); j++) + ((u64 *)mcode->code)[j] = cpu_to_be64(((u64 *)mcode->code)[j]); + /* MC needs 16-bit swap */ + for (j = 0; j < (mcode->code_size / 2); j++) + ((u16 *)mcode->code)[j] = cpu_to_be16(((u16 *)mcode->code)[j]); + + dev_dbg(dev, "mcode->code_size = %u\n", mcode->code_size); + dev_dbg(dev, "mcode->is_ae = %u\n", mcode->is_ae); + dev_dbg(dev, "mcode->num_cores = %u\n", mcode->num_cores); + dev_dbg(dev, "mcode->code = %llx\n", (u64)mcode->code); + dev_dbg(dev, "mcode->phys_base = %llx\n", mcode->phys_base); + + ret = do_cpt_init(cpt, mcode); + if (ret) { + dev_err(dev, "do_cpt_init failed with ret: %d\n", ret); + goto fw_release; + } + + dev_info(dev, "Microcode Loaded %s\n", mcode->version); + mcode->is_mc_valid = 1; + cpt->next_mc_idx++; + +fw_release: + release_firmware(fw_entry); + + return ret; +} + +static int cpt_ucode_load(struct cpt_device *cpt) +{ + int ret = 0; + struct device *dev = &cpt->pdev->dev; + + ret = cpt_ucode_load_fw(cpt, "cpt8x-mc-ae.out", true); + if (ret) { + dev_err(dev, "ae:cpt_ucode_load failed with ret: %d\n", ret); + return ret; + } + ret = cpt_ucode_load_fw(cpt, "cpt8x-mc-se.out", false); + if (ret) { + dev_err(dev, "se:cpt_ucode_load failed with ret: %d\n", ret); + return ret; + } + + return ret; +} + +static irqreturn_t cpt_mbx0_intr_handler(int irq, void *cpt_irq) +{ + struct cpt_device *cpt = (struct cpt_device *)cpt_irq; + + cpt_mbox_intr_handler(cpt, 0); + + return IRQ_HANDLED; +} + +static void cpt_reset(struct cpt_device *cpt) +{ + cpt_write_csr64(cpt->reg_base, CPTX_PF_RESET(0), 1); +} + +static void cpt_find_max_enabled_cores(struct cpt_device *cpt) +{ + union cptx_pf_constants pf_cnsts = {0}; + + pf_cnsts.u = cpt_read_csr64(cpt->reg_base, CPTX_PF_CONSTANTS(0)); + cpt->max_se_cores = pf_cnsts.s.se; + cpt->max_ae_cores = pf_cnsts.s.ae; +} + +static u32 cpt_check_bist_status(struct cpt_device *cpt) +{ + union cptx_pf_bist_status bist_sts = {0}; + + bist_sts.u = cpt_read_csr64(cpt->reg_base, + CPTX_PF_BIST_STATUS(0)); + + return bist_sts.u; +} + +static u64 cpt_check_exe_bist_status(struct cpt_device *cpt) +{ + union cptx_pf_exe_bist_status bist_sts = {0}; + + bist_sts.u = cpt_read_csr64(cpt->reg_base, + CPTX_PF_EXE_BIST_STATUS(0)); + + return bist_sts.u; +} + +static void cpt_disable_all_cores(struct cpt_device *cpt) +{ + u32 grp, timeout = 100; + struct device *dev = &cpt->pdev->dev; + + /* Disengage the cores from groups */ + for (grp = 0; grp < CPT_MAX_CORE_GROUPS; grp++) { + cpt_write_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp), 0); + udelay(CSR_DELAY); + } + + grp = cpt_read_csr64(cpt->reg_base, CPTX_PF_EXEC_BUSY(0)); + while (grp) { + dev_err(dev, "Cores still busy"); + grp = cpt_read_csr64(cpt->reg_base, + CPTX_PF_EXEC_BUSY(0)); + if (timeout--) + break; + + udelay(CSR_DELAY); + } + /* Disable the cores */ + cpt_write_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0), 0); +} + +/** + * Ensure all cores are disengaged from all groups by + * calling cpt_disable_all_cores() before calling this + * function. + */ +static void cpt_unload_microcode(struct cpt_device *cpt) +{ + u32 grp = 0, core; + + /* Free microcode bases and reset group masks */ + for (grp = 0; grp < CPT_MAX_CORE_GROUPS; grp++) { + struct microcode *mcode = &cpt->mcode[grp]; + + if (cpt->mcode[grp].code) + dma_free_coherent(&cpt->pdev->dev, mcode->code_size, + mcode->code, mcode->phys_base); + mcode->code = NULL; + } + /* Clear UCODE_BASE registers for all engines */ + for (core = 0; core < CPT_MAX_TOTAL_CORES; core++) + cpt_write_csr64(cpt->reg_base, + CPTX_PF_ENGX_UCODE_BASE(0, core), 0ull); +} + +static int cpt_device_init(struct cpt_device *cpt) +{ + u64 bist; + struct device *dev = &cpt->pdev->dev; + + /* Reset the PF when probed first */ + cpt_reset(cpt); + msleep(100); + + /*Check BIST status*/ + bist = (u64)cpt_check_bist_status(cpt); + if (bist) { + dev_err(dev, "RAM BIST failed with code 0x%llx", bist); + return -ENODEV; + } + + bist = cpt_check_exe_bist_status(cpt); + if (bist) { + dev_err(dev, "Engine BIST failed with code 0x%llx", bist); + return -ENODEV; + } + + /*Get CLK frequency*/ + /*Get max enabled cores */ + cpt_find_max_enabled_cores(cpt); + /*Disable all cores*/ + cpt_disable_all_cores(cpt); + /*Reset device parameters*/ + cpt->next_mc_idx = 0; + cpt->next_group = 0; + /* PF is ready */ + cpt->flags |= CPT_FLAG_DEVICE_READY; + + return 0; +} + +static int cpt_register_interrupts(struct cpt_device *cpt) +{ + int ret; + struct device *dev = &cpt->pdev->dev; + + /* Enable MSI-X */ + ret = pci_alloc_irq_vectors(cpt->pdev, CPT_PF_MSIX_VECTORS, + CPT_PF_MSIX_VECTORS, PCI_IRQ_MSIX); + if (ret < 0) { + dev_err(&cpt->pdev->dev, "Request for #%d msix vectors failed\n", + CPT_PF_MSIX_VECTORS); + return ret; + } + + /* Register mailbox interrupt handlers */ + ret = request_irq(pci_irq_vector(cpt->pdev, CPT_PF_INT_VEC_E_MBOXX(0)), + cpt_mbx0_intr_handler, 0, "CPT Mbox0", cpt); + if (ret) + goto fail; + + /* Enable mailbox interrupt */ + cpt_enable_mbox_interrupts(cpt); + return 0; + +fail: + dev_err(dev, "Request irq failed\n"); + pci_disable_msix(cpt->pdev); + return ret; +} + +static void cpt_unregister_interrupts(struct cpt_device *cpt) +{ + free_irq(pci_irq_vector(cpt->pdev, CPT_PF_INT_VEC_E_MBOXX(0)), cpt); + pci_disable_msix(cpt->pdev); +} + +static int cpt_sriov_init(struct cpt_device *cpt, int num_vfs) +{ + int pos = 0; + int err; + u16 total_vf_cnt; + struct pci_dev *pdev = cpt->pdev; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + if (!pos) { + dev_err(&pdev->dev, "SRIOV capability is not found in PCIe config space\n"); + return -ENODEV; + } + + cpt->num_vf_en = num_vfs; /* User requested VFs */ + pci_read_config_word(pdev, (pos + PCI_SRIOV_TOTAL_VF), &total_vf_cnt); + if (total_vf_cnt < cpt->num_vf_en) + cpt->num_vf_en = total_vf_cnt; + + if (!total_vf_cnt) + return 0; + + /*Enabled the available VFs */ + err = pci_enable_sriov(pdev, cpt->num_vf_en); + if (err) { + dev_err(&pdev->dev, "SRIOV enable failed, num VF is %d\n", + cpt->num_vf_en); + cpt->num_vf_en = 0; + return err; + } + + /* TODO: Optionally enable static VQ priorities feature */ + + dev_info(&pdev->dev, "SRIOV enabled, number of VF available %d\n", + cpt->num_vf_en); + + cpt->flags |= CPT_FLAG_SRIOV_ENABLED; + + return 0; +} + +static int cpt_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct cpt_device *cpt; + int err; + + if (num_vfs > 16 || num_vfs < 4) { + dev_warn(dev, "Invalid vf count %d, Resetting it to 4(default)\n", + num_vfs); + num_vfs = 4; + } + + cpt = devm_kzalloc(dev, sizeof(*cpt), GFP_KERNEL); + if (!cpt) + return -ENOMEM; + + pci_set_drvdata(pdev, cpt); + cpt->pdev = pdev; + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + pci_set_drvdata(pdev, NULL); + return err; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto cpt_err_disable_device; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable DMA configuration\n"); + goto cpt_err_release_regions; + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n"); + goto cpt_err_release_regions; + } + + /* MAP PF's configuration registers */ + cpt->reg_base = pcim_iomap(pdev, 0, 0); + if (!cpt->reg_base) { + dev_err(dev, "Cannot map config register space, aborting\n"); + err = -ENOMEM; + goto cpt_err_release_regions; + } + + /* CPT device HW initialization */ + cpt_device_init(cpt); + + /* Register interrupts */ + err = cpt_register_interrupts(cpt); + if (err) + goto cpt_err_release_regions; + + err = cpt_ucode_load(cpt); + if (err) + goto cpt_err_unregister_interrupts; + + /* Configure SRIOV */ + err = cpt_sriov_init(cpt, num_vfs); + if (err) + goto cpt_err_unregister_interrupts; + + return 0; + +cpt_err_unregister_interrupts: + cpt_unregister_interrupts(cpt); +cpt_err_release_regions: + pci_release_regions(pdev); +cpt_err_disable_device: + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + return err; +} + +static void cpt_remove(struct pci_dev *pdev) +{ + struct cpt_device *cpt = pci_get_drvdata(pdev); + + /* Disengage SE and AE cores from all groups*/ + cpt_disable_all_cores(cpt); + /* Unload microcodes */ + cpt_unload_microcode(cpt); + cpt_unregister_interrupts(cpt); + pci_disable_sriov(pdev); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +static void cpt_shutdown(struct pci_dev *pdev) +{ + struct cpt_device *cpt = pci_get_drvdata(pdev); + + if (!cpt) + return; + + dev_info(&pdev->dev, "Shutdown device %x:%x.\n", + (u32)pdev->vendor, (u32)pdev->device); + + cpt_unregister_interrupts(cpt); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +/* Supported devices */ +static const struct pci_device_id cpt_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, CPT_81XX_PCI_PF_DEVICE_ID) }, + { 0, } /* end of table */ +}; + +static struct pci_driver cpt_pci_driver = { + .name = DRV_NAME, + .id_table = cpt_id_table, + .probe = cpt_probe, + .remove = cpt_remove, + .shutdown = cpt_shutdown, +}; + +module_pci_driver(cpt_pci_driver); + +MODULE_AUTHOR("George Cherian <george.cherian@cavium.com>"); +MODULE_DESCRIPTION("Cavium Thunder CPT Physical Function Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, cpt_id_table); diff --git a/drivers/crypto/cavium/cpt/cptpf_mbox.c b/drivers/crypto/cavium/cpt/cptpf_mbox.c new file mode 100644 index 000000000..20f2c6ee4 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cptpf_mbox.c @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ +#include <linux/module.h> +#include "cptpf.h" + +static void cpt_send_msg_to_vf(struct cpt_device *cpt, int vf, + struct cpt_mbox *mbx) +{ + /* Writing mbox(0) causes interrupt */ + cpt_write_csr64(cpt->reg_base, CPTX_PF_VFX_MBOXX(0, vf, 1), + mbx->data); + cpt_write_csr64(cpt->reg_base, CPTX_PF_VFX_MBOXX(0, vf, 0), mbx->msg); +} + +/* ACKs VF's mailbox message + * @vf: VF to which ACK to be sent + */ +static void cpt_mbox_send_ack(struct cpt_device *cpt, int vf, + struct cpt_mbox *mbx) +{ + mbx->data = 0ull; + mbx->msg = CPT_MBOX_MSG_TYPE_ACK; + cpt_send_msg_to_vf(cpt, vf, mbx); +} + +static void cpt_clear_mbox_intr(struct cpt_device *cpt, u32 vf) +{ + /* W1C for the VF */ + cpt_write_csr64(cpt->reg_base, CPTX_PF_MBOX_INTX(0, 0), (1 << vf)); +} + +/* + * Configure QLEN/Chunk sizes for VF + */ +static void cpt_cfg_qlen_for_vf(struct cpt_device *cpt, int vf, u32 size) +{ + union cptx_pf_qx_ctl pf_qx_ctl; + + pf_qx_ctl.u = cpt_read_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, vf)); + pf_qx_ctl.s.size = size; + pf_qx_ctl.s.cont_err = true; + cpt_write_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, vf), pf_qx_ctl.u); +} + +/* + * Configure VQ priority + */ +static void cpt_cfg_vq_priority(struct cpt_device *cpt, int vf, u32 pri) +{ + union cptx_pf_qx_ctl pf_qx_ctl; + + pf_qx_ctl.u = cpt_read_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, vf)); + pf_qx_ctl.s.pri = pri; + cpt_write_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, vf), pf_qx_ctl.u); +} + +static int cpt_bind_vq_to_grp(struct cpt_device *cpt, u8 q, u8 grp) +{ + struct microcode *mcode = cpt->mcode; + union cptx_pf_qx_ctl pf_qx_ctl; + struct device *dev = &cpt->pdev->dev; + + if (q >= CPT_MAX_VF_NUM) { + dev_err(dev, "Queues are more than cores in the group"); + return -EINVAL; + } + if (grp >= CPT_MAX_CORE_GROUPS) { + dev_err(dev, "Request group is more than possible groups"); + return -EINVAL; + } + if (grp >= cpt->next_mc_idx) { + dev_err(dev, "Request group is higher than available functional groups"); + return -EINVAL; + } + pf_qx_ctl.u = cpt_read_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, q)); + pf_qx_ctl.s.grp = mcode[grp].group; + cpt_write_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, q), pf_qx_ctl.u); + dev_dbg(dev, "VF %d TYPE %s", q, (mcode[grp].is_ae ? "AE" : "SE")); + + return mcode[grp].is_ae ? AE_TYPES : SE_TYPES; +} + +/* Interrupt handler to handle mailbox messages from VFs */ +static void cpt_handle_mbox_intr(struct cpt_device *cpt, int vf) +{ + struct cpt_vf_info *vfx = &cpt->vfinfo[vf]; + struct cpt_mbox mbx = {}; + int vftype; + struct device *dev = &cpt->pdev->dev; + /* + * MBOX[0] contains msg + * MBOX[1] contains data + */ + mbx.msg = cpt_read_csr64(cpt->reg_base, CPTX_PF_VFX_MBOXX(0, vf, 0)); + mbx.data = cpt_read_csr64(cpt->reg_base, CPTX_PF_VFX_MBOXX(0, vf, 1)); + dev_dbg(dev, "%s: Mailbox msg 0x%llx from VF%d", __func__, mbx.msg, vf); + switch (mbx.msg) { + case CPT_MSG_VF_UP: + vfx->state = VF_STATE_UP; + try_module_get(THIS_MODULE); + cpt_mbox_send_ack(cpt, vf, &mbx); + break; + case CPT_MSG_READY: + mbx.msg = CPT_MSG_READY; + mbx.data = vf; + cpt_send_msg_to_vf(cpt, vf, &mbx); + break; + case CPT_MSG_VF_DOWN: + /* First msg in VF teardown sequence */ + vfx->state = VF_STATE_DOWN; + module_put(THIS_MODULE); + cpt_mbox_send_ack(cpt, vf, &mbx); + break; + case CPT_MSG_QLEN: + vfx->qlen = mbx.data; + cpt_cfg_qlen_for_vf(cpt, vf, vfx->qlen); + cpt_mbox_send_ack(cpt, vf, &mbx); + break; + case CPT_MSG_QBIND_GRP: + vftype = cpt_bind_vq_to_grp(cpt, vf, (u8)mbx.data); + if ((vftype != AE_TYPES) && (vftype != SE_TYPES)) + dev_err(dev, "Queue %d binding to group %llu failed", + vf, mbx.data); + else { + dev_dbg(dev, "Queue %d binding to group %llu successful", + vf, mbx.data); + mbx.msg = CPT_MSG_QBIND_GRP; + mbx.data = vftype; + cpt_send_msg_to_vf(cpt, vf, &mbx); + } + break; + case CPT_MSG_VQ_PRIORITY: + vfx->priority = mbx.data; + cpt_cfg_vq_priority(cpt, vf, vfx->priority); + cpt_mbox_send_ack(cpt, vf, &mbx); + break; + default: + dev_err(&cpt->pdev->dev, "Invalid msg from VF%d, msg 0x%llx\n", + vf, mbx.msg); + break; + } +} + +void cpt_mbox_intr_handler (struct cpt_device *cpt, int mbx) +{ + u64 intr; + u8 vf; + + intr = cpt_read_csr64(cpt->reg_base, CPTX_PF_MBOX_INTX(0, 0)); + dev_dbg(&cpt->pdev->dev, "PF interrupt Mbox%d 0x%llx\n", mbx, intr); + for (vf = 0; vf < CPT_MAX_VF_NUM; vf++) { + if (intr & (1ULL << vf)) { + dev_dbg(&cpt->pdev->dev, "Intr from VF %d\n", vf); + cpt_handle_mbox_intr(cpt, vf); + cpt_clear_mbox_intr(cpt, vf); + } + } +} diff --git a/drivers/crypto/cavium/cpt/cptvf.h b/drivers/crypto/cavium/cpt/cptvf.h new file mode 100644 index 000000000..0a835a07d --- /dev/null +++ b/drivers/crypto/cavium/cpt/cptvf.h @@ -0,0 +1,132 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef __CPTVF_H +#define __CPTVF_H + +#include <linux/list.h> +#include "cpt_common.h" + +/* Default command queue length */ +#define CPT_CMD_QLEN 2046 +#define CPT_CMD_QCHUNK_SIZE 1023 + +/* Default command timeout in seconds */ +#define CPT_COMMAND_TIMEOUT 4 +#define CPT_TIMER_THOLD 0xFFFF +#define CPT_NUM_QS_PER_VF 1 +#define CPT_INST_SIZE 64 +#define CPT_NEXT_CHUNK_PTR_SIZE 8 + +#define CPT_VF_MSIX_VECTORS 2 +#define CPT_VF_INTR_MBOX_MASK BIT(0) +#define CPT_VF_INTR_DOVF_MASK BIT(1) +#define CPT_VF_INTR_IRDE_MASK BIT(2) +#define CPT_VF_INTR_NWRP_MASK BIT(3) +#define CPT_VF_INTR_SERR_MASK BIT(4) +#define DMA_DIRECT_DIRECT 0 /* Input DIRECT, Output DIRECT */ +#define DMA_GATHER_SCATTER 1 +#define FROM_DPTR 1 + +/** + * Enumeration cpt_vf_int_vec_e + * + * CPT VF MSI-X Vector Enumeration + * Enumerates the MSI-X interrupt vectors. + */ +enum cpt_vf_int_vec_e { + CPT_VF_INT_VEC_E_MISC = 0x00, + CPT_VF_INT_VEC_E_DONE = 0x01 +}; + +struct command_chunk { + u8 *head; + dma_addr_t dma_addr; + u32 size; /* Chunk size, max CPT_INST_CHUNK_MAX_SIZE */ + struct hlist_node nextchunk; +}; + +struct command_queue { + spinlock_t lock; /* command queue lock */ + u32 idx; /* Command queue host write idx */ + u32 nchunks; /* Number of command chunks */ + struct command_chunk *qhead; /* Command queue head, instructions + * are inserted here + */ + struct hlist_head chead; +}; + +struct command_qinfo { + u32 cmd_size; + u32 qchunksize; /* Command queue chunk size */ + struct command_queue queue[CPT_NUM_QS_PER_VF]; +}; + +struct pending_entry { + u8 busy; /* Entry status (free/busy) */ + + volatile u64 *completion_addr; /* Completion address */ + void *post_arg; + void (*callback)(int, void *); /* Kernel ASYNC request callabck */ + void *callback_arg; /* Kernel ASYNC request callabck arg */ +}; + +struct pending_queue { + struct pending_entry *head; /* head of the queue */ + u32 front; /* Process work from here */ + u32 rear; /* Append new work here */ + atomic64_t pending_count; + spinlock_t lock; /* Queue lock */ +}; + +struct pending_qinfo { + u32 nr_queues; /* Number of queues supported */ + u32 qlen; /* Queue length */ + struct pending_queue queue[CPT_NUM_QS_PER_VF]; +}; + +#define for_each_pending_queue(qinfo, q, i) \ + for (i = 0, q = &qinfo->queue[i]; i < qinfo->nr_queues; i++, \ + q = &qinfo->queue[i]) + +struct cpt_vf { + u16 flags; /* Flags to hold device status bits */ + u8 vfid; /* Device Index 0...CPT_MAX_VF_NUM */ + u8 vftype; /* VF type of SE_TYPE(1) or AE_TYPE(1) */ + u8 vfgrp; /* VF group (0 - 8) */ + u8 node; /* Operating node: Bits (46:44) in BAR0 address */ + u8 priority; /* VF priority ring: 1-High proirity round + * robin ring;0-Low priority round robin ring; + */ + struct pci_dev *pdev; /* pci device handle */ + void __iomem *reg_base; /* Register start address */ + void *wqe_info; /* BH worker info */ + /* MSI-X */ + cpumask_var_t affinity_mask[CPT_VF_MSIX_VECTORS]; + /* Command and Pending queues */ + u32 qsize; + u32 nr_queues; + struct command_qinfo cqinfo; /* Command queue information */ + struct pending_qinfo pqinfo; /* Pending queue information */ + /* VF-PF mailbox communication */ + bool pf_acked; + bool pf_nacked; +}; + +int cptvf_send_vf_up(struct cpt_vf *cptvf); +int cptvf_send_vf_down(struct cpt_vf *cptvf); +int cptvf_send_vf_to_grp_msg(struct cpt_vf *cptvf); +int cptvf_send_vf_priority_msg(struct cpt_vf *cptvf); +int cptvf_send_vq_size_msg(struct cpt_vf *cptvf); +int cptvf_check_pf_ready(struct cpt_vf *cptvf); +void cptvf_handle_mbox_intr(struct cpt_vf *cptvf); +void cvm_crypto_exit(void); +int cvm_crypto_init(struct cpt_vf *cptvf); +void vq_post_process(struct cpt_vf *cptvf, u32 qno); +void cptvf_write_vq_doorbell(struct cpt_vf *cptvf, u32 val); +#endif /* __CPTVF_H */ diff --git a/drivers/crypto/cavium/cpt/cptvf_algs.c b/drivers/crypto/cavium/cpt/cptvf_algs.c new file mode 100644 index 000000000..cd4d60d31 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cptvf_algs.c @@ -0,0 +1,525 @@ + +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include <crypto/aes.h> +#include <crypto/algapi.h> +#include <crypto/authenc.h> +#include <crypto/cryptd.h> +#include <crypto/crypto_wq.h> +#include <crypto/des.h> +#include <crypto/xts.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <linux/list.h> +#include <linux/scatterlist.h> + +#include "cptvf.h" +#include "cptvf_algs.h" + +struct cpt_device_handle { + void *cdev[MAX_DEVICES]; + u32 dev_count; +}; + +static struct cpt_device_handle dev_handle; + +static void cvm_callback(u32 status, void *arg) +{ + struct crypto_async_request *req = (struct crypto_async_request *)arg; + + req->complete(req, !status); +} + +static inline void update_input_iv(struct cpt_request_info *req_info, + u8 *iv, u32 enc_iv_len, + u32 *argcnt) +{ + /* Setting the iv information */ + req_info->in[*argcnt].vptr = (void *)iv; + req_info->in[*argcnt].size = enc_iv_len; + req_info->req.dlen += enc_iv_len; + + ++(*argcnt); +} + +static inline void update_output_iv(struct cpt_request_info *req_info, + u8 *iv, u32 enc_iv_len, + u32 *argcnt) +{ + /* Setting the iv information */ + req_info->out[*argcnt].vptr = (void *)iv; + req_info->out[*argcnt].size = enc_iv_len; + req_info->rlen += enc_iv_len; + + ++(*argcnt); +} + +static inline void update_input_data(struct cpt_request_info *req_info, + struct scatterlist *inp_sg, + u32 nbytes, u32 *argcnt) +{ + req_info->req.dlen += nbytes; + + while (nbytes) { + u32 len = min(nbytes, inp_sg->length); + u8 *ptr = sg_virt(inp_sg); + + req_info->in[*argcnt].vptr = (void *)ptr; + req_info->in[*argcnt].size = len; + nbytes -= len; + + ++(*argcnt); + ++inp_sg; + } +} + +static inline void update_output_data(struct cpt_request_info *req_info, + struct scatterlist *outp_sg, + u32 nbytes, u32 *argcnt) +{ + req_info->rlen += nbytes; + + while (nbytes) { + u32 len = min(nbytes, outp_sg->length); + u8 *ptr = sg_virt(outp_sg); + + req_info->out[*argcnt].vptr = (void *)ptr; + req_info->out[*argcnt].size = len; + nbytes -= len; + ++(*argcnt); + ++outp_sg; + } +} + +static inline u32 create_ctx_hdr(struct ablkcipher_request *req, u32 enc, + u32 *argcnt) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct cvm_enc_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req); + struct fc_context *fctx = &rctx->fctx; + u64 *offset_control = &rctx->control_word; + u32 enc_iv_len = crypto_ablkcipher_ivsize(tfm); + struct cpt_request_info *req_info = &rctx->cpt_req; + u64 *ctrl_flags = NULL; + + req_info->ctrl.s.grp = 0; + req_info->ctrl.s.dma_mode = DMA_GATHER_SCATTER; + req_info->ctrl.s.se_req = SE_CORE_REQ; + + req_info->req.opcode.s.major = MAJOR_OP_FC | + DMA_MODE_FLAG(DMA_GATHER_SCATTER); + if (enc) + req_info->req.opcode.s.minor = 2; + else + req_info->req.opcode.s.minor = 3; + + req_info->req.param1 = req->nbytes; /* Encryption Data length */ + req_info->req.param2 = 0; /*Auth data length */ + + fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type; + fctx->enc.enc_ctrl.e.aes_key = ctx->key_type; + fctx->enc.enc_ctrl.e.iv_source = FROM_DPTR; + + if (ctx->cipher_type == AES_XTS) + memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len * 2); + else + memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len); + ctrl_flags = (u64 *)&fctx->enc.enc_ctrl.flags; + *ctrl_flags = cpu_to_be64(*ctrl_flags); + + *offset_control = cpu_to_be64(((u64)(enc_iv_len) << 16)); + /* Storing Packet Data Information in offset + * Control Word First 8 bytes + */ + req_info->in[*argcnt].vptr = (u8 *)offset_control; + req_info->in[*argcnt].size = CONTROL_WORD_LEN; + req_info->req.dlen += CONTROL_WORD_LEN; + ++(*argcnt); + + req_info->in[*argcnt].vptr = (u8 *)fctx; + req_info->in[*argcnt].size = sizeof(struct fc_context); + req_info->req.dlen += sizeof(struct fc_context); + + ++(*argcnt); + + return 0; +} + +static inline u32 create_input_list(struct ablkcipher_request *req, u32 enc, + u32 enc_iv_len) +{ + struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req); + struct cpt_request_info *req_info = &rctx->cpt_req; + u32 argcnt = 0; + + create_ctx_hdr(req, enc, &argcnt); + update_input_iv(req_info, req->info, enc_iv_len, &argcnt); + update_input_data(req_info, req->src, req->nbytes, &argcnt); + req_info->incnt = argcnt; + + return 0; +} + +static inline void store_cb_info(struct ablkcipher_request *req, + struct cpt_request_info *req_info) +{ + req_info->callback = (void *)cvm_callback; + req_info->callback_arg = (void *)&req->base; +} + +static inline void create_output_list(struct ablkcipher_request *req, + u32 enc_iv_len) +{ + struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req); + struct cpt_request_info *req_info = &rctx->cpt_req; + u32 argcnt = 0; + + /* OUTPUT Buffer Processing + * AES encryption/decryption output would be + * received in the following format + * + * ------IV--------|------ENCRYPTED/DECRYPTED DATA-----| + * [ 16 Bytes/ [ Request Enc/Dec/ DATA Len AES CBC ] + */ + /* Reading IV information */ + update_output_iv(req_info, req->info, enc_iv_len, &argcnt); + update_output_data(req_info, req->dst, req->nbytes, &argcnt); + req_info->outcnt = argcnt; +} + +static inline int cvm_enc_dec(struct ablkcipher_request *req, u32 enc) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req); + u32 enc_iv_len = crypto_ablkcipher_ivsize(tfm); + struct fc_context *fctx = &rctx->fctx; + struct cpt_request_info *req_info = &rctx->cpt_req; + void *cdev = NULL; + int status; + + memset(req_info, 0, sizeof(struct cpt_request_info)); + req_info->may_sleep = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) != 0; + memset(fctx, 0, sizeof(struct fc_context)); + create_input_list(req, enc, enc_iv_len); + create_output_list(req, enc_iv_len); + store_cb_info(req, req_info); + cdev = dev_handle.cdev[smp_processor_id()]; + status = cptvf_do_request(cdev, req_info); + /* We perform an asynchronous send and once + * the request is completed the driver would + * intimate through registered call back functions + */ + + if (status) + return status; + else + return -EINPROGRESS; +} + +static int cvm_encrypt(struct ablkcipher_request *req) +{ + return cvm_enc_dec(req, true); +} + +static int cvm_decrypt(struct ablkcipher_request *req) +{ + return cvm_enc_dec(req, false); +} + +static int cvm_xts_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + u32 keylen) +{ + struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); + struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + const u8 *key1 = key; + const u8 *key2 = key + (keylen / 2); + + err = xts_check_key(tfm, key, keylen); + if (err) + return err; + ctx->key_len = keylen; + memcpy(ctx->enc_key, key1, keylen / 2); + memcpy(ctx->enc_key + KEY2_OFFSET, key2, keylen / 2); + ctx->cipher_type = AES_XTS; + switch (ctx->key_len) { + case 32: + ctx->key_type = AES_128_BIT; + break; + case 64: + ctx->key_type = AES_256_BIT; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int cvm_validate_keylen(struct cvm_enc_ctx *ctx, u32 keylen) +{ + if ((keylen == 16) || (keylen == 24) || (keylen == 32)) { + ctx->key_len = keylen; + switch (ctx->key_len) { + case 16: + ctx->key_type = AES_128_BIT; + break; + case 24: + ctx->key_type = AES_192_BIT; + break; + case 32: + ctx->key_type = AES_256_BIT; + break; + default: + return -EINVAL; + } + + if (ctx->cipher_type == DES3_CBC) + ctx->key_type = 0; + + return 0; + } + + return -EINVAL; +} + +static int cvm_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + u32 keylen, u8 cipher_type) +{ + struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); + struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->cipher_type = cipher_type; + if (!cvm_validate_keylen(ctx, keylen)) { + memcpy(ctx->enc_key, key, keylen); + return 0; + } else { + crypto_ablkcipher_set_flags(cipher, + CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } +} + +static int cvm_cbc_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + u32 keylen) +{ + return cvm_setkey(cipher, key, keylen, AES_CBC); +} + +static int cvm_ecb_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + u32 keylen) +{ + return cvm_setkey(cipher, key, keylen, AES_ECB); +} + +static int cvm_cfb_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + u32 keylen) +{ + return cvm_setkey(cipher, key, keylen, AES_CFB); +} + +static int cvm_cbc_des3_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + u32 keylen) +{ + return cvm_setkey(cipher, key, keylen, DES3_CBC); +} + +static int cvm_ecb_des3_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + u32 keylen) +{ + return cvm_setkey(cipher, key, keylen, DES3_ECB); +} + +static int cvm_enc_dec_init(struct crypto_tfm *tfm) +{ + struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm); + + memset(ctx, 0, sizeof(*ctx)); + tfm->crt_ablkcipher.reqsize = sizeof(struct cvm_req_ctx) + + sizeof(struct ablkcipher_request); + /* Additional memory for ablkcipher_request is + * allocated since the cryptd daemon uses + * this memory for request_ctx information + */ + + return 0; +} + +static struct crypto_alg algs[] = { { + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cvm_enc_ctx), + .cra_alignmask = 7, + .cra_priority = 4001, + .cra_name = "xts(aes)", + .cra_driver_name = "cavium-xts-aes", + .cra_type = &crypto_ablkcipher_type, + .cra_u = { + .ablkcipher = { + .ivsize = AES_BLOCK_SIZE, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .setkey = cvm_xts_setkey, + .encrypt = cvm_encrypt, + .decrypt = cvm_decrypt, + }, + }, + .cra_init = cvm_enc_dec_init, + .cra_module = THIS_MODULE, +}, { + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cvm_enc_ctx), + .cra_alignmask = 7, + .cra_priority = 4001, + .cra_name = "cbc(aes)", + .cra_driver_name = "cavium-cbc-aes", + .cra_type = &crypto_ablkcipher_type, + .cra_u = { + .ablkcipher = { + .ivsize = AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = cvm_cbc_aes_setkey, + .encrypt = cvm_encrypt, + .decrypt = cvm_decrypt, + }, + }, + .cra_init = cvm_enc_dec_init, + .cra_module = THIS_MODULE, +}, { + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cvm_enc_ctx), + .cra_alignmask = 7, + .cra_priority = 4001, + .cra_name = "ecb(aes)", + .cra_driver_name = "cavium-ecb-aes", + .cra_type = &crypto_ablkcipher_type, + .cra_u = { + .ablkcipher = { + .ivsize = AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = cvm_ecb_aes_setkey, + .encrypt = cvm_encrypt, + .decrypt = cvm_decrypt, + }, + }, + .cra_init = cvm_enc_dec_init, + .cra_module = THIS_MODULE, +}, { + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cvm_enc_ctx), + .cra_alignmask = 7, + .cra_priority = 4001, + .cra_name = "cfb(aes)", + .cra_driver_name = "cavium-cfb-aes", + .cra_type = &crypto_ablkcipher_type, + .cra_u = { + .ablkcipher = { + .ivsize = AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = cvm_cfb_aes_setkey, + .encrypt = cvm_encrypt, + .decrypt = cvm_decrypt, + }, + }, + .cra_init = cvm_enc_dec_init, + .cra_module = THIS_MODULE, +}, { + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cvm_des3_ctx), + .cra_alignmask = 7, + .cra_priority = 4001, + .cra_name = "cbc(des3_ede)", + .cra_driver_name = "cavium-cbc-des3_ede", + .cra_type = &crypto_ablkcipher_type, + .cra_u = { + .ablkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = cvm_cbc_des3_setkey, + .encrypt = cvm_encrypt, + .decrypt = cvm_decrypt, + }, + }, + .cra_init = cvm_enc_dec_init, + .cra_module = THIS_MODULE, +}, { + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cvm_des3_ctx), + .cra_alignmask = 7, + .cra_priority = 4001, + .cra_name = "ecb(des3_ede)", + .cra_driver_name = "cavium-ecb-des3_ede", + .cra_type = &crypto_ablkcipher_type, + .cra_u = { + .ablkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = cvm_ecb_des3_setkey, + .encrypt = cvm_encrypt, + .decrypt = cvm_decrypt, + }, + }, + .cra_init = cvm_enc_dec_init, + .cra_module = THIS_MODULE, +} }; + +static inline int cav_register_algs(void) +{ + int err = 0; + + err = crypto_register_algs(algs, ARRAY_SIZE(algs)); + if (err) + return err; + + return 0; +} + +static inline void cav_unregister_algs(void) +{ + crypto_unregister_algs(algs, ARRAY_SIZE(algs)); +} + +int cvm_crypto_init(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + u32 dev_count; + + dev_count = dev_handle.dev_count; + dev_handle.cdev[dev_count] = cptvf; + dev_handle.dev_count++; + + if (dev_count == 3) { + if (cav_register_algs()) { + dev_err(&pdev->dev, "Error in registering crypto algorithms\n"); + return -EINVAL; + } + } + + return 0; +} + +void cvm_crypto_exit(void) +{ + u32 dev_count; + + dev_count = --dev_handle.dev_count; + if (!dev_count) + cav_unregister_algs(); +} diff --git a/drivers/crypto/cavium/cpt/cptvf_algs.h b/drivers/crypto/cavium/cpt/cptvf_algs.h new file mode 100644 index 000000000..902f25751 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cptvf_algs.h @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef _CPTVF_ALGS_H_ +#define _CPTVF_ALGS_H_ + +#include "request_manager.h" + +#define MAX_DEVICES 16 +#define MAJOR_OP_FC 0x33 +#define MAX_ENC_KEY_SIZE 32 +#define MAX_HASH_KEY_SIZE 64 +#define MAX_KEY_SIZE (MAX_ENC_KEY_SIZE + MAX_HASH_KEY_SIZE) +#define CONTROL_WORD_LEN 8 +#define KEY2_OFFSET 48 + +#define DMA_MODE_FLAG(dma_mode) \ + (((dma_mode) == DMA_GATHER_SCATTER) ? (1 << 7) : 0) + +enum req_type { + AE_CORE_REQ, + SE_CORE_REQ, +}; + +enum cipher_type { + DES3_CBC = 0x1, + DES3_ECB = 0x2, + AES_CBC = 0x3, + AES_ECB = 0x4, + AES_CFB = 0x5, + AES_CTR = 0x6, + AES_GCM = 0x7, + AES_XTS = 0x8 +}; + +enum aes_type { + AES_128_BIT = 0x1, + AES_192_BIT = 0x2, + AES_256_BIT = 0x3 +}; + +union encr_ctrl { + u64 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 enc_cipher:4; + u64 reserved1:1; + u64 aes_key:2; + u64 iv_source:1; + u64 hash_type:4; + u64 reserved2:3; + u64 auth_input_type:1; + u64 mac_len:8; + u64 reserved3:8; + u64 encr_offset:16; + u64 iv_offset:8; + u64 auth_offset:8; +#else + u64 auth_offset:8; + u64 iv_offset:8; + u64 encr_offset:16; + u64 reserved3:8; + u64 mac_len:8; + u64 auth_input_type:1; + u64 reserved2:3; + u64 hash_type:4; + u64 iv_source:1; + u64 aes_key:2; + u64 reserved1:1; + u64 enc_cipher:4; +#endif + } e; +}; + +struct cvm_cipher { + const char *name; + u8 value; +}; + +struct enc_context { + union encr_ctrl enc_ctrl; + u8 encr_key[32]; + u8 encr_iv[16]; +}; + +struct fchmac_context { + u8 ipad[64]; + u8 opad[64]; /* or OPAD */ +}; + +struct fc_context { + struct enc_context enc; + struct fchmac_context hmac; +}; + +struct cvm_enc_ctx { + u32 key_len; + u8 enc_key[MAX_KEY_SIZE]; + u8 cipher_type:4; + u8 key_type:2; +}; + +struct cvm_des3_ctx { + u32 key_len; + u8 des3_key[MAX_KEY_SIZE]; +}; + +struct cvm_req_ctx { + struct cpt_request_info cpt_req; + u64 control_word; + struct fc_context fctx; +}; + +int cptvf_do_request(void *cptvf, struct cpt_request_info *req); +#endif /*_CPTVF_ALGS_H_*/ diff --git a/drivers/crypto/cavium/cpt/cptvf_main.c b/drivers/crypto/cavium/cpt/cptvf_main.c new file mode 100644 index 000000000..5c796ed55 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cptvf_main.c @@ -0,0 +1,866 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include <linux/interrupt.h> +#include <linux/module.h> + +#include "cptvf.h" + +#define DRV_NAME "thunder-cptvf" +#define DRV_VERSION "1.0" + +struct cptvf_wqe { + struct tasklet_struct twork; + void *cptvf; + u32 qno; +}; + +struct cptvf_wqe_info { + struct cptvf_wqe vq_wqe[CPT_NUM_QS_PER_VF]; +}; + +static void vq_work_handler(unsigned long data) +{ + struct cptvf_wqe_info *cwqe_info = (struct cptvf_wqe_info *)data; + struct cptvf_wqe *cwqe = &cwqe_info->vq_wqe[0]; + + vq_post_process(cwqe->cptvf, cwqe->qno); +} + +static int init_worker_threads(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct cptvf_wqe_info *cwqe_info; + int i; + + cwqe_info = kzalloc(sizeof(*cwqe_info), GFP_KERNEL); + if (!cwqe_info) + return -ENOMEM; + + if (cptvf->nr_queues) { + dev_info(&pdev->dev, "Creating VQ worker threads (%d)\n", + cptvf->nr_queues); + } + + for (i = 0; i < cptvf->nr_queues; i++) { + tasklet_init(&cwqe_info->vq_wqe[i].twork, vq_work_handler, + (u64)cwqe_info); + cwqe_info->vq_wqe[i].qno = i; + cwqe_info->vq_wqe[i].cptvf = cptvf; + } + + cptvf->wqe_info = cwqe_info; + + return 0; +} + +static void cleanup_worker_threads(struct cpt_vf *cptvf) +{ + struct cptvf_wqe_info *cwqe_info; + struct pci_dev *pdev = cptvf->pdev; + int i; + + cwqe_info = (struct cptvf_wqe_info *)cptvf->wqe_info; + if (!cwqe_info) + return; + + if (cptvf->nr_queues) { + dev_info(&pdev->dev, "Cleaning VQ worker threads (%u)\n", + cptvf->nr_queues); + } + + for (i = 0; i < cptvf->nr_queues; i++) + tasklet_kill(&cwqe_info->vq_wqe[i].twork); + + kzfree(cwqe_info); + cptvf->wqe_info = NULL; +} + +static void free_pending_queues(struct pending_qinfo *pqinfo) +{ + int i; + struct pending_queue *queue; + + for_each_pending_queue(pqinfo, queue, i) { + if (!queue->head) + continue; + + /* free single queue */ + kzfree((queue->head)); + + queue->front = 0; + queue->rear = 0; + + return; + } + + pqinfo->qlen = 0; + pqinfo->nr_queues = 0; +} + +static int alloc_pending_queues(struct pending_qinfo *pqinfo, u32 qlen, + u32 nr_queues) +{ + u32 i; + size_t size; + int ret; + struct pending_queue *queue = NULL; + + pqinfo->nr_queues = nr_queues; + pqinfo->qlen = qlen; + + size = (qlen * sizeof(struct pending_entry)); + + for_each_pending_queue(pqinfo, queue, i) { + queue->head = kzalloc((size), GFP_KERNEL); + if (!queue->head) { + ret = -ENOMEM; + goto pending_qfail; + } + + queue->front = 0; + queue->rear = 0; + atomic64_set((&queue->pending_count), (0)); + + /* init queue spin lock */ + spin_lock_init(&queue->lock); + } + + return 0; + +pending_qfail: + free_pending_queues(pqinfo); + + return ret; +} + +static int init_pending_queues(struct cpt_vf *cptvf, u32 qlen, u32 nr_queues) +{ + struct pci_dev *pdev = cptvf->pdev; + int ret; + + if (!nr_queues) + return 0; + + ret = alloc_pending_queues(&cptvf->pqinfo, qlen, nr_queues); + if (ret) { + dev_err(&pdev->dev, "failed to setup pending queues (%u)\n", + nr_queues); + return ret; + } + + return 0; +} + +static void cleanup_pending_queues(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + + if (!cptvf->nr_queues) + return; + + dev_info(&pdev->dev, "Cleaning VQ pending queue (%u)\n", + cptvf->nr_queues); + free_pending_queues(&cptvf->pqinfo); +} + +static void free_command_queues(struct cpt_vf *cptvf, + struct command_qinfo *cqinfo) +{ + int i; + struct command_queue *queue = NULL; + struct command_chunk *chunk = NULL; + struct pci_dev *pdev = cptvf->pdev; + struct hlist_node *node; + + /* clean up for each queue */ + for (i = 0; i < cptvf->nr_queues; i++) { + queue = &cqinfo->queue[i]; + if (hlist_empty(&cqinfo->queue[i].chead)) + continue; + + hlist_for_each_entry_safe(chunk, node, &cqinfo->queue[i].chead, + nextchunk) { + dma_free_coherent(&pdev->dev, chunk->size, + chunk->head, + chunk->dma_addr); + chunk->head = NULL; + chunk->dma_addr = 0; + hlist_del(&chunk->nextchunk); + kzfree(chunk); + } + + queue->nchunks = 0; + queue->idx = 0; + } + + /* common cleanup */ + cqinfo->cmd_size = 0; +} + +static int alloc_command_queues(struct cpt_vf *cptvf, + struct command_qinfo *cqinfo, size_t cmd_size, + u32 qlen) +{ + int i; + size_t q_size; + struct command_queue *queue = NULL; + struct pci_dev *pdev = cptvf->pdev; + + /* common init */ + cqinfo->cmd_size = cmd_size; + /* Qsize in dwords, needed for SADDR config, 1-next chunk pointer */ + cptvf->qsize = min(qlen, cqinfo->qchunksize) * + CPT_NEXT_CHUNK_PTR_SIZE + 1; + /* Qsize in bytes to create space for alignment */ + q_size = qlen * cqinfo->cmd_size; + + /* per queue initialization */ + for (i = 0; i < cptvf->nr_queues; i++) { + size_t c_size = 0; + size_t rem_q_size = q_size; + struct command_chunk *curr = NULL, *first = NULL, *last = NULL; + u32 qcsize_bytes = cqinfo->qchunksize * cqinfo->cmd_size; + + queue = &cqinfo->queue[i]; + INIT_HLIST_HEAD(&cqinfo->queue[i].chead); + do { + curr = kzalloc(sizeof(*curr), GFP_KERNEL); + if (!curr) + goto cmd_qfail; + + c_size = (rem_q_size > qcsize_bytes) ? qcsize_bytes : + rem_q_size; + curr->head = (u8 *)dma_zalloc_coherent(&pdev->dev, + c_size + CPT_NEXT_CHUNK_PTR_SIZE, + &curr->dma_addr, GFP_KERNEL); + if (!curr->head) { + dev_err(&pdev->dev, "Command Q (%d) chunk (%d) allocation failed\n", + i, queue->nchunks); + kfree(curr); + goto cmd_qfail; + } + + curr->size = c_size; + if (queue->nchunks == 0) { + hlist_add_head(&curr->nextchunk, + &cqinfo->queue[i].chead); + first = curr; + } else { + hlist_add_behind(&curr->nextchunk, + &last->nextchunk); + } + + queue->nchunks++; + rem_q_size -= c_size; + if (last) + *((u64 *)(&last->head[last->size])) = (u64)curr->dma_addr; + + last = curr; + } while (rem_q_size); + + /* Make the queue circular */ + /* Tie back last chunk entry to head */ + curr = first; + *((u64 *)(&last->head[last->size])) = (u64)curr->dma_addr; + queue->qhead = curr; + spin_lock_init(&queue->lock); + } + return 0; + +cmd_qfail: + free_command_queues(cptvf, cqinfo); + return -ENOMEM; +} + +static int init_command_queues(struct cpt_vf *cptvf, u32 qlen) +{ + struct pci_dev *pdev = cptvf->pdev; + int ret; + + /* setup AE command queues */ + ret = alloc_command_queues(cptvf, &cptvf->cqinfo, CPT_INST_SIZE, + qlen); + if (ret) { + dev_err(&pdev->dev, "failed to allocate AE command queues (%u)\n", + cptvf->nr_queues); + return ret; + } + + return ret; +} + +static void cleanup_command_queues(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + + if (!cptvf->nr_queues) + return; + + dev_info(&pdev->dev, "Cleaning VQ command queue (%u)\n", + cptvf->nr_queues); + free_command_queues(cptvf, &cptvf->cqinfo); +} + +static void cptvf_sw_cleanup(struct cpt_vf *cptvf) +{ + cleanup_worker_threads(cptvf); + cleanup_pending_queues(cptvf); + cleanup_command_queues(cptvf); +} + +static int cptvf_sw_init(struct cpt_vf *cptvf, u32 qlen, u32 nr_queues) +{ + struct pci_dev *pdev = cptvf->pdev; + int ret = 0; + u32 max_dev_queues = 0; + + max_dev_queues = CPT_NUM_QS_PER_VF; + /* possible cpus */ + nr_queues = min_t(u32, nr_queues, max_dev_queues); + cptvf->nr_queues = nr_queues; + + ret = init_command_queues(cptvf, qlen); + if (ret) { + dev_err(&pdev->dev, "Failed to setup command queues (%u)\n", + nr_queues); + return ret; + } + + ret = init_pending_queues(cptvf, qlen, nr_queues); + if (ret) { + dev_err(&pdev->dev, "Failed to setup pending queues (%u)\n", + nr_queues); + goto setup_pqfail; + } + + /* Create worker threads for BH processing */ + ret = init_worker_threads(cptvf); + if (ret) { + dev_err(&pdev->dev, "Failed to setup worker threads\n"); + goto init_work_fail; + } + + return 0; + +init_work_fail: + cleanup_worker_threads(cptvf); + cleanup_pending_queues(cptvf); + +setup_pqfail: + cleanup_command_queues(cptvf); + + return ret; +} + +static void cptvf_free_irq_affinity(struct cpt_vf *cptvf, int vec) +{ + irq_set_affinity_hint(pci_irq_vector(cptvf->pdev, vec), NULL); + free_cpumask_var(cptvf->affinity_mask[vec]); +} + +static void cptvf_write_vq_ctl(struct cpt_vf *cptvf, bool val) +{ + union cptx_vqx_ctl vqx_ctl; + + vqx_ctl.u = cpt_read_csr64(cptvf->reg_base, CPTX_VQX_CTL(0, 0)); + vqx_ctl.s.ena = val; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_CTL(0, 0), vqx_ctl.u); +} + +void cptvf_write_vq_doorbell(struct cpt_vf *cptvf, u32 val) +{ + union cptx_vqx_doorbell vqx_dbell; + + vqx_dbell.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_DOORBELL(0, 0)); + vqx_dbell.s.dbell_cnt = val * 8; /* Num of Instructions * 8 words */ + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_DOORBELL(0, 0), + vqx_dbell.u); +} + +static void cptvf_write_vq_inprog(struct cpt_vf *cptvf, u8 val) +{ + union cptx_vqx_inprog vqx_inprg; + + vqx_inprg.u = cpt_read_csr64(cptvf->reg_base, CPTX_VQX_INPROG(0, 0)); + vqx_inprg.s.inflight = val; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_INPROG(0, 0), vqx_inprg.u); +} + +static void cptvf_write_vq_done_numwait(struct cpt_vf *cptvf, u32 val) +{ + union cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_DONE_WAIT(0, 0)); + vqx_dwait.s.num_wait = val; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_DONE_WAIT(0, 0), + vqx_dwait.u); +} + +static void cptvf_write_vq_done_timewait(struct cpt_vf *cptvf, u16 time) +{ + union cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_DONE_WAIT(0, 0)); + vqx_dwait.s.time_wait = time; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_DONE_WAIT(0, 0), + vqx_dwait.u); +} + +static void cptvf_enable_swerr_interrupts(struct cpt_vf *cptvf) +{ + union cptx_vqx_misc_ena_w1s vqx_misc_ena; + + vqx_misc_ena.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_MISC_ENA_W1S(0, 0)); + /* Set mbox(0) interupts for the requested vf */ + vqx_misc_ena.s.swerr = 1; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_MISC_ENA_W1S(0, 0), + vqx_misc_ena.u); +} + +static void cptvf_enable_mbox_interrupts(struct cpt_vf *cptvf) +{ + union cptx_vqx_misc_ena_w1s vqx_misc_ena; + + vqx_misc_ena.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_MISC_ENA_W1S(0, 0)); + /* Set mbox(0) interupts for the requested vf */ + vqx_misc_ena.s.mbox = 1; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_MISC_ENA_W1S(0, 0), + vqx_misc_ena.u); +} + +static void cptvf_enable_done_interrupts(struct cpt_vf *cptvf) +{ + union cptx_vqx_done_ena_w1s vqx_done_ena; + + vqx_done_ena.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_DONE_ENA_W1S(0, 0)); + /* Set DONE interrupt for the requested vf */ + vqx_done_ena.s.done = 1; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_DONE_ENA_W1S(0, 0), + vqx_done_ena.u); +} + +static void cptvf_clear_dovf_intr(struct cpt_vf *cptvf) +{ + union cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_MISC_INT(0, 0)); + /* W1C for the VF */ + vqx_misc_int.s.dovf = 1; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_MISC_INT(0, 0), + vqx_misc_int.u); +} + +static void cptvf_clear_irde_intr(struct cpt_vf *cptvf) +{ + union cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_MISC_INT(0, 0)); + /* W1C for the VF */ + vqx_misc_int.s.irde = 1; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_MISC_INT(0, 0), + vqx_misc_int.u); +} + +static void cptvf_clear_nwrp_intr(struct cpt_vf *cptvf) +{ + union cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_MISC_INT(0, 0)); + /* W1C for the VF */ + vqx_misc_int.s.nwrp = 1; + cpt_write_csr64(cptvf->reg_base, + CPTX_VQX_MISC_INT(0, 0), vqx_misc_int.u); +} + +static void cptvf_clear_mbox_intr(struct cpt_vf *cptvf) +{ + union cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_MISC_INT(0, 0)); + /* W1C for the VF */ + vqx_misc_int.s.mbox = 1; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_MISC_INT(0, 0), + vqx_misc_int.u); +} + +static void cptvf_clear_swerr_intr(struct cpt_vf *cptvf) +{ + union cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_MISC_INT(0, 0)); + /* W1C for the VF */ + vqx_misc_int.s.swerr = 1; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_MISC_INT(0, 0), + vqx_misc_int.u); +} + +static u64 cptvf_read_vf_misc_intr_status(struct cpt_vf *cptvf) +{ + return cpt_read_csr64(cptvf->reg_base, CPTX_VQX_MISC_INT(0, 0)); +} + +static irqreturn_t cptvf_misc_intr_handler(int irq, void *cptvf_irq) +{ + struct cpt_vf *cptvf = (struct cpt_vf *)cptvf_irq; + struct pci_dev *pdev = cptvf->pdev; + u64 intr; + + intr = cptvf_read_vf_misc_intr_status(cptvf); + /*Check for MISC interrupt types*/ + if (likely(intr & CPT_VF_INTR_MBOX_MASK)) { + dev_dbg(&pdev->dev, "Mailbox interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + cptvf_handle_mbox_intr(cptvf); + cptvf_clear_mbox_intr(cptvf); + } else if (unlikely(intr & CPT_VF_INTR_DOVF_MASK)) { + cptvf_clear_dovf_intr(cptvf); + /*Clear doorbell count*/ + cptvf_write_vq_doorbell(cptvf, 0); + dev_err(&pdev->dev, "Doorbell overflow error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & CPT_VF_INTR_IRDE_MASK)) { + cptvf_clear_irde_intr(cptvf); + dev_err(&pdev->dev, "Instruction NCB read error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & CPT_VF_INTR_NWRP_MASK)) { + cptvf_clear_nwrp_intr(cptvf); + dev_err(&pdev->dev, "NCB response write error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & CPT_VF_INTR_SERR_MASK)) { + cptvf_clear_swerr_intr(cptvf); + dev_err(&pdev->dev, "Software error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else { + dev_err(&pdev->dev, "Unhandled interrupt in CPT VF %d\n", + cptvf->vfid); + } + + return IRQ_HANDLED; +} + +static inline struct cptvf_wqe *get_cptvf_vq_wqe(struct cpt_vf *cptvf, + int qno) +{ + struct cptvf_wqe_info *nwqe_info; + + if (unlikely(qno >= cptvf->nr_queues)) + return NULL; + nwqe_info = (struct cptvf_wqe_info *)cptvf->wqe_info; + + return &nwqe_info->vq_wqe[qno]; +} + +static inline u32 cptvf_read_vq_done_count(struct cpt_vf *cptvf) +{ + union cptx_vqx_done vqx_done; + + vqx_done.u = cpt_read_csr64(cptvf->reg_base, CPTX_VQX_DONE(0, 0)); + return vqx_done.s.done; +} + +static inline void cptvf_write_vq_done_ack(struct cpt_vf *cptvf, + u32 ackcnt) +{ + union cptx_vqx_done_ack vqx_dack_cnt; + + vqx_dack_cnt.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_DONE_ACK(0, 0)); + vqx_dack_cnt.s.done_ack = ackcnt; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_DONE_ACK(0, 0), + vqx_dack_cnt.u); +} + +static irqreturn_t cptvf_done_intr_handler(int irq, void *cptvf_irq) +{ + struct cpt_vf *cptvf = (struct cpt_vf *)cptvf_irq; + struct pci_dev *pdev = cptvf->pdev; + /* Read the number of completions */ + u32 intr = cptvf_read_vq_done_count(cptvf); + + if (intr) { + struct cptvf_wqe *wqe; + + /* Acknowledge the number of + * scheduled completions for processing + */ + cptvf_write_vq_done_ack(cptvf, intr); + wqe = get_cptvf_vq_wqe(cptvf, 0); + if (unlikely(!wqe)) { + dev_err(&pdev->dev, "No work to schedule for VF (%d)", + cptvf->vfid); + return IRQ_NONE; + } + tasklet_hi_schedule(&wqe->twork); + } + + return IRQ_HANDLED; +} + +static void cptvf_set_irq_affinity(struct cpt_vf *cptvf, int vec) +{ + struct pci_dev *pdev = cptvf->pdev; + int cpu; + + if (!zalloc_cpumask_var(&cptvf->affinity_mask[vec], + GFP_KERNEL)) { + dev_err(&pdev->dev, "Allocation failed for affinity_mask for VF %d", + cptvf->vfid); + return; + } + + cpu = cptvf->vfid % num_online_cpus(); + cpumask_set_cpu(cpumask_local_spread(cpu, cptvf->node), + cptvf->affinity_mask[vec]); + irq_set_affinity_hint(pci_irq_vector(pdev, vec), + cptvf->affinity_mask[vec]); +} + +static void cptvf_write_vq_saddr(struct cpt_vf *cptvf, u64 val) +{ + union cptx_vqx_saddr vqx_saddr; + + vqx_saddr.u = val; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_SADDR(0, 0), vqx_saddr.u); +} + +void cptvf_device_init(struct cpt_vf *cptvf) +{ + u64 base_addr = 0; + + /* Disable the VQ */ + cptvf_write_vq_ctl(cptvf, 0); + /* Reset the doorbell */ + cptvf_write_vq_doorbell(cptvf, 0); + /* Clear inflight */ + cptvf_write_vq_inprog(cptvf, 0); + /* Write VQ SADDR */ + /* TODO: for now only one queue, so hard coded */ + base_addr = (u64)(cptvf->cqinfo.queue[0].qhead->dma_addr); + cptvf_write_vq_saddr(cptvf, base_addr); + /* Configure timerhold / coalescence */ + cptvf_write_vq_done_timewait(cptvf, CPT_TIMER_THOLD); + cptvf_write_vq_done_numwait(cptvf, 1); + /* Enable the VQ */ + cptvf_write_vq_ctl(cptvf, 1); + /* Flag the VF ready */ + cptvf->flags |= CPT_FLAG_DEVICE_READY; +} + +static int cptvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct cpt_vf *cptvf; + int err; + + cptvf = devm_kzalloc(dev, sizeof(*cptvf), GFP_KERNEL); + if (!cptvf) + return -ENOMEM; + + pci_set_drvdata(pdev, cptvf); + cptvf->pdev = pdev; + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + pci_set_drvdata(pdev, NULL); + return err; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto cptvf_err_disable_device; + } + /* Mark as VF driver */ + cptvf->flags |= CPT_FLAG_VF_DRIVER; + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable DMA configuration\n"); + goto cptvf_err_release_regions; + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n"); + goto cptvf_err_release_regions; + } + + /* MAP PF's configuration registers */ + cptvf->reg_base = pcim_iomap(pdev, 0, 0); + if (!cptvf->reg_base) { + dev_err(dev, "Cannot map config register space, aborting\n"); + err = -ENOMEM; + goto cptvf_err_release_regions; + } + + cptvf->node = dev_to_node(&pdev->dev); + err = pci_alloc_irq_vectors(pdev, CPT_VF_MSIX_VECTORS, + CPT_VF_MSIX_VECTORS, PCI_IRQ_MSIX); + if (err < 0) { + dev_err(dev, "Request for #%d msix vectors failed\n", + CPT_VF_MSIX_VECTORS); + goto cptvf_err_release_regions; + } + + err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), + cptvf_misc_intr_handler, 0, "CPT VF misc intr", + cptvf); + if (err) { + dev_err(dev, "Request misc irq failed"); + goto cptvf_free_vectors; + } + + /* Enable mailbox interrupt */ + cptvf_enable_mbox_interrupts(cptvf); + cptvf_enable_swerr_interrupts(cptvf); + + /* Check ready with PF */ + /* Gets chip ID / device Id from PF if ready */ + err = cptvf_check_pf_ready(cptvf); + if (err) { + dev_err(dev, "PF not responding to READY msg"); + goto cptvf_free_misc_irq; + } + + /* CPT VF software resources initialization */ + cptvf->cqinfo.qchunksize = CPT_CMD_QCHUNK_SIZE; + err = cptvf_sw_init(cptvf, CPT_CMD_QLEN, CPT_NUM_QS_PER_VF); + if (err) { + dev_err(dev, "cptvf_sw_init() failed"); + goto cptvf_free_misc_irq; + } + /* Convey VQ LEN to PF */ + err = cptvf_send_vq_size_msg(cptvf); + if (err) { + dev_err(dev, "PF not responding to QLEN msg"); + goto cptvf_free_misc_irq; + } + + /* CPT VF device initialization */ + cptvf_device_init(cptvf); + /* Send msg to PF to assign currnet Q to required group */ + cptvf->vfgrp = 1; + err = cptvf_send_vf_to_grp_msg(cptvf); + if (err) { + dev_err(dev, "PF not responding to VF_GRP msg"); + goto cptvf_free_misc_irq; + } + + cptvf->priority = 1; + err = cptvf_send_vf_priority_msg(cptvf); + if (err) { + dev_err(dev, "PF not responding to VF_PRIO msg"); + goto cptvf_free_misc_irq; + } + + err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), + cptvf_done_intr_handler, 0, "CPT VF done intr", + cptvf); + if (err) { + dev_err(dev, "Request done irq failed\n"); + goto cptvf_free_misc_irq; + } + + /* Enable mailbox interrupt */ + cptvf_enable_done_interrupts(cptvf); + + /* Set irq affinity masks */ + cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); + cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + + err = cptvf_send_vf_up(cptvf); + if (err) { + dev_err(dev, "PF not responding to UP msg"); + goto cptvf_free_irq_affinity; + } + err = cvm_crypto_init(cptvf); + if (err) { + dev_err(dev, "Algorithm register failed\n"); + goto cptvf_free_irq_affinity; + } + return 0; + +cptvf_free_irq_affinity: + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); +cptvf_free_misc_irq: + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf); +cptvf_free_vectors: + pci_free_irq_vectors(cptvf->pdev); +cptvf_err_release_regions: + pci_release_regions(pdev); +cptvf_err_disable_device: + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + + return err; +} + +static void cptvf_remove(struct pci_dev *pdev) +{ + struct cpt_vf *cptvf = pci_get_drvdata(pdev); + + if (!cptvf) { + dev_err(&pdev->dev, "Invalid CPT-VF device\n"); + return; + } + + /* Convey DOWN to PF */ + if (cptvf_send_vf_down(cptvf)) { + dev_err(&pdev->dev, "PF not responding to DOWN msg"); + } else { + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), cptvf); + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf); + pci_free_irq_vectors(cptvf->pdev); + cptvf_sw_cleanup(cptvf); + pci_set_drvdata(pdev, NULL); + pci_release_regions(pdev); + pci_disable_device(pdev); + cvm_crypto_exit(); + } +} + +static void cptvf_shutdown(struct pci_dev *pdev) +{ + cptvf_remove(pdev); +} + +/* Supported devices */ +static const struct pci_device_id cptvf_id_table[] = { + {PCI_VDEVICE(CAVIUM, CPT_81XX_PCI_VF_DEVICE_ID), 0}, + { 0, } /* end of table */ +}; + +static struct pci_driver cptvf_pci_driver = { + .name = DRV_NAME, + .id_table = cptvf_id_table, + .probe = cptvf_probe, + .remove = cptvf_remove, + .shutdown = cptvf_shutdown, +}; + +module_pci_driver(cptvf_pci_driver); + +MODULE_AUTHOR("George Cherian <george.cherian@cavium.com>"); +MODULE_DESCRIPTION("Cavium Thunder CPT Virtual Function Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, cptvf_id_table); diff --git a/drivers/crypto/cavium/cpt/cptvf_mbox.c b/drivers/crypto/cavium/cpt/cptvf_mbox.c new file mode 100644 index 000000000..d5ec3b8a9 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cptvf_mbox.c @@ -0,0 +1,211 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include "cptvf.h" + +static void cptvf_send_msg_to_pf(struct cpt_vf *cptvf, struct cpt_mbox *mbx) +{ + /* Writing mbox(1) causes interrupt */ + cpt_write_csr64(cptvf->reg_base, CPTX_VFX_PF_MBOXX(0, 0, 0), + mbx->msg); + cpt_write_csr64(cptvf->reg_base, CPTX_VFX_PF_MBOXX(0, 0, 1), + mbx->data); +} + +/* ACKs PF's mailbox message + */ +void cptvf_mbox_send_ack(struct cpt_vf *cptvf, struct cpt_mbox *mbx) +{ + mbx->msg = CPT_MBOX_MSG_TYPE_ACK; + cptvf_send_msg_to_pf(cptvf, mbx); +} + +/* NACKs PF's mailbox message that VF is not able to + * complete the action + */ +void cptvf_mbox_send_nack(struct cpt_vf *cptvf, struct cpt_mbox *mbx) +{ + mbx->msg = CPT_MBOX_MSG_TYPE_NACK; + cptvf_send_msg_to_pf(cptvf, mbx); +} + +/* Interrupt handler to handle mailbox messages from VFs */ +void cptvf_handle_mbox_intr(struct cpt_vf *cptvf) +{ + struct cpt_mbox mbx = {}; + + /* + * MBOX[0] contains msg + * MBOX[1] contains data + */ + mbx.msg = cpt_read_csr64(cptvf->reg_base, CPTX_VFX_PF_MBOXX(0, 0, 0)); + mbx.data = cpt_read_csr64(cptvf->reg_base, CPTX_VFX_PF_MBOXX(0, 0, 1)); + dev_dbg(&cptvf->pdev->dev, "%s: Mailbox msg 0x%llx from PF\n", + __func__, mbx.msg); + switch (mbx.msg) { + case CPT_MSG_READY: + { + cptvf->pf_acked = true; + cptvf->vfid = mbx.data; + dev_dbg(&cptvf->pdev->dev, "Received VFID %d\n", cptvf->vfid); + break; + } + case CPT_MSG_QBIND_GRP: + cptvf->pf_acked = true; + cptvf->vftype = mbx.data; + dev_dbg(&cptvf->pdev->dev, "VF %d type %s group %d\n", + cptvf->vfid, ((mbx.data == SE_TYPES) ? "SE" : "AE"), + cptvf->vfgrp); + break; + case CPT_MBOX_MSG_TYPE_ACK: + cptvf->pf_acked = true; + break; + case CPT_MBOX_MSG_TYPE_NACK: + cptvf->pf_nacked = true; + break; + default: + dev_err(&cptvf->pdev->dev, "Invalid msg from PF, msg 0x%llx\n", + mbx.msg); + break; + } +} + +static int cptvf_send_msg_to_pf_timeout(struct cpt_vf *cptvf, + struct cpt_mbox *mbx) +{ + int timeout = CPT_MBOX_MSG_TIMEOUT; + int sleep = 10; + + cptvf->pf_acked = false; + cptvf->pf_nacked = false; + cptvf_send_msg_to_pf(cptvf, mbx); + /* Wait for previous message to be acked, timeout 2sec */ + while (!cptvf->pf_acked) { + if (cptvf->pf_nacked) + return -EINVAL; + msleep(sleep); + if (cptvf->pf_acked) + break; + timeout -= sleep; + if (!timeout) { + dev_err(&cptvf->pdev->dev, "PF didn't ack to mbox msg %llx from VF%u\n", + (mbx->msg & 0xFF), cptvf->vfid); + return -EBUSY; + } + } + + return 0; +} + +/* + * Checks if VF is able to comminicate with PF + * and also gets the CPT number this VF is associated to. + */ +int cptvf_check_pf_ready(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct cpt_mbox mbx = {}; + + mbx.msg = CPT_MSG_READY; + if (cptvf_send_msg_to_pf_timeout(cptvf, &mbx)) { + dev_err(&pdev->dev, "PF didn't respond to READY msg\n"); + return -EBUSY; + } + + return 0; +} + +/* + * Communicate VQs size to PF to program CPT(0)_PF_Q(0-15)_CTL of the VF. + * Must be ACKed. + */ +int cptvf_send_vq_size_msg(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct cpt_mbox mbx = {}; + + mbx.msg = CPT_MSG_QLEN; + mbx.data = cptvf->qsize; + if (cptvf_send_msg_to_pf_timeout(cptvf, &mbx)) { + dev_err(&pdev->dev, "PF didn't respond to vq_size msg\n"); + return -EBUSY; + } + + return 0; +} + +/* + * Communicate VF group required to PF and get the VQ binded to that group + */ +int cptvf_send_vf_to_grp_msg(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct cpt_mbox mbx = {}; + + mbx.msg = CPT_MSG_QBIND_GRP; + /* Convey group of the VF */ + mbx.data = cptvf->vfgrp; + if (cptvf_send_msg_to_pf_timeout(cptvf, &mbx)) { + dev_err(&pdev->dev, "PF didn't respond to vf_type msg\n"); + return -EBUSY; + } + + return 0; +} + +/* + * Communicate VF group required to PF and get the VQ binded to that group + */ +int cptvf_send_vf_priority_msg(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct cpt_mbox mbx = {}; + + mbx.msg = CPT_MSG_VQ_PRIORITY; + /* Convey group of the VF */ + mbx.data = cptvf->priority; + if (cptvf_send_msg_to_pf_timeout(cptvf, &mbx)) { + dev_err(&pdev->dev, "PF didn't respond to vf_type msg\n"); + return -EBUSY; + } + return 0; +} + +/* + * Communicate to PF that VF is UP and running + */ +int cptvf_send_vf_up(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct cpt_mbox mbx = {}; + + mbx.msg = CPT_MSG_VF_UP; + if (cptvf_send_msg_to_pf_timeout(cptvf, &mbx)) { + dev_err(&pdev->dev, "PF didn't respond to UP msg\n"); + return -EBUSY; + } + + return 0; +} + +/* + * Communicate to PF that VF is DOWN and running + */ +int cptvf_send_vf_down(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct cpt_mbox mbx = {}; + + mbx.msg = CPT_MSG_VF_DOWN; + if (cptvf_send_msg_to_pf_timeout(cptvf, &mbx)) { + dev_err(&pdev->dev, "PF didn't respond to DOWN msg\n"); + return -EBUSY; + } + + return 0; +} diff --git a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c new file mode 100644 index 000000000..43fe69d09 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c @@ -0,0 +1,594 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include "cptvf.h" +#include "request_manager.h" + +/** + * get_free_pending_entry - get free entry from pending queue + * @param pqinfo: pending_qinfo structure + * @param qno: queue number + */ +static struct pending_entry *get_free_pending_entry(struct pending_queue *q, + int qlen) +{ + struct pending_entry *ent = NULL; + + ent = &q->head[q->rear]; + if (unlikely(ent->busy)) { + ent = NULL; + goto no_free_entry; + } + + q->rear++; + if (unlikely(q->rear == qlen)) + q->rear = 0; + +no_free_entry: + return ent; +} + +static inline void pending_queue_inc_front(struct pending_qinfo *pqinfo, + int qno) +{ + struct pending_queue *queue = &pqinfo->queue[qno]; + + queue->front++; + if (unlikely(queue->front == pqinfo->qlen)) + queue->front = 0; +} + +static int setup_sgio_components(struct cpt_vf *cptvf, struct buf_ptr *list, + int buf_count, u8 *buffer) +{ + int ret = 0, i, j; + int components; + struct sglist_component *sg_ptr = NULL; + struct pci_dev *pdev = cptvf->pdev; + + if (unlikely(!list)) { + dev_err(&pdev->dev, "Input List pointer is NULL\n"); + return -EFAULT; + } + + for (i = 0; i < buf_count; i++) { + if (likely(list[i].vptr)) { + list[i].dma_addr = dma_map_single(&pdev->dev, + list[i].vptr, + list[i].size, + DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(&pdev->dev, + list[i].dma_addr))) { + dev_err(&pdev->dev, "DMA map kernel buffer failed for component: %d\n", + i); + ret = -EIO; + goto sg_cleanup; + } + } + } + + components = buf_count / 4; + sg_ptr = (struct sglist_component *)buffer; + for (i = 0; i < components; i++) { + sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size); + sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size); + sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size); + sg_ptr->u.s.len3 = cpu_to_be16(list[i * 4 + 3].size); + sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); + sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); + sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); + sg_ptr->ptr3 = cpu_to_be64(list[i * 4 + 3].dma_addr); + sg_ptr++; + } + + components = buf_count % 4; + + switch (components) { + case 3: + sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size); + sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); + /* Fall through */ + case 2: + sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size); + sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); + /* Fall through */ + case 1: + sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size); + sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); + break; + default: + break; + } + + return ret; + +sg_cleanup: + for (j = 0; j < i; j++) { + if (list[j].dma_addr) { + dma_unmap_single(&pdev->dev, list[i].dma_addr, + list[i].size, DMA_BIDIRECTIONAL); + } + + list[j].dma_addr = 0; + } + + return ret; +} + +static inline int setup_sgio_list(struct cpt_vf *cptvf, + struct cpt_info_buffer *info, + struct cpt_request_info *req) +{ + u16 g_sz_bytes = 0, s_sz_bytes = 0; + int ret = 0; + struct pci_dev *pdev = cptvf->pdev; + + if (req->incnt > MAX_SG_IN_CNT || req->outcnt > MAX_SG_OUT_CNT) { + dev_err(&pdev->dev, "Request SG components are higher than supported\n"); + ret = -EINVAL; + goto scatter_gather_clean; + } + + /* Setup gather (input) components */ + g_sz_bytes = ((req->incnt + 3) / 4) * sizeof(struct sglist_component); + info->gather_components = kzalloc(g_sz_bytes, req->may_sleep ? GFP_KERNEL : GFP_ATOMIC); + if (!info->gather_components) { + ret = -ENOMEM; + goto scatter_gather_clean; + } + + ret = setup_sgio_components(cptvf, req->in, + req->incnt, + info->gather_components); + if (ret) { + dev_err(&pdev->dev, "Failed to setup gather list\n"); + ret = -EFAULT; + goto scatter_gather_clean; + } + + /* Setup scatter (output) components */ + s_sz_bytes = ((req->outcnt + 3) / 4) * sizeof(struct sglist_component); + info->scatter_components = kzalloc(s_sz_bytes, req->may_sleep ? GFP_KERNEL : GFP_ATOMIC); + if (!info->scatter_components) { + ret = -ENOMEM; + goto scatter_gather_clean; + } + + ret = setup_sgio_components(cptvf, req->out, + req->outcnt, + info->scatter_components); + if (ret) { + dev_err(&pdev->dev, "Failed to setup gather list\n"); + ret = -EFAULT; + goto scatter_gather_clean; + } + + /* Create and initialize DPTR */ + info->dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE; + info->in_buffer = kzalloc(info->dlen, req->may_sleep ? GFP_KERNEL : GFP_ATOMIC); + if (!info->in_buffer) { + ret = -ENOMEM; + goto scatter_gather_clean; + } + + ((u16 *)info->in_buffer)[0] = req->outcnt; + ((u16 *)info->in_buffer)[1] = req->incnt; + ((u16 *)info->in_buffer)[2] = 0; + ((u16 *)info->in_buffer)[3] = 0; + *(u64 *)info->in_buffer = cpu_to_be64p((u64 *)info->in_buffer); + + memcpy(&info->in_buffer[8], info->gather_components, + g_sz_bytes); + memcpy(&info->in_buffer[8 + g_sz_bytes], + info->scatter_components, s_sz_bytes); + + info->dptr_baddr = dma_map_single(&pdev->dev, + (void *)info->in_buffer, + info->dlen, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&pdev->dev, info->dptr_baddr)) { + dev_err(&pdev->dev, "Mapping DPTR Failed %d\n", info->dlen); + ret = -EIO; + goto scatter_gather_clean; + } + + /* Create and initialize RPTR */ + info->out_buffer = kzalloc(COMPLETION_CODE_SIZE, req->may_sleep ? GFP_KERNEL : GFP_ATOMIC); + if (!info->out_buffer) { + ret = -ENOMEM; + goto scatter_gather_clean; + } + + *((u64 *)info->out_buffer) = ~((u64)COMPLETION_CODE_INIT); + info->alternate_caddr = (u64 *)info->out_buffer; + info->rptr_baddr = dma_map_single(&pdev->dev, + (void *)info->out_buffer, + COMPLETION_CODE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&pdev->dev, info->rptr_baddr)) { + dev_err(&pdev->dev, "Mapping RPTR Failed %d\n", + COMPLETION_CODE_SIZE); + ret = -EIO; + goto scatter_gather_clean; + } + + return 0; + +scatter_gather_clean: + return ret; +} + +int send_cpt_command(struct cpt_vf *cptvf, union cpt_inst_s *cmd, + u32 qno) +{ + struct pci_dev *pdev = cptvf->pdev; + struct command_qinfo *qinfo = NULL; + struct command_queue *queue; + struct command_chunk *chunk; + u8 *ent; + int ret = 0; + + if (unlikely(qno >= cptvf->nr_queues)) { + dev_err(&pdev->dev, "Invalid queue (qno: %d, nr_queues: %d)\n", + qno, cptvf->nr_queues); + return -EINVAL; + } + + qinfo = &cptvf->cqinfo; + queue = &qinfo->queue[qno]; + /* lock commad queue */ + spin_lock(&queue->lock); + ent = &queue->qhead->head[queue->idx * qinfo->cmd_size]; + memcpy(ent, (void *)cmd, qinfo->cmd_size); + + if (++queue->idx >= queue->qhead->size / 64) { + struct hlist_node *node; + + hlist_for_each(node, &queue->chead) { + chunk = hlist_entry(node, struct command_chunk, + nextchunk); + if (chunk == queue->qhead) { + continue; + } else { + queue->qhead = chunk; + break; + } + } + queue->idx = 0; + } + /* make sure all memory stores are done before ringing doorbell */ + smp_wmb(); + cptvf_write_vq_doorbell(cptvf, 1); + /* unlock command queue */ + spin_unlock(&queue->lock); + + return ret; +} + +void do_request_cleanup(struct cpt_vf *cptvf, + struct cpt_info_buffer *info) +{ + int i; + struct pci_dev *pdev = cptvf->pdev; + struct cpt_request_info *req; + + if (info->dptr_baddr) + dma_unmap_single(&pdev->dev, info->dptr_baddr, + info->dlen, DMA_BIDIRECTIONAL); + + if (info->rptr_baddr) + dma_unmap_single(&pdev->dev, info->rptr_baddr, + COMPLETION_CODE_SIZE, DMA_BIDIRECTIONAL); + + if (info->comp_baddr) + dma_unmap_single(&pdev->dev, info->comp_baddr, + sizeof(union cpt_res_s), DMA_BIDIRECTIONAL); + + if (info->req) { + req = info->req; + for (i = 0; i < req->outcnt; i++) { + if (req->out[i].dma_addr) + dma_unmap_single(&pdev->dev, + req->out[i].dma_addr, + req->out[i].size, + DMA_BIDIRECTIONAL); + } + + for (i = 0; i < req->incnt; i++) { + if (req->in[i].dma_addr) + dma_unmap_single(&pdev->dev, + req->in[i].dma_addr, + req->in[i].size, + DMA_BIDIRECTIONAL); + } + } + + if (info->scatter_components) + kzfree(info->scatter_components); + + if (info->gather_components) + kzfree(info->gather_components); + + if (info->out_buffer) + kzfree(info->out_buffer); + + if (info->in_buffer) + kzfree(info->in_buffer); + + if (info->completion_addr) + kzfree((void *)info->completion_addr); + + kzfree(info); +} + +void do_post_process(struct cpt_vf *cptvf, struct cpt_info_buffer *info) +{ + struct pci_dev *pdev = cptvf->pdev; + + if (!info) { + dev_err(&pdev->dev, "incorrect cpt_info_buffer for post processing\n"); + return; + } + + do_request_cleanup(cptvf, info); +} + +static inline void process_pending_queue(struct cpt_vf *cptvf, + struct pending_qinfo *pqinfo, + int qno) +{ + struct pci_dev *pdev = cptvf->pdev; + struct pending_queue *pqueue = &pqinfo->queue[qno]; + struct pending_entry *pentry = NULL; + struct cpt_info_buffer *info = NULL; + union cpt_res_s *status = NULL; + unsigned char ccode; + + while (1) { + spin_lock_bh(&pqueue->lock); + pentry = &pqueue->head[pqueue->front]; + if (unlikely(!pentry->busy)) { + spin_unlock_bh(&pqueue->lock); + break; + } + + info = (struct cpt_info_buffer *)pentry->post_arg; + if (unlikely(!info)) { + dev_err(&pdev->dev, "Pending Entry post arg NULL\n"); + pending_queue_inc_front(pqinfo, qno); + spin_unlock_bh(&pqueue->lock); + continue; + } + + status = (union cpt_res_s *)pentry->completion_addr; + ccode = status->s.compcode; + if ((status->s.compcode == CPT_COMP_E_FAULT) || + (status->s.compcode == CPT_COMP_E_SWERR)) { + dev_err(&pdev->dev, "Request failed with %s\n", + (status->s.compcode == CPT_COMP_E_FAULT) ? + "DMA Fault" : "Software error"); + pentry->completion_addr = NULL; + pentry->busy = false; + atomic64_dec((&pqueue->pending_count)); + pentry->post_arg = NULL; + pending_queue_inc_front(pqinfo, qno); + do_request_cleanup(cptvf, info); + spin_unlock_bh(&pqueue->lock); + break; + } else if (status->s.compcode == COMPLETION_CODE_INIT) { + /* check for timeout */ + if (time_after_eq(jiffies, + (info->time_in + + (CPT_COMMAND_TIMEOUT * HZ)))) { + dev_err(&pdev->dev, "Request timed out"); + pentry->completion_addr = NULL; + pentry->busy = false; + atomic64_dec((&pqueue->pending_count)); + pentry->post_arg = NULL; + pending_queue_inc_front(pqinfo, qno); + do_request_cleanup(cptvf, info); + spin_unlock_bh(&pqueue->lock); + break; + } else if ((*info->alternate_caddr == + (~COMPLETION_CODE_INIT)) && + (info->extra_time < TIME_IN_RESET_COUNT)) { + info->time_in = jiffies; + info->extra_time++; + spin_unlock_bh(&pqueue->lock); + break; + } + } + + pentry->completion_addr = NULL; + pentry->busy = false; + pentry->post_arg = NULL; + atomic64_dec((&pqueue->pending_count)); + pending_queue_inc_front(pqinfo, qno); + spin_unlock_bh(&pqueue->lock); + + do_post_process(info->cptvf, info); + /* + * Calling callback after we find + * that the request has been serviced + */ + pentry->callback(ccode, pentry->callback_arg); + } +} + +int process_request(struct cpt_vf *cptvf, struct cpt_request_info *req) +{ + int ret = 0, clear = 0, queue = 0; + struct cpt_info_buffer *info = NULL; + struct cptvf_request *cpt_req = NULL; + union ctrl_info *ctrl = NULL; + union cpt_res_s *result = NULL; + struct pending_entry *pentry = NULL; + struct pending_queue *pqueue = NULL; + struct pci_dev *pdev = cptvf->pdev; + u8 group = 0; + struct cpt_vq_command vq_cmd; + union cpt_inst_s cptinst; + + info = kzalloc(sizeof(*info), req->may_sleep ? GFP_KERNEL : GFP_ATOMIC); + if (unlikely(!info)) { + dev_err(&pdev->dev, "Unable to allocate memory for info_buffer\n"); + return -ENOMEM; + } + + cpt_req = (struct cptvf_request *)&req->req; + ctrl = (union ctrl_info *)&req->ctrl; + + info->cptvf = cptvf; + group = ctrl->s.grp; + ret = setup_sgio_list(cptvf, info, req); + if (ret) { + dev_err(&pdev->dev, "Setting up SG list failed"); + goto request_cleanup; + } + + cpt_req->dlen = info->dlen; + /* + * Get buffer for union cpt_res_s response + * structure and its physical address + */ + info->completion_addr = kzalloc(sizeof(union cpt_res_s), req->may_sleep ? GFP_KERNEL : GFP_ATOMIC); + if (unlikely(!info->completion_addr)) { + dev_err(&pdev->dev, "Unable to allocate memory for completion_addr\n"); + ret = -ENOMEM; + goto request_cleanup; + } + + result = (union cpt_res_s *)info->completion_addr; + result->s.compcode = COMPLETION_CODE_INIT; + info->comp_baddr = dma_map_single(&pdev->dev, + (void *)info->completion_addr, + sizeof(union cpt_res_s), + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&pdev->dev, info->comp_baddr)) { + dev_err(&pdev->dev, "mapping compptr Failed %lu\n", + sizeof(union cpt_res_s)); + ret = -EFAULT; + goto request_cleanup; + } + + /* Fill the VQ command */ + vq_cmd.cmd.u64 = 0; + vq_cmd.cmd.s.opcode = cpu_to_be16(cpt_req->opcode.flags); + vq_cmd.cmd.s.param1 = cpu_to_be16(cpt_req->param1); + vq_cmd.cmd.s.param2 = cpu_to_be16(cpt_req->param2); + vq_cmd.cmd.s.dlen = cpu_to_be16(cpt_req->dlen); + + /* 64-bit swap for microcode data reads, not needed for addresses*/ + vq_cmd.cmd.u64 = cpu_to_be64(vq_cmd.cmd.u64); + vq_cmd.dptr = info->dptr_baddr; + vq_cmd.rptr = info->rptr_baddr; + vq_cmd.cptr.u64 = 0; + vq_cmd.cptr.s.grp = group; + /* Get Pending Entry to submit command */ + /* Always queue 0, because 1 queue per VF */ + queue = 0; + pqueue = &cptvf->pqinfo.queue[queue]; + + if (atomic64_read(&pqueue->pending_count) > PENDING_THOLD) { + dev_err(&pdev->dev, "pending threshold reached\n"); + process_pending_queue(cptvf, &cptvf->pqinfo, queue); + } + +get_pending_entry: + spin_lock_bh(&pqueue->lock); + pentry = get_free_pending_entry(pqueue, cptvf->pqinfo.qlen); + if (unlikely(!pentry)) { + spin_unlock_bh(&pqueue->lock); + if (clear == 0) { + process_pending_queue(cptvf, &cptvf->pqinfo, queue); + clear = 1; + goto get_pending_entry; + } + dev_err(&pdev->dev, "Get free entry failed\n"); + dev_err(&pdev->dev, "queue: %d, rear: %d, front: %d\n", + queue, pqueue->rear, pqueue->front); + ret = -EFAULT; + goto request_cleanup; + } + + pentry->completion_addr = info->completion_addr; + pentry->post_arg = (void *)info; + pentry->callback = req->callback; + pentry->callback_arg = req->callback_arg; + info->pentry = pentry; + pentry->busy = true; + atomic64_inc(&pqueue->pending_count); + + /* Send CPT command */ + info->pentry = pentry; + info->time_in = jiffies; + info->req = req; + + /* Create the CPT_INST_S type command for HW intrepretation */ + cptinst.s.doneint = true; + cptinst.s.res_addr = (u64)info->comp_baddr; + cptinst.s.tag = 0; + cptinst.s.grp = 0; + cptinst.s.wq_ptr = 0; + cptinst.s.ei0 = vq_cmd.cmd.u64; + cptinst.s.ei1 = vq_cmd.dptr; + cptinst.s.ei2 = vq_cmd.rptr; + cptinst.s.ei3 = vq_cmd.cptr.u64; + + ret = send_cpt_command(cptvf, &cptinst, queue); + spin_unlock_bh(&pqueue->lock); + if (unlikely(ret)) { + dev_err(&pdev->dev, "Send command failed for AE\n"); + ret = -EFAULT; + goto request_cleanup; + } + + return 0; + +request_cleanup: + dev_dbg(&pdev->dev, "Failed to submit CPT command\n"); + do_request_cleanup(cptvf, info); + + return ret; +} + +void vq_post_process(struct cpt_vf *cptvf, u32 qno) +{ + struct pci_dev *pdev = cptvf->pdev; + + if (unlikely(qno > cptvf->nr_queues)) { + dev_err(&pdev->dev, "Request for post processing on invalid pending queue: %u\n", + qno); + return; + } + + process_pending_queue(cptvf, &cptvf->pqinfo, qno); +} + +int cptvf_do_request(void *vfdev, struct cpt_request_info *req) +{ + struct cpt_vf *cptvf = (struct cpt_vf *)vfdev; + struct pci_dev *pdev = cptvf->pdev; + + if (!cpt_device_ready(cptvf)) { + dev_err(&pdev->dev, "CPT Device is not ready"); + return -ENODEV; + } + + if ((cptvf->vftype == SE_TYPES) && (!req->ctrl.s.se_req)) { + dev_err(&pdev->dev, "CPTVF-%d of SE TYPE got AE request", + cptvf->vfid); + return -EINVAL; + } else if ((cptvf->vftype == AE_TYPES) && (req->ctrl.s.se_req)) { + dev_err(&pdev->dev, "CPTVF-%d of AE TYPE got SE request", + cptvf->vfid); + return -EINVAL; + } + + return process_request(cptvf, req); +} diff --git a/drivers/crypto/cavium/cpt/request_manager.h b/drivers/crypto/cavium/cpt/request_manager.h new file mode 100644 index 000000000..09930d95a --- /dev/null +++ b/drivers/crypto/cavium/cpt/request_manager.h @@ -0,0 +1,149 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef __REQUEST_MANAGER_H +#define __REQUEST_MANAGER_H + +#include "cpt_common.h" + +#define TIME_IN_RESET_COUNT 5 +#define COMPLETION_CODE_SIZE 8 +#define COMPLETION_CODE_INIT 0 +#define PENDING_THOLD 100 +#define MAX_SG_IN_CNT 12 +#define MAX_SG_OUT_CNT 13 +#define SG_LIST_HDR_SIZE 8 +#define MAX_BUF_CNT 16 + +union ctrl_info { + u32 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u32 reserved0:26; + u32 grp:3; /* Group bits */ + u32 dma_mode:2; /* DMA mode */ + u32 se_req:1;/* To SE core */ +#else + u32 se_req:1; /* To SE core */ + u32 dma_mode:2; /* DMA mode */ + u32 grp:3; /* Group bits */ + u32 reserved0:26; +#endif + } s; +}; + +union opcode_info { + u16 flags; + struct { + u8 major; + u8 minor; + } s; +}; + +struct cptvf_request { + union opcode_info opcode; + u16 param1; + u16 param2; + u16 dlen; +}; + +struct buf_ptr { + u8 *vptr; + dma_addr_t dma_addr; + u16 size; +}; + +struct cpt_request_info { + u8 incnt; /* Number of input buffers */ + u8 outcnt; /* Number of output buffers */ + u16 rlen; /* Output length */ + union ctrl_info ctrl; /* User control information */ + struct cptvf_request req; /* Request Information (Core specific) */ + + bool may_sleep; + + struct buf_ptr in[MAX_BUF_CNT]; + struct buf_ptr out[MAX_BUF_CNT]; + + void (*callback)(int, void *); /* Kernel ASYNC request callabck */ + void *callback_arg; /* Kernel ASYNC request callabck arg */ +}; + +struct sglist_component { + union { + u64 len; + struct { + u16 len0; + u16 len1; + u16 len2; + u16 len3; + } s; + } u; + u64 ptr0; + u64 ptr1; + u64 ptr2; + u64 ptr3; +}; + +struct cpt_info_buffer { + struct cpt_vf *cptvf; + unsigned long time_in; + u8 extra_time; + + struct cpt_request_info *req; + dma_addr_t dptr_baddr; + u32 dlen; + dma_addr_t rptr_baddr; + dma_addr_t comp_baddr; + u8 *in_buffer; + u8 *out_buffer; + u8 *gather_components; + u8 *scatter_components; + + struct pending_entry *pentry; + volatile u64 *completion_addr; + volatile u64 *alternate_caddr; +}; + +/* + * CPT_INST_S software command definitions + * Words EI (0-3) + */ +union vq_cmd_word0 { + u64 u64; + struct { + u16 opcode; + u16 param1; + u16 param2; + u16 dlen; + } s; +}; + +union vq_cmd_word3 { + u64 u64; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 grp:3; + u64 cptr:61; +#else + u64 cptr:61; + u64 grp:3; +#endif + } s; +}; + +struct cpt_vq_command { + union vq_cmd_word0 cmd; + u64 dptr; + u64 rptr; + union vq_cmd_word3 cptr; +}; + +void vq_post_process(struct cpt_vf *cptvf, u32 qno); +int process_request(struct cpt_vf *cptvf, struct cpt_request_info *req); +#endif /* __REQUEST_MANAGER_H */ |