From ace9429bb58fd418f0c81d4c2835699bddf6bde6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 11 Apr 2024 10:27:49 +0200 Subject: Adding upstream version 6.6.15. Signed-off-by: Daniel Baumann --- drivers/dma/Kconfig | 787 ++++ drivers/dma/Makefile | 89 + drivers/dma/TODO | 12 + drivers/dma/acpi-dma.c | 481 ++ drivers/dma/altera-msgdma.c | 963 ++++ drivers/dma/amba-pl08x.c | 3073 +++++++++++++ drivers/dma/apple-admac.c | 958 ++++ drivers/dma/at_hdmac.c | 2271 ++++++++++ drivers/dma/at_xdmac.c | 2503 +++++++++++ drivers/dma/bcm-sba-raid.c | 1771 ++++++++ drivers/dma/bcm2835-dma.c | 1046 +++++ drivers/dma/bestcomm/Kconfig | 37 + drivers/dma/bestcomm/Makefile | 15 + drivers/dma/bestcomm/ata.c | 152 + drivers/dma/bestcomm/bcom_ata_task.c | 64 + drivers/dma/bestcomm/bcom_fec_rx_task.c | 75 + drivers/dma/bestcomm/bcom_fec_tx_task.c | 88 + drivers/dma/bestcomm/bcom_gen_bd_rx_task.c | 59 + drivers/dma/bestcomm/bcom_gen_bd_tx_task.c | 65 + drivers/dma/bestcomm/bestcomm.c | 525 +++ drivers/dma/bestcomm/fec.c | 265 ++ drivers/dma/bestcomm/gen_bd.c | 350 ++ drivers/dma/bestcomm/sram.c | 171 + drivers/dma/dma-axi-dmac.c | 1063 +++++ drivers/dma/dma-jz4780.c | 1148 +++++ drivers/dma/dmaengine.c | 1601 +++++++ drivers/dma/dmaengine.h | 201 + drivers/dma/dmatest.c | 1360 ++++++ drivers/dma/dw-axi-dmac/Makefile | 2 + drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c | 1602 +++++++ drivers/dma/dw-axi-dmac/dw-axi-dmac.h | 400 ++ drivers/dma/dw-edma/Kconfig | 22 + drivers/dma/dw-edma/Makefile | 9 + drivers/dma/dw-edma/dw-edma-core.c | 1016 +++++ drivers/dma/dw-edma/dw-edma-core.h | 209 + drivers/dma/dw-edma/dw-edma-pcie.c | 378 ++ drivers/dma/dw-edma/dw-edma-v0-core.c | 508 +++ drivers/dma/dw-edma/dw-edma-v0-core.h | 17 + drivers/dma/dw-edma/dw-edma-v0-debugfs.c | 292 ++ drivers/dma/dw-edma/dw-edma-v0-debugfs.h | 22 + drivers/dma/dw-edma/dw-edma-v0-regs.h | 233 + drivers/dma/dw-edma/dw-hdma-v0-core.c | 296 ++ drivers/dma/dw-edma/dw-hdma-v0-core.h | 17 + drivers/dma/dw-edma/dw-hdma-v0-debugfs.c | 170 + drivers/dma/dw-edma/dw-hdma-v0-debugfs.h | 22 + drivers/dma/dw-edma/dw-hdma-v0-regs.h | 129 + drivers/dma/dw/Kconfig | 36 + drivers/dma/dw/Makefile | 13 + drivers/dma/dw/acpi.c | 55 + drivers/dma/dw/core.c | 1319 ++++++ drivers/dma/dw/dw.c | 137 + drivers/dma/dw/idma32.c | 291 ++ drivers/dma/dw/internal.h | 93 + drivers/dma/dw/of.c | 122 + drivers/dma/dw/pci.c | 147 + drivers/dma/dw/platform.c | 220 + drivers/dma/dw/regs.h | 409 ++ drivers/dma/dw/rzn1-dmamux.c | 160 + drivers/dma/ep93xx_dma.c | 1431 ++++++ drivers/dma/fsl-dpaa2-qdma/Kconfig | 9 + drivers/dma/fsl-dpaa2-qdma/Makefile | 3 + drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c | 839 ++++ drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.h | 153 + drivers/dma/fsl-dpaa2-qdma/dpdmai.c | 397 ++ drivers/dma/fsl-dpaa2-qdma/dpdmai.h | 179 + drivers/dma/fsl-edma-common.c | 881 ++++ drivers/dma/fsl-edma-common.h | 351 ++ drivers/dma/fsl-edma-main.c | 724 +++ drivers/dma/fsl-qdma.c | 1308 ++++++ drivers/dma/fsl_raid.c | 898 ++++ drivers/dma/fsl_raid.h | 306 ++ drivers/dma/fsldma.c | 1431 ++++++ drivers/dma/fsldma.h | 265 ++ drivers/dma/hisi_dma.c | 1053 +++++ drivers/dma/hsu/Kconfig | 10 + drivers/dma/hsu/Makefile | 6 + drivers/dma/hsu/hsu.c | 512 +++ drivers/dma/hsu/hsu.h | 127 + drivers/dma/hsu/pci.c | 135 + drivers/dma/idma64.c | 706 +++ drivers/dma/idma64.h | 229 + drivers/dma/idxd/Makefile | 12 + drivers/dma/idxd/bus.c | 91 + drivers/dma/idxd/cdev.c | 692 +++ drivers/dma/idxd/compat.c | 107 + drivers/dma/idxd/debugfs.c | 138 + drivers/dma/idxd/device.c | 1607 +++++++ drivers/dma/idxd/dma.c | 359 ++ drivers/dma/idxd/idxd.h | 752 ++++ drivers/dma/idxd/init.c | 913 ++++ drivers/dma/idxd/irq.c | 655 +++ drivers/dma/idxd/perfmon.c | 661 +++ drivers/dma/idxd/perfmon.h | 119 + drivers/dma/idxd/registers.h | 632 +++ drivers/dma/idxd/submit.c | 217 + drivers/dma/idxd/sysfs.c | 1934 ++++++++ drivers/dma/img-mdc-dma.c | 1087 +++++ drivers/dma/imx-dma.c | 1252 ++++++ drivers/dma/imx-sdma.c | 2402 ++++++++++ drivers/dma/ioat/Makefile | 3 + drivers/dma/ioat/dca.c | 331 ++ drivers/dma/ioat/dma.c | 1061 +++++ drivers/dma/ioat/dma.h | 407 ++ drivers/dma/ioat/hw.h | 271 ++ drivers/dma/ioat/init.c | 1450 ++++++ drivers/dma/ioat/prep.c | 737 ++++ drivers/dma/ioat/registers.h | 251 ++ drivers/dma/ioat/sysfs.c | 166 + drivers/dma/k3dma.c | 1044 +++++ drivers/dma/lgm/Kconfig | 10 + drivers/dma/lgm/Makefile | 2 + drivers/dma/lgm/lgm-dma.c | 1735 ++++++++ drivers/dma/lpc18xx-dmamux.c | 181 + drivers/dma/mcf-edma-main.c | 303 ++ drivers/dma/mediatek/Kconfig | 38 + drivers/dma/mediatek/Makefile | 4 + drivers/dma/mediatek/mtk-cqdma.c | 937 ++++ drivers/dma/mediatek/mtk-hsdma.c | 1053 +++++ drivers/dma/mediatek/mtk-uart-apdma.c | 654 +++ drivers/dma/milbeaut-hdmac.c | 578 +++ drivers/dma/milbeaut-xdmac.c | 416 ++ drivers/dma/mmp_pdma.c | 1151 +++++ drivers/dma/mmp_tdma.c | 765 ++++ drivers/dma/moxart-dma.c | 676 +++ drivers/dma/mpc512x_dma.c | 1125 +++++ drivers/dma/mv_xor.c | 1468 +++++++ drivers/dma/mv_xor.h | 193 + drivers/dma/mv_xor_v2.c | 898 ++++ drivers/dma/mxs-dma.c | 842 ++++ drivers/dma/nbpfaxi.c | 1527 +++++++ drivers/dma/of-dma.c | 371 ++ drivers/dma/owl-dma.c | 1279 ++++++ drivers/dma/pch_dma.c | 995 +++++ drivers/dma/pl330.c | 3277 ++++++++++++++ drivers/dma/plx_dma.c | 635 +++ drivers/dma/ppc4xx/Makefile | 2 + drivers/dma/ppc4xx/adma.c | 4627 +++++++++++++++++++ drivers/dma/ppc4xx/adma.h | 190 + drivers/dma/ppc4xx/dma.h | 220 + drivers/dma/ppc4xx/xor.h | 107 + drivers/dma/ptdma/Kconfig | 13 + drivers/dma/ptdma/Makefile | 10 + drivers/dma/ptdma/ptdma-debugfs.c | 106 + drivers/dma/ptdma/ptdma-dev.c | 309 ++ drivers/dma/ptdma/ptdma-dmaengine.c | 413 ++ drivers/dma/ptdma/ptdma-pci.c | 243 + drivers/dma/ptdma/ptdma.h | 337 ++ drivers/dma/pxa_dma.c | 1467 +++++++ drivers/dma/qcom/Kconfig | 55 + drivers/dma/qcom/Makefile | 8 + drivers/dma/qcom/bam_dma.c | 1490 +++++++ drivers/dma/qcom/gpi.c | 2319 ++++++++++ drivers/dma/qcom/hidma.c | 973 ++++ drivers/dma/qcom/hidma.h | 160 + drivers/dma/qcom/hidma_dbg.c | 165 + drivers/dma/qcom/hidma_ll.c | 855 ++++ drivers/dma/qcom/hidma_mgmt.c | 441 ++ drivers/dma/qcom/hidma_mgmt.h | 31 + drivers/dma/qcom/hidma_mgmt_sys.c | 285 ++ drivers/dma/qcom/qcom_adm.c | 953 ++++ drivers/dma/sa11x0-dma.c | 1102 +++++ drivers/dma/sf-pdma/Kconfig | 7 + drivers/dma/sf-pdma/Makefile | 1 + drivers/dma/sf-pdma/sf-pdma.c | 623 +++ drivers/dma/sf-pdma/sf-pdma.h | 119 + drivers/dma/sh/Kconfig | 58 + drivers/dma/sh/Makefile | 18 + drivers/dma/sh/rcar-dmac.c | 2052 +++++++++ drivers/dma/sh/rz-dmac.c | 1013 +++++ drivers/dma/sh/shdma-base.c | 1051 +++++ drivers/dma/sh/shdma.h | 61 + drivers/dma/sh/shdmac.c | 936 ++++ drivers/dma/sh/usb-dmac.c | 912 ++++ drivers/dma/sprd-dma.c | 1308 ++++++ drivers/dma/st_fdma.c | 877 ++++ drivers/dma/st_fdma.h | 245 ++ drivers/dma/ste_dma40.c | 3695 ++++++++++++++++ drivers/dma/ste_dma40.h | 110 + drivers/dma/ste_dma40_ll.c | 449 ++ drivers/dma/ste_dma40_ll.h | 470 ++ drivers/dma/stm32-dma.c | 1793 ++++++++ drivers/dma/stm32-dmamux.c | 402 ++ drivers/dma/stm32-mdma.c | 1830 ++++++++ drivers/dma/sun4i-dma.c | 1308 ++++++ drivers/dma/sun6i-dma.c | 1504 +++++++ drivers/dma/tegra186-gpc-dma.c | 1544 +++++++ drivers/dma/tegra20-apb-dma.c | 1687 +++++++ drivers/dma/tegra210-adma.c | 991 +++++ drivers/dma/ti/Kconfig | 63 + drivers/dma/ti/Makefile | 17 + drivers/dma/ti/cppi41.c | 1257 ++++++ drivers/dma/ti/dma-crossbar.c | 478 ++ drivers/dma/ti/edma.c | 2690 ++++++++++++ drivers/dma/ti/k3-psil-am62.c | 186 + drivers/dma/ti/k3-psil-am62a.c | 196 + drivers/dma/ti/k3-psil-am64.c | 158 + drivers/dma/ti/k3-psil-am654.c | 175 + drivers/dma/ti/k3-psil-j7200.c | 242 + drivers/dma/ti/k3-psil-j721e.c | 377 ++ drivers/dma/ti/k3-psil-j721s2.c | 186 + drivers/dma/ti/k3-psil-j784s4.c | 354 ++ drivers/dma/ti/k3-psil-priv.h | 49 + drivers/dma/ti/k3-psil.c | 107 + drivers/dma/ti/k3-udma-glue.c | 1445 ++++++ drivers/dma/ti/k3-udma-private.c | 183 + drivers/dma/ti/k3-udma.c | 5619 ++++++++++++++++++++++++ drivers/dma/ti/k3-udma.h | 164 + drivers/dma/ti/omap-dma.c | 1956 +++++++++ drivers/dma/timb_dma.c | 773 ++++ drivers/dma/txx9dmac.c | 1306 ++++++ drivers/dma/txx9dmac.h | 304 ++ drivers/dma/uniphier-mdmac.c | 501 +++ drivers/dma/uniphier-xdmac.c | 611 +++ drivers/dma/virt-dma.c | 142 + drivers/dma/virt-dma.h | 227 + drivers/dma/xgene-dma.c | 1834 ++++++++ drivers/dma/xilinx/Makefile | 5 + drivers/dma/xilinx/xdma-regs.h | 166 + drivers/dma/xilinx/xdma.c | 976 ++++ drivers/dma/xilinx/xilinx_dma.c | 3279 ++++++++++++++ drivers/dma/xilinx/xilinx_dpdma.c | 1777 ++++++++ drivers/dma/xilinx/zynqmp_dma.c | 1185 +++++ 222 files changed, 152889 insertions(+) create mode 100644 drivers/dma/Kconfig create mode 100644 drivers/dma/Makefile create mode 100644 drivers/dma/TODO create mode 100644 drivers/dma/acpi-dma.c create mode 100644 drivers/dma/altera-msgdma.c create mode 100644 drivers/dma/amba-pl08x.c create mode 100644 drivers/dma/apple-admac.c create mode 100644 drivers/dma/at_hdmac.c create mode 100644 drivers/dma/at_xdmac.c create mode 100644 drivers/dma/bcm-sba-raid.c create mode 100644 drivers/dma/bcm2835-dma.c create mode 100644 drivers/dma/bestcomm/Kconfig create mode 100644 drivers/dma/bestcomm/Makefile create mode 100644 drivers/dma/bestcomm/ata.c create mode 100644 drivers/dma/bestcomm/bcom_ata_task.c create mode 100644 drivers/dma/bestcomm/bcom_fec_rx_task.c create mode 100644 drivers/dma/bestcomm/bcom_fec_tx_task.c create mode 100644 drivers/dma/bestcomm/bcom_gen_bd_rx_task.c create mode 100644 drivers/dma/bestcomm/bcom_gen_bd_tx_task.c create mode 100644 drivers/dma/bestcomm/bestcomm.c create mode 100644 drivers/dma/bestcomm/fec.c create mode 100644 drivers/dma/bestcomm/gen_bd.c create mode 100644 drivers/dma/bestcomm/sram.c create mode 100644 drivers/dma/dma-axi-dmac.c create mode 100644 drivers/dma/dma-jz4780.c create mode 100644 drivers/dma/dmaengine.c create mode 100644 drivers/dma/dmaengine.h create mode 100644 drivers/dma/dmatest.c create mode 100644 drivers/dma/dw-axi-dmac/Makefile create mode 100644 drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c create mode 100644 drivers/dma/dw-axi-dmac/dw-axi-dmac.h create mode 100644 drivers/dma/dw-edma/Kconfig create mode 100644 drivers/dma/dw-edma/Makefile create mode 100644 drivers/dma/dw-edma/dw-edma-core.c create mode 100644 drivers/dma/dw-edma/dw-edma-core.h create mode 100644 drivers/dma/dw-edma/dw-edma-pcie.c create mode 100644 drivers/dma/dw-edma/dw-edma-v0-core.c create mode 100644 drivers/dma/dw-edma/dw-edma-v0-core.h create mode 100644 drivers/dma/dw-edma/dw-edma-v0-debugfs.c create mode 100644 drivers/dma/dw-edma/dw-edma-v0-debugfs.h create mode 100644 drivers/dma/dw-edma/dw-edma-v0-regs.h create mode 100644 drivers/dma/dw-edma/dw-hdma-v0-core.c create mode 100644 drivers/dma/dw-edma/dw-hdma-v0-core.h create mode 100644 drivers/dma/dw-edma/dw-hdma-v0-debugfs.c create mode 100644 drivers/dma/dw-edma/dw-hdma-v0-debugfs.h create mode 100644 drivers/dma/dw-edma/dw-hdma-v0-regs.h create mode 100644 drivers/dma/dw/Kconfig create mode 100644 drivers/dma/dw/Makefile create mode 100644 drivers/dma/dw/acpi.c create mode 100644 drivers/dma/dw/core.c create mode 100644 drivers/dma/dw/dw.c create mode 100644 drivers/dma/dw/idma32.c create mode 100644 drivers/dma/dw/internal.h create mode 100644 drivers/dma/dw/of.c create mode 100644 drivers/dma/dw/pci.c create mode 100644 drivers/dma/dw/platform.c create mode 100644 drivers/dma/dw/regs.h create mode 100644 drivers/dma/dw/rzn1-dmamux.c create mode 100644 drivers/dma/ep93xx_dma.c create mode 100644 drivers/dma/fsl-dpaa2-qdma/Kconfig create mode 100644 drivers/dma/fsl-dpaa2-qdma/Makefile create mode 100644 drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c create mode 100644 drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.h create mode 100644 drivers/dma/fsl-dpaa2-qdma/dpdmai.c create mode 100644 drivers/dma/fsl-dpaa2-qdma/dpdmai.h create mode 100644 drivers/dma/fsl-edma-common.c create mode 100644 drivers/dma/fsl-edma-common.h create mode 100644 drivers/dma/fsl-edma-main.c create mode 100644 drivers/dma/fsl-qdma.c create mode 100644 drivers/dma/fsl_raid.c create mode 100644 drivers/dma/fsl_raid.h create mode 100644 drivers/dma/fsldma.c create mode 100644 drivers/dma/fsldma.h create mode 100644 drivers/dma/hisi_dma.c create mode 100644 drivers/dma/hsu/Kconfig create mode 100644 drivers/dma/hsu/Makefile create mode 100644 drivers/dma/hsu/hsu.c create mode 100644 drivers/dma/hsu/hsu.h create mode 100644 drivers/dma/hsu/pci.c create mode 100644 drivers/dma/idma64.c create mode 100644 drivers/dma/idma64.h create mode 100644 drivers/dma/idxd/Makefile create mode 100644 drivers/dma/idxd/bus.c create mode 100644 drivers/dma/idxd/cdev.c create mode 100644 drivers/dma/idxd/compat.c create mode 100644 drivers/dma/idxd/debugfs.c create mode 100644 drivers/dma/idxd/device.c create mode 100644 drivers/dma/idxd/dma.c create mode 100644 drivers/dma/idxd/idxd.h create mode 100644 drivers/dma/idxd/init.c create mode 100644 drivers/dma/idxd/irq.c create mode 100644 drivers/dma/idxd/perfmon.c create mode 100644 drivers/dma/idxd/perfmon.h create mode 100644 drivers/dma/idxd/registers.h create mode 100644 drivers/dma/idxd/submit.c create mode 100644 drivers/dma/idxd/sysfs.c create mode 100644 drivers/dma/img-mdc-dma.c create mode 100644 drivers/dma/imx-dma.c create mode 100644 drivers/dma/imx-sdma.c create mode 100644 drivers/dma/ioat/Makefile create mode 100644 drivers/dma/ioat/dca.c create mode 100644 drivers/dma/ioat/dma.c create mode 100644 drivers/dma/ioat/dma.h create mode 100644 drivers/dma/ioat/hw.h create mode 100644 drivers/dma/ioat/init.c create mode 100644 drivers/dma/ioat/prep.c create mode 100644 drivers/dma/ioat/registers.h create mode 100644 drivers/dma/ioat/sysfs.c create mode 100644 drivers/dma/k3dma.c create mode 100644 drivers/dma/lgm/Kconfig create mode 100644 drivers/dma/lgm/Makefile create mode 100644 drivers/dma/lgm/lgm-dma.c create mode 100644 drivers/dma/lpc18xx-dmamux.c create mode 100644 drivers/dma/mcf-edma-main.c create mode 100644 drivers/dma/mediatek/Kconfig create mode 100644 drivers/dma/mediatek/Makefile create mode 100644 drivers/dma/mediatek/mtk-cqdma.c create mode 100644 drivers/dma/mediatek/mtk-hsdma.c create mode 100644 drivers/dma/mediatek/mtk-uart-apdma.c create mode 100644 drivers/dma/milbeaut-hdmac.c create mode 100644 drivers/dma/milbeaut-xdmac.c create mode 100644 drivers/dma/mmp_pdma.c create mode 100644 drivers/dma/mmp_tdma.c create mode 100644 drivers/dma/moxart-dma.c create mode 100644 drivers/dma/mpc512x_dma.c create mode 100644 drivers/dma/mv_xor.c create mode 100644 drivers/dma/mv_xor.h create mode 100644 drivers/dma/mv_xor_v2.c create mode 100644 drivers/dma/mxs-dma.c create mode 100644 drivers/dma/nbpfaxi.c create mode 100644 drivers/dma/of-dma.c create mode 100644 drivers/dma/owl-dma.c create mode 100644 drivers/dma/pch_dma.c create mode 100644 drivers/dma/pl330.c create mode 100644 drivers/dma/plx_dma.c create mode 100644 drivers/dma/ppc4xx/Makefile create mode 100644 drivers/dma/ppc4xx/adma.c create mode 100644 drivers/dma/ppc4xx/adma.h create mode 100644 drivers/dma/ppc4xx/dma.h create mode 100644 drivers/dma/ppc4xx/xor.h create mode 100644 drivers/dma/ptdma/Kconfig create mode 100644 drivers/dma/ptdma/Makefile create mode 100644 drivers/dma/ptdma/ptdma-debugfs.c create mode 100644 drivers/dma/ptdma/ptdma-dev.c create mode 100644 drivers/dma/ptdma/ptdma-dmaengine.c create mode 100644 drivers/dma/ptdma/ptdma-pci.c create mode 100644 drivers/dma/ptdma/ptdma.h create mode 100644 drivers/dma/pxa_dma.c create mode 100644 drivers/dma/qcom/Kconfig create mode 100644 drivers/dma/qcom/Makefile create mode 100644 drivers/dma/qcom/bam_dma.c create mode 100644 drivers/dma/qcom/gpi.c create mode 100644 drivers/dma/qcom/hidma.c create mode 100644 drivers/dma/qcom/hidma.h create mode 100644 drivers/dma/qcom/hidma_dbg.c create mode 100644 drivers/dma/qcom/hidma_ll.c create mode 100644 drivers/dma/qcom/hidma_mgmt.c create mode 100644 drivers/dma/qcom/hidma_mgmt.h create mode 100644 drivers/dma/qcom/hidma_mgmt_sys.c create mode 100644 drivers/dma/qcom/qcom_adm.c create mode 100644 drivers/dma/sa11x0-dma.c create mode 100644 drivers/dma/sf-pdma/Kconfig create mode 100644 drivers/dma/sf-pdma/Makefile create mode 100644 drivers/dma/sf-pdma/sf-pdma.c create mode 100644 drivers/dma/sf-pdma/sf-pdma.h create mode 100644 drivers/dma/sh/Kconfig create mode 100644 drivers/dma/sh/Makefile create mode 100644 drivers/dma/sh/rcar-dmac.c create mode 100644 drivers/dma/sh/rz-dmac.c create mode 100644 drivers/dma/sh/shdma-base.c create mode 100644 drivers/dma/sh/shdma.h create mode 100644 drivers/dma/sh/shdmac.c create mode 100644 drivers/dma/sh/usb-dmac.c create mode 100644 drivers/dma/sprd-dma.c create mode 100644 drivers/dma/st_fdma.c create mode 100644 drivers/dma/st_fdma.h create mode 100644 drivers/dma/ste_dma40.c create mode 100644 drivers/dma/ste_dma40.h create mode 100644 drivers/dma/ste_dma40_ll.c create mode 100644 drivers/dma/ste_dma40_ll.h create mode 100644 drivers/dma/stm32-dma.c create mode 100644 drivers/dma/stm32-dmamux.c create mode 100644 drivers/dma/stm32-mdma.c create mode 100644 drivers/dma/sun4i-dma.c create mode 100644 drivers/dma/sun6i-dma.c create mode 100644 drivers/dma/tegra186-gpc-dma.c create mode 100644 drivers/dma/tegra20-apb-dma.c create mode 100644 drivers/dma/tegra210-adma.c create mode 100644 drivers/dma/ti/Kconfig create mode 100644 drivers/dma/ti/Makefile create mode 100644 drivers/dma/ti/cppi41.c create mode 100644 drivers/dma/ti/dma-crossbar.c create mode 100644 drivers/dma/ti/edma.c create mode 100644 drivers/dma/ti/k3-psil-am62.c create mode 100644 drivers/dma/ti/k3-psil-am62a.c create mode 100644 drivers/dma/ti/k3-psil-am64.c create mode 100644 drivers/dma/ti/k3-psil-am654.c create mode 100644 drivers/dma/ti/k3-psil-j7200.c create mode 100644 drivers/dma/ti/k3-psil-j721e.c create mode 100644 drivers/dma/ti/k3-psil-j721s2.c create mode 100644 drivers/dma/ti/k3-psil-j784s4.c create mode 100644 drivers/dma/ti/k3-psil-priv.h create mode 100644 drivers/dma/ti/k3-psil.c create mode 100644 drivers/dma/ti/k3-udma-glue.c create mode 100644 drivers/dma/ti/k3-udma-private.c create mode 100644 drivers/dma/ti/k3-udma.c create mode 100644 drivers/dma/ti/k3-udma.h create mode 100644 drivers/dma/ti/omap-dma.c create mode 100644 drivers/dma/timb_dma.c create mode 100644 drivers/dma/txx9dmac.c create mode 100644 drivers/dma/txx9dmac.h create mode 100644 drivers/dma/uniphier-mdmac.c create mode 100644 drivers/dma/uniphier-xdmac.c create mode 100644 drivers/dma/virt-dma.c create mode 100644 drivers/dma/virt-dma.h create mode 100644 drivers/dma/xgene-dma.c create mode 100644 drivers/dma/xilinx/Makefile create mode 100644 drivers/dma/xilinx/xdma-regs.h create mode 100644 drivers/dma/xilinx/xdma.c create mode 100644 drivers/dma/xilinx/xilinx_dma.c create mode 100644 drivers/dma/xilinx/xilinx_dpdma.c create mode 100644 drivers/dma/xilinx/zynqmp_dma.c (limited to 'drivers/dma') diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig new file mode 100644 index 0000000000..4ccae1a3b8 --- /dev/null +++ b/drivers/dma/Kconfig @@ -0,0 +1,787 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# DMA engine configuration +# + +menuconfig DMADEVICES + bool "DMA Engine support" + depends on HAS_DMA + help + DMA engines can do asynchronous data transfers without + involving the host CPU. Currently, this framework can be + used to offload memory copies in the network stack and + RAID operations in the MD driver. This menu only presents + DMA Device drivers supported by the configured arch, it may + be empty in some cases. + +config DMADEVICES_DEBUG + bool "DMA Engine debugging" + depends on DMADEVICES != n + help + This is an option for use by developers; most people should + say N here. This enables DMA engine core and driver debugging. + +config DMADEVICES_VDEBUG + bool "DMA Engine verbose debugging" + depends on DMADEVICES_DEBUG != n + help + This is an option for use by developers; most people should + say N here. This enables deeper (more verbose) debugging of + the DMA engine core and drivers. + + +if DMADEVICES + +comment "DMA Devices" + +#core +config ASYNC_TX_ENABLE_CHANNEL_SWITCH + bool + +config ARCH_HAS_ASYNC_TX_FIND_CHANNEL + bool + +config DMA_ENGINE + bool + +config DMA_VIRTUAL_CHANNELS + tristate + +config DMA_ACPI + def_bool y + depends on ACPI + +config DMA_OF + def_bool y + depends on OF + select DMA_ENGINE + +#devices +config ALTERA_MSGDMA + tristate "Altera / Intel mSGDMA Engine" + depends on HAS_IOMEM + select DMA_ENGINE + help + Enable support for Altera / Intel mSGDMA controller. + +config AMBA_PL08X + bool "ARM PrimeCell PL080 or PL081 support" + depends on ARM_AMBA + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Say yes if your platform has a PL08x DMAC device which can + provide DMA engine support. This includes the original ARM + PL080 and PL081, Samsungs PL080 derivative and Faraday + Technology's FTDMAC020 PL080 derivative. + +config AMCC_PPC440SPE_ADMA + tristate "AMCC PPC440SPe ADMA support" + depends on 440SPe || 440SP + select DMA_ENGINE + select DMA_ENGINE_RAID + select ARCH_HAS_ASYNC_TX_FIND_CHANNEL + select ASYNC_TX_ENABLE_CHANNEL_SWITCH + help + Enable support for the AMCC PPC440SPe RAID engines. + +config APPLE_ADMAC + tristate "Apple ADMAC support" + depends on ARCH_APPLE || COMPILE_TEST + select DMA_ENGINE + default ARCH_APPLE + help + Enable support for Audio DMA Controller found on Apple Silicon SoCs. + +config AT_HDMAC + tristate "Atmel AHB DMA support" + depends on ARCH_AT91 + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support the Atmel AHB DMA controller. + +config AT_XDMAC + tristate "Atmel XDMA support" + depends on ARCH_AT91 + select DMA_ENGINE + help + Support the Atmel XDMA controller. + +config AXI_DMAC + tristate "Analog Devices AXI-DMAC DMA support" + depends on MICROBLAZE || NIOS2 || ARCH_ZYNQ || ARCH_ZYNQMP || ARCH_INTEL_SOCFPGA || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + select REGMAP_MMIO + help + Enable support for the Analog Devices AXI-DMAC peripheral. This DMA + controller is often used in Analog Devices' reference designs for FPGA + platforms. + +config BCM_SBA_RAID + tristate "Broadcom SBA RAID engine support" + depends on ARM64 || COMPILE_TEST + depends on MAILBOX && RAID6_PQ + select DMA_ENGINE + select DMA_ENGINE_RAID + select ASYNC_TX_DISABLE_XOR_VAL_DMA + select ASYNC_TX_DISABLE_PQ_VAL_DMA + default m if ARCH_BCM_IPROC + help + Enable support for Broadcom SBA RAID Engine. The SBA RAID + engine is available on most of the Broadcom iProc SoCs. It + has the capability to offload memcpy, xor and pq computation + for raid5/6. + +config DMA_BCM2835 + tristate "BCM2835 DMA engine support" + depends on ARCH_BCM2835 + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + +config DMA_JZ4780 + tristate "JZ4780 DMA support" + depends on MIPS || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + This selects support for the DMA controller in Ingenic JZ4780 SoCs. + If you have a board based on such a SoC and wish to use DMA for + devices which can use the DMA controller, say Y or M here. + +config DMA_SA11X0 + tristate "SA-11x0 DMA support" + depends on ARCH_SA1100 || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support the DMA engine found on Intel StrongARM SA-1100 and + SA-1110 SoCs. This DMA engine can only be used with on-chip + devices. + +config DMA_SUN4I + tristate "Allwinner A10 DMA SoCs support" + depends on MACH_SUN4I || MACH_SUN5I || MACH_SUN7I + default (MACH_SUN4I || MACH_SUN5I || MACH_SUN7I) + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for the DMA controller present in the sun4i, + sun5i and sun7i Allwinner ARM SoCs. + +config DMA_SUN6I + tristate "Allwinner A31 SoCs DMA support" + depends on ARCH_SUNXI || COMPILE_TEST + depends on RESET_CONTROLLER + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support for the DMA engine first found in Allwinner A31 SoCs. + +config DW_AXI_DMAC + tristate "Synopsys DesignWare AXI DMA support" + depends on OF + depends on HAS_IOMEM + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for Synopsys DesignWare AXI DMA controller. + NOTE: This driver wasn't tested on 64 bit platform because + of lack 64 bit platform with Synopsys DW AXI DMAC. + +config EP93XX_DMA + bool "Cirrus Logic EP93xx DMA support" + depends on ARCH_EP93XX || COMPILE_TEST + select DMA_ENGINE + help + Enable support for the Cirrus Logic EP93xx M2P/M2M DMA controller. + +config FSL_DMA + tristate "Freescale Elo series DMA support" + depends on FSL_SOC + select DMA_ENGINE + select ASYNC_TX_ENABLE_CHANNEL_SWITCH + help + Enable support for the Freescale Elo series DMA controllers. + The Elo is the DMA controller on some mpc82xx and mpc83xx parts, the + EloPlus is on mpc85xx and mpc86xx and Pxxx parts, and the Elo3 is on + some Txxx and Bxxx parts. + +config FSL_EDMA + tristate "Freescale eDMA engine support" + depends on OF + depends on HAS_IOMEM + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support the Freescale eDMA engine with programmable channel + multiplexing capability for DMA request sources(slot). + This module can be found on Freescale Vybrid and LS-1 SoCs. + +config FSL_QDMA + tristate "NXP Layerscape qDMA engine support" + depends on ARM || ARM64 + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + select DMA_ENGINE_RAID + select ASYNC_TX_ENABLE_CHANNEL_SWITCH + help + Support the NXP Layerscape qDMA engine with command queue and legacy mode. + Channel virtualization is supported through enqueuing of DMA jobs to, + or dequeuing DMA jobs from, different work queues. + This module can be found on NXP Layerscape SoCs. + The qdma driver only work on SoCs with a DPAA hardware block. + +config FSL_RAID + tristate "Freescale RAID engine Support" + depends on FSL_SOC && !ASYNC_TX_ENABLE_CHANNEL_SWITCH + select DMA_ENGINE + select DMA_ENGINE_RAID + help + Enable support for Freescale RAID Engine. RAID Engine is + available on some QorIQ SoCs (like P5020/P5040). It has + the capability to offload memcpy, xor and pq computation + for raid5/6. + +config HISI_DMA + tristate "HiSilicon DMA Engine support" + depends on ARCH_HISI || COMPILE_TEST + depends on PCI_MSI + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support HiSilicon Kunpeng DMA engine. + +config IMG_MDC_DMA + tristate "IMG MDC support" + depends on MIPS || COMPILE_TEST + depends on MFD_SYSCON + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for the IMG multi-threaded DMA controller (MDC). + +config IMX_DMA + tristate "i.MX DMA support" + depends on ARCH_MXC + select DMA_ENGINE + help + Support the i.MX DMA engine. This engine is integrated into + Freescale i.MX1/21/27 chips. + +config IMX_SDMA + tristate "i.MX SDMA support" + depends on ARCH_MXC + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support the i.MX SDMA engine. This engine is integrated into + Freescale i.MX25/31/35/51/53/6 chips. + +config INTEL_IDMA64 + tristate "Intel integrated DMA 64-bit support" + depends on HAS_IOMEM + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable DMA support for Intel Low Power Subsystem such as found on + Intel Skylake PCH. + +config INTEL_IDXD_BUS + tristate + default INTEL_IDXD + +config INTEL_IDXD + tristate "Intel Data Accelerators support" + depends on PCI && X86_64 && !UML + depends on PCI_MSI + depends on PCI_PASID + depends on SBITMAP + select DMA_ENGINE + help + Enable support for the Intel(R) data accelerators present + in Intel Xeon CPU. + + Say Y if you have such a platform. + + If unsure, say N. + +config INTEL_IDXD_COMPAT + bool "Legacy behavior for idxd driver" + depends on PCI && X86_64 + select INTEL_IDXD_BUS + help + Compatible driver to support old /sys/bus/dsa/drivers/dsa behavior. + The old behavior performed driver bind/unbind for device and wq + devices all under the dsa driver. The compat driver will emulate + the legacy behavior in order to allow existing support apps (i.e. + accel-config) to continue function. It is expected that accel-config + v3.2 and earlier will need the compat mode. A distro with later + accel-config version can disable this compat config. + + Say Y if you have old applications that require such behavior. + + If unsure, say N. + +# Config symbol that collects all the dependencies that's necessary to +# support shared virtual memory for the devices supported by idxd. +config INTEL_IDXD_SVM + bool "Accelerator Shared Virtual Memory Support" + depends on INTEL_IDXD + depends on INTEL_IOMMU_SVM + depends on PCI_PRI + depends on PCI_PASID + depends on PCI_IOV + +config INTEL_IDXD_PERFMON + bool "Intel Data Accelerators performance monitor support" + depends on INTEL_IDXD + help + Enable performance monitor (pmu) support for the Intel(R) + data accelerators present in Intel Xeon CPU. With this + enabled, perf can be used to monitor the DSA (Intel Data + Streaming Accelerator) events described in the Intel DSA + spec. + + If unsure, say N. + +config INTEL_IOATDMA + tristate "Intel I/OAT DMA support" + depends on PCI && X86_64 && !UML + select DMA_ENGINE + select DMA_ENGINE_RAID + select DCA + help + Enable support for the Intel(R) I/OAT DMA engine present + in recent Intel Xeon chipsets. + + Say Y here if you have such a chipset. + + If unsure, say N. + +config K3_DMA + tristate "Hisilicon K3 DMA support" + depends on ARCH_HI3xxx || ARCH_HISI || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support the DMA engine for Hisilicon K3 platform + devices. + +config LPC18XX_DMAMUX + bool "NXP LPC18xx/43xx DMA MUX for PL080" + depends on ARCH_LPC18XX || COMPILE_TEST + depends on OF && AMBA_PL08X + select MFD_SYSCON + help + Enable support for DMA on NXP LPC18xx/43xx platforms + with PL080 and multiplexed DMA request lines. + +config MCF_EDMA + tristate "Freescale eDMA engine support, ColdFire mcf5441x SoCs" + depends on M5441x || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support the Freescale ColdFire eDMA engine, 64-channel + implementation that performs complex data transfers with + minimal intervention from a host processor. + This module can be found on Freescale ColdFire mcf5441x SoCs. + +config MILBEAUT_HDMAC + tristate "Milbeaut AHB DMA support" + depends on ARCH_MILBEAUT || COMPILE_TEST + depends on OF + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Say yes here to support the Socionext Milbeaut + HDMAC device. + +config MILBEAUT_XDMAC + tristate "Milbeaut AXI DMA support" + depends on ARCH_MILBEAUT || COMPILE_TEST + depends on OF + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Say yes here to support the Socionext Milbeaut + XDMAC device. + +config MMP_PDMA + tristate "MMP PDMA support" + depends on ARCH_MMP || ARCH_PXA || COMPILE_TEST + select DMA_ENGINE + help + Support the MMP PDMA engine for PXA and MMP platform. + +config MMP_TDMA + tristate "MMP Two-Channel DMA support" + depends on ARCH_MMP || COMPILE_TEST + select DMA_ENGINE + select GENERIC_ALLOCATOR + help + Support the MMP Two-Channel DMA engine. + This engine used for MMP Audio DMA and pxa910 SQU. + +config MOXART_DMA + tristate "MOXART DMA support" + depends on ARCH_MOXART + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for the MOXA ART SoC DMA controller. + + Say Y here if you enabled MMP ADMA, otherwise say N. + +config MPC512X_DMA + tristate "Freescale MPC512x built-in DMA engine support" + depends on PPC_MPC512x || PPC_MPC831x + select DMA_ENGINE + help + Enable support for the Freescale MPC512x built-in DMA engine. + +config MV_XOR + bool "Marvell XOR engine support" + depends on PLAT_ORION || ARCH_MVEBU || COMPILE_TEST + select DMA_ENGINE + select DMA_ENGINE_RAID + select ASYNC_TX_ENABLE_CHANNEL_SWITCH + help + Enable support for the Marvell XOR engine. + +config MV_XOR_V2 + bool "Marvell XOR engine version 2 support " + depends on ARM64 + select DMA_ENGINE + select DMA_ENGINE_RAID + select ASYNC_TX_ENABLE_CHANNEL_SWITCH + select GENERIC_MSI_IRQ + help + Enable support for the Marvell version 2 XOR engine. + + This engine provides acceleration for copy, XOR and RAID6 + operations, and is available on Marvell Armada 7K and 8K + platforms. + +config MXS_DMA + bool "MXS DMA support" + depends on ARCH_MXS || ARCH_MXC || COMPILE_TEST + select STMP_DEVICE + select DMA_ENGINE + help + Support the MXS DMA engine. This engine including APBH-DMA + and APBX-DMA is integrated into some Freescale chips. + +config NBPFAXI_DMA + tristate "Renesas Type-AXI NBPF DMA support" + select DMA_ENGINE + depends on ARM || COMPILE_TEST + help + Support for "Type-AXI" NBPF DMA IPs from Renesas + +config OWL_DMA + tristate "Actions Semi Owl SoCs DMA support" + depends on ARCH_ACTIONS + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for the Actions Semi Owl SoCs DMA controller. + +config PCH_DMA + tristate "Intel EG20T PCH / LAPIS Semicon IOH(ML7213/ML7223/ML7831) DMA" + depends on PCI && (X86_32 || COMPILE_TEST) + select DMA_ENGINE + help + Enable support for Intel EG20T PCH DMA engine. + + This driver also can be used for LAPIS Semiconductor IOH(Input/ + Output Hub), ML7213, ML7223 and ML7831. + ML7213 IOH is for IVI(In-Vehicle Infotainment) use, ML7223 IOH is + for MP(Media Phone) use and ML7831 IOH is for general purpose use. + ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series. + ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH. + +config PL330_DMA + tristate "DMA API Driver for PL330" + select DMA_ENGINE + depends on ARM_AMBA + help + Select if your platform has one or more PL330 DMACs. + You need to provide platform specific settings via + platform_data for a dma-pl330 device. + +config PXA_DMA + bool "PXA DMA support" + depends on (ARCH_MMP || ARCH_PXA) + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support the DMA engine for PXA. It is also compatible with MMP PDMA + platform. The internal DMA IP of all PXA variants is supported, with + 16 to 32 channels for peripheral to memory or memory to memory + transfers. + +config PLX_DMA + tristate "PLX ExpressLane PEX Switch DMA Engine Support" + depends on PCI + select DMA_ENGINE + help + Some PLX ExpressLane PCI Switches support additional DMA engines. + These are exposed via extra functions on the switch's + upstream port. Each function exposes one DMA channel. + +config STE_DMA40 + bool "ST-Ericsson DMA40 support" + depends on ARCH_U8500 + select DMA_ENGINE + select SRAM + help + Support for ST-Ericsson DMA40 controller + +config ST_FDMA + tristate "ST FDMA dmaengine support" + depends on ARCH_STI + depends on REMOTEPROC + select ST_SLIM_REMOTEPROC + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for ST FDMA controller. + It supports 16 independent DMA channels, accepts up to 32 DMA requests + + Say Y here if you have such a chipset. + If unsure, say N. + +config STM32_DMA + bool "STMicroelectronics STM32 DMA support" + depends on ARCH_STM32 || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for the on-chip DMA controller on STMicroelectronics + STM32 MCUs. + If you have a board based on such a MCU and wish to use DMA say Y + here. + +config STM32_DMAMUX + bool "STMicroelectronics STM32 dma multiplexer support" + depends on STM32_DMA || COMPILE_TEST + help + Enable support for the on-chip DMA multiplexer on STMicroelectronics + STM32 MCUs. + If you have a board based on such a MCU and wish to use DMAMUX say Y + here. + +config STM32_MDMA + bool "STMicroelectronics STM32 master dma support" + depends on ARCH_STM32 || COMPILE_TEST + depends on OF + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for the on-chip MDMA controller on STMicroelectronics + STM32 platforms. + If you have a board based on STM32 SoC and wish to use the master DMA + say Y here. + +config SPRD_DMA + tristate "Spreadtrum DMA support" + depends on ARCH_SPRD || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for the on-chip DMA controller on Spreadtrum platform. + +config TXX9_DMAC + tristate "Toshiba TXx9 SoC DMA support" + depends on MACH_TX49XX + select DMA_ENGINE + help + Support the TXx9 SoC internal DMA controller. This can be + integrated in chips such as the Toshiba TX4927/38/39. + +config TEGRA186_GPC_DMA + tristate "NVIDIA Tegra GPC DMA support" + depends on (ARCH_TEGRA || COMPILE_TEST) && ARCH_DMA_ADDR_T_64BIT + depends on IOMMU_API + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support for the NVIDIA Tegra General Purpose Central DMA controller. + The DMA controller has multiple DMA channels which can be configured + for different peripherals like UART, SPI, etc which are on APB bus. + This DMA controller transfers data from memory to peripheral FIFO + or vice versa. It also supports memory to memory data transfer. + +config TEGRA20_APB_DMA + tristate "NVIDIA Tegra20 APB DMA support" + depends on ARCH_TEGRA || COMPILE_TEST + select DMA_ENGINE + help + Support for the NVIDIA Tegra20 APB DMA controller driver. The + DMA controller is having multiple DMA channel which can be + configured for different peripherals like audio, UART, SPI, + I2C etc which is in APB bus. + This DMA controller transfers data from memory to peripheral fifo + or vice versa. It does not support memory to memory data transfer. + +config TEGRA210_ADMA + tristate "NVIDIA Tegra210 ADMA support" + depends on (ARCH_TEGRA_210_SOC || COMPILE_TEST) + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support for the NVIDIA Tegra210 ADMA controller driver. The + DMA controller has multiple DMA channels and is used to service + various audio clients in the Tegra210 audio processing engine + (APE). This DMA controller transfers data from memory to + peripheral and vice versa. It does not support memory to + memory data transfer. + +config TIMB_DMA + tristate "Timberdale FPGA DMA support" + depends on MFD_TIMBERDALE || COMPILE_TEST + select DMA_ENGINE + help + Enable support for the Timberdale FPGA DMA engine. + +config UNIPHIER_MDMAC + tristate "UniPhier MIO DMAC" + depends on ARCH_UNIPHIER || COMPILE_TEST + depends on OF + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for the MIO DMAC (Media I/O DMA controller) on the + UniPhier platform. This DMA controller is used as the external + DMA engine of the SD/eMMC controllers of the LD4, Pro4, sLD8 SoCs. + +config UNIPHIER_XDMAC + tristate "UniPhier XDMAC support" + depends on ARCH_UNIPHIER || COMPILE_TEST + depends on OF + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for the XDMAC (external DMA controller) on the + UniPhier platform. This DMA controller can transfer data from + memory to memory, memory to peripheral and peripheral to memory. + +config XGENE_DMA + tristate "APM X-Gene DMA support" + depends on ARCH_XGENE || COMPILE_TEST + select DMA_ENGINE + select DMA_ENGINE_RAID + select ASYNC_TX_ENABLE_CHANNEL_SWITCH + help + Enable support for the APM X-Gene SoC DMA engine. + +config XILINX_DMA + tristate "Xilinx AXI DMAS Engine" + depends on HAS_IOMEM + select DMA_ENGINE + help + Enable support for Xilinx AXI VDMA Soft IP. + + AXI VDMA engine provides high-bandwidth direct memory access + between memory and AXI4-Stream video type target + peripherals including peripherals which support AXI4- + Stream Video Protocol. It has two stream interfaces/ + channels, Memory Mapped to Stream (MM2S) and Stream to + Memory Mapped (S2MM) for the data transfers. + AXI CDMA engine provides high-bandwidth direct memory access + between a memory-mapped source address and a memory-mapped + destination address. + AXI DMA engine provides high-bandwidth one dimensional direct + memory access between memory and AXI4-Stream target peripherals. + AXI MCDMA engine provides high-bandwidth direct memory access + between memory and AXI4-Stream target peripherals. It provides + the scatter gather interface with multiple channels independent + configuration support. + +config XILINX_XDMA + tristate "Xilinx DMA/Bridge Subsystem DMA Engine" + depends on HAS_IOMEM + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + select REGMAP_MMIO + help + Enable support for Xilinx DMA/Bridge Subsystem DMA engine. The DMA + provides high performance block data movement between Host memory + and the DMA subsystem. These direct memory transfers can be both in + the Host to Card (H2C) and Card to Host (C2H) transfers. + The core also provides up to 16 user interrupt wires that generate + interrupts to the host. + +config XILINX_ZYNQMP_DMA + tristate "Xilinx ZynqMP DMA Engine" + depends on ARCH_ZYNQ || MICROBLAZE || ARM64 || COMPILE_TEST + select DMA_ENGINE + help + Enable support for Xilinx ZynqMP DMA controller. + +config XILINX_ZYNQMP_DPDMA + tristate "Xilinx DPDMA Engine" + depends on HAS_IOMEM && OF + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for Xilinx ZynqMP DisplayPort DMA. Choose this option + if you have a Xilinx ZynqMP SoC with a DisplayPort subsystem. The + driver provides the dmaengine required by the DisplayPort subsystem + display driver. + +# driver files +source "drivers/dma/bestcomm/Kconfig" + +source "drivers/dma/mediatek/Kconfig" + +source "drivers/dma/ptdma/Kconfig" + +source "drivers/dma/qcom/Kconfig" + +source "drivers/dma/dw/Kconfig" + +source "drivers/dma/dw-edma/Kconfig" + +source "drivers/dma/hsu/Kconfig" + +source "drivers/dma/sf-pdma/Kconfig" + +source "drivers/dma/sh/Kconfig" + +source "drivers/dma/ti/Kconfig" + +source "drivers/dma/fsl-dpaa2-qdma/Kconfig" + +source "drivers/dma/lgm/Kconfig" + +# clients +comment "DMA Clients" + depends on DMA_ENGINE + +config ASYNC_TX_DMA + bool "Async_tx: Offload support for the async_tx api" + depends on DMA_ENGINE + help + This allows the async_tx api to take advantage of offload engines for + memcpy, memset, xor, and raid6 p+q operations. If your platform has + a dma engine that can perform raid operations and you have enabled + MD_RAID456 say Y. + + If unsure, say N. + +config DMATEST + tristate "DMA Test client" + depends on DMA_ENGINE + select DMA_ENGINE_RAID + help + Simple DMA test client. Say N unless you're debugging a + DMA Device driver. + +config DMA_ENGINE_RAID + bool + +endif diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile new file mode 100644 index 0000000000..83553a97a0 --- /dev/null +++ b/drivers/dma/Makefile @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: GPL-2.0 +#dmaengine debug flags +subdir-ccflags-$(CONFIG_DMADEVICES_DEBUG) := -DDEBUG +subdir-ccflags-$(CONFIG_DMADEVICES_VDEBUG) += -DVERBOSE_DEBUG + +#core +obj-$(CONFIG_DMA_ENGINE) += dmaengine.o +obj-$(CONFIG_DMA_VIRTUAL_CHANNELS) += virt-dma.o +obj-$(CONFIG_DMA_ACPI) += acpi-dma.o +obj-$(CONFIG_DMA_OF) += of-dma.o + +#dmatest +obj-$(CONFIG_DMATEST) += dmatest.o + +#devices +obj-$(CONFIG_ALTERA_MSGDMA) += altera-msgdma.o +obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o +obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/ +obj-$(CONFIG_AMD_PTDMA) += ptdma/ +obj-$(CONFIG_APPLE_ADMAC) += apple-admac.o +obj-$(CONFIG_AT_HDMAC) += at_hdmac.o +obj-$(CONFIG_AT_XDMAC) += at_xdmac.o +obj-$(CONFIG_AXI_DMAC) += dma-axi-dmac.o +obj-$(CONFIG_BCM_SBA_RAID) += bcm-sba-raid.o +obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o +obj-$(CONFIG_DMA_JZ4780) += dma-jz4780.o +obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o +obj-$(CONFIG_DMA_SUN4I) += sun4i-dma.o +obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o +obj-$(CONFIG_DW_AXI_DMAC) += dw-axi-dmac/ +obj-$(CONFIG_DW_DMAC_CORE) += dw/ +obj-$(CONFIG_DW_EDMA) += dw-edma/ +obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o +obj-$(CONFIG_FSL_DMA) += fsldma.o +fsl-edma-objs := fsl-edma-main.o fsl-edma-common.o +obj-$(CONFIG_FSL_EDMA) += fsl-edma.o +mcf-edma-objs := mcf-edma-main.o fsl-edma-common.o +obj-$(CONFIG_MCF_EDMA) += mcf-edma.o +obj-$(CONFIG_FSL_QDMA) += fsl-qdma.o +obj-$(CONFIG_FSL_RAID) += fsl_raid.o +obj-$(CONFIG_HISI_DMA) += hisi_dma.o +obj-$(CONFIG_HSU_DMA) += hsu/ +obj-$(CONFIG_IMG_MDC_DMA) += img-mdc-dma.o +obj-$(CONFIG_IMX_DMA) += imx-dma.o +obj-$(CONFIG_IMX_SDMA) += imx-sdma.o +obj-$(CONFIG_INTEL_IDMA64) += idma64.o +obj-$(CONFIG_INTEL_IOATDMA) += ioat/ +obj-y += idxd/ +obj-$(CONFIG_K3_DMA) += k3dma.o +obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o +obj-$(CONFIG_MILBEAUT_HDMAC) += milbeaut-hdmac.o +obj-$(CONFIG_MILBEAUT_XDMAC) += milbeaut-xdmac.o +obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o +obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o +obj-$(CONFIG_MOXART_DMA) += moxart-dma.o +obj-$(CONFIG_MPC512X_DMA) += mpc512x_dma.o +obj-$(CONFIG_MV_XOR) += mv_xor.o +obj-$(CONFIG_MV_XOR_V2) += mv_xor_v2.o +obj-$(CONFIG_MXS_DMA) += mxs-dma.o +obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o +obj-$(CONFIG_OWL_DMA) += owl-dma.o +obj-$(CONFIG_PCH_DMA) += pch_dma.o +obj-$(CONFIG_PL330_DMA) += pl330.o +obj-$(CONFIG_PLX_DMA) += plx_dma.o +obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/ +obj-$(CONFIG_PXA_DMA) += pxa_dma.o +obj-$(CONFIG_RENESAS_DMA) += sh/ +obj-$(CONFIG_SF_PDMA) += sf-pdma/ +obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o +obj-$(CONFIG_STM32_DMA) += stm32-dma.o +obj-$(CONFIG_STM32_DMAMUX) += stm32-dmamux.o +obj-$(CONFIG_STM32_MDMA) += stm32-mdma.o +obj-$(CONFIG_SPRD_DMA) += sprd-dma.o +obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o +obj-$(CONFIG_TEGRA186_GPC_DMA) += tegra186-gpc-dma.o +obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o +obj-$(CONFIG_TEGRA210_ADMA) += tegra210-adma.o +obj-$(CONFIG_TIMB_DMA) += timb_dma.o +obj-$(CONFIG_UNIPHIER_MDMAC) += uniphier-mdmac.o +obj-$(CONFIG_UNIPHIER_XDMAC) += uniphier-xdmac.o +obj-$(CONFIG_XGENE_DMA) += xgene-dma.o +obj-$(CONFIG_ST_FDMA) += st_fdma.o +obj-$(CONFIG_FSL_DPAA2_QDMA) += fsl-dpaa2-qdma/ +obj-$(CONFIG_INTEL_LDMA) += lgm/ + +obj-y += mediatek/ +obj-y += qcom/ +obj-y += ti/ +obj-y += xilinx/ diff --git a/drivers/dma/TODO b/drivers/dma/TODO new file mode 100644 index 0000000000..b8045cd42e --- /dev/null +++ b/drivers/dma/TODO @@ -0,0 +1,12 @@ +TODO for slave dma + +1. Move remaining drivers to use new slave interface +2. Remove old slave pointer machansim +3. Make issue_pending to start the transaction in below drivers + - mpc512x_dma + - imx-dma + - imx-sdma + - mxs-dma.c + - intel_mid_dma +4. Check other subsystems for dma drivers and merge/move to dmaengine +5. Remove dma_slave_config's dma direction. diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c new file mode 100644 index 0000000000..5906eae26e --- /dev/null +++ b/drivers/dma/acpi-dma.c @@ -0,0 +1,481 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ACPI helpers for DMA request / controller + * + * Based on of-dma.c + * + * Copyright (C) 2013, Intel Corporation + * Authors: Andy Shevchenko + * Mika Westerberg + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static LIST_HEAD(acpi_dma_list); +static DEFINE_MUTEX(acpi_dma_lock); + +/** + * acpi_dma_parse_resource_group - match device and parse resource group + * @grp: CSRT resource group + * @adev: ACPI device to match with + * @adma: struct acpi_dma of the given DMA controller + * + * In order to match a device from DSDT table to the corresponding CSRT device + * we use MMIO address and IRQ. + * + * Return: + * 1 on success, 0 when no information is available, or appropriate errno value + * on error. + */ +static int acpi_dma_parse_resource_group(const struct acpi_csrt_group *grp, + struct acpi_device *adev, struct acpi_dma *adma) +{ + const struct acpi_csrt_shared_info *si; + struct list_head resource_list; + struct resource_entry *rentry; + resource_size_t mem = 0, irq = 0; + int ret; + + if (grp->shared_info_length != sizeof(struct acpi_csrt_shared_info)) + return -ENODEV; + + INIT_LIST_HEAD(&resource_list); + ret = acpi_dev_get_resources(adev, &resource_list, NULL, NULL); + if (ret <= 0) + return 0; + + list_for_each_entry(rentry, &resource_list, node) { + if (resource_type(rentry->res) == IORESOURCE_MEM) + mem = rentry->res->start; + else if (resource_type(rentry->res) == IORESOURCE_IRQ) + irq = rentry->res->start; + } + + acpi_dev_free_resource_list(&resource_list); + + /* Consider initial zero values as resource not found */ + if (mem == 0 && irq == 0) + return 0; + + si = (const struct acpi_csrt_shared_info *)&grp[1]; + + /* Match device by MMIO */ + if (si->mmio_base_low != lower_32_bits(mem) || + si->mmio_base_high != upper_32_bits(mem)) + return 0; + + /* + * acpi_gsi_to_irq() can't be used because some platforms do not save + * registered IRQs in the MP table. Instead we just try to register + * the GSI, which is the core part of the above mentioned function. + */ + ret = acpi_register_gsi(NULL, si->gsi_interrupt, si->interrupt_mode, si->interrupt_polarity); + if (ret < 0) + return 0; + + /* Match device by Linux vIRQ */ + if (ret != irq) + return 0; + + dev_dbg(&adev->dev, "matches with %.4s%04X (rev %u)\n", + (char *)&grp->vendor_id, grp->device_id, grp->revision); + + /* Check if the request line range is available */ + if (si->base_request_line == 0 && si->num_handshake_signals == 0) + return 0; + + /* Set up DMA mask based on value from CSRT */ + ret = dma_coerce_mask_and_coherent(&adev->dev, + DMA_BIT_MASK(si->dma_address_width)); + if (ret) + return 0; + + adma->base_request_line = si->base_request_line; + adma->end_request_line = si->base_request_line + + si->num_handshake_signals - 1; + + dev_dbg(&adev->dev, "request line base: 0x%04x end: 0x%04x\n", + adma->base_request_line, adma->end_request_line); + + return 1; +} + +/** + * acpi_dma_parse_csrt - parse CSRT to exctract additional DMA resources + * @adev: ACPI device to match with + * @adma: struct acpi_dma of the given DMA controller + * + * CSRT or Core System Resources Table is a proprietary ACPI table + * introduced by Microsoft. This table can contain devices that are not in + * the system DSDT table. In particular DMA controllers might be described + * here. + * + * We are using this table to get the request line range of the specific DMA + * controller to be used later. + */ +static void acpi_dma_parse_csrt(struct acpi_device *adev, struct acpi_dma *adma) +{ + struct acpi_csrt_group *grp, *end; + struct acpi_table_csrt *csrt; + acpi_status status; + int ret; + + status = acpi_get_table(ACPI_SIG_CSRT, 0, + (struct acpi_table_header **)&csrt); + if (ACPI_FAILURE(status)) { + if (status != AE_NOT_FOUND) + dev_warn(&adev->dev, "failed to get the CSRT table\n"); + return; + } + + grp = (struct acpi_csrt_group *)(csrt + 1); + end = (struct acpi_csrt_group *)((void *)csrt + csrt->header.length); + + while (grp < end) { + ret = acpi_dma_parse_resource_group(grp, adev, adma); + if (ret < 0) { + dev_warn(&adev->dev, + "error in parsing resource group\n"); + break; + } + + grp = (struct acpi_csrt_group *)((void *)grp + grp->length); + } + + acpi_put_table((struct acpi_table_header *)csrt); +} + +/** + * acpi_dma_controller_register - Register a DMA controller to ACPI DMA helpers + * @dev: struct device of DMA controller + * @acpi_dma_xlate: translation function which converts a dma specifier + * into a dma_chan structure + * @data: pointer to controller specific data to be used by + * translation function + * + * Allocated memory should be freed with appropriate acpi_dma_controller_free() + * call. + * + * Return: + * 0 on success or appropriate errno value on error. + */ +int acpi_dma_controller_register(struct device *dev, + struct dma_chan *(*acpi_dma_xlate) + (struct acpi_dma_spec *, struct acpi_dma *), + void *data) +{ + struct acpi_device *adev; + struct acpi_dma *adma; + + if (!dev || !acpi_dma_xlate) + return -EINVAL; + + /* Check if the device was enumerated by ACPI */ + adev = ACPI_COMPANION(dev); + if (!adev) + return -EINVAL; + + adma = kzalloc(sizeof(*adma), GFP_KERNEL); + if (!adma) + return -ENOMEM; + + adma->dev = dev; + adma->acpi_dma_xlate = acpi_dma_xlate; + adma->data = data; + + acpi_dma_parse_csrt(adev, adma); + + /* Now queue acpi_dma controller structure in list */ + mutex_lock(&acpi_dma_lock); + list_add_tail(&adma->dma_controllers, &acpi_dma_list); + mutex_unlock(&acpi_dma_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(acpi_dma_controller_register); + +/** + * acpi_dma_controller_free - Remove a DMA controller from ACPI DMA helpers list + * @dev: struct device of DMA controller + * + * Memory allocated by acpi_dma_controller_register() is freed here. + * + * Return: + * 0 on success or appropriate errno value on error. + */ +int acpi_dma_controller_free(struct device *dev) +{ + struct acpi_dma *adma; + + if (!dev) + return -EINVAL; + + mutex_lock(&acpi_dma_lock); + + list_for_each_entry(adma, &acpi_dma_list, dma_controllers) + if (adma->dev == dev) { + list_del(&adma->dma_controllers); + mutex_unlock(&acpi_dma_lock); + kfree(adma); + return 0; + } + + mutex_unlock(&acpi_dma_lock); + return -ENODEV; +} +EXPORT_SYMBOL_GPL(acpi_dma_controller_free); + +static void devm_acpi_dma_release(struct device *dev, void *res) +{ + acpi_dma_controller_free(dev); +} + +/** + * devm_acpi_dma_controller_register - resource managed acpi_dma_controller_register() + * @dev: device that is registering this DMA controller + * @acpi_dma_xlate: translation function + * @data: pointer to controller specific data + * + * Managed acpi_dma_controller_register(). DMA controller registered by this + * function are automatically freed on driver detach. See + * acpi_dma_controller_register() for more information. + * + * Return: + * 0 on success or appropriate errno value on error. + */ +int devm_acpi_dma_controller_register(struct device *dev, + struct dma_chan *(*acpi_dma_xlate) + (struct acpi_dma_spec *, struct acpi_dma *), + void *data) +{ + void *res; + int ret; + + res = devres_alloc(devm_acpi_dma_release, 0, GFP_KERNEL); + if (!res) + return -ENOMEM; + + ret = acpi_dma_controller_register(dev, acpi_dma_xlate, data); + if (ret) { + devres_free(res); + return ret; + } + devres_add(dev, res); + return 0; +} +EXPORT_SYMBOL_GPL(devm_acpi_dma_controller_register); + +/** + * devm_acpi_dma_controller_free - resource managed acpi_dma_controller_free() + * @dev: device that is unregistering as DMA controller + * + * Unregister a DMA controller registered with + * devm_acpi_dma_controller_register(). Normally this function will not need to + * be called and the resource management code will ensure that the resource is + * freed. + */ +void devm_acpi_dma_controller_free(struct device *dev) +{ + WARN_ON(devres_release(dev, devm_acpi_dma_release, NULL, NULL)); +} +EXPORT_SYMBOL_GPL(devm_acpi_dma_controller_free); + +/** + * acpi_dma_update_dma_spec - prepare dma specifier to pass to translation function + * @adma: struct acpi_dma of DMA controller + * @dma_spec: dma specifier to update + * + * Accordingly to ACPI 5.0 Specification Table 6-170 "Fixed DMA Resource + * Descriptor": + * DMA Request Line bits is a platform-relative number uniquely + * identifying the request line assigned. Request line-to-Controller + * mapping is done in a controller-specific OS driver. + * That's why we can safely adjust slave_id when the appropriate controller is + * found. + * + * Return: + * 0, if no information is avaiable, -1 on mismatch, and 1 otherwise. + */ +static int acpi_dma_update_dma_spec(struct acpi_dma *adma, + struct acpi_dma_spec *dma_spec) +{ + /* Set link to the DMA controller device */ + dma_spec->dev = adma->dev; + + /* Check if the request line range is available */ + if (adma->base_request_line == 0 && adma->end_request_line == 0) + return 0; + + /* Check if slave_id falls to the range */ + if (dma_spec->slave_id < adma->base_request_line || + dma_spec->slave_id > adma->end_request_line) + return -1; + + /* + * Here we adjust slave_id. It should be a relative number to the base + * request line. + */ + dma_spec->slave_id -= adma->base_request_line; + + return 1; +} + +struct acpi_dma_parser_data { + struct acpi_dma_spec dma_spec; + size_t index; + size_t n; +}; + +/** + * acpi_dma_parse_fixed_dma - Parse FixedDMA ACPI resources to a DMA specifier + * @res: struct acpi_resource to get FixedDMA resources from + * @data: pointer to a helper struct acpi_dma_parser_data + */ +static int acpi_dma_parse_fixed_dma(struct acpi_resource *res, void *data) +{ + struct acpi_dma_parser_data *pdata = data; + + if (res->type == ACPI_RESOURCE_TYPE_FIXED_DMA) { + struct acpi_resource_fixed_dma *dma = &res->data.fixed_dma; + + if (pdata->n++ == pdata->index) { + pdata->dma_spec.chan_id = dma->channels; + pdata->dma_spec.slave_id = dma->request_lines; + } + } + + /* Tell the ACPI core to skip this resource */ + return 1; +} + +/** + * acpi_dma_request_slave_chan_by_index - Get the DMA slave channel + * @dev: struct device to get DMA request from + * @index: index of FixedDMA descriptor for @dev + * + * Return: + * Pointer to appropriate dma channel on success or an error pointer. + */ +struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev, + size_t index) +{ + struct acpi_dma_parser_data pdata; + struct acpi_dma_spec *dma_spec = &pdata.dma_spec; + struct acpi_device *adev = ACPI_COMPANION(dev); + struct list_head resource_list; + struct acpi_dma *adma; + struct dma_chan *chan = NULL; + int found; + int ret; + + memset(&pdata, 0, sizeof(pdata)); + pdata.index = index; + + /* Initial values for the request line and channel */ + dma_spec->chan_id = -1; + dma_spec->slave_id = -1; + + INIT_LIST_HEAD(&resource_list); + ret = acpi_dev_get_resources(adev, &resource_list, + acpi_dma_parse_fixed_dma, &pdata); + acpi_dev_free_resource_list(&resource_list); + if (ret < 0) + return ERR_PTR(ret); + + if (dma_spec->slave_id < 0 || dma_spec->chan_id < 0) + return ERR_PTR(-ENODEV); + + mutex_lock(&acpi_dma_lock); + + list_for_each_entry(adma, &acpi_dma_list, dma_controllers) { + /* + * We are not going to call translation function if slave_id + * doesn't fall to the request range. + */ + found = acpi_dma_update_dma_spec(adma, dma_spec); + if (found < 0) + continue; + chan = adma->acpi_dma_xlate(dma_spec, adma); + /* + * Try to get a channel only from the DMA controller that + * matches the slave_id. See acpi_dma_update_dma_spec() + * description for the details. + */ + if (found > 0 || chan) + break; + } + + mutex_unlock(&acpi_dma_lock); + return chan ? chan : ERR_PTR(-EPROBE_DEFER); +} +EXPORT_SYMBOL_GPL(acpi_dma_request_slave_chan_by_index); + +/** + * acpi_dma_request_slave_chan_by_name - Get the DMA slave channel + * @dev: struct device to get DMA request from + * @name: represents corresponding FixedDMA descriptor for @dev + * + * In order to support both Device Tree and ACPI in a single driver we + * translate the names "tx" and "rx" here based on the most common case where + * the first FixedDMA descriptor is TX and second is RX. + * + * If the device has "dma-names" property the FixedDMA descriptor indices + * are retrieved based on those. Otherwise the function falls back using + * hardcoded indices. + * + * Return: + * Pointer to appropriate dma channel on success or an error pointer. + */ +struct dma_chan *acpi_dma_request_slave_chan_by_name(struct device *dev, + const char *name) +{ + int index; + + index = device_property_match_string(dev, "dma-names", name); + if (index < 0) { + if (!strcmp(name, "tx")) + index = 0; + else if (!strcmp(name, "rx")) + index = 1; + else + return ERR_PTR(-ENODEV); + } + + dev_dbg(dev, "Looking for DMA channel \"%s\" at index %d...\n", name, index); + return acpi_dma_request_slave_chan_by_index(dev, index); +} +EXPORT_SYMBOL_GPL(acpi_dma_request_slave_chan_by_name); + +/** + * acpi_dma_simple_xlate - Simple ACPI DMA engine translation helper + * @dma_spec: pointer to ACPI DMA specifier + * @adma: pointer to ACPI DMA controller data + * + * A simple translation function for ACPI based devices. Passes &struct + * dma_spec to the DMA controller driver provided filter function. + * + * Return: + * Pointer to the channel if found or %NULL otherwise. + */ +struct dma_chan *acpi_dma_simple_xlate(struct acpi_dma_spec *dma_spec, + struct acpi_dma *adma) +{ + struct acpi_dma_filter_info *info = adma->data; + + if (!info || !info->filter_fn) + return NULL; + + return dma_request_channel(info->dma_cap, info->filter_fn, dma_spec); +} +EXPORT_SYMBOL_GPL(acpi_dma_simple_xlate); diff --git a/drivers/dma/altera-msgdma.c b/drivers/dma/altera-msgdma.c new file mode 100644 index 0000000000..4153c2edb0 --- /dev/null +++ b/drivers/dma/altera-msgdma.c @@ -0,0 +1,963 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * DMA driver for Altera mSGDMA IP core + * + * Copyright (C) 2017 Stefan Roese + * + * Based on drivers/dma/xilinx/zynqmp_dma.c, which is: + * Copyright (C) 2016 Xilinx, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" + +#define MSGDMA_MAX_TRANS_LEN U32_MAX +#define MSGDMA_DESC_NUM 1024 + +/** + * struct msgdma_extended_desc - implements an extended descriptor + * @read_addr_lo: data buffer source address low bits + * @write_addr_lo: data buffer destination address low bits + * @len: the number of bytes to transfer per descriptor + * @burst_seq_num: bit 31:24 write burst + * bit 23:16 read burst + * bit 15:00 sequence number + * @stride: bit 31:16 write stride + * bit 15:00 read stride + * @read_addr_hi: data buffer source address high bits + * @write_addr_hi: data buffer destination address high bits + * @control: characteristics of the transfer + */ +struct msgdma_extended_desc { + u32 read_addr_lo; + u32 write_addr_lo; + u32 len; + u32 burst_seq_num; + u32 stride; + u32 read_addr_hi; + u32 write_addr_hi; + u32 control; +}; + +/* mSGDMA descriptor control field bit definitions */ +#define MSGDMA_DESC_CTL_SET_CH(x) ((x) & 0xff) +#define MSGDMA_DESC_CTL_GEN_SOP BIT(8) +#define MSGDMA_DESC_CTL_GEN_EOP BIT(9) +#define MSGDMA_DESC_CTL_PARK_READS BIT(10) +#define MSGDMA_DESC_CTL_PARK_WRITES BIT(11) +#define MSGDMA_DESC_CTL_END_ON_EOP BIT(12) +#define MSGDMA_DESC_CTL_END_ON_LEN BIT(13) +#define MSGDMA_DESC_CTL_TR_COMP_IRQ BIT(14) +#define MSGDMA_DESC_CTL_EARLY_IRQ BIT(15) +#define MSGDMA_DESC_CTL_TR_ERR_IRQ GENMASK(23, 16) +#define MSGDMA_DESC_CTL_EARLY_DONE BIT(24) + +/* + * Writing "1" the "go" bit commits the entire descriptor into the + * descriptor FIFO(s) + */ +#define MSGDMA_DESC_CTL_GO BIT(31) + +/* Tx buffer control flags */ +#define MSGDMA_DESC_CTL_TX_FIRST (MSGDMA_DESC_CTL_GEN_SOP | \ + MSGDMA_DESC_CTL_TR_ERR_IRQ | \ + MSGDMA_DESC_CTL_GO) + +#define MSGDMA_DESC_CTL_TX_MIDDLE (MSGDMA_DESC_CTL_TR_ERR_IRQ | \ + MSGDMA_DESC_CTL_GO) + +#define MSGDMA_DESC_CTL_TX_LAST (MSGDMA_DESC_CTL_GEN_EOP | \ + MSGDMA_DESC_CTL_TR_COMP_IRQ | \ + MSGDMA_DESC_CTL_TR_ERR_IRQ | \ + MSGDMA_DESC_CTL_GO) + +#define MSGDMA_DESC_CTL_TX_SINGLE (MSGDMA_DESC_CTL_GEN_SOP | \ + MSGDMA_DESC_CTL_GEN_EOP | \ + MSGDMA_DESC_CTL_TR_COMP_IRQ | \ + MSGDMA_DESC_CTL_TR_ERR_IRQ | \ + MSGDMA_DESC_CTL_GO) + +#define MSGDMA_DESC_CTL_RX_SINGLE (MSGDMA_DESC_CTL_END_ON_EOP | \ + MSGDMA_DESC_CTL_END_ON_LEN | \ + MSGDMA_DESC_CTL_TR_COMP_IRQ | \ + MSGDMA_DESC_CTL_EARLY_IRQ | \ + MSGDMA_DESC_CTL_TR_ERR_IRQ | \ + MSGDMA_DESC_CTL_GO) + +/* mSGDMA extended descriptor stride definitions */ +#define MSGDMA_DESC_STRIDE_RD 0x00000001 +#define MSGDMA_DESC_STRIDE_WR 0x00010000 +#define MSGDMA_DESC_STRIDE_RW 0x00010001 + +/* mSGDMA dispatcher control and status register map */ +#define MSGDMA_CSR_STATUS 0x00 /* Read / Clear */ +#define MSGDMA_CSR_CONTROL 0x04 /* Read / Write */ +#define MSGDMA_CSR_RW_FILL_LEVEL 0x08 /* 31:16 - write fill level */ + /* 15:00 - read fill level */ +#define MSGDMA_CSR_RESP_FILL_LEVEL 0x0c /* response FIFO fill level */ +#define MSGDMA_CSR_RW_SEQ_NUM 0x10 /* 31:16 - write seq number */ + /* 15:00 - read seq number */ + +/* mSGDMA CSR status register bit definitions */ +#define MSGDMA_CSR_STAT_BUSY BIT(0) +#define MSGDMA_CSR_STAT_DESC_BUF_EMPTY BIT(1) +#define MSGDMA_CSR_STAT_DESC_BUF_FULL BIT(2) +#define MSGDMA_CSR_STAT_RESP_BUF_EMPTY BIT(3) +#define MSGDMA_CSR_STAT_RESP_BUF_FULL BIT(4) +#define MSGDMA_CSR_STAT_STOPPED BIT(5) +#define MSGDMA_CSR_STAT_RESETTING BIT(6) +#define MSGDMA_CSR_STAT_STOPPED_ON_ERR BIT(7) +#define MSGDMA_CSR_STAT_STOPPED_ON_EARLY BIT(8) +#define MSGDMA_CSR_STAT_IRQ BIT(9) +#define MSGDMA_CSR_STAT_MASK GENMASK(9, 0) +#define MSGDMA_CSR_STAT_MASK_WITHOUT_IRQ GENMASK(8, 0) + +#define DESC_EMPTY (MSGDMA_CSR_STAT_DESC_BUF_EMPTY | \ + MSGDMA_CSR_STAT_RESP_BUF_EMPTY) + +/* mSGDMA CSR control register bit definitions */ +#define MSGDMA_CSR_CTL_STOP BIT(0) +#define MSGDMA_CSR_CTL_RESET BIT(1) +#define MSGDMA_CSR_CTL_STOP_ON_ERR BIT(2) +#define MSGDMA_CSR_CTL_STOP_ON_EARLY BIT(3) +#define MSGDMA_CSR_CTL_GLOBAL_INTR BIT(4) +#define MSGDMA_CSR_CTL_STOP_DESCS BIT(5) + +/* mSGDMA CSR fill level bits */ +#define MSGDMA_CSR_WR_FILL_LEVEL_GET(v) (((v) & 0xffff0000) >> 16) +#define MSGDMA_CSR_RD_FILL_LEVEL_GET(v) ((v) & 0x0000ffff) +#define MSGDMA_CSR_RESP_FILL_LEVEL_GET(v) ((v) & 0x0000ffff) + +#define MSGDMA_CSR_SEQ_NUM_GET(v) (((v) & 0xffff0000) >> 16) + +/* mSGDMA response register map */ +#define MSGDMA_RESP_BYTES_TRANSFERRED 0x00 +#define MSGDMA_RESP_STATUS 0x04 + +/* mSGDMA response register bit definitions */ +#define MSGDMA_RESP_EARLY_TERM BIT(8) +#define MSGDMA_RESP_ERR_MASK 0xff + +/** + * struct msgdma_sw_desc - implements a sw descriptor + * @async_tx: support for the async_tx api + * @hw_desc: assosiated HW descriptor + * @node: node to move from the free list to the tx list + * @tx_list: transmit list node + */ +struct msgdma_sw_desc { + struct dma_async_tx_descriptor async_tx; + struct msgdma_extended_desc hw_desc; + struct list_head node; + struct list_head tx_list; +}; + +/* + * struct msgdma_device - DMA device structure + */ +struct msgdma_device { + spinlock_t lock; + struct device *dev; + struct tasklet_struct irq_tasklet; + struct list_head pending_list; + struct list_head free_list; + struct list_head active_list; + struct list_head done_list; + u32 desc_free_cnt; + bool idle; + + struct dma_device dmadev; + struct dma_chan dmachan; + dma_addr_t hw_desq; + struct msgdma_sw_desc *sw_desq; + unsigned int npendings; + + struct dma_slave_config slave_cfg; + + int irq; + + /* mSGDMA controller */ + void __iomem *csr; + + /* mSGDMA descriptors */ + void __iomem *desc; + + /* mSGDMA response */ + void __iomem *resp; +}; + +#define to_mdev(chan) container_of(chan, struct msgdma_device, dmachan) +#define tx_to_desc(tx) container_of(tx, struct msgdma_sw_desc, async_tx) + +/** + * msgdma_get_descriptor - Get the sw descriptor from the pool + * @mdev: Pointer to the Altera mSGDMA device structure + * + * Return: The sw descriptor + */ +static struct msgdma_sw_desc *msgdma_get_descriptor(struct msgdma_device *mdev) +{ + struct msgdma_sw_desc *desc; + unsigned long flags; + + spin_lock_irqsave(&mdev->lock, flags); + desc = list_first_entry(&mdev->free_list, struct msgdma_sw_desc, node); + list_del(&desc->node); + spin_unlock_irqrestore(&mdev->lock, flags); + + INIT_LIST_HEAD(&desc->tx_list); + + return desc; +} + +/** + * msgdma_free_descriptor - Issue pending transactions + * @mdev: Pointer to the Altera mSGDMA device structure + * @desc: Transaction descriptor pointer + */ +static void msgdma_free_descriptor(struct msgdma_device *mdev, + struct msgdma_sw_desc *desc) +{ + struct msgdma_sw_desc *child, *next; + + mdev->desc_free_cnt++; + list_add_tail(&desc->node, &mdev->free_list); + list_for_each_entry_safe(child, next, &desc->tx_list, node) { + mdev->desc_free_cnt++; + list_move_tail(&child->node, &mdev->free_list); + } +} + +/** + * msgdma_free_desc_list - Free descriptors list + * @mdev: Pointer to the Altera mSGDMA device structure + * @list: List to parse and delete the descriptor + */ +static void msgdma_free_desc_list(struct msgdma_device *mdev, + struct list_head *list) +{ + struct msgdma_sw_desc *desc, *next; + + list_for_each_entry_safe(desc, next, list, node) + msgdma_free_descriptor(mdev, desc); +} + +/** + * msgdma_desc_config - Configure the descriptor + * @desc: Hw descriptor pointer + * @dst: Destination buffer address + * @src: Source buffer address + * @len: Transfer length + * @stride: Read/write stride value to set + */ +static void msgdma_desc_config(struct msgdma_extended_desc *desc, + dma_addr_t dst, dma_addr_t src, size_t len, + u32 stride) +{ + /* Set lower 32bits of src & dst addresses in the descriptor */ + desc->read_addr_lo = lower_32_bits(src); + desc->write_addr_lo = lower_32_bits(dst); + + /* Set upper 32bits of src & dst addresses in the descriptor */ + desc->read_addr_hi = upper_32_bits(src); + desc->write_addr_hi = upper_32_bits(dst); + + desc->len = len; + desc->stride = stride; + desc->burst_seq_num = 0; /* 0 will result in max burst length */ + + /* + * Don't set interrupt on xfer end yet, this will be done later + * for the "last" descriptor + */ + desc->control = MSGDMA_DESC_CTL_TR_ERR_IRQ | MSGDMA_DESC_CTL_GO | + MSGDMA_DESC_CTL_END_ON_LEN; +} + +/** + * msgdma_desc_config_eod - Mark the descriptor as end descriptor + * @desc: Hw descriptor pointer + */ +static void msgdma_desc_config_eod(struct msgdma_extended_desc *desc) +{ + desc->control |= MSGDMA_DESC_CTL_TR_COMP_IRQ; +} + +/** + * msgdma_tx_submit - Submit DMA transaction + * @tx: Async transaction descriptor pointer + * + * Return: cookie value + */ +static dma_cookie_t msgdma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct msgdma_device *mdev = to_mdev(tx->chan); + struct msgdma_sw_desc *new; + dma_cookie_t cookie; + unsigned long flags; + + new = tx_to_desc(tx); + spin_lock_irqsave(&mdev->lock, flags); + cookie = dma_cookie_assign(tx); + + list_add_tail(&new->node, &mdev->pending_list); + spin_unlock_irqrestore(&mdev->lock, flags); + + return cookie; +} + +/** + * msgdma_prep_memcpy - prepare descriptors for memcpy transaction + * @dchan: DMA channel + * @dma_dst: Destination buffer address + * @dma_src: Source buffer address + * @len: Transfer length + * @flags: transfer ack flags + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor * +msgdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst, + dma_addr_t dma_src, size_t len, ulong flags) +{ + struct msgdma_device *mdev = to_mdev(dchan); + struct msgdma_sw_desc *new, *first = NULL; + struct msgdma_extended_desc *desc; + size_t copy; + u32 desc_cnt; + unsigned long irqflags; + + desc_cnt = DIV_ROUND_UP(len, MSGDMA_MAX_TRANS_LEN); + + spin_lock_irqsave(&mdev->lock, irqflags); + if (desc_cnt > mdev->desc_free_cnt) { + spin_unlock_irqrestore(&mdev->lock, irqflags); + dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev); + return NULL; + } + mdev->desc_free_cnt -= desc_cnt; + spin_unlock_irqrestore(&mdev->lock, irqflags); + + do { + /* Allocate and populate the descriptor */ + new = msgdma_get_descriptor(mdev); + + copy = min_t(size_t, len, MSGDMA_MAX_TRANS_LEN); + desc = &new->hw_desc; + msgdma_desc_config(desc, dma_dst, dma_src, copy, + MSGDMA_DESC_STRIDE_RW); + len -= copy; + dma_src += copy; + dma_dst += copy; + if (!first) + first = new; + else + list_add_tail(&new->node, &first->tx_list); + } while (len); + + msgdma_desc_config_eod(desc); + async_tx_ack(&first->async_tx); + first->async_tx.flags = flags; + + return &first->async_tx; +} + +/** + * msgdma_prep_slave_sg - prepare descriptors for a slave sg transaction + * + * @dchan: DMA channel + * @sgl: Destination scatter list + * @sg_len: Number of entries in destination scatter list + * @dir: DMA transfer direction + * @flags: transfer ack flags + * @context: transfer context (unused) + */ +static struct dma_async_tx_descriptor * +msgdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) + +{ + struct msgdma_device *mdev = to_mdev(dchan); + struct dma_slave_config *cfg = &mdev->slave_cfg; + struct msgdma_sw_desc *new, *first = NULL; + void *desc = NULL; + size_t len, avail; + dma_addr_t dma_dst, dma_src; + u32 desc_cnt = 0, i; + struct scatterlist *sg; + u32 stride; + unsigned long irqflags; + + for_each_sg(sgl, sg, sg_len, i) + desc_cnt += DIV_ROUND_UP(sg_dma_len(sg), MSGDMA_MAX_TRANS_LEN); + + spin_lock_irqsave(&mdev->lock, irqflags); + if (desc_cnt > mdev->desc_free_cnt) { + spin_unlock_irqrestore(&mdev->lock, irqflags); + dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev); + return NULL; + } + mdev->desc_free_cnt -= desc_cnt; + spin_unlock_irqrestore(&mdev->lock, irqflags); + + avail = sg_dma_len(sgl); + + /* Run until we are out of scatterlist entries */ + while (true) { + /* Allocate and populate the descriptor */ + new = msgdma_get_descriptor(mdev); + + desc = &new->hw_desc; + len = min_t(size_t, avail, MSGDMA_MAX_TRANS_LEN); + + if (dir == DMA_MEM_TO_DEV) { + dma_src = sg_dma_address(sgl) + sg_dma_len(sgl) - avail; + dma_dst = cfg->dst_addr; + stride = MSGDMA_DESC_STRIDE_RD; + } else { + dma_src = cfg->src_addr; + dma_dst = sg_dma_address(sgl) + sg_dma_len(sgl) - avail; + stride = MSGDMA_DESC_STRIDE_WR; + } + msgdma_desc_config(desc, dma_dst, dma_src, len, stride); + avail -= len; + + if (!first) + first = new; + else + list_add_tail(&new->node, &first->tx_list); + + /* Fetch the next scatterlist entry */ + if (avail == 0) { + if (sg_len == 0) + break; + sgl = sg_next(sgl); + if (sgl == NULL) + break; + sg_len--; + avail = sg_dma_len(sgl); + } + } + + msgdma_desc_config_eod(desc); + first->async_tx.flags = flags; + + return &first->async_tx; +} + +static int msgdma_dma_config(struct dma_chan *dchan, + struct dma_slave_config *config) +{ + struct msgdma_device *mdev = to_mdev(dchan); + + memcpy(&mdev->slave_cfg, config, sizeof(*config)); + + return 0; +} + +static void msgdma_reset(struct msgdma_device *mdev) +{ + u32 val; + int ret; + + /* Reset mSGDMA */ + iowrite32(MSGDMA_CSR_STAT_MASK, mdev->csr + MSGDMA_CSR_STATUS); + iowrite32(MSGDMA_CSR_CTL_RESET, mdev->csr + MSGDMA_CSR_CONTROL); + + ret = readl_poll_timeout(mdev->csr + MSGDMA_CSR_STATUS, val, + (val & MSGDMA_CSR_STAT_RESETTING) == 0, + 1, 10000); + if (ret) + dev_err(mdev->dev, "DMA channel did not reset\n"); + + /* Clear all status bits */ + iowrite32(MSGDMA_CSR_STAT_MASK, mdev->csr + MSGDMA_CSR_STATUS); + + /* Enable the DMA controller including interrupts */ + iowrite32(MSGDMA_CSR_CTL_STOP_ON_ERR | MSGDMA_CSR_CTL_STOP_ON_EARLY | + MSGDMA_CSR_CTL_GLOBAL_INTR, mdev->csr + MSGDMA_CSR_CONTROL); + + mdev->idle = true; +}; + +static void msgdma_copy_one(struct msgdma_device *mdev, + struct msgdma_sw_desc *desc) +{ + void __iomem *hw_desc = mdev->desc; + + /* + * Check if the DESC FIFO it not full. If its full, we need to wait + * for at least one entry to become free again + */ + while (ioread32(mdev->csr + MSGDMA_CSR_STATUS) & + MSGDMA_CSR_STAT_DESC_BUF_FULL) + mdelay(1); + + /* + * The descriptor needs to get copied into the descriptor FIFO + * of the DMA controller. The descriptor will get flushed to the + * FIFO, once the last word (control word) is written. Since we + * are not 100% sure that memcpy() writes all word in the "correct" + * oder (address from low to high) on all architectures, we make + * sure this control word is written last by single coding it and + * adding some write-barriers here. + */ + memcpy((void __force *)hw_desc, &desc->hw_desc, + sizeof(desc->hw_desc) - sizeof(u32)); + + /* Write control word last to flush this descriptor into the FIFO */ + mdev->idle = false; + wmb(); + iowrite32(desc->hw_desc.control, hw_desc + + offsetof(struct msgdma_extended_desc, control)); + wmb(); +} + +/** + * msgdma_copy_desc_to_fifo - copy descriptor(s) into controller FIFO + * @mdev: Pointer to the Altera mSGDMA device structure + * @desc: Transaction descriptor pointer + */ +static void msgdma_copy_desc_to_fifo(struct msgdma_device *mdev, + struct msgdma_sw_desc *desc) +{ + struct msgdma_sw_desc *sdesc, *next; + + msgdma_copy_one(mdev, desc); + + list_for_each_entry_safe(sdesc, next, &desc->tx_list, node) + msgdma_copy_one(mdev, sdesc); +} + +/** + * msgdma_start_transfer - Initiate the new transfer + * @mdev: Pointer to the Altera mSGDMA device structure + */ +static void msgdma_start_transfer(struct msgdma_device *mdev) +{ + struct msgdma_sw_desc *desc; + + if (!mdev->idle) + return; + + desc = list_first_entry_or_null(&mdev->pending_list, + struct msgdma_sw_desc, node); + if (!desc) + return; + + list_splice_tail_init(&mdev->pending_list, &mdev->active_list); + msgdma_copy_desc_to_fifo(mdev, desc); +} + +/** + * msgdma_issue_pending - Issue pending transactions + * @chan: DMA channel pointer + */ +static void msgdma_issue_pending(struct dma_chan *chan) +{ + struct msgdma_device *mdev = to_mdev(chan); + unsigned long flags; + + spin_lock_irqsave(&mdev->lock, flags); + msgdma_start_transfer(mdev); + spin_unlock_irqrestore(&mdev->lock, flags); +} + +/** + * msgdma_chan_desc_cleanup - Cleanup the completed descriptors + * @mdev: Pointer to the Altera mSGDMA device structure + */ +static void msgdma_chan_desc_cleanup(struct msgdma_device *mdev) +{ + struct msgdma_sw_desc *desc, *next; + + list_for_each_entry_safe(desc, next, &mdev->done_list, node) { + struct dmaengine_desc_callback cb; + + list_del(&desc->node); + + dmaengine_desc_get_callback(&desc->async_tx, &cb); + if (dmaengine_desc_callback_valid(&cb)) { + spin_unlock(&mdev->lock); + dmaengine_desc_callback_invoke(&cb, NULL); + spin_lock(&mdev->lock); + } + + /* Run any dependencies, then free the descriptor */ + msgdma_free_descriptor(mdev, desc); + } +} + +/** + * msgdma_complete_descriptor - Mark the active descriptor as complete + * @mdev: Pointer to the Altera mSGDMA device structure + */ +static void msgdma_complete_descriptor(struct msgdma_device *mdev) +{ + struct msgdma_sw_desc *desc; + + desc = list_first_entry_or_null(&mdev->active_list, + struct msgdma_sw_desc, node); + if (!desc) + return; + list_del(&desc->node); + dma_cookie_complete(&desc->async_tx); + list_add_tail(&desc->node, &mdev->done_list); +} + +/** + * msgdma_free_descriptors - Free channel descriptors + * @mdev: Pointer to the Altera mSGDMA device structure + */ +static void msgdma_free_descriptors(struct msgdma_device *mdev) +{ + msgdma_free_desc_list(mdev, &mdev->active_list); + msgdma_free_desc_list(mdev, &mdev->pending_list); + msgdma_free_desc_list(mdev, &mdev->done_list); +} + +/** + * msgdma_free_chan_resources - Free channel resources + * @dchan: DMA channel pointer + */ +static void msgdma_free_chan_resources(struct dma_chan *dchan) +{ + struct msgdma_device *mdev = to_mdev(dchan); + unsigned long flags; + + spin_lock_irqsave(&mdev->lock, flags); + msgdma_free_descriptors(mdev); + spin_unlock_irqrestore(&mdev->lock, flags); + kfree(mdev->sw_desq); +} + +/** + * msgdma_alloc_chan_resources - Allocate channel resources + * @dchan: DMA channel + * + * Return: Number of descriptors on success and failure value on error + */ +static int msgdma_alloc_chan_resources(struct dma_chan *dchan) +{ + struct msgdma_device *mdev = to_mdev(dchan); + struct msgdma_sw_desc *desc; + int i; + + mdev->sw_desq = kcalloc(MSGDMA_DESC_NUM, sizeof(*desc), GFP_NOWAIT); + if (!mdev->sw_desq) + return -ENOMEM; + + mdev->idle = true; + mdev->desc_free_cnt = MSGDMA_DESC_NUM; + + INIT_LIST_HEAD(&mdev->free_list); + + for (i = 0; i < MSGDMA_DESC_NUM; i++) { + desc = mdev->sw_desq + i; + dma_async_tx_descriptor_init(&desc->async_tx, &mdev->dmachan); + desc->async_tx.tx_submit = msgdma_tx_submit; + list_add_tail(&desc->node, &mdev->free_list); + } + + return MSGDMA_DESC_NUM; +} + +/** + * msgdma_tasklet - Schedule completion tasklet + * @t: Pointer to the Altera sSGDMA channel structure + */ +static void msgdma_tasklet(struct tasklet_struct *t) +{ + struct msgdma_device *mdev = from_tasklet(mdev, t, irq_tasklet); + u32 count; + u32 __maybe_unused size; + u32 __maybe_unused status; + unsigned long flags; + + spin_lock_irqsave(&mdev->lock, flags); + + if (mdev->resp) { + /* Read number of responses that are available */ + count = ioread32(mdev->csr + MSGDMA_CSR_RESP_FILL_LEVEL); + dev_dbg(mdev->dev, "%s (%d): response count=%d\n", + __func__, __LINE__, count); + } else { + count = 1; + } + + while (count--) { + /* + * Read both longwords to purge this response from the FIFO + * On Avalon-MM implementations, size and status do not + * have any real values, like transferred bytes or error + * bits. So we need to just drop these values. + */ + if (mdev->resp) { + size = ioread32(mdev->resp + + MSGDMA_RESP_BYTES_TRANSFERRED); + status = ioread32(mdev->resp + + MSGDMA_RESP_STATUS); + } + + msgdma_complete_descriptor(mdev); + msgdma_chan_desc_cleanup(mdev); + } + + spin_unlock_irqrestore(&mdev->lock, flags); +} + +/** + * msgdma_irq_handler - Altera mSGDMA Interrupt handler + * @irq: IRQ number + * @data: Pointer to the Altera mSGDMA device structure + * + * Return: IRQ_HANDLED/IRQ_NONE + */ +static irqreturn_t msgdma_irq_handler(int irq, void *data) +{ + struct msgdma_device *mdev = data; + u32 status; + + status = ioread32(mdev->csr + MSGDMA_CSR_STATUS); + if ((status & MSGDMA_CSR_STAT_BUSY) == 0) { + /* Start next transfer if the DMA controller is idle */ + spin_lock(&mdev->lock); + mdev->idle = true; + msgdma_start_transfer(mdev); + spin_unlock(&mdev->lock); + } + + tasklet_schedule(&mdev->irq_tasklet); + + /* Clear interrupt in mSGDMA controller */ + iowrite32(MSGDMA_CSR_STAT_IRQ, mdev->csr + MSGDMA_CSR_STATUS); + + return IRQ_HANDLED; +} + +/** + * msgdma_dev_remove() - Device remove function + * @mdev: Pointer to the Altera mSGDMA device structure + */ +static void msgdma_dev_remove(struct msgdma_device *mdev) +{ + if (!mdev) + return; + + devm_free_irq(mdev->dev, mdev->irq, mdev); + tasklet_kill(&mdev->irq_tasklet); + list_del(&mdev->dmachan.device_node); +} + +static int request_and_map(struct platform_device *pdev, const char *name, + struct resource **res, void __iomem **ptr, + bool optional) +{ + struct resource *region; + struct device *device = &pdev->dev; + + *res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name); + if (*res == NULL) { + if (optional) { + *ptr = NULL; + dev_info(device, "optional resource %s not defined\n", + name); + return 0; + } + dev_err(device, "mandatory resource %s not defined\n", name); + return -ENODEV; + } + + region = devm_request_mem_region(device, (*res)->start, + resource_size(*res), dev_name(device)); + if (region == NULL) { + dev_err(device, "unable to request %s\n", name); + return -EBUSY; + } + + *ptr = devm_ioremap(device, region->start, + resource_size(region)); + if (*ptr == NULL) { + dev_err(device, "ioremap of %s failed!", name); + return -ENOMEM; + } + + return 0; +} + +/** + * msgdma_probe - Driver probe function + * @pdev: Pointer to the platform_device structure + * + * Return: '0' on success and failure value on error + */ +static int msgdma_probe(struct platform_device *pdev) +{ + struct msgdma_device *mdev; + struct dma_device *dma_dev; + struct resource *dma_res; + int ret; + + mdev = devm_kzalloc(&pdev->dev, sizeof(*mdev), GFP_NOWAIT); + if (!mdev) + return -ENOMEM; + + mdev->dev = &pdev->dev; + + /* Map CSR space */ + ret = request_and_map(pdev, "csr", &dma_res, &mdev->csr, false); + if (ret) + return ret; + + /* Map (extended) descriptor space */ + ret = request_and_map(pdev, "desc", &dma_res, &mdev->desc, false); + if (ret) + return ret; + + /* Map response space */ + ret = request_and_map(pdev, "resp", &dma_res, &mdev->resp, true); + if (ret) + return ret; + + platform_set_drvdata(pdev, mdev); + + /* Get interrupt nr from platform data */ + mdev->irq = platform_get_irq(pdev, 0); + if (mdev->irq < 0) + return -ENXIO; + + ret = devm_request_irq(&pdev->dev, mdev->irq, msgdma_irq_handler, + 0, dev_name(&pdev->dev), mdev); + if (ret) + return ret; + + tasklet_setup(&mdev->irq_tasklet, msgdma_tasklet); + + dma_cookie_init(&mdev->dmachan); + + spin_lock_init(&mdev->lock); + + INIT_LIST_HEAD(&mdev->active_list); + INIT_LIST_HEAD(&mdev->pending_list); + INIT_LIST_HEAD(&mdev->done_list); + INIT_LIST_HEAD(&mdev->free_list); + + dma_dev = &mdev->dmadev; + + /* Set DMA capabilities */ + dma_cap_zero(dma_dev->cap_mask); + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); + + dma_dev->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + dma_dev->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + dma_dev->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM) | + BIT(DMA_MEM_TO_MEM); + dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + + /* Init DMA link list */ + INIT_LIST_HEAD(&dma_dev->channels); + + /* Set base routines */ + dma_dev->device_tx_status = dma_cookie_status; + dma_dev->device_issue_pending = msgdma_issue_pending; + dma_dev->dev = &pdev->dev; + + dma_dev->copy_align = DMAENGINE_ALIGN_4_BYTES; + dma_dev->device_prep_dma_memcpy = msgdma_prep_memcpy; + dma_dev->device_prep_slave_sg = msgdma_prep_slave_sg; + dma_dev->device_config = msgdma_dma_config; + + dma_dev->device_alloc_chan_resources = msgdma_alloc_chan_resources; + dma_dev->device_free_chan_resources = msgdma_free_chan_resources; + + mdev->dmachan.device = dma_dev; + list_add_tail(&mdev->dmachan.device_node, &dma_dev->channels); + + /* Set DMA mask to 64 bits */ + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + dev_warn(&pdev->dev, "unable to set coherent mask to 64"); + goto fail; + } + + msgdma_reset(mdev); + + ret = dma_async_device_register(dma_dev); + if (ret) + goto fail; + + ret = of_dma_controller_register(pdev->dev.of_node, + of_dma_xlate_by_chan_id, dma_dev); + if (ret == -EINVAL) + dev_warn(&pdev->dev, "device was not probed from DT"); + else if (ret && ret != -ENODEV) + goto fail; + + dev_notice(&pdev->dev, "Altera mSGDMA driver probe success\n"); + + return 0; + +fail: + msgdma_dev_remove(mdev); + + return ret; +} + +/** + * msgdma_remove() - Driver remove function + * @pdev: Pointer to the platform_device structure + * + * Return: Always '0' + */ +static int msgdma_remove(struct platform_device *pdev) +{ + struct msgdma_device *mdev = platform_get_drvdata(pdev); + + if (pdev->dev.of_node) + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&mdev->dmadev); + msgdma_dev_remove(mdev); + + dev_notice(&pdev->dev, "Altera mSGDMA driver removed\n"); + + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id msgdma_match[] = { + { .compatible = "altr,socfpga-msgdma", }, + { } +}; + +MODULE_DEVICE_TABLE(of, msgdma_match); +#endif + +static struct platform_driver msgdma_driver = { + .driver = { + .name = "altera-msgdma", + .of_match_table = of_match_ptr(msgdma_match), + }, + .probe = msgdma_probe, + .remove = msgdma_remove, +}; + +module_platform_driver(msgdma_driver); + +MODULE_ALIAS("platform:altera-msgdma"); +MODULE_DESCRIPTION("Altera mSGDMA driver"); +MODULE_AUTHOR("Stefan Roese "); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c new file mode 100644 index 0000000000..eea8bd33b4 --- /dev/null +++ b/drivers/dma/amba-pl08x.c @@ -0,0 +1,3073 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2006 ARM Ltd. + * Copyright (c) 2010 ST-Ericsson SA + * Copyirght (c) 2017 Linaro Ltd. + * + * Author: Peter Pearse + * Author: Linus Walleij + * + * Documentation: ARM DDI 0196G == PL080 + * Documentation: ARM DDI 0218E == PL081 + * Documentation: S3C6410 User's Manual == PL080S + * + * PL080 & PL081 both have 16 sets of DMA signals that can be routed to any + * channel. + * + * The PL080 has 8 channels available for simultaneous use, and the PL081 + * has only two channels. So on these DMA controllers the number of channels + * and the number of incoming DMA signals are two totally different things. + * It is usually not possible to theoretically handle all physical signals, + * so a multiplexing scheme with possible denial of use is necessary. + * + * The PL080 has a dual bus master, PL081 has a single master. + * + * PL080S is a version modified by Samsung and used in S3C64xx SoCs. + * It differs in following aspects: + * - CH_CONFIG register at different offset, + * - separate CH_CONTROL2 register for transfer size, + * - bigger maximum transfer size, + * - 8-word aligned LLI, instead of 4-word, due to extra CCTL2 word, + * - no support for peripheral flow control. + * + * Memory to peripheral transfer may be visualized as + * Get data from memory to DMAC + * Until no data left + * On burst request from peripheral + * Destination burst from DMAC to peripheral + * Clear burst request + * Raise terminal count interrupt + * + * For peripherals with a FIFO: + * Source burst size == half the depth of the peripheral FIFO + * Destination burst size == the depth of the peripheral FIFO + * + * (Bursts are irrelevant for mem to mem transfers - there are no burst + * signals, the DMA controller will simply facilitate its AHB master.) + * + * ASSUMES default (little) endianness for DMA transfers + * + * The PL08x has two flow control settings: + * - DMAC flow control: the transfer size defines the number of transfers + * which occur for the current LLI entry, and the DMAC raises TC at the + * end of every LLI entry. Observed behaviour shows the DMAC listening + * to both the BREQ and SREQ signals (contrary to documented), + * transferring data if either is active. The LBREQ and LSREQ signals + * are ignored. + * + * - Peripheral flow control: the transfer size is ignored (and should be + * zero). The data is transferred from the current LLI entry, until + * after the final transfer signalled by LBREQ or LSREQ. The DMAC + * will then move to the next LLI entry. Unsupported by PL080S. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" +#include "virt-dma.h" + +#define DRIVER_NAME "pl08xdmac" + +#define PL80X_DMA_BUSWIDTHS \ + BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \ + BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) + +static struct amba_driver pl08x_amba_driver; +struct pl08x_driver_data; + +/** + * struct vendor_data - vendor-specific config parameters for PL08x derivatives + * @config_offset: offset to the configuration register + * @channels: the number of channels available in this variant + * @signals: the number of request signals available from the hardware + * @dualmaster: whether this version supports dual AHB masters or not. + * @nomadik: whether this variant is a ST Microelectronics Nomadik, where the + * channels have Nomadik security extension bits that need to be checked + * for permission before use and some registers are missing + * @pl080s: whether this variant is a Samsung PL080S, which has separate + * register and LLI word for transfer size. + * @ftdmac020: whether this variant is a Faraday Technology FTDMAC020 + * @max_transfer_size: the maximum single element transfer size for this + * PL08x variant. + */ +struct vendor_data { + u8 config_offset; + u8 channels; + u8 signals; + bool dualmaster; + bool nomadik; + bool pl080s; + bool ftdmac020; + u32 max_transfer_size; +}; + +/** + * struct pl08x_bus_data - information of source or destination + * busses for a transfer + * @addr: current address + * @maxwidth: the maximum width of a transfer on this bus + * @buswidth: the width of this bus in bytes: 1, 2 or 4 + */ +struct pl08x_bus_data { + dma_addr_t addr; + u8 maxwidth; + u8 buswidth; +}; + +#define IS_BUS_ALIGNED(bus) IS_ALIGNED((bus)->addr, (bus)->buswidth) + +/** + * struct pl08x_phy_chan - holder for the physical channels + * @id: physical index to this channel + * @base: memory base address for this physical channel + * @reg_config: configuration address for this physical channel + * @reg_control: control address for this physical channel + * @reg_src: transfer source address register + * @reg_dst: transfer destination address register + * @reg_lli: transfer LLI address register + * @reg_busy: if the variant has a special per-channel busy register, + * this contains a pointer to it + * @lock: a lock to use when altering an instance of this struct + * @serving: the virtual channel currently being served by this physical + * channel + * @locked: channel unavailable for the system, e.g. dedicated to secure + * world + * @ftdmac020: channel is on a FTDMAC020 + * @pl080s: channel is on a PL08s + */ +struct pl08x_phy_chan { + unsigned int id; + void __iomem *base; + void __iomem *reg_config; + void __iomem *reg_control; + void __iomem *reg_src; + void __iomem *reg_dst; + void __iomem *reg_lli; + void __iomem *reg_busy; + spinlock_t lock; + struct pl08x_dma_chan *serving; + bool locked; + bool ftdmac020; + bool pl080s; +}; + +/** + * struct pl08x_sg - structure containing data per sg + * @src_addr: src address of sg + * @dst_addr: dst address of sg + * @len: transfer len in bytes + * @node: node for txd's dsg_list + */ +struct pl08x_sg { + dma_addr_t src_addr; + dma_addr_t dst_addr; + size_t len; + struct list_head node; +}; + +/** + * struct pl08x_txd - wrapper for struct dma_async_tx_descriptor + * @vd: virtual DMA descriptor + * @dsg_list: list of children sg's + * @llis_bus: DMA memory address (physical) start for the LLIs + * @llis_va: virtual memory address start for the LLIs + * @cctl: control reg values for current txd + * @ccfg: config reg values for current txd + * @done: this marks completed descriptors, which should not have their + * mux released. + * @cyclic: indicate cyclic transfers + */ +struct pl08x_txd { + struct virt_dma_desc vd; + struct list_head dsg_list; + dma_addr_t llis_bus; + u32 *llis_va; + /* Default cctl value for LLIs */ + u32 cctl; + /* + * Settings to be put into the physical channel when we + * trigger this txd. Other registers are in llis_va[0]. + */ + u32 ccfg; + bool done; + bool cyclic; +}; + +/** + * enum pl08x_dma_chan_state - holds the PL08x specific virtual channel + * states + * @PL08X_CHAN_IDLE: the channel is idle + * @PL08X_CHAN_RUNNING: the channel has allocated a physical transport + * channel and is running a transfer on it + * @PL08X_CHAN_PAUSED: the channel has allocated a physical transport + * channel, but the transfer is currently paused + * @PL08X_CHAN_WAITING: the channel is waiting for a physical transport + * channel to become available (only pertains to memcpy channels) + */ +enum pl08x_dma_chan_state { + PL08X_CHAN_IDLE, + PL08X_CHAN_RUNNING, + PL08X_CHAN_PAUSED, + PL08X_CHAN_WAITING, +}; + +/** + * struct pl08x_dma_chan - this structure wraps a DMA ENGINE channel + * @vc: wrapped virtual channel + * @phychan: the physical channel utilized by this channel, if there is one + * @name: name of channel + * @cd: channel platform data + * @cfg: slave configuration + * @at: active transaction on this channel + * @host: a pointer to the host (internal use) + * @state: whether the channel is idle, paused, running etc + * @slave: whether this channel is a device (slave) or for memcpy + * @signal: the physical DMA request signal which this channel is using + * @mux_use: count of descriptors using this DMA request signal setting + * @waiting_at: time in jiffies when this channel moved to waiting state + */ +struct pl08x_dma_chan { + struct virt_dma_chan vc; + struct pl08x_phy_chan *phychan; + const char *name; + struct pl08x_channel_data *cd; + struct dma_slave_config cfg; + struct pl08x_txd *at; + struct pl08x_driver_data *host; + enum pl08x_dma_chan_state state; + bool slave; + int signal; + unsigned mux_use; + unsigned long waiting_at; +}; + +/** + * struct pl08x_driver_data - the local state holder for the PL08x + * @slave: optional slave engine for this instance + * @memcpy: memcpy engine for this instance + * @has_slave: the PL08x has a slave engine (routed signals) + * @base: virtual memory base (remapped) for the PL08x + * @adev: the corresponding AMBA (PrimeCell) bus entry + * @vd: vendor data for this PL08x variant + * @pd: platform data passed in from the platform/machine + * @phy_chans: array of data for the physical channels + * @pool: a pool for the LLI descriptors + * @lli_buses: bitmask to or in to LLI pointer selecting AHB port for LLI + * fetches + * @mem_buses: set to indicate memory transfers on AHB2. + * @lli_words: how many words are used in each LLI item for this variant + */ +struct pl08x_driver_data { + struct dma_device slave; + struct dma_device memcpy; + bool has_slave; + void __iomem *base; + struct amba_device *adev; + const struct vendor_data *vd; + struct pl08x_platform_data *pd; + struct pl08x_phy_chan *phy_chans; + struct dma_pool *pool; + u8 lli_buses; + u8 mem_buses; + u8 lli_words; +}; + +/* + * PL08X specific defines + */ + +/* The order of words in an LLI. */ +#define PL080_LLI_SRC 0 +#define PL080_LLI_DST 1 +#define PL080_LLI_LLI 2 +#define PL080_LLI_CCTL 3 +#define PL080S_LLI_CCTL2 4 + +/* Total words in an LLI. */ +#define PL080_LLI_WORDS 4 +#define PL080S_LLI_WORDS 8 + +/* + * Number of LLIs in each LLI buffer allocated for one transfer + * (maximum times we call dma_pool_alloc on this pool without freeing) + */ +#define MAX_NUM_TSFR_LLIS 512 +#define PL08X_ALIGN 8 + +static inline struct pl08x_dma_chan *to_pl08x_chan(struct dma_chan *chan) +{ + return container_of(chan, struct pl08x_dma_chan, vc.chan); +} + +static inline struct pl08x_txd *to_pl08x_txd(struct dma_async_tx_descriptor *tx) +{ + return container_of(tx, struct pl08x_txd, vd.tx); +} + +/* + * Mux handling. + * + * This gives us the DMA request input to the PL08x primecell which the + * peripheral described by the channel data will be routed to, possibly + * via a board/SoC specific external MUX. One important point to note + * here is that this does not depend on the physical channel. + */ +static int pl08x_request_mux(struct pl08x_dma_chan *plchan) +{ + const struct pl08x_platform_data *pd = plchan->host->pd; + int ret; + + if (plchan->mux_use++ == 0 && pd->get_xfer_signal) { + ret = pd->get_xfer_signal(plchan->cd); + if (ret < 0) { + plchan->mux_use = 0; + return ret; + } + + plchan->signal = ret; + } + return 0; +} + +static void pl08x_release_mux(struct pl08x_dma_chan *plchan) +{ + const struct pl08x_platform_data *pd = plchan->host->pd; + + if (plchan->signal >= 0) { + WARN_ON(plchan->mux_use == 0); + + if (--plchan->mux_use == 0 && pd->put_xfer_signal) { + pd->put_xfer_signal(plchan->cd, plchan->signal); + plchan->signal = -1; + } + } +} + +/* + * Physical channel handling + */ + +/* Whether a certain channel is busy or not */ +static int pl08x_phy_channel_busy(struct pl08x_phy_chan *ch) +{ + unsigned int val; + + /* If we have a special busy register, take a shortcut */ + if (ch->reg_busy) { + val = readl(ch->reg_busy); + return !!(val & BIT(ch->id)); + } + val = readl(ch->reg_config); + return val & PL080_CONFIG_ACTIVE; +} + +/* + * pl08x_write_lli() - Write an LLI into the DMA controller. + * + * The PL08x derivatives support linked lists, but the first item of the + * list containing the source, destination, control word and next LLI is + * ignored. Instead the driver has to write those values directly into the + * SRC, DST, LLI and control registers. On FTDMAC020 also the SIZE + * register need to be set up for the first transfer. + */ +static void pl08x_write_lli(struct pl08x_driver_data *pl08x, + struct pl08x_phy_chan *phychan, const u32 *lli, u32 ccfg) +{ + if (pl08x->vd->pl080s) + dev_vdbg(&pl08x->adev->dev, + "WRITE channel %d: csrc=0x%08x, cdst=0x%08x, " + "clli=0x%08x, cctl=0x%08x, cctl2=0x%08x, ccfg=0x%08x\n", + phychan->id, lli[PL080_LLI_SRC], lli[PL080_LLI_DST], + lli[PL080_LLI_LLI], lli[PL080_LLI_CCTL], + lli[PL080S_LLI_CCTL2], ccfg); + else + dev_vdbg(&pl08x->adev->dev, + "WRITE channel %d: csrc=0x%08x, cdst=0x%08x, " + "clli=0x%08x, cctl=0x%08x, ccfg=0x%08x\n", + phychan->id, lli[PL080_LLI_SRC], lli[PL080_LLI_DST], + lli[PL080_LLI_LLI], lli[PL080_LLI_CCTL], ccfg); + + writel_relaxed(lli[PL080_LLI_SRC], phychan->reg_src); + writel_relaxed(lli[PL080_LLI_DST], phychan->reg_dst); + writel_relaxed(lli[PL080_LLI_LLI], phychan->reg_lli); + + /* + * The FTMAC020 has a different layout in the CCTL word of the LLI + * and the CCTL register which is split in CSR and SIZE registers. + * Convert the LLI item CCTL into the proper values to write into + * the CSR and SIZE registers. + */ + if (phychan->ftdmac020) { + u32 llictl = lli[PL080_LLI_CCTL]; + u32 val = 0; + + /* Write the transfer size (12 bits) to the size register */ + writel_relaxed(llictl & FTDMAC020_LLI_TRANSFER_SIZE_MASK, + phychan->base + FTDMAC020_CH_SIZE); + /* + * Then write the control bits 28..16 to the control register + * by shuffleing the bits around to where they are in the + * main register. The mapping is as follows: + * Bit 28: TC_MSK - mask on all except last LLI + * Bit 27..25: SRC_WIDTH + * Bit 24..22: DST_WIDTH + * Bit 21..20: SRCAD_CTRL + * Bit 19..17: DSTAD_CTRL + * Bit 17: SRC_SEL + * Bit 16: DST_SEL + */ + if (llictl & FTDMAC020_LLI_TC_MSK) + val |= FTDMAC020_CH_CSR_TC_MSK; + val |= ((llictl & FTDMAC020_LLI_SRC_WIDTH_MSK) >> + (FTDMAC020_LLI_SRC_WIDTH_SHIFT - + FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT)); + val |= ((llictl & FTDMAC020_LLI_DST_WIDTH_MSK) >> + (FTDMAC020_LLI_DST_WIDTH_SHIFT - + FTDMAC020_CH_CSR_DST_WIDTH_SHIFT)); + val |= ((llictl & FTDMAC020_LLI_SRCAD_CTL_MSK) >> + (FTDMAC020_LLI_SRCAD_CTL_SHIFT - + FTDMAC020_CH_CSR_SRCAD_CTL_SHIFT)); + val |= ((llictl & FTDMAC020_LLI_DSTAD_CTL_MSK) >> + (FTDMAC020_LLI_DSTAD_CTL_SHIFT - + FTDMAC020_CH_CSR_DSTAD_CTL_SHIFT)); + if (llictl & FTDMAC020_LLI_SRC_SEL) + val |= FTDMAC020_CH_CSR_SRC_SEL; + if (llictl & FTDMAC020_LLI_DST_SEL) + val |= FTDMAC020_CH_CSR_DST_SEL; + + /* + * Set up the bits that exist in the CSR but are not + * part the LLI, i.e. only gets written to the control + * register right here. + * + * FIXME: do not just handle memcpy, also handle slave DMA. + */ + switch (pl08x->pd->memcpy_burst_size) { + default: + case PL08X_BURST_SZ_1: + val |= PL080_BSIZE_1 << + FTDMAC020_CH_CSR_SRC_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_4: + val |= PL080_BSIZE_4 << + FTDMAC020_CH_CSR_SRC_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_8: + val |= PL080_BSIZE_8 << + FTDMAC020_CH_CSR_SRC_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_16: + val |= PL080_BSIZE_16 << + FTDMAC020_CH_CSR_SRC_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_32: + val |= PL080_BSIZE_32 << + FTDMAC020_CH_CSR_SRC_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_64: + val |= PL080_BSIZE_64 << + FTDMAC020_CH_CSR_SRC_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_128: + val |= PL080_BSIZE_128 << + FTDMAC020_CH_CSR_SRC_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_256: + val |= PL080_BSIZE_256 << + FTDMAC020_CH_CSR_SRC_SIZE_SHIFT; + break; + } + + /* Protection flags */ + if (pl08x->pd->memcpy_prot_buff) + val |= FTDMAC020_CH_CSR_PROT2; + if (pl08x->pd->memcpy_prot_cache) + val |= FTDMAC020_CH_CSR_PROT3; + /* We are the kernel, so we are in privileged mode */ + val |= FTDMAC020_CH_CSR_PROT1; + + writel_relaxed(val, phychan->reg_control); + } else { + /* Bits are just identical */ + writel_relaxed(lli[PL080_LLI_CCTL], phychan->reg_control); + } + + /* Second control word on the PL080s */ + if (pl08x->vd->pl080s) + writel_relaxed(lli[PL080S_LLI_CCTL2], + phychan->base + PL080S_CH_CONTROL2); + + writel(ccfg, phychan->reg_config); +} + +/* + * Set the initial DMA register values i.e. those for the first LLI + * The next LLI pointer and the configuration interrupt bit have + * been set when the LLIs were constructed. Poke them into the hardware + * and start the transfer. + */ +static void pl08x_start_next_txd(struct pl08x_dma_chan *plchan) +{ + struct pl08x_driver_data *pl08x = plchan->host; + struct pl08x_phy_chan *phychan = plchan->phychan; + struct virt_dma_desc *vd = vchan_next_desc(&plchan->vc); + struct pl08x_txd *txd = to_pl08x_txd(&vd->tx); + u32 val; + + list_del(&txd->vd.node); + + plchan->at = txd; + + /* Wait for channel inactive */ + while (pl08x_phy_channel_busy(phychan)) + cpu_relax(); + + pl08x_write_lli(pl08x, phychan, &txd->llis_va[0], txd->ccfg); + + /* Enable the DMA channel */ + /* Do not access config register until channel shows as disabled */ + while (readl(pl08x->base + PL080_EN_CHAN) & BIT(phychan->id)) + cpu_relax(); + + /* Do not access config register until channel shows as inactive */ + if (phychan->ftdmac020) { + val = readl(phychan->reg_config); + while (val & FTDMAC020_CH_CFG_BUSY) + val = readl(phychan->reg_config); + + val = readl(phychan->reg_control); + while (val & FTDMAC020_CH_CSR_EN) + val = readl(phychan->reg_control); + + writel(val | FTDMAC020_CH_CSR_EN, + phychan->reg_control); + } else { + val = readl(phychan->reg_config); + while ((val & PL080_CONFIG_ACTIVE) || + (val & PL080_CONFIG_ENABLE)) + val = readl(phychan->reg_config); + + writel(val | PL080_CONFIG_ENABLE, phychan->reg_config); + } +} + +/* + * Pause the channel by setting the HALT bit. + * + * For M->P transfers, pause the DMAC first and then stop the peripheral - + * the FIFO can only drain if the peripheral is still requesting data. + * (note: this can still timeout if the DMAC FIFO never drains of data.) + * + * For P->M transfers, disable the peripheral first to stop it filling + * the DMAC FIFO, and then pause the DMAC. + */ +static void pl08x_pause_phy_chan(struct pl08x_phy_chan *ch) +{ + u32 val; + int timeout; + + if (ch->ftdmac020) { + /* Use the enable bit on the FTDMAC020 */ + val = readl(ch->reg_control); + val &= ~FTDMAC020_CH_CSR_EN; + writel(val, ch->reg_control); + return; + } + + /* Set the HALT bit and wait for the FIFO to drain */ + val = readl(ch->reg_config); + val |= PL080_CONFIG_HALT; + writel(val, ch->reg_config); + + /* Wait for channel inactive */ + for (timeout = 1000; timeout; timeout--) { + if (!pl08x_phy_channel_busy(ch)) + break; + udelay(1); + } + if (pl08x_phy_channel_busy(ch)) + pr_err("pl08x: channel%u timeout waiting for pause\n", ch->id); +} + +static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch) +{ + u32 val; + + /* Use the enable bit on the FTDMAC020 */ + if (ch->ftdmac020) { + val = readl(ch->reg_control); + val |= FTDMAC020_CH_CSR_EN; + writel(val, ch->reg_control); + return; + } + + /* Clear the HALT bit */ + val = readl(ch->reg_config); + val &= ~PL080_CONFIG_HALT; + writel(val, ch->reg_config); +} + +/* + * pl08x_terminate_phy_chan() stops the channel, clears the FIFO and + * clears any pending interrupt status. This should not be used for + * an on-going transfer, but as a method of shutting down a channel + * (eg, when it's no longer used) or terminating a transfer. + */ +static void pl08x_terminate_phy_chan(struct pl08x_driver_data *pl08x, + struct pl08x_phy_chan *ch) +{ + u32 val; + + /* The layout for the FTDMAC020 is different */ + if (ch->ftdmac020) { + /* Disable all interrupts */ + val = readl(ch->reg_config); + val |= (FTDMAC020_CH_CFG_INT_ABT_MASK | + FTDMAC020_CH_CFG_INT_ERR_MASK | + FTDMAC020_CH_CFG_INT_TC_MASK); + writel(val, ch->reg_config); + + /* Abort and disable channel */ + val = readl(ch->reg_control); + val &= ~FTDMAC020_CH_CSR_EN; + val |= FTDMAC020_CH_CSR_ABT; + writel(val, ch->reg_control); + + /* Clear ABT and ERR interrupt flags */ + writel(BIT(ch->id) | BIT(ch->id + 16), + pl08x->base + PL080_ERR_CLEAR); + writel(BIT(ch->id), pl08x->base + PL080_TC_CLEAR); + + return; + } + + val = readl(ch->reg_config); + val &= ~(PL080_CONFIG_ENABLE | PL080_CONFIG_ERR_IRQ_MASK | + PL080_CONFIG_TC_IRQ_MASK); + writel(val, ch->reg_config); + + writel(BIT(ch->id), pl08x->base + PL080_ERR_CLEAR); + writel(BIT(ch->id), pl08x->base + PL080_TC_CLEAR); +} + +static u32 get_bytes_in_phy_channel(struct pl08x_phy_chan *ch) +{ + u32 val; + u32 bytes; + + if (ch->ftdmac020) { + bytes = readl(ch->base + FTDMAC020_CH_SIZE); + + val = readl(ch->reg_control); + val &= FTDMAC020_CH_CSR_SRC_WIDTH_MSK; + val >>= FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT; + } else if (ch->pl080s) { + val = readl(ch->base + PL080S_CH_CONTROL2); + bytes = val & PL080S_CONTROL_TRANSFER_SIZE_MASK; + + val = readl(ch->reg_control); + val &= PL080_CONTROL_SWIDTH_MASK; + val >>= PL080_CONTROL_SWIDTH_SHIFT; + } else { + /* Plain PL08x */ + val = readl(ch->reg_control); + bytes = val & PL080_CONTROL_TRANSFER_SIZE_MASK; + + val &= PL080_CONTROL_SWIDTH_MASK; + val >>= PL080_CONTROL_SWIDTH_SHIFT; + } + + switch (val) { + case PL080_WIDTH_8BIT: + break; + case PL080_WIDTH_16BIT: + bytes *= 2; + break; + case PL080_WIDTH_32BIT: + bytes *= 4; + break; + } + return bytes; +} + +static u32 get_bytes_in_lli(struct pl08x_phy_chan *ch, const u32 *llis_va) +{ + u32 val; + u32 bytes; + + if (ch->ftdmac020) { + val = llis_va[PL080_LLI_CCTL]; + bytes = val & FTDMAC020_LLI_TRANSFER_SIZE_MASK; + + val = llis_va[PL080_LLI_CCTL]; + val &= FTDMAC020_LLI_SRC_WIDTH_MSK; + val >>= FTDMAC020_LLI_SRC_WIDTH_SHIFT; + } else if (ch->pl080s) { + val = llis_va[PL080S_LLI_CCTL2]; + bytes = val & PL080S_CONTROL_TRANSFER_SIZE_MASK; + + val = llis_va[PL080_LLI_CCTL]; + val &= PL080_CONTROL_SWIDTH_MASK; + val >>= PL080_CONTROL_SWIDTH_SHIFT; + } else { + /* Plain PL08x */ + val = llis_va[PL080_LLI_CCTL]; + bytes = val & PL080_CONTROL_TRANSFER_SIZE_MASK; + + val &= PL080_CONTROL_SWIDTH_MASK; + val >>= PL080_CONTROL_SWIDTH_SHIFT; + } + + switch (val) { + case PL080_WIDTH_8BIT: + break; + case PL080_WIDTH_16BIT: + bytes *= 2; + break; + case PL080_WIDTH_32BIT: + bytes *= 4; + break; + } + return bytes; +} + +/* The channel should be paused when calling this */ +static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan) +{ + struct pl08x_driver_data *pl08x = plchan->host; + const u32 *llis_va, *llis_va_limit; + struct pl08x_phy_chan *ch; + dma_addr_t llis_bus; + struct pl08x_txd *txd; + u32 llis_max_words; + size_t bytes; + u32 clli; + + ch = plchan->phychan; + txd = plchan->at; + + if (!ch || !txd) + return 0; + + /* + * Follow the LLIs to get the number of remaining + * bytes in the currently active transaction. + */ + clli = readl(ch->reg_lli) & ~PL080_LLI_LM_AHB2; + + /* First get the remaining bytes in the active transfer */ + bytes = get_bytes_in_phy_channel(ch); + + if (!clli) + return bytes; + + llis_va = txd->llis_va; + llis_bus = txd->llis_bus; + + llis_max_words = pl08x->lli_words * MAX_NUM_TSFR_LLIS; + BUG_ON(clli < llis_bus || clli >= llis_bus + + sizeof(u32) * llis_max_words); + + /* + * Locate the next LLI - as this is an array, + * it's simple maths to find. + */ + llis_va += (clli - llis_bus) / sizeof(u32); + + llis_va_limit = llis_va + llis_max_words; + + for (; llis_va < llis_va_limit; llis_va += pl08x->lli_words) { + bytes += get_bytes_in_lli(ch, llis_va); + + /* + * A LLI pointer going backward terminates the LLI list + */ + if (llis_va[PL080_LLI_LLI] <= clli) + break; + } + + return bytes; +} + +/* + * Allocate a physical channel for a virtual channel + * + * Try to locate a physical channel to be used for this transfer. If all + * are taken return NULL and the requester will have to cope by using + * some fallback PIO mode or retrying later. + */ +static struct pl08x_phy_chan * +pl08x_get_phy_channel(struct pl08x_driver_data *pl08x, + struct pl08x_dma_chan *virt_chan) +{ + struct pl08x_phy_chan *ch = NULL; + unsigned long flags; + int i; + + for (i = 0; i < pl08x->vd->channels; i++) { + ch = &pl08x->phy_chans[i]; + + spin_lock_irqsave(&ch->lock, flags); + + if (!ch->locked && !ch->serving) { + ch->serving = virt_chan; + spin_unlock_irqrestore(&ch->lock, flags); + break; + } + + spin_unlock_irqrestore(&ch->lock, flags); + } + + if (i == pl08x->vd->channels) { + /* No physical channel available, cope with it */ + return NULL; + } + + return ch; +} + +/* Mark the physical channel as free. Note, this write is atomic. */ +static inline void pl08x_put_phy_channel(struct pl08x_driver_data *pl08x, + struct pl08x_phy_chan *ch) +{ + ch->serving = NULL; +} + +/* + * Try to allocate a physical channel. When successful, assign it to + * this virtual channel, and initiate the next descriptor. The + * virtual channel lock must be held at this point. + */ +static void pl08x_phy_alloc_and_start(struct pl08x_dma_chan *plchan) +{ + struct pl08x_driver_data *pl08x = plchan->host; + struct pl08x_phy_chan *ch; + + ch = pl08x_get_phy_channel(pl08x, plchan); + if (!ch) { + dev_dbg(&pl08x->adev->dev, "no physical channel available for xfer on %s\n", plchan->name); + plchan->state = PL08X_CHAN_WAITING; + plchan->waiting_at = jiffies; + return; + } + + dev_dbg(&pl08x->adev->dev, "allocated physical channel %d for xfer on %s\n", + ch->id, plchan->name); + + plchan->phychan = ch; + plchan->state = PL08X_CHAN_RUNNING; + pl08x_start_next_txd(plchan); +} + +static void pl08x_phy_reassign_start(struct pl08x_phy_chan *ch, + struct pl08x_dma_chan *plchan) +{ + struct pl08x_driver_data *pl08x = plchan->host; + + dev_dbg(&pl08x->adev->dev, "reassigned physical channel %d for xfer on %s\n", + ch->id, plchan->name); + + /* + * We do this without taking the lock; we're really only concerned + * about whether this pointer is NULL or not, and we're guaranteed + * that this will only be called when it _already_ is non-NULL. + */ + ch->serving = plchan; + plchan->phychan = ch; + plchan->state = PL08X_CHAN_RUNNING; + pl08x_start_next_txd(plchan); +} + +/* + * Free a physical DMA channel, potentially reallocating it to another + * virtual channel if we have any pending. + */ +static void pl08x_phy_free(struct pl08x_dma_chan *plchan) +{ + struct pl08x_driver_data *pl08x = plchan->host; + struct pl08x_dma_chan *p, *next; + unsigned long waiting_at; + retry: + next = NULL; + waiting_at = jiffies; + + /* + * Find a waiting virtual channel for the next transfer. + * To be fair, time when each channel reached waiting state is compared + * to select channel that is waiting for the longest time. + */ + list_for_each_entry(p, &pl08x->memcpy.channels, vc.chan.device_node) + if (p->state == PL08X_CHAN_WAITING && + p->waiting_at <= waiting_at) { + next = p; + waiting_at = p->waiting_at; + } + + if (!next && pl08x->has_slave) { + list_for_each_entry(p, &pl08x->slave.channels, vc.chan.device_node) + if (p->state == PL08X_CHAN_WAITING && + p->waiting_at <= waiting_at) { + next = p; + waiting_at = p->waiting_at; + } + } + + /* Ensure that the physical channel is stopped */ + pl08x_terminate_phy_chan(pl08x, plchan->phychan); + + if (next) { + bool success; + + /* + * Eww. We know this isn't going to deadlock + * but lockdep probably doesn't. + */ + spin_lock(&next->vc.lock); + /* Re-check the state now that we have the lock */ + success = next->state == PL08X_CHAN_WAITING; + if (success) + pl08x_phy_reassign_start(plchan->phychan, next); + spin_unlock(&next->vc.lock); + + /* If the state changed, try to find another channel */ + if (!success) + goto retry; + } else { + /* No more jobs, so free up the physical channel */ + pl08x_put_phy_channel(pl08x, plchan->phychan); + } + + plchan->phychan = NULL; + plchan->state = PL08X_CHAN_IDLE; +} + +/* + * LLI handling + */ + +static inline unsigned int +pl08x_get_bytes_for_lli(struct pl08x_driver_data *pl08x, + u32 cctl, + bool source) +{ + u32 val; + + if (pl08x->vd->ftdmac020) { + if (source) + val = (cctl & FTDMAC020_LLI_SRC_WIDTH_MSK) >> + FTDMAC020_LLI_SRC_WIDTH_SHIFT; + else + val = (cctl & FTDMAC020_LLI_DST_WIDTH_MSK) >> + FTDMAC020_LLI_DST_WIDTH_SHIFT; + } else { + if (source) + val = (cctl & PL080_CONTROL_SWIDTH_MASK) >> + PL080_CONTROL_SWIDTH_SHIFT; + else + val = (cctl & PL080_CONTROL_DWIDTH_MASK) >> + PL080_CONTROL_DWIDTH_SHIFT; + } + + switch (val) { + case PL080_WIDTH_8BIT: + return 1; + case PL080_WIDTH_16BIT: + return 2; + case PL080_WIDTH_32BIT: + return 4; + default: + break; + } + BUG(); + return 0; +} + +static inline u32 pl08x_lli_control_bits(struct pl08x_driver_data *pl08x, + u32 cctl, + u8 srcwidth, u8 dstwidth, + size_t tsize) +{ + u32 retbits = cctl; + + /* + * Remove all src, dst and transfer size bits, then set the + * width and size according to the parameters. The bit offsets + * are different in the FTDMAC020 so we need to accound for this. + */ + if (pl08x->vd->ftdmac020) { + retbits &= ~FTDMAC020_LLI_DST_WIDTH_MSK; + retbits &= ~FTDMAC020_LLI_SRC_WIDTH_MSK; + retbits &= ~FTDMAC020_LLI_TRANSFER_SIZE_MASK; + + switch (srcwidth) { + case 1: + retbits |= PL080_WIDTH_8BIT << + FTDMAC020_LLI_SRC_WIDTH_SHIFT; + break; + case 2: + retbits |= PL080_WIDTH_16BIT << + FTDMAC020_LLI_SRC_WIDTH_SHIFT; + break; + case 4: + retbits |= PL080_WIDTH_32BIT << + FTDMAC020_LLI_SRC_WIDTH_SHIFT; + break; + default: + BUG(); + break; + } + + switch (dstwidth) { + case 1: + retbits |= PL080_WIDTH_8BIT << + FTDMAC020_LLI_DST_WIDTH_SHIFT; + break; + case 2: + retbits |= PL080_WIDTH_16BIT << + FTDMAC020_LLI_DST_WIDTH_SHIFT; + break; + case 4: + retbits |= PL080_WIDTH_32BIT << + FTDMAC020_LLI_DST_WIDTH_SHIFT; + break; + default: + BUG(); + break; + } + + tsize &= FTDMAC020_LLI_TRANSFER_SIZE_MASK; + retbits |= tsize << FTDMAC020_LLI_TRANSFER_SIZE_SHIFT; + } else { + retbits &= ~PL080_CONTROL_DWIDTH_MASK; + retbits &= ~PL080_CONTROL_SWIDTH_MASK; + retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK; + + switch (srcwidth) { + case 1: + retbits |= PL080_WIDTH_8BIT << + PL080_CONTROL_SWIDTH_SHIFT; + break; + case 2: + retbits |= PL080_WIDTH_16BIT << + PL080_CONTROL_SWIDTH_SHIFT; + break; + case 4: + retbits |= PL080_WIDTH_32BIT << + PL080_CONTROL_SWIDTH_SHIFT; + break; + default: + BUG(); + break; + } + + switch (dstwidth) { + case 1: + retbits |= PL080_WIDTH_8BIT << + PL080_CONTROL_DWIDTH_SHIFT; + break; + case 2: + retbits |= PL080_WIDTH_16BIT << + PL080_CONTROL_DWIDTH_SHIFT; + break; + case 4: + retbits |= PL080_WIDTH_32BIT << + PL080_CONTROL_DWIDTH_SHIFT; + break; + default: + BUG(); + break; + } + + tsize &= PL080_CONTROL_TRANSFER_SIZE_MASK; + retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT; + } + + return retbits; +} + +struct pl08x_lli_build_data { + struct pl08x_txd *txd; + struct pl08x_bus_data srcbus; + struct pl08x_bus_data dstbus; + size_t remainder; + u32 lli_bus; +}; + +/* + * Autoselect a master bus to use for the transfer. Slave will be the chosen as + * victim in case src & dest are not similarly aligned. i.e. If after aligning + * masters address with width requirements of transfer (by sending few byte by + * byte data), slave is still not aligned, then its width will be reduced to + * BYTE. + * - prefers the destination bus if both available + * - prefers bus with fixed address (i.e. peripheral) + */ +static void pl08x_choose_master_bus(struct pl08x_driver_data *pl08x, + struct pl08x_lli_build_data *bd, + struct pl08x_bus_data **mbus, + struct pl08x_bus_data **sbus, + u32 cctl) +{ + bool dst_incr; + bool src_incr; + + /* + * The FTDMAC020 only supports memory-to-memory transfer, so + * source and destination always increase. + */ + if (pl08x->vd->ftdmac020) { + dst_incr = true; + src_incr = true; + } else { + dst_incr = !!(cctl & PL080_CONTROL_DST_INCR); + src_incr = !!(cctl & PL080_CONTROL_SRC_INCR); + } + + /* + * If either bus is not advancing, i.e. it is a peripheral, that + * one becomes master + */ + if (!dst_incr) { + *mbus = &bd->dstbus; + *sbus = &bd->srcbus; + } else if (!src_incr) { + *mbus = &bd->srcbus; + *sbus = &bd->dstbus; + } else { + if (bd->dstbus.buswidth >= bd->srcbus.buswidth) { + *mbus = &bd->dstbus; + *sbus = &bd->srcbus; + } else { + *mbus = &bd->srcbus; + *sbus = &bd->dstbus; + } + } +} + +/* + * Fills in one LLI for a certain transfer descriptor and advance the counter + */ +static void pl08x_fill_lli_for_desc(struct pl08x_driver_data *pl08x, + struct pl08x_lli_build_data *bd, + int num_llis, int len, u32 cctl, u32 cctl2) +{ + u32 offset = num_llis * pl08x->lli_words; + u32 *llis_va = bd->txd->llis_va + offset; + dma_addr_t llis_bus = bd->txd->llis_bus; + + BUG_ON(num_llis >= MAX_NUM_TSFR_LLIS); + + /* Advance the offset to next LLI. */ + offset += pl08x->lli_words; + + llis_va[PL080_LLI_SRC] = bd->srcbus.addr; + llis_va[PL080_LLI_DST] = bd->dstbus.addr; + llis_va[PL080_LLI_LLI] = (llis_bus + sizeof(u32) * offset); + llis_va[PL080_LLI_LLI] |= bd->lli_bus; + llis_va[PL080_LLI_CCTL] = cctl; + if (pl08x->vd->pl080s) + llis_va[PL080S_LLI_CCTL2] = cctl2; + + if (pl08x->vd->ftdmac020) { + /* FIXME: only memcpy so far so both increase */ + bd->srcbus.addr += len; + bd->dstbus.addr += len; + } else { + if (cctl & PL080_CONTROL_SRC_INCR) + bd->srcbus.addr += len; + if (cctl & PL080_CONTROL_DST_INCR) + bd->dstbus.addr += len; + } + + BUG_ON(bd->remainder < len); + + bd->remainder -= len; +} + +static inline void prep_byte_width_lli(struct pl08x_driver_data *pl08x, + struct pl08x_lli_build_data *bd, u32 *cctl, u32 len, + int num_llis, size_t *total_bytes) +{ + *cctl = pl08x_lli_control_bits(pl08x, *cctl, 1, 1, len); + pl08x_fill_lli_for_desc(pl08x, bd, num_llis, len, *cctl, len); + (*total_bytes) += len; +} + +#if 1 +static void pl08x_dump_lli(struct pl08x_driver_data *pl08x, + const u32 *llis_va, int num_llis) +{ + int i; + + if (pl08x->vd->pl080s) { + dev_vdbg(&pl08x->adev->dev, + "%-3s %-9s %-10s %-10s %-10s %-10s %s\n", + "lli", "", "csrc", "cdst", "clli", "cctl", "cctl2"); + for (i = 0; i < num_llis; i++) { + dev_vdbg(&pl08x->adev->dev, + "%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", + i, llis_va, llis_va[PL080_LLI_SRC], + llis_va[PL080_LLI_DST], llis_va[PL080_LLI_LLI], + llis_va[PL080_LLI_CCTL], + llis_va[PL080S_LLI_CCTL2]); + llis_va += pl08x->lli_words; + } + } else { + dev_vdbg(&pl08x->adev->dev, + "%-3s %-9s %-10s %-10s %-10s %s\n", + "lli", "", "csrc", "cdst", "clli", "cctl"); + for (i = 0; i < num_llis; i++) { + dev_vdbg(&pl08x->adev->dev, + "%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x\n", + i, llis_va, llis_va[PL080_LLI_SRC], + llis_va[PL080_LLI_DST], llis_va[PL080_LLI_LLI], + llis_va[PL080_LLI_CCTL]); + llis_va += pl08x->lli_words; + } + } +} +#else +static inline void pl08x_dump_lli(struct pl08x_driver_data *pl08x, + const u32 *llis_va, int num_llis) {} +#endif + +/* + * This fills in the table of LLIs for the transfer descriptor + * Note that we assume we never have to change the burst sizes + * Return 0 for error + */ +static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, + struct pl08x_txd *txd) +{ + struct pl08x_bus_data *mbus, *sbus; + struct pl08x_lli_build_data bd; + int num_llis = 0; + u32 cctl, early_bytes = 0; + size_t max_bytes_per_lli, total_bytes; + u32 *llis_va, *last_lli; + struct pl08x_sg *dsg; + + txd->llis_va = dma_pool_alloc(pl08x->pool, GFP_NOWAIT, &txd->llis_bus); + if (!txd->llis_va) { + dev_err(&pl08x->adev->dev, "%s no memory for llis\n", __func__); + return 0; + } + + bd.txd = txd; + bd.lli_bus = (pl08x->lli_buses & PL08X_AHB2) ? PL080_LLI_LM_AHB2 : 0; + cctl = txd->cctl; + + /* Find maximum width of the source bus */ + bd.srcbus.maxwidth = pl08x_get_bytes_for_lli(pl08x, cctl, true); + + /* Find maximum width of the destination bus */ + bd.dstbus.maxwidth = pl08x_get_bytes_for_lli(pl08x, cctl, false); + + list_for_each_entry(dsg, &txd->dsg_list, node) { + total_bytes = 0; + cctl = txd->cctl; + + bd.srcbus.addr = dsg->src_addr; + bd.dstbus.addr = dsg->dst_addr; + bd.remainder = dsg->len; + bd.srcbus.buswidth = bd.srcbus.maxwidth; + bd.dstbus.buswidth = bd.dstbus.maxwidth; + + pl08x_choose_master_bus(pl08x, &bd, &mbus, &sbus, cctl); + + dev_vdbg(&pl08x->adev->dev, + "src=0x%08llx%s/%u dst=0x%08llx%s/%u len=%zu\n", + (u64)bd.srcbus.addr, + cctl & PL080_CONTROL_SRC_INCR ? "+" : "", + bd.srcbus.buswidth, + (u64)bd.dstbus.addr, + cctl & PL080_CONTROL_DST_INCR ? "+" : "", + bd.dstbus.buswidth, + bd.remainder); + dev_vdbg(&pl08x->adev->dev, "mbus=%s sbus=%s\n", + mbus == &bd.srcbus ? "src" : "dst", + sbus == &bd.srcbus ? "src" : "dst"); + + /* + * Zero length is only allowed if all these requirements are + * met: + * - flow controller is peripheral. + * - src.addr is aligned to src.width + * - dst.addr is aligned to dst.width + * + * sg_len == 1 should be true, as there can be two cases here: + * + * - Memory addresses are contiguous and are not scattered. + * Here, Only one sg will be passed by user driver, with + * memory address and zero length. We pass this to controller + * and after the transfer it will receive the last burst + * request from peripheral and so transfer finishes. + * + * - Memory addresses are scattered and are not contiguous. + * Here, Obviously as DMA controller doesn't know when a lli's + * transfer gets over, it can't load next lli. So in this + * case, there has to be an assumption that only one lli is + * supported. Thus, we can't have scattered addresses. + */ + if (!bd.remainder) { + u32 fc; + + /* FTDMAC020 only does memory-to-memory */ + if (pl08x->vd->ftdmac020) + fc = PL080_FLOW_MEM2MEM; + else + fc = (txd->ccfg & PL080_CONFIG_FLOW_CONTROL_MASK) >> + PL080_CONFIG_FLOW_CONTROL_SHIFT; + if (!((fc >= PL080_FLOW_SRC2DST_DST) && + (fc <= PL080_FLOW_SRC2DST_SRC))) { + dev_err(&pl08x->adev->dev, "%s sg len can't be zero", + __func__); + return 0; + } + + if (!IS_BUS_ALIGNED(&bd.srcbus) || + !IS_BUS_ALIGNED(&bd.dstbus)) { + dev_err(&pl08x->adev->dev, + "%s src & dst address must be aligned to src" + " & dst width if peripheral is flow controller", + __func__); + return 0; + } + + cctl = pl08x_lli_control_bits(pl08x, cctl, + bd.srcbus.buswidth, bd.dstbus.buswidth, + 0); + pl08x_fill_lli_for_desc(pl08x, &bd, num_llis++, + 0, cctl, 0); + break; + } + + /* + * Send byte by byte for following cases + * - Less than a bus width available + * - until master bus is aligned + */ + if (bd.remainder < mbus->buswidth) + early_bytes = bd.remainder; + else if (!IS_BUS_ALIGNED(mbus)) { + early_bytes = mbus->buswidth - + (mbus->addr & (mbus->buswidth - 1)); + if ((bd.remainder - early_bytes) < mbus->buswidth) + early_bytes = bd.remainder; + } + + if (early_bytes) { + dev_vdbg(&pl08x->adev->dev, + "%s byte width LLIs (remain 0x%08zx)\n", + __func__, bd.remainder); + prep_byte_width_lli(pl08x, &bd, &cctl, early_bytes, + num_llis++, &total_bytes); + } + + if (bd.remainder) { + /* + * Master now aligned + * - if slave is not then we must set its width down + */ + if (!IS_BUS_ALIGNED(sbus)) { + dev_dbg(&pl08x->adev->dev, + "%s set down bus width to one byte\n", + __func__); + + sbus->buswidth = 1; + } + + /* + * Bytes transferred = tsize * src width, not + * MIN(buswidths) + */ + max_bytes_per_lli = bd.srcbus.buswidth * + pl08x->vd->max_transfer_size; + dev_vdbg(&pl08x->adev->dev, + "%s max bytes per lli = %zu\n", + __func__, max_bytes_per_lli); + + /* + * Make largest possible LLIs until less than one bus + * width left + */ + while (bd.remainder > (mbus->buswidth - 1)) { + size_t lli_len, tsize, width; + + /* + * If enough left try to send max possible, + * otherwise try to send the remainder + */ + lli_len = min(bd.remainder, max_bytes_per_lli); + + /* + * Check against maximum bus alignment: + * Calculate actual transfer size in relation to + * bus width an get a maximum remainder of the + * highest bus width - 1 + */ + width = max(mbus->buswidth, sbus->buswidth); + lli_len = (lli_len / width) * width; + tsize = lli_len / bd.srcbus.buswidth; + + dev_vdbg(&pl08x->adev->dev, + "%s fill lli with single lli chunk of " + "size 0x%08zx (remainder 0x%08zx)\n", + __func__, lli_len, bd.remainder); + + cctl = pl08x_lli_control_bits(pl08x, cctl, + bd.srcbus.buswidth, bd.dstbus.buswidth, + tsize); + pl08x_fill_lli_for_desc(pl08x, &bd, num_llis++, + lli_len, cctl, tsize); + total_bytes += lli_len; + } + + /* + * Send any odd bytes + */ + if (bd.remainder) { + dev_vdbg(&pl08x->adev->dev, + "%s align with boundary, send odd bytes (remain %zu)\n", + __func__, bd.remainder); + prep_byte_width_lli(pl08x, &bd, &cctl, + bd.remainder, num_llis++, &total_bytes); + } + } + + if (total_bytes != dsg->len) { + dev_err(&pl08x->adev->dev, + "%s size of encoded lli:s don't match total txd, transferred 0x%08zx from size 0x%08zx\n", + __func__, total_bytes, dsg->len); + return 0; + } + + if (num_llis >= MAX_NUM_TSFR_LLIS) { + dev_err(&pl08x->adev->dev, + "%s need to increase MAX_NUM_TSFR_LLIS from 0x%08x\n", + __func__, MAX_NUM_TSFR_LLIS); + return 0; + } + } + + llis_va = txd->llis_va; + last_lli = llis_va + (num_llis - 1) * pl08x->lli_words; + + if (txd->cyclic) { + /* Link back to the first LLI. */ + last_lli[PL080_LLI_LLI] = txd->llis_bus | bd.lli_bus; + } else { + /* The final LLI terminates the LLI. */ + last_lli[PL080_LLI_LLI] = 0; + /* The final LLI element shall also fire an interrupt. */ + if (pl08x->vd->ftdmac020) + last_lli[PL080_LLI_CCTL] &= ~FTDMAC020_LLI_TC_MSK; + else + last_lli[PL080_LLI_CCTL] |= PL080_CONTROL_TC_IRQ_EN; + } + + pl08x_dump_lli(pl08x, llis_va, num_llis); + + return num_llis; +} + +static void pl08x_free_txd(struct pl08x_driver_data *pl08x, + struct pl08x_txd *txd) +{ + struct pl08x_sg *dsg, *_dsg; + + if (txd->llis_va) + dma_pool_free(pl08x->pool, txd->llis_va, txd->llis_bus); + + list_for_each_entry_safe(dsg, _dsg, &txd->dsg_list, node) { + list_del(&dsg->node); + kfree(dsg); + } + + kfree(txd); +} + +static void pl08x_desc_free(struct virt_dma_desc *vd) +{ + struct pl08x_txd *txd = to_pl08x_txd(&vd->tx); + struct pl08x_dma_chan *plchan = to_pl08x_chan(vd->tx.chan); + + dma_descriptor_unmap(&vd->tx); + if (!txd->done) + pl08x_release_mux(plchan); + + pl08x_free_txd(plchan->host, txd); +} + +static void pl08x_free_txd_list(struct pl08x_driver_data *pl08x, + struct pl08x_dma_chan *plchan) +{ + LIST_HEAD(head); + + vchan_get_all_descriptors(&plchan->vc, &head); + vchan_dma_desc_free_list(&plchan->vc, &head); +} + +/* + * The DMA ENGINE API + */ +static void pl08x_free_chan_resources(struct dma_chan *chan) +{ + /* Ensure all queued descriptors are freed */ + vchan_free_chan_resources(to_virt_chan(chan)); +} + +/* + * Code accessing dma_async_is_complete() in a tight loop may give problems. + * If slaves are relying on interrupts to signal completion this function + * must not be called with interrupts disabled. + */ +static enum dma_status pl08x_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + struct virt_dma_desc *vd; + unsigned long flags; + enum dma_status ret; + size_t bytes = 0; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; + + /* + * There's no point calculating the residue if there's + * no txstate to store the value. + */ + if (!txstate) { + if (plchan->state == PL08X_CHAN_PAUSED) + ret = DMA_PAUSED; + return ret; + } + + spin_lock_irqsave(&plchan->vc.lock, flags); + ret = dma_cookie_status(chan, cookie, txstate); + if (ret != DMA_COMPLETE) { + vd = vchan_find_desc(&plchan->vc, cookie); + if (vd) { + /* On the issued list, so hasn't been processed yet */ + struct pl08x_txd *txd = to_pl08x_txd(&vd->tx); + struct pl08x_sg *dsg; + + list_for_each_entry(dsg, &txd->dsg_list, node) + bytes += dsg->len; + } else { + bytes = pl08x_getbytes_chan(plchan); + } + } + spin_unlock_irqrestore(&plchan->vc.lock, flags); + + /* + * This cookie not complete yet + * Get number of bytes left in the active transactions and queue + */ + dma_set_residue(txstate, bytes); + + if (plchan->state == PL08X_CHAN_PAUSED && ret == DMA_IN_PROGRESS) + ret = DMA_PAUSED; + + /* Whether waiting or running, we're in progress */ + return ret; +} + +/* PrimeCell DMA extension */ +struct burst_table { + u32 burstwords; + u32 reg; +}; + +static const struct burst_table burst_sizes[] = { + { + .burstwords = 256, + .reg = PL080_BSIZE_256, + }, + { + .burstwords = 128, + .reg = PL080_BSIZE_128, + }, + { + .burstwords = 64, + .reg = PL080_BSIZE_64, + }, + { + .burstwords = 32, + .reg = PL080_BSIZE_32, + }, + { + .burstwords = 16, + .reg = PL080_BSIZE_16, + }, + { + .burstwords = 8, + .reg = PL080_BSIZE_8, + }, + { + .burstwords = 4, + .reg = PL080_BSIZE_4, + }, + { + .burstwords = 0, + .reg = PL080_BSIZE_1, + }, +}; + +/* + * Given the source and destination available bus masks, select which + * will be routed to each port. We try to have source and destination + * on separate ports, but always respect the allowable settings. + */ +static u32 pl08x_select_bus(bool ftdmac020, u8 src, u8 dst) +{ + u32 cctl = 0; + u32 dst_ahb2; + u32 src_ahb2; + + /* The FTDMAC020 use different bits to indicate src/dst bus */ + if (ftdmac020) { + dst_ahb2 = FTDMAC020_LLI_DST_SEL; + src_ahb2 = FTDMAC020_LLI_SRC_SEL; + } else { + dst_ahb2 = PL080_CONTROL_DST_AHB2; + src_ahb2 = PL080_CONTROL_SRC_AHB2; + } + + if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1))) + cctl |= dst_ahb2; + if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2))) + cctl |= src_ahb2; + + return cctl; +} + +static u32 pl08x_cctl(u32 cctl) +{ + cctl &= ~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 | + PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR | + PL080_CONTROL_PROT_MASK); + + /* Access the cell in privileged mode, non-bufferable, non-cacheable */ + return cctl | PL080_CONTROL_PROT_SYS; +} + +static u32 pl08x_width(enum dma_slave_buswidth width) +{ + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + return PL080_WIDTH_8BIT; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + return PL080_WIDTH_16BIT; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + return PL080_WIDTH_32BIT; + default: + return ~0; + } +} + +static u32 pl08x_burst(u32 maxburst) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(burst_sizes); i++) + if (burst_sizes[i].burstwords <= maxburst) + break; + + return burst_sizes[i].reg; +} + +static u32 pl08x_get_cctl(struct pl08x_dma_chan *plchan, + enum dma_slave_buswidth addr_width, u32 maxburst) +{ + u32 width, burst, cctl = 0; + + width = pl08x_width(addr_width); + if (width == ~0) + return ~0; + + cctl |= width << PL080_CONTROL_SWIDTH_SHIFT; + cctl |= width << PL080_CONTROL_DWIDTH_SHIFT; + + /* + * If this channel will only request single transfers, set this + * down to ONE element. Also select one element if no maxburst + * is specified. + */ + if (plchan->cd->single) + maxburst = 1; + + burst = pl08x_burst(maxburst); + cctl |= burst << PL080_CONTROL_SB_SIZE_SHIFT; + cctl |= burst << PL080_CONTROL_DB_SIZE_SHIFT; + + return pl08x_cctl(cctl); +} + +/* + * Slave transactions callback to the slave device to allow + * synchronization of slave DMA signals with the DMAC enable + */ +static void pl08x_issue_pending(struct dma_chan *chan) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&plchan->vc.lock, flags); + if (vchan_issue_pending(&plchan->vc)) { + if (!plchan->phychan && plchan->state != PL08X_CHAN_WAITING) + pl08x_phy_alloc_and_start(plchan); + } + spin_unlock_irqrestore(&plchan->vc.lock, flags); +} + +static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan) +{ + struct pl08x_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT); + + if (txd) + INIT_LIST_HEAD(&txd->dsg_list); + return txd; +} + +static u32 pl08x_memcpy_cctl(struct pl08x_driver_data *pl08x) +{ + u32 cctl = 0; + + /* Conjure cctl */ + switch (pl08x->pd->memcpy_burst_size) { + default: + dev_err(&pl08x->adev->dev, + "illegal burst size for memcpy, set to 1\n"); + fallthrough; + case PL08X_BURST_SZ_1: + cctl |= PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_4: + cctl |= PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_8: + cctl |= PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_16: + cctl |= PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_32: + cctl |= PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_64: + cctl |= PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_128: + cctl |= PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_256: + cctl |= PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + } + + switch (pl08x->pd->memcpy_bus_width) { + default: + dev_err(&pl08x->adev->dev, + "illegal bus width for memcpy, set to 8 bits\n"); + fallthrough; + case PL08X_BUS_WIDTH_8_BITS: + cctl |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT | + PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + case PL08X_BUS_WIDTH_16_BITS: + cctl |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT | + PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + case PL08X_BUS_WIDTH_32_BITS: + cctl |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT | + PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + } + + /* Protection flags */ + if (pl08x->pd->memcpy_prot_buff) + cctl |= PL080_CONTROL_PROT_BUFF; + if (pl08x->pd->memcpy_prot_cache) + cctl |= PL080_CONTROL_PROT_CACHE; + + /* We are the kernel, so we are in privileged mode */ + cctl |= PL080_CONTROL_PROT_SYS; + + /* Both to be incremented or the code will break */ + cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR; + + if (pl08x->vd->dualmaster) + cctl |= pl08x_select_bus(false, + pl08x->mem_buses, + pl08x->mem_buses); + + return cctl; +} + +static u32 pl08x_ftdmac020_memcpy_cctl(struct pl08x_driver_data *pl08x) +{ + u32 cctl = 0; + + /* Conjure cctl */ + switch (pl08x->pd->memcpy_bus_width) { + default: + dev_err(&pl08x->adev->dev, + "illegal bus width for memcpy, set to 8 bits\n"); + fallthrough; + case PL08X_BUS_WIDTH_8_BITS: + cctl |= PL080_WIDTH_8BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT | + PL080_WIDTH_8BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT; + break; + case PL08X_BUS_WIDTH_16_BITS: + cctl |= PL080_WIDTH_16BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT | + PL080_WIDTH_16BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT; + break; + case PL08X_BUS_WIDTH_32_BITS: + cctl |= PL080_WIDTH_32BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT | + PL080_WIDTH_32BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT; + break; + } + + /* + * By default mask the TC IRQ on all LLIs, it will be unmasked on + * the last LLI item by other code. + */ + cctl |= FTDMAC020_LLI_TC_MSK; + + /* + * Both to be incremented so leave bits FTDMAC020_LLI_SRCAD_CTL + * and FTDMAC020_LLI_DSTAD_CTL as zero + */ + if (pl08x->vd->dualmaster) + cctl |= pl08x_select_bus(true, + pl08x->mem_buses, + pl08x->mem_buses); + + return cctl; +} + +/* + * Initialize a descriptor to be used by memcpy submit + */ +static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + struct pl08x_driver_data *pl08x = plchan->host; + struct pl08x_txd *txd; + struct pl08x_sg *dsg; + int ret; + + txd = pl08x_get_txd(plchan); + if (!txd) { + dev_err(&pl08x->adev->dev, + "%s no memory for descriptor\n", __func__); + return NULL; + } + + dsg = kzalloc(sizeof(struct pl08x_sg), GFP_NOWAIT); + if (!dsg) { + pl08x_free_txd(pl08x, txd); + return NULL; + } + list_add_tail(&dsg->node, &txd->dsg_list); + + dsg->src_addr = src; + dsg->dst_addr = dest; + dsg->len = len; + if (pl08x->vd->ftdmac020) { + /* Writing CCFG zero ENABLES all interrupts */ + txd->ccfg = 0; + txd->cctl = pl08x_ftdmac020_memcpy_cctl(pl08x); + } else { + txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK | + PL080_CONFIG_TC_IRQ_MASK | + PL080_FLOW_MEM2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT; + txd->cctl = pl08x_memcpy_cctl(pl08x); + } + + ret = pl08x_fill_llis_for_desc(plchan->host, txd); + if (!ret) { + pl08x_free_txd(pl08x, txd); + return NULL; + } + + return vchan_tx_prep(&plchan->vc, &txd->vd, flags); +} + +static struct pl08x_txd *pl08x_init_txd( + struct dma_chan *chan, + enum dma_transfer_direction direction, + dma_addr_t *slave_addr) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + struct pl08x_driver_data *pl08x = plchan->host; + struct pl08x_txd *txd; + enum dma_slave_buswidth addr_width; + int ret, tmp; + u8 src_buses, dst_buses; + u32 maxburst, cctl; + + txd = pl08x_get_txd(plchan); + if (!txd) { + dev_err(&pl08x->adev->dev, "%s no txd\n", __func__); + return NULL; + } + + /* + * Set up addresses, the PrimeCell configured address + * will take precedence since this may configure the + * channel target address dynamically at runtime. + */ + if (direction == DMA_MEM_TO_DEV) { + cctl = PL080_CONTROL_SRC_INCR; + *slave_addr = plchan->cfg.dst_addr; + addr_width = plchan->cfg.dst_addr_width; + maxburst = plchan->cfg.dst_maxburst; + src_buses = pl08x->mem_buses; + dst_buses = plchan->cd->periph_buses; + } else if (direction == DMA_DEV_TO_MEM) { + cctl = PL080_CONTROL_DST_INCR; + *slave_addr = plchan->cfg.src_addr; + addr_width = plchan->cfg.src_addr_width; + maxburst = plchan->cfg.src_maxburst; + src_buses = plchan->cd->periph_buses; + dst_buses = pl08x->mem_buses; + } else { + pl08x_free_txd(pl08x, txd); + dev_err(&pl08x->adev->dev, + "%s direction unsupported\n", __func__); + return NULL; + } + + cctl |= pl08x_get_cctl(plchan, addr_width, maxburst); + if (cctl == ~0) { + pl08x_free_txd(pl08x, txd); + dev_err(&pl08x->adev->dev, + "DMA slave configuration botched?\n"); + return NULL; + } + + txd->cctl = cctl | pl08x_select_bus(false, src_buses, dst_buses); + + if (plchan->cfg.device_fc) + tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER_PER : + PL080_FLOW_PER2MEM_PER; + else + tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER : + PL080_FLOW_PER2MEM; + + txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK | + PL080_CONFIG_TC_IRQ_MASK | + tmp << PL080_CONFIG_FLOW_CONTROL_SHIFT; + + ret = pl08x_request_mux(plchan); + if (ret < 0) { + pl08x_free_txd(pl08x, txd); + dev_dbg(&pl08x->adev->dev, + "unable to mux for transfer on %s due to platform restrictions\n", + plchan->name); + return NULL; + } + + dev_dbg(&pl08x->adev->dev, "allocated DMA request signal %d for xfer on %s\n", + plchan->signal, plchan->name); + + /* Assign the flow control signal to this channel */ + if (direction == DMA_MEM_TO_DEV) + txd->ccfg |= plchan->signal << PL080_CONFIG_DST_SEL_SHIFT; + else + txd->ccfg |= plchan->signal << PL080_CONFIG_SRC_SEL_SHIFT; + + return txd; +} + +static int pl08x_tx_add_sg(struct pl08x_txd *txd, + enum dma_transfer_direction direction, + dma_addr_t slave_addr, + dma_addr_t buf_addr, + unsigned int len) +{ + struct pl08x_sg *dsg; + + dsg = kzalloc(sizeof(struct pl08x_sg), GFP_NOWAIT); + if (!dsg) + return -ENOMEM; + + list_add_tail(&dsg->node, &txd->dsg_list); + + dsg->len = len; + if (direction == DMA_MEM_TO_DEV) { + dsg->src_addr = buf_addr; + dsg->dst_addr = slave_addr; + } else { + dsg->src_addr = slave_addr; + dsg->dst_addr = buf_addr; + } + + return 0; +} + +static struct dma_async_tx_descriptor *pl08x_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + struct pl08x_driver_data *pl08x = plchan->host; + struct pl08x_txd *txd; + struct scatterlist *sg; + int ret, tmp; + dma_addr_t slave_addr; + + dev_dbg(&pl08x->adev->dev, "%s prepare transaction of %d bytes from %s\n", + __func__, sg_dma_len(sgl), plchan->name); + + txd = pl08x_init_txd(chan, direction, &slave_addr); + if (!txd) + return NULL; + + for_each_sg(sgl, sg, sg_len, tmp) { + ret = pl08x_tx_add_sg(txd, direction, slave_addr, + sg_dma_address(sg), + sg_dma_len(sg)); + if (ret) { + pl08x_release_mux(plchan); + pl08x_free_txd(pl08x, txd); + dev_err(&pl08x->adev->dev, "%s no mem for pl080 sg\n", + __func__); + return NULL; + } + } + + ret = pl08x_fill_llis_for_desc(plchan->host, txd); + if (!ret) { + pl08x_release_mux(plchan); + pl08x_free_txd(pl08x, txd); + return NULL; + } + + return vchan_tx_prep(&plchan->vc, &txd->vd, flags); +} + +static struct dma_async_tx_descriptor *pl08x_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + struct pl08x_driver_data *pl08x = plchan->host; + struct pl08x_txd *txd; + int ret, tmp; + dma_addr_t slave_addr; + + dev_dbg(&pl08x->adev->dev, + "%s prepare cyclic transaction of %zd/%zd bytes %s %s\n", + __func__, period_len, buf_len, + direction == DMA_MEM_TO_DEV ? "to" : "from", + plchan->name); + + txd = pl08x_init_txd(chan, direction, &slave_addr); + if (!txd) + return NULL; + + txd->cyclic = true; + txd->cctl |= PL080_CONTROL_TC_IRQ_EN; + for (tmp = 0; tmp < buf_len; tmp += period_len) { + ret = pl08x_tx_add_sg(txd, direction, slave_addr, + buf_addr + tmp, period_len); + if (ret) { + pl08x_release_mux(plchan); + pl08x_free_txd(pl08x, txd); + return NULL; + } + } + + ret = pl08x_fill_llis_for_desc(plchan->host, txd); + if (!ret) { + pl08x_release_mux(plchan); + pl08x_free_txd(pl08x, txd); + return NULL; + } + + return vchan_tx_prep(&plchan->vc, &txd->vd, flags); +} + +static int pl08x_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + struct pl08x_driver_data *pl08x = plchan->host; + + if (!plchan->slave) + return -EINVAL; + + /* Reject definitely invalid configurations */ + if (config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES || + config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES) + return -EINVAL; + + if (config->device_fc && pl08x->vd->pl080s) { + dev_err(&pl08x->adev->dev, + "%s: PL080S does not support peripheral flow control\n", + __func__); + return -EINVAL; + } + + plchan->cfg = *config; + + return 0; +} + +static int pl08x_terminate_all(struct dma_chan *chan) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + struct pl08x_driver_data *pl08x = plchan->host; + unsigned long flags; + + spin_lock_irqsave(&plchan->vc.lock, flags); + if (!plchan->phychan && !plchan->at) { + spin_unlock_irqrestore(&plchan->vc.lock, flags); + return 0; + } + + plchan->state = PL08X_CHAN_IDLE; + + if (plchan->phychan) { + /* + * Mark physical channel as free and free any slave + * signal + */ + pl08x_phy_free(plchan); + } + /* Dequeue jobs and free LLIs */ + if (plchan->at) { + vchan_terminate_vdesc(&plchan->at->vd); + plchan->at = NULL; + } + /* Dequeue jobs not yet fired as well */ + pl08x_free_txd_list(pl08x, plchan); + + spin_unlock_irqrestore(&plchan->vc.lock, flags); + + return 0; +} + +static void pl08x_synchronize(struct dma_chan *chan) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + + vchan_synchronize(&plchan->vc); +} + +static int pl08x_pause(struct dma_chan *chan) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + unsigned long flags; + + /* + * Anything succeeds on channels with no physical allocation and + * no queued transfers. + */ + spin_lock_irqsave(&plchan->vc.lock, flags); + if (!plchan->phychan && !plchan->at) { + spin_unlock_irqrestore(&plchan->vc.lock, flags); + return 0; + } + + pl08x_pause_phy_chan(plchan->phychan); + plchan->state = PL08X_CHAN_PAUSED; + + spin_unlock_irqrestore(&plchan->vc.lock, flags); + + return 0; +} + +static int pl08x_resume(struct dma_chan *chan) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + unsigned long flags; + + /* + * Anything succeeds on channels with no physical allocation and + * no queued transfers. + */ + spin_lock_irqsave(&plchan->vc.lock, flags); + if (!plchan->phychan && !plchan->at) { + spin_unlock_irqrestore(&plchan->vc.lock, flags); + return 0; + } + + pl08x_resume_phy_chan(plchan->phychan); + plchan->state = PL08X_CHAN_RUNNING; + + spin_unlock_irqrestore(&plchan->vc.lock, flags); + + return 0; +} + +bool pl08x_filter_id(struct dma_chan *chan, void *chan_id) +{ + struct pl08x_dma_chan *plchan; + char *name = chan_id; + + /* Reject channels for devices not bound to this driver */ + if (chan->device->dev->driver != &pl08x_amba_driver.drv) + return false; + + plchan = to_pl08x_chan(chan); + + /* Check that the channel is not taken! */ + if (!strcmp(plchan->name, name)) + return true; + + return false; +} +EXPORT_SYMBOL_GPL(pl08x_filter_id); + +static bool pl08x_filter_fn(struct dma_chan *chan, void *chan_id) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + + return plchan->cd == chan_id; +} + +/* + * Just check that the device is there and active + * TODO: turn this bit on/off depending on the number of physical channels + * actually used, if it is zero... well shut it off. That will save some + * power. Cut the clock at the same time. + */ +static void pl08x_ensure_on(struct pl08x_driver_data *pl08x) +{ + /* The Nomadik variant does not have the config register */ + if (pl08x->vd->nomadik) + return; + /* The FTDMAC020 variant does this in another register */ + if (pl08x->vd->ftdmac020) { + writel(PL080_CONFIG_ENABLE, pl08x->base + FTDMAC020_CSR); + return; + } + writel(PL080_CONFIG_ENABLE, pl08x->base + PL080_CONFIG); +} + +static irqreturn_t pl08x_irq(int irq, void *dev) +{ + struct pl08x_driver_data *pl08x = dev; + u32 mask = 0, err, tc, i; + + /* check & clear - ERR & TC interrupts */ + err = readl(pl08x->base + PL080_ERR_STATUS); + if (err) { + dev_err(&pl08x->adev->dev, "%s error interrupt, register value 0x%08x\n", + __func__, err); + writel(err, pl08x->base + PL080_ERR_CLEAR); + } + tc = readl(pl08x->base + PL080_TC_STATUS); + if (tc) + writel(tc, pl08x->base + PL080_TC_CLEAR); + + if (!err && !tc) + return IRQ_NONE; + + for (i = 0; i < pl08x->vd->channels; i++) { + if ((BIT(i) & err) || (BIT(i) & tc)) { + /* Locate physical channel */ + struct pl08x_phy_chan *phychan = &pl08x->phy_chans[i]; + struct pl08x_dma_chan *plchan = phychan->serving; + struct pl08x_txd *tx; + + if (!plchan) { + dev_err(&pl08x->adev->dev, + "%s Error TC interrupt on unused channel: 0x%08x\n", + __func__, i); + continue; + } + + spin_lock(&plchan->vc.lock); + tx = plchan->at; + if (tx && tx->cyclic) { + vchan_cyclic_callback(&tx->vd); + } else if (tx) { + plchan->at = NULL; + /* + * This descriptor is done, release its mux + * reservation. + */ + pl08x_release_mux(plchan); + tx->done = true; + vchan_cookie_complete(&tx->vd); + + /* + * And start the next descriptor (if any), + * otherwise free this channel. + */ + if (vchan_next_desc(&plchan->vc)) + pl08x_start_next_txd(plchan); + else + pl08x_phy_free(plchan); + } + spin_unlock(&plchan->vc.lock); + + mask |= BIT(i); + } + } + + return mask ? IRQ_HANDLED : IRQ_NONE; +} + +static void pl08x_dma_slave_init(struct pl08x_dma_chan *chan) +{ + chan->slave = true; + chan->name = chan->cd->bus_id; + chan->cfg.src_addr = chan->cd->addr; + chan->cfg.dst_addr = chan->cd->addr; +} + +/* + * Initialise the DMAC memcpy/slave channels. + * Make a local wrapper to hold required data + */ +static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x, + struct dma_device *dmadev, unsigned int channels, bool slave) +{ + struct pl08x_dma_chan *chan; + int i; + + INIT_LIST_HEAD(&dmadev->channels); + + /* + * Register as many memcpy as we have physical channels, + * we won't always be able to use all but the code will have + * to cope with that situation. + */ + for (i = 0; i < channels; i++) { + chan = kzalloc(sizeof(*chan), GFP_KERNEL); + if (!chan) + return -ENOMEM; + + chan->host = pl08x; + chan->state = PL08X_CHAN_IDLE; + chan->signal = -1; + + if (slave) { + chan->cd = &pl08x->pd->slave_channels[i]; + /* + * Some implementations have muxed signals, whereas some + * use a mux in front of the signals and need dynamic + * assignment of signals. + */ + chan->signal = i; + pl08x_dma_slave_init(chan); + } else { + chan->cd = kzalloc(sizeof(*chan->cd), GFP_KERNEL); + if (!chan->cd) { + kfree(chan); + return -ENOMEM; + } + chan->cd->bus_id = "memcpy"; + chan->cd->periph_buses = pl08x->pd->mem_buses; + chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i); + if (!chan->name) { + kfree(chan->cd); + kfree(chan); + return -ENOMEM; + } + } + dev_dbg(&pl08x->adev->dev, + "initialize virtual channel \"%s\"\n", + chan->name); + + chan->vc.desc_free = pl08x_desc_free; + vchan_init(&chan->vc, dmadev); + } + dev_info(&pl08x->adev->dev, "initialized %d virtual %s channels\n", + i, slave ? "slave" : "memcpy"); + return i; +} + +static void pl08x_free_virtual_channels(struct dma_device *dmadev) +{ + struct pl08x_dma_chan *chan = NULL; + struct pl08x_dma_chan *next; + + list_for_each_entry_safe(chan, + next, &dmadev->channels, vc.chan.device_node) { + list_del(&chan->vc.chan.device_node); + kfree(chan); + } +} + +#ifdef CONFIG_DEBUG_FS +static const char *pl08x_state_str(enum pl08x_dma_chan_state state) +{ + switch (state) { + case PL08X_CHAN_IDLE: + return "idle"; + case PL08X_CHAN_RUNNING: + return "running"; + case PL08X_CHAN_PAUSED: + return "paused"; + case PL08X_CHAN_WAITING: + return "waiting"; + default: + break; + } + return "UNKNOWN STATE"; +} + +static int pl08x_debugfs_show(struct seq_file *s, void *data) +{ + struct pl08x_driver_data *pl08x = s->private; + struct pl08x_dma_chan *chan; + struct pl08x_phy_chan *ch; + unsigned long flags; + int i; + + seq_printf(s, "PL08x physical channels:\n"); + seq_printf(s, "CHANNEL:\tUSER:\n"); + seq_printf(s, "--------\t-----\n"); + for (i = 0; i < pl08x->vd->channels; i++) { + struct pl08x_dma_chan *virt_chan; + + ch = &pl08x->phy_chans[i]; + + spin_lock_irqsave(&ch->lock, flags); + virt_chan = ch->serving; + + seq_printf(s, "%d\t\t%s%s\n", + ch->id, + virt_chan ? virt_chan->name : "(none)", + ch->locked ? " LOCKED" : ""); + + spin_unlock_irqrestore(&ch->lock, flags); + } + + seq_printf(s, "\nPL08x virtual memcpy channels:\n"); + seq_printf(s, "CHANNEL:\tSTATE:\n"); + seq_printf(s, "--------\t------\n"); + list_for_each_entry(chan, &pl08x->memcpy.channels, vc.chan.device_node) { + seq_printf(s, "%s\t\t%s\n", chan->name, + pl08x_state_str(chan->state)); + } + + if (pl08x->has_slave) { + seq_printf(s, "\nPL08x virtual slave channels:\n"); + seq_printf(s, "CHANNEL:\tSTATE:\n"); + seq_printf(s, "--------\t------\n"); + list_for_each_entry(chan, &pl08x->slave.channels, + vc.chan.device_node) { + seq_printf(s, "%s\t\t%s\n", chan->name, + pl08x_state_str(chan->state)); + } + } + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(pl08x_debugfs); + +static void init_pl08x_debugfs(struct pl08x_driver_data *pl08x) +{ + /* Expose a simple debugfs interface to view all clocks */ + debugfs_create_file(dev_name(&pl08x->adev->dev), S_IFREG | S_IRUGO, + NULL, pl08x, &pl08x_debugfs_fops); +} + +#else +static inline void init_pl08x_debugfs(struct pl08x_driver_data *pl08x) +{ +} +#endif + +#ifdef CONFIG_OF +static struct dma_chan *pl08x_find_chan_id(struct pl08x_driver_data *pl08x, + u32 id) +{ + struct pl08x_dma_chan *chan; + + /* Trying to get a slave channel from something with no slave support */ + if (!pl08x->has_slave) + return NULL; + + list_for_each_entry(chan, &pl08x->slave.channels, vc.chan.device_node) { + if (chan->signal == id) + return &chan->vc.chan; + } + + return NULL; +} + +static struct dma_chan *pl08x_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct pl08x_driver_data *pl08x = ofdma->of_dma_data; + struct dma_chan *dma_chan; + struct pl08x_dma_chan *plchan; + + if (!pl08x) + return NULL; + + if (dma_spec->args_count != 2) { + dev_err(&pl08x->adev->dev, + "DMA channel translation requires two cells\n"); + return NULL; + } + + dma_chan = pl08x_find_chan_id(pl08x, dma_spec->args[0]); + if (!dma_chan) { + dev_err(&pl08x->adev->dev, + "DMA slave channel not found\n"); + return NULL; + } + + plchan = to_pl08x_chan(dma_chan); + dev_dbg(&pl08x->adev->dev, + "translated channel for signal %d\n", + dma_spec->args[0]); + + /* Augment channel data for applicable AHB buses */ + plchan->cd->periph_buses = dma_spec->args[1]; + return dma_get_slave_channel(dma_chan); +} + +static int pl08x_of_probe(struct amba_device *adev, + struct pl08x_driver_data *pl08x, + struct device_node *np) +{ + struct pl08x_platform_data *pd; + struct pl08x_channel_data *chanp = NULL; + u32 val; + int ret; + int i; + + pd = devm_kzalloc(&adev->dev, sizeof(*pd), GFP_KERNEL); + if (!pd) + return -ENOMEM; + + /* Eligible bus masters for fetching LLIs */ + if (of_property_read_bool(np, "lli-bus-interface-ahb1")) + pd->lli_buses |= PL08X_AHB1; + if (of_property_read_bool(np, "lli-bus-interface-ahb2")) + pd->lli_buses |= PL08X_AHB2; + if (!pd->lli_buses) { + dev_info(&adev->dev, "no bus masters for LLIs stated, assume all\n"); + pd->lli_buses |= PL08X_AHB1 | PL08X_AHB2; + } + + /* Eligible bus masters for memory access */ + if (of_property_read_bool(np, "mem-bus-interface-ahb1")) + pd->mem_buses |= PL08X_AHB1; + if (of_property_read_bool(np, "mem-bus-interface-ahb2")) + pd->mem_buses |= PL08X_AHB2; + if (!pd->mem_buses) { + dev_info(&adev->dev, "no bus masters for memory stated, assume all\n"); + pd->mem_buses |= PL08X_AHB1 | PL08X_AHB2; + } + + /* Parse the memcpy channel properties */ + ret = of_property_read_u32(np, "memcpy-burst-size", &val); + if (ret) { + dev_info(&adev->dev, "no memcpy burst size specified, using 1 byte\n"); + val = 1; + } + switch (val) { + default: + dev_err(&adev->dev, "illegal burst size for memcpy, set to 1\n"); + fallthrough; + case 1: + pd->memcpy_burst_size = PL08X_BURST_SZ_1; + break; + case 4: + pd->memcpy_burst_size = PL08X_BURST_SZ_4; + break; + case 8: + pd->memcpy_burst_size = PL08X_BURST_SZ_8; + break; + case 16: + pd->memcpy_burst_size = PL08X_BURST_SZ_16; + break; + case 32: + pd->memcpy_burst_size = PL08X_BURST_SZ_32; + break; + case 64: + pd->memcpy_burst_size = PL08X_BURST_SZ_64; + break; + case 128: + pd->memcpy_burst_size = PL08X_BURST_SZ_128; + break; + case 256: + pd->memcpy_burst_size = PL08X_BURST_SZ_256; + break; + } + + ret = of_property_read_u32(np, "memcpy-bus-width", &val); + if (ret) { + dev_info(&adev->dev, "no memcpy bus width specified, using 8 bits\n"); + val = 8; + } + switch (val) { + default: + dev_err(&adev->dev, "illegal bus width for memcpy, set to 8 bits\n"); + fallthrough; + case 8: + pd->memcpy_bus_width = PL08X_BUS_WIDTH_8_BITS; + break; + case 16: + pd->memcpy_bus_width = PL08X_BUS_WIDTH_16_BITS; + break; + case 32: + pd->memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS; + break; + } + + /* + * Allocate channel data for all possible slave channels (one + * for each possible signal), channels will then be allocated + * for a device and have it's AHB interfaces set up at + * translation time. + */ + if (pl08x->vd->signals) { + chanp = devm_kcalloc(&adev->dev, + pl08x->vd->signals, + sizeof(struct pl08x_channel_data), + GFP_KERNEL); + if (!chanp) + return -ENOMEM; + + pd->slave_channels = chanp; + for (i = 0; i < pl08x->vd->signals; i++) { + /* + * chanp->periph_buses will be assigned at translation + */ + chanp->bus_id = kasprintf(GFP_KERNEL, "slave%d", i); + chanp++; + } + pd->num_slave_channels = pl08x->vd->signals; + } + + pl08x->pd = pd; + + return of_dma_controller_register(adev->dev.of_node, pl08x_of_xlate, + pl08x); +} +#else +static inline int pl08x_of_probe(struct amba_device *adev, + struct pl08x_driver_data *pl08x, + struct device_node *np) +{ + return -EINVAL; +} +#endif + +static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) +{ + struct pl08x_driver_data *pl08x; + struct vendor_data *vd = id->data; + struct device_node *np = adev->dev.of_node; + u32 tsfr_size; + int ret = 0; + int i; + + ret = amba_request_regions(adev, NULL); + if (ret) + return ret; + + /* Ensure that we can do DMA */ + ret = dma_set_mask_and_coherent(&adev->dev, DMA_BIT_MASK(32)); + if (ret) + goto out_no_pl08x; + + /* Create the driver state holder */ + pl08x = kzalloc(sizeof(*pl08x), GFP_KERNEL); + if (!pl08x) { + ret = -ENOMEM; + goto out_no_pl08x; + } + + /* Assign useful pointers to the driver state */ + pl08x->adev = adev; + pl08x->vd = vd; + + pl08x->base = ioremap(adev->res.start, resource_size(&adev->res)); + if (!pl08x->base) { + ret = -ENOMEM; + goto out_no_ioremap; + } + + if (vd->ftdmac020) { + u32 val; + + val = readl(pl08x->base + FTDMAC020_REVISION); + dev_info(&pl08x->adev->dev, "FTDMAC020 %d.%d rel %d\n", + (val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff); + val = readl(pl08x->base + FTDMAC020_FEATURE); + dev_info(&pl08x->adev->dev, "FTDMAC020 %d channels, " + "%s built-in bridge, %s, %s linked lists\n", + (val >> 12) & 0x0f, + (val & BIT(10)) ? "no" : "has", + (val & BIT(9)) ? "AHB0 and AHB1" : "AHB0", + (val & BIT(8)) ? "supports" : "does not support"); + + /* Vendor data from feature register */ + if (!(val & BIT(8))) + dev_warn(&pl08x->adev->dev, + "linked lists not supported, required\n"); + vd->channels = (val >> 12) & 0x0f; + vd->dualmaster = !!(val & BIT(9)); + } + + /* Initialize memcpy engine */ + dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask); + pl08x->memcpy.dev = &adev->dev; + pl08x->memcpy.device_free_chan_resources = pl08x_free_chan_resources; + pl08x->memcpy.device_prep_dma_memcpy = pl08x_prep_dma_memcpy; + pl08x->memcpy.device_tx_status = pl08x_dma_tx_status; + pl08x->memcpy.device_issue_pending = pl08x_issue_pending; + pl08x->memcpy.device_config = pl08x_config; + pl08x->memcpy.device_pause = pl08x_pause; + pl08x->memcpy.device_resume = pl08x_resume; + pl08x->memcpy.device_terminate_all = pl08x_terminate_all; + pl08x->memcpy.device_synchronize = pl08x_synchronize; + pl08x->memcpy.src_addr_widths = PL80X_DMA_BUSWIDTHS; + pl08x->memcpy.dst_addr_widths = PL80X_DMA_BUSWIDTHS; + pl08x->memcpy.directions = BIT(DMA_MEM_TO_MEM); + pl08x->memcpy.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + if (vd->ftdmac020) + pl08x->memcpy.copy_align = DMAENGINE_ALIGN_4_BYTES; + + + /* + * Initialize slave engine, if the block has no signals, that means + * we have no slave support. + */ + if (vd->signals) { + pl08x->has_slave = true; + dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, pl08x->slave.cap_mask); + pl08x->slave.dev = &adev->dev; + pl08x->slave.device_free_chan_resources = + pl08x_free_chan_resources; + pl08x->slave.device_tx_status = pl08x_dma_tx_status; + pl08x->slave.device_issue_pending = pl08x_issue_pending; + pl08x->slave.device_prep_slave_sg = pl08x_prep_slave_sg; + pl08x->slave.device_prep_dma_cyclic = pl08x_prep_dma_cyclic; + pl08x->slave.device_config = pl08x_config; + pl08x->slave.device_pause = pl08x_pause; + pl08x->slave.device_resume = pl08x_resume; + pl08x->slave.device_terminate_all = pl08x_terminate_all; + pl08x->slave.device_synchronize = pl08x_synchronize; + pl08x->slave.src_addr_widths = PL80X_DMA_BUSWIDTHS; + pl08x->slave.dst_addr_widths = PL80X_DMA_BUSWIDTHS; + pl08x->slave.directions = + BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + pl08x->slave.residue_granularity = + DMA_RESIDUE_GRANULARITY_SEGMENT; + } + + /* Get the platform data */ + pl08x->pd = dev_get_platdata(&adev->dev); + if (!pl08x->pd) { + if (np) { + ret = pl08x_of_probe(adev, pl08x, np); + if (ret) + goto out_no_platdata; + } else { + dev_err(&adev->dev, "no platform data supplied\n"); + ret = -EINVAL; + goto out_no_platdata; + } + } else { + pl08x->slave.filter.map = pl08x->pd->slave_map; + pl08x->slave.filter.mapcnt = pl08x->pd->slave_map_len; + pl08x->slave.filter.fn = pl08x_filter_fn; + } + + /* By default, AHB1 only. If dualmaster, from platform */ + pl08x->lli_buses = PL08X_AHB1; + pl08x->mem_buses = PL08X_AHB1; + if (pl08x->vd->dualmaster) { + pl08x->lli_buses = pl08x->pd->lli_buses; + pl08x->mem_buses = pl08x->pd->mem_buses; + } + + if (vd->pl080s) + pl08x->lli_words = PL080S_LLI_WORDS; + else + pl08x->lli_words = PL080_LLI_WORDS; + tsfr_size = MAX_NUM_TSFR_LLIS * pl08x->lli_words * sizeof(u32); + + /* A DMA memory pool for LLIs, align on 1-byte boundary */ + pl08x->pool = dma_pool_create(DRIVER_NAME, &pl08x->adev->dev, + tsfr_size, PL08X_ALIGN, 0); + if (!pl08x->pool) { + ret = -ENOMEM; + goto out_no_lli_pool; + } + + /* Turn on the PL08x */ + pl08x_ensure_on(pl08x); + + /* Clear any pending interrupts */ + if (vd->ftdmac020) + /* This variant has error IRQs in bits 16-19 */ + writel(0x0000FFFF, pl08x->base + PL080_ERR_CLEAR); + else + writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR); + writel(0x000000FF, pl08x->base + PL080_TC_CLEAR); + + /* Attach the interrupt handler */ + ret = request_irq(adev->irq[0], pl08x_irq, 0, DRIVER_NAME, pl08x); + if (ret) { + dev_err(&adev->dev, "%s failed to request interrupt %d\n", + __func__, adev->irq[0]); + goto out_no_irq; + } + + /* Initialize physical channels */ + pl08x->phy_chans = kzalloc((vd->channels * sizeof(*pl08x->phy_chans)), + GFP_KERNEL); + if (!pl08x->phy_chans) { + ret = -ENOMEM; + goto out_no_phychans; + } + + for (i = 0; i < vd->channels; i++) { + struct pl08x_phy_chan *ch = &pl08x->phy_chans[i]; + + ch->id = i; + ch->base = pl08x->base + PL080_Cx_BASE(i); + if (vd->ftdmac020) { + /* FTDMA020 has a special channel busy register */ + ch->reg_busy = ch->base + FTDMAC020_CH_BUSY; + ch->reg_config = ch->base + FTDMAC020_CH_CFG; + ch->reg_control = ch->base + FTDMAC020_CH_CSR; + ch->reg_src = ch->base + FTDMAC020_CH_SRC_ADDR; + ch->reg_dst = ch->base + FTDMAC020_CH_DST_ADDR; + ch->reg_lli = ch->base + FTDMAC020_CH_LLP; + ch->ftdmac020 = true; + } else { + ch->reg_config = ch->base + vd->config_offset; + ch->reg_control = ch->base + PL080_CH_CONTROL; + ch->reg_src = ch->base + PL080_CH_SRC_ADDR; + ch->reg_dst = ch->base + PL080_CH_DST_ADDR; + ch->reg_lli = ch->base + PL080_CH_LLI; + } + if (vd->pl080s) + ch->pl080s = true; + + spin_lock_init(&ch->lock); + + /* + * Nomadik variants can have channels that are locked + * down for the secure world only. Lock up these channels + * by perpetually serving a dummy virtual channel. + */ + if (vd->nomadik) { + u32 val; + + val = readl(ch->reg_config); + if (val & (PL080N_CONFIG_ITPROT | PL080N_CONFIG_SECPROT)) { + dev_info(&adev->dev, "physical channel %d reserved for secure access only\n", i); + ch->locked = true; + } + } + + dev_dbg(&adev->dev, "physical channel %d is %s\n", + i, pl08x_phy_channel_busy(ch) ? "BUSY" : "FREE"); + } + + /* Register as many memcpy channels as there are physical channels */ + ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->memcpy, + pl08x->vd->channels, false); + if (ret <= 0) { + dev_warn(&pl08x->adev->dev, + "%s failed to enumerate memcpy channels - %d\n", + __func__, ret); + goto out_no_memcpy; + } + + /* Register slave channels */ + if (pl08x->has_slave) { + ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave, + pl08x->pd->num_slave_channels, true); + if (ret < 0) { + dev_warn(&pl08x->adev->dev, + "%s failed to enumerate slave channels - %d\n", + __func__, ret); + goto out_no_slave; + } + } + + ret = dma_async_device_register(&pl08x->memcpy); + if (ret) { + dev_warn(&pl08x->adev->dev, + "%s failed to register memcpy as an async device - %d\n", + __func__, ret); + goto out_no_memcpy_reg; + } + + if (pl08x->has_slave) { + ret = dma_async_device_register(&pl08x->slave); + if (ret) { + dev_warn(&pl08x->adev->dev, + "%s failed to register slave as an async device - %d\n", + __func__, ret); + goto out_no_slave_reg; + } + } + + amba_set_drvdata(adev, pl08x); + init_pl08x_debugfs(pl08x); + dev_info(&pl08x->adev->dev, "DMA: PL%03x%s rev%u at 0x%08llx irq %d\n", + amba_part(adev), pl08x->vd->pl080s ? "s" : "", amba_rev(adev), + (unsigned long long)adev->res.start, adev->irq[0]); + + return 0; + +out_no_slave_reg: + dma_async_device_unregister(&pl08x->memcpy); +out_no_memcpy_reg: + if (pl08x->has_slave) + pl08x_free_virtual_channels(&pl08x->slave); +out_no_slave: + pl08x_free_virtual_channels(&pl08x->memcpy); +out_no_memcpy: + kfree(pl08x->phy_chans); +out_no_phychans: + free_irq(adev->irq[0], pl08x); +out_no_irq: + dma_pool_destroy(pl08x->pool); +out_no_lli_pool: +out_no_platdata: + iounmap(pl08x->base); +out_no_ioremap: + kfree(pl08x); +out_no_pl08x: + amba_release_regions(adev); + return ret; +} + +/* PL080 has 8 channels and the PL080 have just 2 */ +static struct vendor_data vendor_pl080 = { + .config_offset = PL080_CH_CONFIG, + .channels = 8, + .signals = 16, + .dualmaster = true, + .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK, +}; + +static struct vendor_data vendor_nomadik = { + .config_offset = PL080_CH_CONFIG, + .channels = 8, + .signals = 32, + .dualmaster = true, + .nomadik = true, + .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK, +}; + +static struct vendor_data vendor_pl080s = { + .config_offset = PL080S_CH_CONFIG, + .channels = 8, + .signals = 32, + .pl080s = true, + .max_transfer_size = PL080S_CONTROL_TRANSFER_SIZE_MASK, +}; + +static struct vendor_data vendor_pl081 = { + .config_offset = PL080_CH_CONFIG, + .channels = 2, + .signals = 16, + .dualmaster = false, + .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK, +}; + +static struct vendor_data vendor_ftdmac020 = { + .config_offset = PL080_CH_CONFIG, + .ftdmac020 = true, + .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK, +}; + +static const struct amba_id pl08x_ids[] = { + /* Samsung PL080S variant */ + { + .id = 0x0a141080, + .mask = 0xffffffff, + .data = &vendor_pl080s, + }, + /* PL080 */ + { + .id = 0x00041080, + .mask = 0x000fffff, + .data = &vendor_pl080, + }, + /* PL081 */ + { + .id = 0x00041081, + .mask = 0x000fffff, + .data = &vendor_pl081, + }, + /* Nomadik 8815 PL080 variant */ + { + .id = 0x00280080, + .mask = 0x00ffffff, + .data = &vendor_nomadik, + }, + /* Faraday Technology FTDMAC020 */ + { + .id = 0x0003b080, + .mask = 0x000fffff, + .data = &vendor_ftdmac020, + }, + { 0, 0 }, +}; + +MODULE_DEVICE_TABLE(amba, pl08x_ids); + +static struct amba_driver pl08x_amba_driver = { + .drv.name = DRIVER_NAME, + .id_table = pl08x_ids, + .probe = pl08x_probe, +}; + +static int __init pl08x_init(void) +{ + int retval; + retval = amba_driver_register(&pl08x_amba_driver); + if (retval) + printk(KERN_WARNING DRIVER_NAME + "failed to register as an AMBA device (%d)\n", + retval); + return retval; +} +subsys_initcall(pl08x_init); diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c new file mode 100644 index 0000000000..3af795635c --- /dev/null +++ b/drivers/dma/apple-admac.c @@ -0,0 +1,958 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Driver for Audio DMA Controller (ADMAC) on t8103 (M1) and other Apple chips + * + * Copyright (C) The Asahi Linux Contributors + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" + +#define NCHANNELS_MAX 64 +#define IRQ_NOUTPUTS 4 + +/* + * For allocation purposes we split the cache + * memory into blocks of fixed size (given in bytes). + */ +#define SRAM_BLOCK 2048 + +#define RING_WRITE_SLOT GENMASK(1, 0) +#define RING_READ_SLOT GENMASK(5, 4) +#define RING_FULL BIT(9) +#define RING_EMPTY BIT(8) +#define RING_ERR BIT(10) + +#define STATUS_DESC_DONE BIT(0) +#define STATUS_ERR BIT(6) + +#define FLAG_DESC_NOTIFY BIT(16) + +#define REG_TX_START 0x0000 +#define REG_TX_STOP 0x0004 +#define REG_RX_START 0x0008 +#define REG_RX_STOP 0x000c +#define REG_IMPRINT 0x0090 +#define REG_TX_SRAM_SIZE 0x0094 +#define REG_RX_SRAM_SIZE 0x0098 + +#define REG_CHAN_CTL(ch) (0x8000 + (ch) * 0x200) +#define REG_CHAN_CTL_RST_RINGS BIT(0) + +#define REG_DESC_RING(ch) (0x8070 + (ch) * 0x200) +#define REG_REPORT_RING(ch) (0x8074 + (ch) * 0x200) + +#define REG_RESIDUE(ch) (0x8064 + (ch) * 0x200) + +#define REG_BUS_WIDTH(ch) (0x8040 + (ch) * 0x200) + +#define BUS_WIDTH_8BIT 0x00 +#define BUS_WIDTH_16BIT 0x01 +#define BUS_WIDTH_32BIT 0x02 +#define BUS_WIDTH_FRAME_2_WORDS 0x10 +#define BUS_WIDTH_FRAME_4_WORDS 0x20 + +#define REG_CHAN_SRAM_CARVEOUT(ch) (0x8050 + (ch) * 0x200) +#define CHAN_SRAM_CARVEOUT_SIZE GENMASK(31, 16) +#define CHAN_SRAM_CARVEOUT_BASE GENMASK(15, 0) + +#define REG_CHAN_FIFOCTL(ch) (0x8054 + (ch) * 0x200) +#define CHAN_FIFOCTL_LIMIT GENMASK(31, 16) +#define CHAN_FIFOCTL_THRESHOLD GENMASK(15, 0) + +#define REG_DESC_WRITE(ch) (0x10000 + ((ch) / 2) * 0x4 + ((ch) & 1) * 0x4000) +#define REG_REPORT_READ(ch) (0x10100 + ((ch) / 2) * 0x4 + ((ch) & 1) * 0x4000) + +#define REG_TX_INTSTATE(idx) (0x0030 + (idx) * 4) +#define REG_RX_INTSTATE(idx) (0x0040 + (idx) * 4) +#define REG_GLOBAL_INTSTATE(idx) (0x0050 + (idx) * 4) +#define REG_CHAN_INTSTATUS(ch, idx) (0x8010 + (ch) * 0x200 + (idx) * 4) +#define REG_CHAN_INTMASK(ch, idx) (0x8020 + (ch) * 0x200 + (idx) * 4) + +struct admac_data; +struct admac_tx; + +struct admac_chan { + unsigned int no; + struct admac_data *host; + struct dma_chan chan; + struct tasklet_struct tasklet; + + u32 carveout; + + spinlock_t lock; + struct admac_tx *current_tx; + int nperiod_acks; + + /* + * We maintain a 'submitted' and 'issued' list mainly for interface + * correctness. Typical use of the driver (per channel) will be + * prepping, submitting and issuing a single cyclic transaction which + * will stay current until terminate_all is called. + */ + struct list_head submitted; + struct list_head issued; + + struct list_head to_free; +}; + +struct admac_sram { + u32 size; + /* + * SRAM_CARVEOUT has 16-bit fields, so the SRAM cannot be larger than + * 64K and a 32-bit bitfield over 2K blocks covers it. + */ + u32 allocated; +}; + +struct admac_data { + struct dma_device dma; + struct device *dev; + __iomem void *base; + struct reset_control *rstc; + + struct mutex cache_alloc_lock; + struct admac_sram txcache, rxcache; + + int irq; + int irq_index; + int nchannels; + struct admac_chan channels[]; +}; + +struct admac_tx { + struct dma_async_tx_descriptor tx; + bool cyclic; + dma_addr_t buf_addr; + dma_addr_t buf_end; + size_t buf_len; + size_t period_len; + + size_t submitted_pos; + size_t reclaimed_pos; + + struct list_head node; +}; + +static int admac_alloc_sram_carveout(struct admac_data *ad, + enum dma_transfer_direction dir, + u32 *out) +{ + struct admac_sram *sram; + int i, ret = 0, nblocks; + + if (dir == DMA_MEM_TO_DEV) + sram = &ad->txcache; + else + sram = &ad->rxcache; + + mutex_lock(&ad->cache_alloc_lock); + + nblocks = sram->size / SRAM_BLOCK; + for (i = 0; i < nblocks; i++) + if (!(sram->allocated & BIT(i))) + break; + + if (i < nblocks) { + *out = FIELD_PREP(CHAN_SRAM_CARVEOUT_BASE, i * SRAM_BLOCK) | + FIELD_PREP(CHAN_SRAM_CARVEOUT_SIZE, SRAM_BLOCK); + sram->allocated |= BIT(i); + } else { + ret = -EBUSY; + } + + mutex_unlock(&ad->cache_alloc_lock); + + return ret; +} + +static void admac_free_sram_carveout(struct admac_data *ad, + enum dma_transfer_direction dir, + u32 carveout) +{ + struct admac_sram *sram; + u32 base = FIELD_GET(CHAN_SRAM_CARVEOUT_BASE, carveout); + int i; + + if (dir == DMA_MEM_TO_DEV) + sram = &ad->txcache; + else + sram = &ad->rxcache; + + if (WARN_ON(base >= sram->size)) + return; + + mutex_lock(&ad->cache_alloc_lock); + i = base / SRAM_BLOCK; + sram->allocated &= ~BIT(i); + mutex_unlock(&ad->cache_alloc_lock); +} + +static void admac_modify(struct admac_data *ad, int reg, u32 mask, u32 val) +{ + void __iomem *addr = ad->base + reg; + u32 curr = readl_relaxed(addr); + + writel_relaxed((curr & ~mask) | (val & mask), addr); +} + +static struct admac_chan *to_admac_chan(struct dma_chan *chan) +{ + return container_of(chan, struct admac_chan, chan); +} + +static struct admac_tx *to_admac_tx(struct dma_async_tx_descriptor *tx) +{ + return container_of(tx, struct admac_tx, tx); +} + +static enum dma_transfer_direction admac_chan_direction(int channo) +{ + /* Channel directions are hardwired */ + return (channo & 1) ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV; +} + +static dma_cookie_t admac_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct admac_tx *adtx = to_admac_tx(tx); + struct admac_chan *adchan = to_admac_chan(tx->chan); + unsigned long flags; + dma_cookie_t cookie; + + spin_lock_irqsave(&adchan->lock, flags); + cookie = dma_cookie_assign(tx); + list_add_tail(&adtx->node, &adchan->submitted); + spin_unlock_irqrestore(&adchan->lock, flags); + + return cookie; +} + +static int admac_desc_free(struct dma_async_tx_descriptor *tx) +{ + kfree(to_admac_tx(tx)); + + return 0; +} + +static struct dma_async_tx_descriptor *admac_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct admac_chan *adchan = container_of(chan, struct admac_chan, chan); + struct admac_tx *adtx; + + if (direction != admac_chan_direction(adchan->no)) + return NULL; + + adtx = kzalloc(sizeof(*adtx), GFP_NOWAIT); + if (!adtx) + return NULL; + + adtx->cyclic = true; + + adtx->buf_addr = buf_addr; + adtx->buf_len = buf_len; + adtx->buf_end = buf_addr + buf_len; + adtx->period_len = period_len; + + adtx->submitted_pos = 0; + adtx->reclaimed_pos = 0; + + dma_async_tx_descriptor_init(&adtx->tx, chan); + adtx->tx.tx_submit = admac_tx_submit; + adtx->tx.desc_free = admac_desc_free; + + return &adtx->tx; +} + +/* + * Write one hardware descriptor for a dmaengine cyclic transaction. + */ +static void admac_cyclic_write_one_desc(struct admac_data *ad, int channo, + struct admac_tx *tx) +{ + dma_addr_t addr; + + addr = tx->buf_addr + (tx->submitted_pos % tx->buf_len); + + /* If happens means we have buggy code */ + WARN_ON_ONCE(addr + tx->period_len > tx->buf_end); + + dev_dbg(ad->dev, "ch%d descriptor: addr=0x%pad len=0x%zx flags=0x%lx\n", + channo, &addr, tx->period_len, FLAG_DESC_NOTIFY); + + writel_relaxed(lower_32_bits(addr), ad->base + REG_DESC_WRITE(channo)); + writel_relaxed(upper_32_bits(addr), ad->base + REG_DESC_WRITE(channo)); + writel_relaxed(tx->period_len, ad->base + REG_DESC_WRITE(channo)); + writel_relaxed(FLAG_DESC_NOTIFY, ad->base + REG_DESC_WRITE(channo)); + + tx->submitted_pos += tx->period_len; + tx->submitted_pos %= 2 * tx->buf_len; +} + +/* + * Write all the hardware descriptors for a dmaengine cyclic + * transaction there is space for. + */ +static void admac_cyclic_write_desc(struct admac_data *ad, int channo, + struct admac_tx *tx) +{ + int i; + + for (i = 0; i < 4; i++) { + if (readl_relaxed(ad->base + REG_DESC_RING(channo)) & RING_FULL) + break; + admac_cyclic_write_one_desc(ad, channo, tx); + } +} + +static int admac_ring_noccupied_slots(int ringval) +{ + int wrslot = FIELD_GET(RING_WRITE_SLOT, ringval); + int rdslot = FIELD_GET(RING_READ_SLOT, ringval); + + if (wrslot != rdslot) { + return (wrslot + 4 - rdslot) % 4; + } else { + WARN_ON((ringval & (RING_FULL | RING_EMPTY)) == 0); + + if (ringval & RING_FULL) + return 4; + else + return 0; + } +} + +/* + * Read from hardware the residue of a cyclic dmaengine transaction. + */ +static u32 admac_cyclic_read_residue(struct admac_data *ad, int channo, + struct admac_tx *adtx) +{ + u32 ring1, ring2; + u32 residue1, residue2; + int nreports; + size_t pos; + + ring1 = readl_relaxed(ad->base + REG_REPORT_RING(channo)); + residue1 = readl_relaxed(ad->base + REG_RESIDUE(channo)); + ring2 = readl_relaxed(ad->base + REG_REPORT_RING(channo)); + residue2 = readl_relaxed(ad->base + REG_RESIDUE(channo)); + + if (residue2 > residue1) { + /* + * Controller must have loaded next descriptor between + * the two residue reads + */ + nreports = admac_ring_noccupied_slots(ring1) + 1; + } else { + /* No descriptor load between the two reads, ring2 is safe to use */ + nreports = admac_ring_noccupied_slots(ring2); + } + + pos = adtx->reclaimed_pos + adtx->period_len * (nreports + 1) - residue2; + + return adtx->buf_len - pos % adtx->buf_len; +} + +static enum dma_status admac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct admac_chan *adchan = to_admac_chan(chan); + struct admac_data *ad = adchan->host; + struct admac_tx *adtx; + + enum dma_status ret; + size_t residue; + unsigned long flags; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE || !txstate) + return ret; + + spin_lock_irqsave(&adchan->lock, flags); + adtx = adchan->current_tx; + + if (adtx && adtx->tx.cookie == cookie) { + ret = DMA_IN_PROGRESS; + residue = admac_cyclic_read_residue(ad, adchan->no, adtx); + } else { + ret = DMA_IN_PROGRESS; + residue = 0; + list_for_each_entry(adtx, &adchan->issued, node) { + if (adtx->tx.cookie == cookie) { + residue = adtx->buf_len; + break; + } + } + } + spin_unlock_irqrestore(&adchan->lock, flags); + + dma_set_residue(txstate, residue); + return ret; +} + +static void admac_start_chan(struct admac_chan *adchan) +{ + struct admac_data *ad = adchan->host; + u32 startbit = 1 << (adchan->no / 2); + + writel_relaxed(STATUS_DESC_DONE | STATUS_ERR, + ad->base + REG_CHAN_INTSTATUS(adchan->no, ad->irq_index)); + writel_relaxed(STATUS_DESC_DONE | STATUS_ERR, + ad->base + REG_CHAN_INTMASK(adchan->no, ad->irq_index)); + + switch (admac_chan_direction(adchan->no)) { + case DMA_MEM_TO_DEV: + writel_relaxed(startbit, ad->base + REG_TX_START); + break; + case DMA_DEV_TO_MEM: + writel_relaxed(startbit, ad->base + REG_RX_START); + break; + default: + break; + } + dev_dbg(adchan->host->dev, "ch%d start\n", adchan->no); +} + +static void admac_stop_chan(struct admac_chan *adchan) +{ + struct admac_data *ad = adchan->host; + u32 stopbit = 1 << (adchan->no / 2); + + switch (admac_chan_direction(adchan->no)) { + case DMA_MEM_TO_DEV: + writel_relaxed(stopbit, ad->base + REG_TX_STOP); + break; + case DMA_DEV_TO_MEM: + writel_relaxed(stopbit, ad->base + REG_RX_STOP); + break; + default: + break; + } + dev_dbg(adchan->host->dev, "ch%d stop\n", adchan->no); +} + +static void admac_reset_rings(struct admac_chan *adchan) +{ + struct admac_data *ad = adchan->host; + + writel_relaxed(REG_CHAN_CTL_RST_RINGS, + ad->base + REG_CHAN_CTL(adchan->no)); + writel_relaxed(0, ad->base + REG_CHAN_CTL(adchan->no)); +} + +static void admac_start_current_tx(struct admac_chan *adchan) +{ + struct admac_data *ad = adchan->host; + int ch = adchan->no; + + admac_reset_rings(adchan); + writel_relaxed(0, ad->base + REG_CHAN_CTL(ch)); + + admac_cyclic_write_one_desc(ad, ch, adchan->current_tx); + admac_start_chan(adchan); + admac_cyclic_write_desc(ad, ch, adchan->current_tx); +} + +static void admac_issue_pending(struct dma_chan *chan) +{ + struct admac_chan *adchan = to_admac_chan(chan); + struct admac_tx *tx; + unsigned long flags; + + spin_lock_irqsave(&adchan->lock, flags); + list_splice_tail_init(&adchan->submitted, &adchan->issued); + if (!list_empty(&adchan->issued) && !adchan->current_tx) { + tx = list_first_entry(&adchan->issued, struct admac_tx, node); + list_del(&tx->node); + + adchan->current_tx = tx; + adchan->nperiod_acks = 0; + admac_start_current_tx(adchan); + } + spin_unlock_irqrestore(&adchan->lock, flags); +} + +static int admac_pause(struct dma_chan *chan) +{ + struct admac_chan *adchan = to_admac_chan(chan); + + admac_stop_chan(adchan); + + return 0; +} + +static int admac_resume(struct dma_chan *chan) +{ + struct admac_chan *adchan = to_admac_chan(chan); + + admac_start_chan(adchan); + + return 0; +} + +static int admac_terminate_all(struct dma_chan *chan) +{ + struct admac_chan *adchan = to_admac_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&adchan->lock, flags); + admac_stop_chan(adchan); + admac_reset_rings(adchan); + + if (adchan->current_tx) { + list_add_tail(&adchan->current_tx->node, &adchan->to_free); + adchan->current_tx = NULL; + } + /* + * Descriptors can only be freed after the tasklet + * has been killed (in admac_synchronize). + */ + list_splice_tail_init(&adchan->submitted, &adchan->to_free); + list_splice_tail_init(&adchan->issued, &adchan->to_free); + spin_unlock_irqrestore(&adchan->lock, flags); + + return 0; +} + +static void admac_synchronize(struct dma_chan *chan) +{ + struct admac_chan *adchan = to_admac_chan(chan); + struct admac_tx *adtx, *_adtx; + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&adchan->lock, flags); + list_splice_tail_init(&adchan->to_free, &head); + spin_unlock_irqrestore(&adchan->lock, flags); + + tasklet_kill(&adchan->tasklet); + + list_for_each_entry_safe(adtx, _adtx, &head, node) { + list_del(&adtx->node); + admac_desc_free(&adtx->tx); + } +} + +static int admac_alloc_chan_resources(struct dma_chan *chan) +{ + struct admac_chan *adchan = to_admac_chan(chan); + struct admac_data *ad = adchan->host; + int ret; + + dma_cookie_init(&adchan->chan); + ret = admac_alloc_sram_carveout(ad, admac_chan_direction(adchan->no), + &adchan->carveout); + if (ret < 0) + return ret; + + writel_relaxed(adchan->carveout, + ad->base + REG_CHAN_SRAM_CARVEOUT(adchan->no)); + return 0; +} + +static void admac_free_chan_resources(struct dma_chan *chan) +{ + struct admac_chan *adchan = to_admac_chan(chan); + + admac_terminate_all(chan); + admac_synchronize(chan); + admac_free_sram_carveout(adchan->host, admac_chan_direction(adchan->no), + adchan->carveout); +} + +static struct dma_chan *admac_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct admac_data *ad = (struct admac_data *) ofdma->of_dma_data; + unsigned int index; + + if (dma_spec->args_count != 1) + return NULL; + + index = dma_spec->args[0]; + + if (index >= ad->nchannels) { + dev_err(ad->dev, "channel index %u out of bounds\n", index); + return NULL; + } + + return dma_get_slave_channel(&ad->channels[index].chan); +} + +static int admac_drain_reports(struct admac_data *ad, int channo) +{ + int count; + + for (count = 0; count < 4; count++) { + u32 countval_hi, countval_lo, unk1, flags; + + if (readl_relaxed(ad->base + REG_REPORT_RING(channo)) & RING_EMPTY) + break; + + countval_lo = readl_relaxed(ad->base + REG_REPORT_READ(channo)); + countval_hi = readl_relaxed(ad->base + REG_REPORT_READ(channo)); + unk1 = readl_relaxed(ad->base + REG_REPORT_READ(channo)); + flags = readl_relaxed(ad->base + REG_REPORT_READ(channo)); + + dev_dbg(ad->dev, "ch%d report: countval=0x%llx unk1=0x%x flags=0x%x\n", + channo, ((u64) countval_hi) << 32 | countval_lo, unk1, flags); + } + + return count; +} + +static void admac_handle_status_err(struct admac_data *ad, int channo) +{ + bool handled = false; + + if (readl_relaxed(ad->base + REG_DESC_RING(channo)) & RING_ERR) { + writel_relaxed(RING_ERR, ad->base + REG_DESC_RING(channo)); + dev_err_ratelimited(ad->dev, "ch%d descriptor ring error\n", channo); + handled = true; + } + + if (readl_relaxed(ad->base + REG_REPORT_RING(channo)) & RING_ERR) { + writel_relaxed(RING_ERR, ad->base + REG_REPORT_RING(channo)); + dev_err_ratelimited(ad->dev, "ch%d report ring error\n", channo); + handled = true; + } + + if (unlikely(!handled)) { + dev_err(ad->dev, "ch%d unknown error, masking errors as cause of IRQs\n", channo); + admac_modify(ad, REG_CHAN_INTMASK(channo, ad->irq_index), + STATUS_ERR, 0); + } +} + +static void admac_handle_status_desc_done(struct admac_data *ad, int channo) +{ + struct admac_chan *adchan = &ad->channels[channo]; + unsigned long flags; + int nreports; + + writel_relaxed(STATUS_DESC_DONE, + ad->base + REG_CHAN_INTSTATUS(channo, ad->irq_index)); + + spin_lock_irqsave(&adchan->lock, flags); + nreports = admac_drain_reports(ad, channo); + + if (adchan->current_tx) { + struct admac_tx *tx = adchan->current_tx; + + adchan->nperiod_acks += nreports; + tx->reclaimed_pos += nreports * tx->period_len; + tx->reclaimed_pos %= 2 * tx->buf_len; + + admac_cyclic_write_desc(ad, channo, tx); + tasklet_schedule(&adchan->tasklet); + } + spin_unlock_irqrestore(&adchan->lock, flags); +} + +static void admac_handle_chan_int(struct admac_data *ad, int no) +{ + u32 cause = readl_relaxed(ad->base + REG_CHAN_INTSTATUS(no, ad->irq_index)); + + if (cause & STATUS_ERR) + admac_handle_status_err(ad, no); + + if (cause & STATUS_DESC_DONE) + admac_handle_status_desc_done(ad, no); +} + +static irqreturn_t admac_interrupt(int irq, void *devid) +{ + struct admac_data *ad = devid; + u32 rx_intstate, tx_intstate, global_intstate; + int i; + + rx_intstate = readl_relaxed(ad->base + REG_RX_INTSTATE(ad->irq_index)); + tx_intstate = readl_relaxed(ad->base + REG_TX_INTSTATE(ad->irq_index)); + global_intstate = readl_relaxed(ad->base + REG_GLOBAL_INTSTATE(ad->irq_index)); + + if (!tx_intstate && !rx_intstate && !global_intstate) + return IRQ_NONE; + + for (i = 0; i < ad->nchannels; i += 2) { + if (tx_intstate & 1) + admac_handle_chan_int(ad, i); + tx_intstate >>= 1; + } + + for (i = 1; i < ad->nchannels; i += 2) { + if (rx_intstate & 1) + admac_handle_chan_int(ad, i); + rx_intstate >>= 1; + } + + if (global_intstate) { + dev_warn(ad->dev, "clearing unknown global interrupt flag: %x\n", + global_intstate); + writel_relaxed(~(u32) 0, ad->base + REG_GLOBAL_INTSTATE(ad->irq_index)); + } + + return IRQ_HANDLED; +} + +static void admac_chan_tasklet(struct tasklet_struct *t) +{ + struct admac_chan *adchan = from_tasklet(adchan, t, tasklet); + struct admac_tx *adtx; + struct dmaengine_desc_callback cb; + struct dmaengine_result tx_result; + int nacks; + + spin_lock_irq(&adchan->lock); + adtx = adchan->current_tx; + nacks = adchan->nperiod_acks; + adchan->nperiod_acks = 0; + spin_unlock_irq(&adchan->lock); + + if (!adtx || !nacks) + return; + + tx_result.result = DMA_TRANS_NOERROR; + tx_result.residue = 0; + + dmaengine_desc_get_callback(&adtx->tx, &cb); + while (nacks--) + dmaengine_desc_callback_invoke(&cb, &tx_result); +} + +static int admac_device_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct admac_chan *adchan = to_admac_chan(chan); + struct admac_data *ad = adchan->host; + bool is_tx = admac_chan_direction(adchan->no) == DMA_MEM_TO_DEV; + int wordsize = 0; + u32 bus_width = 0; + + switch (is_tx ? config->dst_addr_width : config->src_addr_width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + wordsize = 1; + bus_width |= BUS_WIDTH_8BIT; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + wordsize = 2; + bus_width |= BUS_WIDTH_16BIT; + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + wordsize = 4; + bus_width |= BUS_WIDTH_32BIT; + break; + default: + return -EINVAL; + } + + /* + * We take port_window_size to be the number of words in a frame. + * + * The controller has some means of out-of-band signalling, to the peripheral, + * of words position in a frame. That's where the importance of this control + * comes from. + */ + switch (is_tx ? config->dst_port_window_size : config->src_port_window_size) { + case 0 ... 1: + break; + case 2: + bus_width |= BUS_WIDTH_FRAME_2_WORDS; + break; + case 4: + bus_width |= BUS_WIDTH_FRAME_4_WORDS; + break; + default: + return -EINVAL; + } + + writel_relaxed(bus_width, ad->base + REG_BUS_WIDTH(adchan->no)); + + /* + * By FIFOCTL_LIMIT we seem to set the maximal number of bytes allowed to be + * held in controller's per-channel FIFO. Transfers seem to be triggered + * around the time FIFO occupancy touches FIFOCTL_THRESHOLD. + * + * The numbers we set are more or less arbitrary. + */ + writel_relaxed(FIELD_PREP(CHAN_FIFOCTL_LIMIT, 0x30 * wordsize) + | FIELD_PREP(CHAN_FIFOCTL_THRESHOLD, 0x18 * wordsize), + ad->base + REG_CHAN_FIFOCTL(adchan->no)); + + return 0; +} + +static int admac_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct admac_data *ad; + struct dma_device *dma; + int nchannels; + int err, irq, i; + + err = of_property_read_u32(np, "dma-channels", &nchannels); + if (err || nchannels > NCHANNELS_MAX) { + dev_err(&pdev->dev, "missing or invalid dma-channels property\n"); + return -EINVAL; + } + + ad = devm_kzalloc(&pdev->dev, struct_size(ad, channels, nchannels), GFP_KERNEL); + if (!ad) + return -ENOMEM; + + platform_set_drvdata(pdev, ad); + ad->dev = &pdev->dev; + ad->nchannels = nchannels; + mutex_init(&ad->cache_alloc_lock); + + /* + * The controller has 4 IRQ outputs. Try them all until + * we find one we can use. + */ + for (i = 0; i < IRQ_NOUTPUTS; i++) { + irq = platform_get_irq_optional(pdev, i); + if (irq >= 0) { + ad->irq_index = i; + break; + } + } + + if (irq < 0) + return dev_err_probe(&pdev->dev, irq, "no usable interrupt\n"); + ad->irq = irq; + + ad->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(ad->base)) + return dev_err_probe(&pdev->dev, PTR_ERR(ad->base), + "unable to obtain MMIO resource\n"); + + ad->rstc = devm_reset_control_get_optional_shared(&pdev->dev, NULL); + if (IS_ERR(ad->rstc)) + return PTR_ERR(ad->rstc); + + dma = &ad->dma; + + dma_cap_set(DMA_PRIVATE, dma->cap_mask); + dma_cap_set(DMA_CYCLIC, dma->cap_mask); + + dma->dev = &pdev->dev; + dma->device_alloc_chan_resources = admac_alloc_chan_resources; + dma->device_free_chan_resources = admac_free_chan_resources; + dma->device_tx_status = admac_tx_status; + dma->device_issue_pending = admac_issue_pending; + dma->device_terminate_all = admac_terminate_all; + dma->device_synchronize = admac_synchronize; + dma->device_prep_dma_cyclic = admac_prep_dma_cyclic; + dma->device_config = admac_device_config; + dma->device_pause = admac_pause; + dma->device_resume = admac_resume; + + dma->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); + dma->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + dma->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + dma->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + + INIT_LIST_HEAD(&dma->channels); + for (i = 0; i < nchannels; i++) { + struct admac_chan *adchan = &ad->channels[i]; + + adchan->host = ad; + adchan->no = i; + adchan->chan.device = &ad->dma; + spin_lock_init(&adchan->lock); + INIT_LIST_HEAD(&adchan->submitted); + INIT_LIST_HEAD(&adchan->issued); + INIT_LIST_HEAD(&adchan->to_free); + list_add_tail(&adchan->chan.device_node, &dma->channels); + tasklet_setup(&adchan->tasklet, admac_chan_tasklet); + } + + err = reset_control_reset(ad->rstc); + if (err) + return dev_err_probe(&pdev->dev, err, + "unable to trigger reset\n"); + + err = request_irq(irq, admac_interrupt, 0, dev_name(&pdev->dev), ad); + if (err) { + dev_err_probe(&pdev->dev, err, + "unable to register interrupt\n"); + goto free_reset; + } + + err = dma_async_device_register(&ad->dma); + if (err) { + dev_err_probe(&pdev->dev, err, "failed to register DMA device\n"); + goto free_irq; + } + + err = of_dma_controller_register(pdev->dev.of_node, admac_dma_of_xlate, ad); + if (err) { + dma_async_device_unregister(&ad->dma); + dev_err_probe(&pdev->dev, err, "failed to register with OF\n"); + goto free_irq; + } + + ad->txcache.size = readl_relaxed(ad->base + REG_TX_SRAM_SIZE); + ad->rxcache.size = readl_relaxed(ad->base + REG_RX_SRAM_SIZE); + + dev_info(&pdev->dev, "Audio DMA Controller\n"); + dev_info(&pdev->dev, "imprint %x TX cache %u RX cache %u\n", + readl_relaxed(ad->base + REG_IMPRINT), ad->txcache.size, ad->rxcache.size); + + return 0; + +free_irq: + free_irq(ad->irq, ad); +free_reset: + reset_control_rearm(ad->rstc); + return err; +} + +static int admac_remove(struct platform_device *pdev) +{ + struct admac_data *ad = platform_get_drvdata(pdev); + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&ad->dma); + free_irq(ad->irq, ad); + reset_control_rearm(ad->rstc); + + return 0; +} + +static const struct of_device_id admac_of_match[] = { + { .compatible = "apple,admac", }, + { } +}; +MODULE_DEVICE_TABLE(of, admac_of_match); + +static struct platform_driver apple_admac_driver = { + .driver = { + .name = "apple-admac", + .of_match_table = admac_of_match, + }, + .probe = admac_probe, + .remove = admac_remove, +}; +module_platform_driver(apple_admac_driver); + +MODULE_AUTHOR("Martin PoviÅ¡er "); +MODULE_DESCRIPTION("Driver for Audio DMA Controller (ADMAC) on Apple SoCs"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c new file mode 100644 index 0000000000..b2876f6747 --- /dev/null +++ b/drivers/dma/at_hdmac.c @@ -0,0 +1,2271 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Driver for the Atmel AHB DMA Controller (aka HDMA or DMAC on AT91 systems) + * + * Copyright (C) 2008 Atmel Corporation + * Copyright (C) 2022 Microchip Technology, Inc. and its subsidiaries + * + * This supports the Atmel AHB DMA Controller found in several Atmel SoCs. + * The only Atmel DMA Controller that is not covered by this driver is the one + * found on AT91SAM9263. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" +#include "virt-dma.h" + +/* + * Glossary + * -------- + * + * at_hdmac : Name of the ATmel AHB DMA Controller + * at_dma_ / atdma : ATmel DMA controller entity related + * atc_ / atchan : ATmel DMA Channel entity related + */ + +#define AT_DMA_MAX_NR_CHANNELS 8 + +/* Global Configuration Register */ +#define AT_DMA_GCFG 0x00 +#define AT_DMA_IF_BIGEND(i) BIT((i)) /* AHB-Lite Interface i in Big-endian mode */ +#define AT_DMA_ARB_CFG BIT(4) /* Arbiter mode. */ + +/* Controller Enable Register */ +#define AT_DMA_EN 0x04 +#define AT_DMA_ENABLE BIT(0) + +/* Software Single Request Register */ +#define AT_DMA_SREQ 0x08 +#define AT_DMA_SSREQ(x) BIT((x) << 1) /* Request a source single transfer on channel x */ +#define AT_DMA_DSREQ(x) BIT(1 + ((x) << 1)) /* Request a destination single transfer on channel x */ + +/* Software Chunk Transfer Request Register */ +#define AT_DMA_CREQ 0x0c +#define AT_DMA_SCREQ(x) BIT((x) << 1) /* Request a source chunk transfer on channel x */ +#define AT_DMA_DCREQ(x) BIT(1 + ((x) << 1)) /* Request a destination chunk transfer on channel x */ + +/* Software Last Transfer Flag Register */ +#define AT_DMA_LAST 0x10 +#define AT_DMA_SLAST(x) BIT((x) << 1) /* This src rq is last tx of buffer on channel x */ +#define AT_DMA_DLAST(x) BIT(1 + ((x) << 1)) /* This dst rq is last tx of buffer on channel x */ + +/* Request Synchronization Register */ +#define AT_DMA_SYNC 0x14 +#define AT_DMA_SYR(h) BIT((h)) /* Synchronize handshake line h */ + +/* Error, Chained Buffer transfer completed and Buffer transfer completed Interrupt registers */ +#define AT_DMA_EBCIER 0x18 /* Enable register */ +#define AT_DMA_EBCIDR 0x1c /* Disable register */ +#define AT_DMA_EBCIMR 0x20 /* Mask Register */ +#define AT_DMA_EBCISR 0x24 /* Status Register */ +#define AT_DMA_CBTC_OFFSET 8 +#define AT_DMA_ERR_OFFSET 16 +#define AT_DMA_BTC(x) BIT((x)) +#define AT_DMA_CBTC(x) BIT(AT_DMA_CBTC_OFFSET + (x)) +#define AT_DMA_ERR(x) BIT(AT_DMA_ERR_OFFSET + (x)) + +/* Channel Handler Enable Register */ +#define AT_DMA_CHER 0x28 +#define AT_DMA_ENA(x) BIT((x)) +#define AT_DMA_SUSP(x) BIT(8 + (x)) +#define AT_DMA_KEEP(x) BIT(24 + (x)) + +/* Channel Handler Disable Register */ +#define AT_DMA_CHDR 0x2c +#define AT_DMA_DIS(x) BIT(x) +#define AT_DMA_RES(x) BIT(8 + (x)) + +/* Channel Handler Status Register */ +#define AT_DMA_CHSR 0x30 +#define AT_DMA_EMPT(x) BIT(16 + (x)) +#define AT_DMA_STAL(x) BIT(24 + (x)) + +/* Channel registers base address */ +#define AT_DMA_CH_REGS_BASE 0x3c +#define ch_regs(x) (AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */ + +/* Hardware register offset for each channel */ +#define ATC_SADDR_OFFSET 0x00 /* Source Address Register */ +#define ATC_DADDR_OFFSET 0x04 /* Destination Address Register */ +#define ATC_DSCR_OFFSET 0x08 /* Descriptor Address Register */ +#define ATC_CTRLA_OFFSET 0x0c /* Control A Register */ +#define ATC_CTRLB_OFFSET 0x10 /* Control B Register */ +#define ATC_CFG_OFFSET 0x14 /* Configuration Register */ +#define ATC_SPIP_OFFSET 0x18 /* Src PIP Configuration Register */ +#define ATC_DPIP_OFFSET 0x1c /* Dst PIP Configuration Register */ + + +/* Bitfield definitions */ + +/* Bitfields in DSCR */ +#define ATC_DSCR_IF GENMASK(1, 0) /* Dsc feched via AHB-Lite Interface */ + +/* Bitfields in CTRLA */ +#define ATC_BTSIZE_MAX GENMASK(15, 0) /* Maximum Buffer Transfer Size */ +#define ATC_BTSIZE GENMASK(15, 0) /* Buffer Transfer Size */ +#define ATC_SCSIZE GENMASK(18, 16) /* Source Chunk Transfer Size */ +#define ATC_DCSIZE GENMASK(22, 20) /* Destination Chunk Transfer Size */ +#define ATC_SRC_WIDTH GENMASK(25, 24) /* Source Single Transfer Size */ +#define ATC_DST_WIDTH GENMASK(29, 28) /* Destination Single Transfer Size */ +#define ATC_DONE BIT(31) /* Tx Done (only written back in descriptor) */ + +/* Bitfields in CTRLB */ +#define ATC_SIF GENMASK(1, 0) /* Src tx done via AHB-Lite Interface i */ +#define ATC_DIF GENMASK(5, 4) /* Dst tx done via AHB-Lite Interface i */ +#define AT_DMA_MEM_IF 0x0 /* interface 0 as memory interface */ +#define AT_DMA_PER_IF 0x1 /* interface 1 as peripheral interface */ +#define ATC_SRC_PIP BIT(8) /* Source Picture-in-Picture enabled */ +#define ATC_DST_PIP BIT(12) /* Destination Picture-in-Picture enabled */ +#define ATC_SRC_DSCR_DIS BIT(16) /* Src Descriptor fetch disable */ +#define ATC_DST_DSCR_DIS BIT(20) /* Dst Descriptor fetch disable */ +#define ATC_FC GENMASK(23, 21) /* Choose Flow Controller */ +#define ATC_FC_MEM2MEM 0x0 /* Mem-to-Mem (DMA) */ +#define ATC_FC_MEM2PER 0x1 /* Mem-to-Periph (DMA) */ +#define ATC_FC_PER2MEM 0x2 /* Periph-to-Mem (DMA) */ +#define ATC_FC_PER2PER 0x3 /* Periph-to-Periph (DMA) */ +#define ATC_FC_PER2MEM_PER 0x4 /* Periph-to-Mem (Peripheral) */ +#define ATC_FC_MEM2PER_PER 0x5 /* Mem-to-Periph (Peripheral) */ +#define ATC_FC_PER2PER_SRCPER 0x6 /* Periph-to-Periph (Src Peripheral) */ +#define ATC_FC_PER2PER_DSTPER 0x7 /* Periph-to-Periph (Dst Peripheral) */ +#define ATC_SRC_ADDR_MODE GENMASK(25, 24) +#define ATC_SRC_ADDR_MODE_INCR 0x0 /* Incrementing Mode */ +#define ATC_SRC_ADDR_MODE_DECR 0x1 /* Decrementing Mode */ +#define ATC_SRC_ADDR_MODE_FIXED 0x2 /* Fixed Mode */ +#define ATC_DST_ADDR_MODE GENMASK(29, 28) +#define ATC_DST_ADDR_MODE_INCR 0x0 /* Incrementing Mode */ +#define ATC_DST_ADDR_MODE_DECR 0x1 /* Decrementing Mode */ +#define ATC_DST_ADDR_MODE_FIXED 0x2 /* Fixed Mode */ +#define ATC_IEN BIT(30) /* BTC interrupt enable (active low) */ +#define ATC_AUTO BIT(31) /* Auto multiple buffer tx enable */ + +/* Bitfields in CFG */ +#define ATC_SRC_PER GENMASK(3, 0) /* Channel src rq associated with periph handshaking ifc h */ +#define ATC_DST_PER GENMASK(7, 4) /* Channel dst rq associated with periph handshaking ifc h */ +#define ATC_SRC_REP BIT(8) /* Source Replay Mod */ +#define ATC_SRC_H2SEL BIT(9) /* Source Handshaking Mod */ +#define ATC_SRC_PER_MSB GENMASK(11, 10) /* Channel src rq (most significant bits) */ +#define ATC_DST_REP BIT(12) /* Destination Replay Mod */ +#define ATC_DST_H2SEL BIT(13) /* Destination Handshaking Mod */ +#define ATC_DST_PER_MSB GENMASK(15, 14) /* Channel dst rq (most significant bits) */ +#define ATC_SOD BIT(16) /* Stop On Done */ +#define ATC_LOCK_IF BIT(20) /* Interface Lock */ +#define ATC_LOCK_B BIT(21) /* AHB Bus Lock */ +#define ATC_LOCK_IF_L BIT(22) /* Master Interface Arbiter Lock */ +#define ATC_AHB_PROT GENMASK(26, 24) /* AHB Protection */ +#define ATC_FIFOCFG GENMASK(29, 28) /* FIFO Request Configuration */ +#define ATC_FIFOCFG_LARGESTBURST 0x0 +#define ATC_FIFOCFG_HALFFIFO 0x1 +#define ATC_FIFOCFG_ENOUGHSPACE 0x2 + +/* Bitfields in SPIP */ +#define ATC_SPIP_HOLE GENMASK(15, 0) +#define ATC_SPIP_BOUNDARY GENMASK(25, 16) + +/* Bitfields in DPIP */ +#define ATC_DPIP_HOLE GENMASK(15, 0) +#define ATC_DPIP_BOUNDARY GENMASK(25, 16) + +#define ATC_PER_MSB GENMASK(5, 4) /* Extract MSBs of a handshaking identifier */ +#define ATC_SRC_PER_ID(id) \ + ({ typeof(id) _id = (id); \ + FIELD_PREP(ATC_SRC_PER_MSB, FIELD_GET(ATC_PER_MSB, _id)) | \ + FIELD_PREP(ATC_SRC_PER, _id); }) +#define ATC_DST_PER_ID(id) \ + ({ typeof(id) _id = (id); \ + FIELD_PREP(ATC_DST_PER_MSB, FIELD_GET(ATC_PER_MSB, _id)) | \ + FIELD_PREP(ATC_DST_PER, _id); }) + + + +/*-- descriptors -----------------------------------------------------*/ + +/* LLI == Linked List Item; aka DMA buffer descriptor */ +struct at_lli { + /* values that are not changed by hardware */ + u32 saddr; + u32 daddr; + /* value that may get written back: */ + u32 ctrla; + /* more values that are not changed by hardware */ + u32 ctrlb; + u32 dscr; /* chain to next lli */ +}; + +/** + * struct atdma_sg - atdma scatter gather entry + * @len: length of the current Linked List Item. + * @lli: linked list item that is passed to the DMA controller + * @lli_phys: physical address of the LLI. + */ +struct atdma_sg { + unsigned int len; + struct at_lli *lli; + dma_addr_t lli_phys; +}; + +/** + * struct at_desc - software descriptor + * @vd: pointer to the virtual dma descriptor. + * @atchan: pointer to the atmel dma channel. + * @total_len: total transaction byte count + * @sg_len: number of sg entries. + * @sg: array of sgs. + */ +struct at_desc { + struct virt_dma_desc vd; + struct at_dma_chan *atchan; + size_t total_len; + unsigned int sglen; + /* Interleaved data */ + size_t boundary; + size_t dst_hole; + size_t src_hole; + + /* Memset temporary buffer */ + bool memset_buffer; + dma_addr_t memset_paddr; + int *memset_vaddr; + struct atdma_sg sg[]; +}; + +/*-- Channels --------------------------------------------------------*/ + +/** + * atc_status - information bits stored in channel status flag + * + * Manipulated with atomic operations. + */ +enum atc_status { + ATC_IS_PAUSED = 1, + ATC_IS_CYCLIC = 24, +}; + +/** + * struct at_dma_chan - internal representation of an Atmel HDMAC channel + * @vc: virtual dma channel entry. + * @atdma: pointer to the driver data. + * @ch_regs: memory mapped register base + * @mask: channel index in a mask + * @per_if: peripheral interface + * @mem_if: memory interface + * @status: transmit status information from irq/prep* functions + * to tasklet (use atomic operations) + * @save_cfg: configuration register that is saved on suspend/resume cycle + * @save_dscr: for cyclic operations, preserve next descriptor address in + * the cyclic list on suspend/resume cycle + * @dma_sconfig: configuration for slave transfers, passed via + * .device_config + * @desc: pointer to the atmel dma descriptor. + */ +struct at_dma_chan { + struct virt_dma_chan vc; + struct at_dma *atdma; + void __iomem *ch_regs; + u8 mask; + u8 per_if; + u8 mem_if; + unsigned long status; + u32 save_cfg; + u32 save_dscr; + struct dma_slave_config dma_sconfig; + bool cyclic; + struct at_desc *desc; +}; + +#define channel_readl(atchan, name) \ + __raw_readl((atchan)->ch_regs + ATC_##name##_OFFSET) + +#define channel_writel(atchan, name, val) \ + __raw_writel((val), (atchan)->ch_regs + ATC_##name##_OFFSET) + +/* + * Fix sconfig's burst size according to at_hdmac. We need to convert them as: + * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3, 32 -> 4, 64 -> 5, 128 -> 6, 256 -> 7. + * + * This can be done by finding most significant bit set. + */ +static inline void convert_burst(u32 *maxburst) +{ + if (*maxburst > 1) + *maxburst = fls(*maxburst) - 2; + else + *maxburst = 0; +} + +/* + * Fix sconfig's bus width according to at_hdmac. + * 1 byte -> 0, 2 bytes -> 1, 4 bytes -> 2. + */ +static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width) +{ + switch (addr_width) { + case DMA_SLAVE_BUSWIDTH_2_BYTES: + return 1; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + return 2; + default: + /* For 1 byte width or fallback */ + return 0; + } +} + +/*-- Controller ------------------------------------------------------*/ + +/** + * struct at_dma - internal representation of an Atmel HDMA Controller + * @dma_device: dmaengine dma_device object members + * @atdma_devtype: identifier of DMA controller compatibility + * @ch_regs: memory mapped register base + * @clk: dma controller clock + * @save_imr: interrupt mask register that is saved on suspend/resume cycle + * @all_chan_mask: all channels availlable in a mask + * @lli_pool: hw lli table + * @chan: channels table to store at_dma_chan structures + */ +struct at_dma { + struct dma_device dma_device; + void __iomem *regs; + struct clk *clk; + u32 save_imr; + + u8 all_chan_mask; + + struct dma_pool *lli_pool; + struct dma_pool *memset_pool; + /* AT THE END channels table */ + struct at_dma_chan chan[]; +}; + +#define dma_readl(atdma, name) \ + __raw_readl((atdma)->regs + AT_DMA_##name) +#define dma_writel(atdma, name, val) \ + __raw_writel((val), (atdma)->regs + AT_DMA_##name) + +static inline struct at_desc *to_atdma_desc(struct dma_async_tx_descriptor *t) +{ + return container_of(t, struct at_desc, vd.tx); +} + +static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct at_dma_chan, vc.chan); +} + +static inline struct at_dma *to_at_dma(struct dma_device *ddev) +{ + return container_of(ddev, struct at_dma, dma_device); +} + + +/*-- Helper functions ------------------------------------------------*/ + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +#if defined(VERBOSE_DEBUG) +static void vdbg_dump_regs(struct at_dma_chan *atchan) +{ + struct at_dma *atdma = to_at_dma(atchan->vc.chan.device); + + dev_err(chan2dev(&atchan->vc.chan), + " channel %d : imr = 0x%x, chsr = 0x%x\n", + atchan->vc.chan.chan_id, + dma_readl(atdma, EBCIMR), + dma_readl(atdma, CHSR)); + + dev_err(chan2dev(&atchan->vc.chan), + " channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n", + channel_readl(atchan, SADDR), + channel_readl(atchan, DADDR), + channel_readl(atchan, CTRLA), + channel_readl(atchan, CTRLB), + channel_readl(atchan, CFG), + channel_readl(atchan, DSCR)); +} +#else +static void vdbg_dump_regs(struct at_dma_chan *atchan) {} +#endif + +static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli) +{ + dev_crit(chan2dev(&atchan->vc.chan), + "desc: s%pad d%pad ctrl0x%x:0x%x l%pad\n", + &lli->saddr, &lli->daddr, + lli->ctrla, lli->ctrlb, &lli->dscr); +} + + +static void atc_setup_irq(struct at_dma *atdma, int chan_id, int on) +{ + u32 ebci; + + /* enable interrupts on buffer transfer completion & error */ + ebci = AT_DMA_BTC(chan_id) + | AT_DMA_ERR(chan_id); + if (on) + dma_writel(atdma, EBCIER, ebci); + else + dma_writel(atdma, EBCIDR, ebci); +} + +static void atc_enable_chan_irq(struct at_dma *atdma, int chan_id) +{ + atc_setup_irq(atdma, chan_id, 1); +} + +static void atc_disable_chan_irq(struct at_dma *atdma, int chan_id) +{ + atc_setup_irq(atdma, chan_id, 0); +} + + +/** + * atc_chan_is_enabled - test if given channel is enabled + * @atchan: channel we want to test status + */ +static inline int atc_chan_is_enabled(struct at_dma_chan *atchan) +{ + struct at_dma *atdma = to_at_dma(atchan->vc.chan.device); + + return !!(dma_readl(atdma, CHSR) & atchan->mask); +} + +/** + * atc_chan_is_paused - test channel pause/resume status + * @atchan: channel we want to test status + */ +static inline int atc_chan_is_paused(struct at_dma_chan *atchan) +{ + return test_bit(ATC_IS_PAUSED, &atchan->status); +} + +/** + * atc_chan_is_cyclic - test if given channel has cyclic property set + * @atchan: channel we want to test status + */ +static inline int atc_chan_is_cyclic(struct at_dma_chan *atchan) +{ + return test_bit(ATC_IS_CYCLIC, &atchan->status); +} + +/** + * set_lli_eol - set end-of-link to descriptor so it will end transfer + * @desc: descriptor, signle or at the end of a chain, to end chain on + * @i: index of the atmel scatter gather entry that is at the end of the chain. + */ +static void set_lli_eol(struct at_desc *desc, unsigned int i) +{ + u32 ctrlb = desc->sg[i].lli->ctrlb; + + ctrlb &= ~ATC_IEN; + ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS; + + desc->sg[i].lli->ctrlb = ctrlb; + desc->sg[i].lli->dscr = 0; +} + +#define ATC_DEFAULT_CFG FIELD_PREP(ATC_FIFOCFG, ATC_FIFOCFG_HALFFIFO) +#define ATC_DEFAULT_CTRLB (FIELD_PREP(ATC_SIF, AT_DMA_MEM_IF) | \ + FIELD_PREP(ATC_DIF, AT_DMA_MEM_IF)) +#define ATC_DMA_BUSWIDTHS\ + (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\ + BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |\ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |\ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) + +#define ATC_MAX_DSCR_TRIALS 10 + +/* + * Initial number of descriptors to allocate for each channel. This could + * be increased during dma usage. + */ +static unsigned int init_nr_desc_per_channel = 64; +module_param(init_nr_desc_per_channel, uint, 0644); +MODULE_PARM_DESC(init_nr_desc_per_channel, + "initial descriptors per channel (default: 64)"); + +/** + * struct at_dma_platform_data - Controller configuration parameters + * @nr_channels: Number of channels supported by hardware (max 8) + * @cap_mask: dma_capability flags supported by the platform + */ +struct at_dma_platform_data { + unsigned int nr_channels; + dma_cap_mask_t cap_mask; +}; + +/** + * struct at_dma_slave - Controller-specific information about a slave + * @dma_dev: required DMA master device + * @cfg: Platform-specific initializer for the CFG register + */ +struct at_dma_slave { + struct device *dma_dev; + u32 cfg; +}; + +static inline unsigned int atc_get_xfer_width(dma_addr_t src, dma_addr_t dst, + size_t len) +{ + unsigned int width; + + if (!((src | dst | len) & 3)) + width = 2; + else if (!((src | dst | len) & 1)) + width = 1; + else + width = 0; + + return width; +} + +static void atdma_lli_chain(struct at_desc *desc, unsigned int i) +{ + struct atdma_sg *atdma_sg = &desc->sg[i]; + + if (i) + desc->sg[i - 1].lli->dscr = atdma_sg->lli_phys; +} + +/** + * atc_dostart - starts the DMA engine for real + * @atchan: the channel we want to start + */ +static void atc_dostart(struct at_dma_chan *atchan) +{ + struct virt_dma_desc *vd = vchan_next_desc(&atchan->vc); + struct at_desc *desc; + + if (!vd) { + atchan->desc = NULL; + return; + } + + vdbg_dump_regs(atchan); + + list_del(&vd->node); + atchan->desc = desc = to_atdma_desc(&vd->tx); + + channel_writel(atchan, SADDR, 0); + channel_writel(atchan, DADDR, 0); + channel_writel(atchan, CTRLA, 0); + channel_writel(atchan, CTRLB, 0); + channel_writel(atchan, DSCR, desc->sg[0].lli_phys); + channel_writel(atchan, SPIP, + FIELD_PREP(ATC_SPIP_HOLE, desc->src_hole) | + FIELD_PREP(ATC_SPIP_BOUNDARY, desc->boundary)); + channel_writel(atchan, DPIP, + FIELD_PREP(ATC_DPIP_HOLE, desc->dst_hole) | + FIELD_PREP(ATC_DPIP_BOUNDARY, desc->boundary)); + + /* Don't allow CPU to reorder channel enable. */ + wmb(); + dma_writel(atchan->atdma, CHER, atchan->mask); + + vdbg_dump_regs(atchan); +} + +static void atdma_desc_free(struct virt_dma_desc *vd) +{ + struct at_dma *atdma = to_at_dma(vd->tx.chan->device); + struct at_desc *desc = to_atdma_desc(&vd->tx); + unsigned int i; + + for (i = 0; i < desc->sglen; i++) { + if (desc->sg[i].lli) + dma_pool_free(atdma->lli_pool, desc->sg[i].lli, + desc->sg[i].lli_phys); + } + + /* If the transfer was a memset, free our temporary buffer */ + if (desc->memset_buffer) { + dma_pool_free(atdma->memset_pool, desc->memset_vaddr, + desc->memset_paddr); + desc->memset_buffer = false; + } + + kfree(desc); +} + +/** + * atc_calc_bytes_left - calculates the number of bytes left according to the + * value read from CTRLA. + * + * @current_len: the number of bytes left before reading CTRLA + * @ctrla: the value of CTRLA + */ +static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) +{ + u32 btsize = FIELD_GET(ATC_BTSIZE, ctrla); + u32 src_width = FIELD_GET(ATC_SRC_WIDTH, ctrla); + + /* + * According to the datasheet, when reading the Control A Register + * (ctrla), the Buffer Transfer Size (btsize) bitfield refers to the + * number of transfers completed on the Source Interface. + * So btsize is always a number of source width transfers. + */ + return current_len - (btsize << src_width); +} + +/** + * atc_get_llis_residue - Get residue for a hardware linked list transfer + * + * Calculate the residue by removing the length of the Linked List Item (LLI) + * already transferred from the total length. To get the current LLI we can use + * the value of the channel's DSCR register and compare it against the DSCR + * value of each LLI. + * + * The CTRLA register provides us with the amount of data already read from the + * source for the LLI. So we can compute a more accurate residue by also + * removing the number of bytes corresponding to this amount of data. + * + * However, the DSCR and CTRLA registers cannot be read both atomically. Hence a + * race condition may occur: the first read register may refer to one LLI + * whereas the second read may refer to a later LLI in the list because of the + * DMA transfer progression inbetween the two reads. + * + * One solution could have been to pause the DMA transfer, read the DSCR and + * CTRLA then resume the DMA transfer. Nonetheless, this approach presents some + * drawbacks: + * - If the DMA transfer is paused, RX overruns or TX underruns are more likey + * to occur depending on the system latency. Taking the USART driver as an + * example, it uses a cyclic DMA transfer to read data from the Receive + * Holding Register (RHR) to avoid RX overruns since the RHR is not protected + * by any FIFO on most Atmel SoCs. So pausing the DMA transfer to compute the + * residue would break the USART driver design. + * - The atc_pause() function masks interrupts but we'd rather avoid to do so + * for system latency purpose. + * + * Then we'd rather use another solution: the DSCR is read a first time, the + * CTRLA is read in turn, next the DSCR is read a second time. If the two + * consecutive read values of the DSCR are the same then we assume both refers + * to the very same LLI as well as the CTRLA value read inbetween does. For + * cyclic tranfers, the assumption is that a full loop is "not so fast". If the + * two DSCR values are different, we read again the CTRLA then the DSCR till two + * consecutive read values from DSCR are equal or till the maximum trials is + * reach. This algorithm is very unlikely not to find a stable value for DSCR. + * @atchan: pointer to an atmel hdmac channel. + * @desc: pointer to the descriptor for which the residue is calculated. + * @residue: residue to be set to dma_tx_state. + * Returns 0 on success, -errno otherwise. + */ +static int atc_get_llis_residue(struct at_dma_chan *atchan, + struct at_desc *desc, u32 *residue) +{ + u32 len, ctrla, dscr; + unsigned int i; + + len = desc->total_len; + dscr = channel_readl(atchan, DSCR); + rmb(); /* ensure DSCR is read before CTRLA */ + ctrla = channel_readl(atchan, CTRLA); + for (i = 0; i < ATC_MAX_DSCR_TRIALS; ++i) { + u32 new_dscr; + + rmb(); /* ensure DSCR is read after CTRLA */ + new_dscr = channel_readl(atchan, DSCR); + + /* + * If the DSCR register value has not changed inside the DMA + * controller since the previous read, we assume that both the + * dscr and ctrla values refers to the very same descriptor. + */ + if (likely(new_dscr == dscr)) + break; + + /* + * DSCR has changed inside the DMA controller, so the previouly + * read value of CTRLA may refer to an already processed + * descriptor hence could be outdated. We need to update ctrla + * to match the current descriptor. + */ + dscr = new_dscr; + rmb(); /* ensure DSCR is read before CTRLA */ + ctrla = channel_readl(atchan, CTRLA); + } + if (unlikely(i == ATC_MAX_DSCR_TRIALS)) + return -ETIMEDOUT; + + /* For the first descriptor we can be more accurate. */ + if (desc->sg[0].lli->dscr == dscr) { + *residue = atc_calc_bytes_left(len, ctrla); + return 0; + } + len -= desc->sg[0].len; + + for (i = 1; i < desc->sglen; i++) { + if (desc->sg[i].lli && desc->sg[i].lli->dscr == dscr) + break; + len -= desc->sg[i].len; + } + + /* + * For the current LLI in the chain we can calculate the remaining bytes + * using the channel's CTRLA register. + */ + *residue = atc_calc_bytes_left(len, ctrla); + return 0; + +} + +/** + * atc_get_residue - get the number of bytes residue for a cookie. + * The residue is passed by address and updated on success. + * @chan: DMA channel + * @cookie: transaction identifier to check status of + * @residue: residue to be updated. + * Return 0 on success, -errono otherwise. + */ +static int atc_get_residue(struct dma_chan *chan, dma_cookie_t cookie, + u32 *residue) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct virt_dma_desc *vd; + struct at_desc *desc = NULL; + u32 len, ctrla; + + vd = vchan_find_desc(&atchan->vc, cookie); + if (vd) + desc = to_atdma_desc(&vd->tx); + else if (atchan->desc && atchan->desc->vd.tx.cookie == cookie) + desc = atchan->desc; + + if (!desc) + return -EINVAL; + + if (desc->sg[0].lli->dscr) + /* hardware linked list transfer */ + return atc_get_llis_residue(atchan, desc, residue); + + /* single transfer */ + len = desc->total_len; + ctrla = channel_readl(atchan, CTRLA); + *residue = atc_calc_bytes_left(len, ctrla); + return 0; +} + +/** + * atc_handle_error - handle errors reported by DMA controller + * @atchan: channel where error occurs. + * @i: channel index + */ +static void atc_handle_error(struct at_dma_chan *atchan, unsigned int i) +{ + struct at_desc *desc = atchan->desc; + + /* Disable channel on AHB error */ + dma_writel(atchan->atdma, CHDR, AT_DMA_RES(i) | atchan->mask); + + /* + * KERN_CRITICAL may seem harsh, but since this only happens + * when someone submits a bad physical address in a + * descriptor, we should consider ourselves lucky that the + * controller flagged an error instead of scribbling over + * random memory locations. + */ + dev_crit(chan2dev(&atchan->vc.chan), "Bad descriptor submitted for DMA!\n"); + dev_crit(chan2dev(&atchan->vc.chan), "cookie: %d\n", + desc->vd.tx.cookie); + for (i = 0; i < desc->sglen; i++) + atc_dump_lli(atchan, desc->sg[i].lli); +} + +static void atdma_handle_chan_done(struct at_dma_chan *atchan, u32 pending, + unsigned int i) +{ + struct at_desc *desc; + + spin_lock(&atchan->vc.lock); + desc = atchan->desc; + + if (desc) { + if (pending & AT_DMA_ERR(i)) { + atc_handle_error(atchan, i); + /* Pretend the descriptor completed successfully */ + } + + if (atc_chan_is_cyclic(atchan)) { + vchan_cyclic_callback(&desc->vd); + } else { + vchan_cookie_complete(&desc->vd); + atchan->desc = NULL; + if (!(atc_chan_is_enabled(atchan))) + atc_dostart(atchan); + } + } + spin_unlock(&atchan->vc.lock); +} + +static irqreturn_t at_dma_interrupt(int irq, void *dev_id) +{ + struct at_dma *atdma = dev_id; + struct at_dma_chan *atchan; + int i; + u32 status, pending, imr; + int ret = IRQ_NONE; + + do { + imr = dma_readl(atdma, EBCIMR); + status = dma_readl(atdma, EBCISR); + pending = status & imr; + + if (!pending) + break; + + dev_vdbg(atdma->dma_device.dev, + "interrupt: status = 0x%08x, 0x%08x, 0x%08x\n", + status, imr, pending); + + for (i = 0; i < atdma->dma_device.chancnt; i++) { + atchan = &atdma->chan[i]; + if (!(pending & (AT_DMA_BTC(i) | AT_DMA_ERR(i)))) + continue; + atdma_handle_chan_done(atchan, pending, i); + ret = IRQ_HANDLED; + } + + } while (pending); + + return ret; +} + +/*-- DMA Engine API --------------------------------------------------*/ +/** + * atc_prep_dma_interleaved - prepare memory to memory interleaved operation + * @chan: the channel to prepare operation on + * @xt: Interleaved transfer template + * @flags: tx descriptor status flags + */ +static struct dma_async_tx_descriptor * +atc_prep_dma_interleaved(struct dma_chan *chan, + struct dma_interleaved_template *xt, + unsigned long flags) +{ + struct at_dma *atdma = to_at_dma(chan->device); + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct data_chunk *first; + struct atdma_sg *atdma_sg; + struct at_desc *desc; + struct at_lli *lli; + size_t xfer_count; + unsigned int dwidth; + u32 ctrla; + u32 ctrlb; + size_t len = 0; + int i; + + if (unlikely(!xt || xt->numf != 1 || !xt->frame_size)) + return NULL; + + first = xt->sgl; + + dev_info(chan2dev(chan), + "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n", + __func__, &xt->src_start, &xt->dst_start, xt->numf, + xt->frame_size, flags); + + /* + * The controller can only "skip" X bytes every Y bytes, so we + * need to make sure we are given a template that fit that + * description, ie a template with chunks that always have the + * same size, with the same ICGs. + */ + for (i = 0; i < xt->frame_size; i++) { + struct data_chunk *chunk = xt->sgl + i; + + if ((chunk->size != xt->sgl->size) || + (dmaengine_get_dst_icg(xt, chunk) != dmaengine_get_dst_icg(xt, first)) || + (dmaengine_get_src_icg(xt, chunk) != dmaengine_get_src_icg(xt, first))) { + dev_err(chan2dev(chan), + "%s: the controller can transfer only identical chunks\n", + __func__); + return NULL; + } + + len += chunk->size; + } + + dwidth = atc_get_xfer_width(xt->src_start, xt->dst_start, len); + + xfer_count = len >> dwidth; + if (xfer_count > ATC_BTSIZE_MAX) { + dev_err(chan2dev(chan), "%s: buffer is too big\n", __func__); + return NULL; + } + + ctrla = FIELD_PREP(ATC_SRC_WIDTH, dwidth) | + FIELD_PREP(ATC_DST_WIDTH, dwidth); + + ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN | + FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_INCR) | + FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) | + ATC_SRC_PIP | ATC_DST_PIP | + FIELD_PREP(ATC_FC, ATC_FC_MEM2MEM); + + desc = kzalloc(struct_size(desc, sg, 1), GFP_ATOMIC); + if (!desc) + return NULL; + desc->sglen = 1; + + atdma_sg = desc->sg; + atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, GFP_NOWAIT, + &atdma_sg->lli_phys); + if (!atdma_sg->lli) { + kfree(desc); + return NULL; + } + lli = atdma_sg->lli; + + lli->saddr = xt->src_start; + lli->daddr = xt->dst_start; + lli->ctrla = ctrla | xfer_count; + lli->ctrlb = ctrlb; + + desc->boundary = first->size >> dwidth; + desc->dst_hole = (dmaengine_get_dst_icg(xt, first) >> dwidth) + 1; + desc->src_hole = (dmaengine_get_src_icg(xt, first) >> dwidth) + 1; + + atdma_sg->len = len; + desc->total_len = len; + + set_lli_eol(desc, 0); + return vchan_tx_prep(&atchan->vc, &desc->vd, flags); +} + +/** + * atc_prep_dma_memcpy - prepare a memcpy operation + * @chan: the channel to prepare operation on + * @dest: operation virtual destination address + * @src: operation virtual source address + * @len: operation length + * @flags: tx descriptor status flags + */ +static struct dma_async_tx_descriptor * +atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct at_dma *atdma = to_at_dma(chan->device); + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_desc *desc = NULL; + size_t xfer_count; + size_t offset; + size_t sg_len; + unsigned int src_width; + unsigned int dst_width; + unsigned int i; + u32 ctrla; + u32 ctrlb; + + dev_dbg(chan2dev(chan), "prep_dma_memcpy: d%pad s%pad l0x%zx f0x%lx\n", + &dest, &src, len, flags); + + if (unlikely(!len)) { + dev_err(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); + return NULL; + } + + sg_len = DIV_ROUND_UP(len, ATC_BTSIZE_MAX); + desc = kzalloc(struct_size(desc, sg, sg_len), GFP_ATOMIC); + if (!desc) + return NULL; + desc->sglen = sg_len; + + ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN | + FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_INCR) | + FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) | + FIELD_PREP(ATC_FC, ATC_FC_MEM2MEM); + + /* + * We can be a lot more clever here, but this should take care + * of the most common optimization. + */ + src_width = dst_width = atc_get_xfer_width(src, dest, len); + + ctrla = FIELD_PREP(ATC_SRC_WIDTH, src_width) | + FIELD_PREP(ATC_DST_WIDTH, dst_width); + + for (offset = 0, i = 0; offset < len; + offset += xfer_count << src_width, i++) { + struct atdma_sg *atdma_sg = &desc->sg[i]; + struct at_lli *lli; + + atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, GFP_NOWAIT, + &atdma_sg->lli_phys); + if (!atdma_sg->lli) + goto err_desc_get; + lli = atdma_sg->lli; + + xfer_count = min_t(size_t, (len - offset) >> src_width, + ATC_BTSIZE_MAX); + + lli->saddr = src + offset; + lli->daddr = dest + offset; + lli->ctrla = ctrla | xfer_count; + lli->ctrlb = ctrlb; + + desc->sg[i].len = xfer_count << src_width; + + atdma_lli_chain(desc, i); + } + + desc->total_len = len; + + /* set end-of-link to the last link descriptor of list*/ + set_lli_eol(desc, i - 1); + + return vchan_tx_prep(&atchan->vc, &desc->vd, flags); + +err_desc_get: + atdma_desc_free(&desc->vd); + return NULL; +} + +static int atdma_create_memset_lli(struct dma_chan *chan, + struct atdma_sg *atdma_sg, + dma_addr_t psrc, dma_addr_t pdst, size_t len) +{ + struct at_dma *atdma = to_at_dma(chan->device); + struct at_lli *lli; + size_t xfer_count; + u32 ctrla = FIELD_PREP(ATC_SRC_WIDTH, 2) | FIELD_PREP(ATC_DST_WIDTH, 2); + u32 ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN | + FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) | + FIELD_PREP(ATC_FC, ATC_FC_MEM2MEM); + + xfer_count = len >> 2; + if (xfer_count > ATC_BTSIZE_MAX) { + dev_err(chan2dev(chan), "%s: buffer is too big\n", __func__); + return -EINVAL; + } + + atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, GFP_NOWAIT, + &atdma_sg->lli_phys); + if (!atdma_sg->lli) + return -ENOMEM; + lli = atdma_sg->lli; + + lli->saddr = psrc; + lli->daddr = pdst; + lli->ctrla = ctrla | xfer_count; + lli->ctrlb = ctrlb; + + atdma_sg->len = len; + + return 0; +} + +/** + * atc_prep_dma_memset - prepare a memcpy operation + * @chan: the channel to prepare operation on + * @dest: operation virtual destination address + * @value: value to set memory buffer to + * @len: operation length + * @flags: tx descriptor status flags + */ +static struct dma_async_tx_descriptor * +atc_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, + size_t len, unsigned long flags) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma *atdma = to_at_dma(chan->device); + struct at_desc *desc; + void __iomem *vaddr; + dma_addr_t paddr; + char fill_pattern; + int ret; + + dev_vdbg(chan2dev(chan), "%s: d%pad v0x%x l0x%zx f0x%lx\n", __func__, + &dest, value, len, flags); + + if (unlikely(!len)) { + dev_dbg(chan2dev(chan), "%s: length is zero!\n", __func__); + return NULL; + } + + if (!is_dma_fill_aligned(chan->device, dest, 0, len)) { + dev_dbg(chan2dev(chan), "%s: buffer is not aligned\n", + __func__); + return NULL; + } + + vaddr = dma_pool_alloc(atdma->memset_pool, GFP_NOWAIT, &paddr); + if (!vaddr) { + dev_err(chan2dev(chan), "%s: couldn't allocate buffer\n", + __func__); + return NULL; + } + + /* Only the first byte of value is to be used according to dmaengine */ + fill_pattern = (char)value; + + *(u32*)vaddr = (fill_pattern << 24) | + (fill_pattern << 16) | + (fill_pattern << 8) | + fill_pattern; + + desc = kzalloc(struct_size(desc, sg, 1), GFP_ATOMIC); + if (!desc) + goto err_free_buffer; + desc->sglen = 1; + + ret = atdma_create_memset_lli(chan, desc->sg, paddr, dest, len); + if (ret) + goto err_free_desc; + + desc->memset_paddr = paddr; + desc->memset_vaddr = vaddr; + desc->memset_buffer = true; + + desc->total_len = len; + + /* set end-of-link on the descriptor */ + set_lli_eol(desc, 0); + + return vchan_tx_prep(&atchan->vc, &desc->vd, flags); + +err_free_desc: + kfree(desc); +err_free_buffer: + dma_pool_free(atdma->memset_pool, vaddr, paddr); + return NULL; +} + +static struct dma_async_tx_descriptor * +atc_prep_dma_memset_sg(struct dma_chan *chan, + struct scatterlist *sgl, + unsigned int sg_len, int value, + unsigned long flags) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma *atdma = to_at_dma(chan->device); + struct at_desc *desc; + struct scatterlist *sg; + void __iomem *vaddr; + dma_addr_t paddr; + size_t total_len = 0; + int i; + int ret; + + dev_vdbg(chan2dev(chan), "%s: v0x%x l0x%zx f0x%lx\n", __func__, + value, sg_len, flags); + + if (unlikely(!sgl || !sg_len)) { + dev_dbg(chan2dev(chan), "%s: scatterlist is empty!\n", + __func__); + return NULL; + } + + vaddr = dma_pool_alloc(atdma->memset_pool, GFP_NOWAIT, &paddr); + if (!vaddr) { + dev_err(chan2dev(chan), "%s: couldn't allocate buffer\n", + __func__); + return NULL; + } + *(u32*)vaddr = value; + + desc = kzalloc(struct_size(desc, sg, sg_len), GFP_ATOMIC); + if (!desc) + goto err_free_dma_buf; + desc->sglen = sg_len; + + for_each_sg(sgl, sg, sg_len, i) { + dma_addr_t dest = sg_dma_address(sg); + size_t len = sg_dma_len(sg); + + dev_vdbg(chan2dev(chan), "%s: d%pad, l0x%zx\n", + __func__, &dest, len); + + if (!is_dma_fill_aligned(chan->device, dest, 0, len)) { + dev_err(chan2dev(chan), "%s: buffer is not aligned\n", + __func__); + goto err_free_desc; + } + + ret = atdma_create_memset_lli(chan, &desc->sg[i], paddr, dest, + len); + if (ret) + goto err_free_desc; + + atdma_lli_chain(desc, i); + total_len += len; + } + + desc->memset_paddr = paddr; + desc->memset_vaddr = vaddr; + desc->memset_buffer = true; + + desc->total_len = total_len; + + /* set end-of-link on the descriptor */ + set_lli_eol(desc, i - 1); + + return vchan_tx_prep(&atchan->vc, &desc->vd, flags); + +err_free_desc: + atdma_desc_free(&desc->vd); +err_free_dma_buf: + dma_pool_free(atdma->memset_pool, vaddr, paddr); + return NULL; +} + +/** + * atc_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction + * @chan: DMA channel + * @sgl: scatterlist to transfer to/from + * @sg_len: number of entries in @scatterlist + * @direction: DMA direction + * @flags: tx descriptor status flags + * @context: transaction context (ignored) + */ +static struct dma_async_tx_descriptor * +atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct at_dma *atdma = to_at_dma(chan->device); + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma_slave *atslave = chan->private; + struct dma_slave_config *sconfig = &atchan->dma_sconfig; + struct at_desc *desc; + u32 ctrla; + u32 ctrlb; + dma_addr_t reg; + unsigned int reg_width; + unsigned int mem_width; + unsigned int i; + struct scatterlist *sg; + size_t total_len = 0; + + dev_vdbg(chan2dev(chan), "prep_slave_sg (%d): %s f0x%lx\n", + sg_len, + direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE", + flags); + + if (unlikely(!atslave || !sg_len)) { + dev_dbg(chan2dev(chan), "prep_slave_sg: sg length is zero!\n"); + return NULL; + } + + desc = kzalloc(struct_size(desc, sg, sg_len), GFP_ATOMIC); + if (!desc) + return NULL; + desc->sglen = sg_len; + + ctrla = FIELD_PREP(ATC_SCSIZE, sconfig->src_maxburst) | + FIELD_PREP(ATC_DCSIZE, sconfig->dst_maxburst); + ctrlb = ATC_IEN; + + switch (direction) { + case DMA_MEM_TO_DEV: + reg_width = convert_buswidth(sconfig->dst_addr_width); + ctrla |= FIELD_PREP(ATC_DST_WIDTH, reg_width); + ctrlb |= FIELD_PREP(ATC_DST_ADDR_MODE, + ATC_DST_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_INCR) | + FIELD_PREP(ATC_FC, ATC_FC_MEM2PER) | + FIELD_PREP(ATC_SIF, atchan->mem_if) | + FIELD_PREP(ATC_DIF, atchan->per_if); + reg = sconfig->dst_addr; + for_each_sg(sgl, sg, sg_len, i) { + struct atdma_sg *atdma_sg = &desc->sg[i]; + struct at_lli *lli; + u32 len; + u32 mem; + + atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, + GFP_NOWAIT, + &atdma_sg->lli_phys); + if (!atdma_sg->lli) + goto err_desc_get; + lli = atdma_sg->lli; + + mem = sg_dma_address(sg); + len = sg_dma_len(sg); + if (unlikely(!len)) { + dev_dbg(chan2dev(chan), + "prep_slave_sg: sg(%d) data length is zero\n", i); + goto err; + } + mem_width = 2; + if (unlikely(mem & 3 || len & 3)) + mem_width = 0; + + lli->saddr = mem; + lli->daddr = reg; + lli->ctrla = ctrla | + FIELD_PREP(ATC_SRC_WIDTH, mem_width) | + len >> mem_width; + lli->ctrlb = ctrlb; + + atdma_sg->len = len; + total_len += len; + + desc->sg[i].len = len; + atdma_lli_chain(desc, i); + } + break; + case DMA_DEV_TO_MEM: + reg_width = convert_buswidth(sconfig->src_addr_width); + ctrla |= FIELD_PREP(ATC_SRC_WIDTH, reg_width); + ctrlb |= FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) | + FIELD_PREP(ATC_SRC_ADDR_MODE, + ATC_SRC_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_FC, ATC_FC_PER2MEM) | + FIELD_PREP(ATC_SIF, atchan->per_if) | + FIELD_PREP(ATC_DIF, atchan->mem_if); + + reg = sconfig->src_addr; + for_each_sg(sgl, sg, sg_len, i) { + struct atdma_sg *atdma_sg = &desc->sg[i]; + struct at_lli *lli; + u32 len; + u32 mem; + + atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, + GFP_NOWAIT, + &atdma_sg->lli_phys); + if (!atdma_sg->lli) + goto err_desc_get; + lli = atdma_sg->lli; + + mem = sg_dma_address(sg); + len = sg_dma_len(sg); + if (unlikely(!len)) { + dev_dbg(chan2dev(chan), + "prep_slave_sg: sg(%d) data length is zero\n", i); + goto err; + } + mem_width = 2; + if (unlikely(mem & 3 || len & 3)) + mem_width = 0; + + lli->saddr = reg; + lli->daddr = mem; + lli->ctrla = ctrla | + FIELD_PREP(ATC_DST_WIDTH, mem_width) | + len >> reg_width; + lli->ctrlb = ctrlb; + + desc->sg[i].len = len; + total_len += len; + + atdma_lli_chain(desc, i); + } + break; + default: + return NULL; + } + + /* set end-of-link to the last link descriptor of list*/ + set_lli_eol(desc, i - 1); + + desc->total_len = total_len; + + return vchan_tx_prep(&atchan->vc, &desc->vd, flags); + +err_desc_get: + dev_err(chan2dev(chan), "not enough descriptors available\n"); +err: + atdma_desc_free(&desc->vd); + return NULL; +} + +/* + * atc_dma_cyclic_check_values + * Check for too big/unaligned periods and unaligned DMA buffer + */ +static int +atc_dma_cyclic_check_values(unsigned int reg_width, dma_addr_t buf_addr, + size_t period_len) +{ + if (period_len > (ATC_BTSIZE_MAX << reg_width)) + goto err_out; + if (unlikely(period_len & ((1 << reg_width) - 1))) + goto err_out; + if (unlikely(buf_addr & ((1 << reg_width) - 1))) + goto err_out; + + return 0; + +err_out: + return -EINVAL; +} + +/* + * atc_dma_cyclic_fill_desc - Fill one period descriptor + */ +static int +atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc, + unsigned int i, dma_addr_t buf_addr, + unsigned int reg_width, size_t period_len, + enum dma_transfer_direction direction) +{ + struct at_dma *atdma = to_at_dma(chan->device); + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct dma_slave_config *sconfig = &atchan->dma_sconfig; + struct atdma_sg *atdma_sg = &desc->sg[i]; + struct at_lli *lli; + + atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, GFP_ATOMIC, + &atdma_sg->lli_phys); + if (!atdma_sg->lli) + return -ENOMEM; + lli = atdma_sg->lli; + + switch (direction) { + case DMA_MEM_TO_DEV: + lli->saddr = buf_addr + (period_len * i); + lli->daddr = sconfig->dst_addr; + lli->ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE, + ATC_DST_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_SRC_ADDR_MODE, + ATC_SRC_ADDR_MODE_INCR) | + FIELD_PREP(ATC_FC, ATC_FC_MEM2PER) | + FIELD_PREP(ATC_SIF, atchan->mem_if) | + FIELD_PREP(ATC_DIF, atchan->per_if); + + break; + + case DMA_DEV_TO_MEM: + lli->saddr = sconfig->src_addr; + lli->daddr = buf_addr + (period_len * i); + lli->ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE, + ATC_DST_ADDR_MODE_INCR) | + FIELD_PREP(ATC_SRC_ADDR_MODE, + ATC_SRC_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_FC, ATC_FC_PER2MEM) | + FIELD_PREP(ATC_SIF, atchan->per_if) | + FIELD_PREP(ATC_DIF, atchan->mem_if); + break; + + default: + return -EINVAL; + } + + lli->ctrla = FIELD_PREP(ATC_SCSIZE, sconfig->src_maxburst) | + FIELD_PREP(ATC_DCSIZE, sconfig->dst_maxburst) | + FIELD_PREP(ATC_DST_WIDTH, reg_width) | + FIELD_PREP(ATC_SRC_WIDTH, reg_width) | + period_len >> reg_width; + desc->sg[i].len = period_len; + + return 0; +} + +/** + * atc_prep_dma_cyclic - prepare the cyclic DMA transfer + * @chan: the DMA channel to prepare + * @buf_addr: physical DMA address where the buffer starts + * @buf_len: total number of bytes for the entire buffer + * @period_len: number of bytes for each period + * @direction: transfer direction, to or from device + * @flags: tx descriptor status flags + */ +static struct dma_async_tx_descriptor * +atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma_slave *atslave = chan->private; + struct dma_slave_config *sconfig = &atchan->dma_sconfig; + struct at_desc *desc; + unsigned long was_cyclic; + unsigned int reg_width; + unsigned int periods = buf_len / period_len; + unsigned int i; + + dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@%pad - %d (%d/%d)\n", + direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE", + &buf_addr, + periods, buf_len, period_len); + + if (unlikely(!atslave || !buf_len || !period_len)) { + dev_dbg(chan2dev(chan), "prep_dma_cyclic: length is zero!\n"); + return NULL; + } + + was_cyclic = test_and_set_bit(ATC_IS_CYCLIC, &atchan->status); + if (was_cyclic) { + dev_dbg(chan2dev(chan), "prep_dma_cyclic: channel in use!\n"); + return NULL; + } + + if (unlikely(!is_slave_direction(direction))) + goto err_out; + + if (direction == DMA_MEM_TO_DEV) + reg_width = convert_buswidth(sconfig->dst_addr_width); + else + reg_width = convert_buswidth(sconfig->src_addr_width); + + /* Check for too big/unaligned periods and unaligned DMA buffer */ + if (atc_dma_cyclic_check_values(reg_width, buf_addr, period_len)) + goto err_out; + + desc = kzalloc(struct_size(desc, sg, periods), GFP_ATOMIC); + if (!desc) + goto err_out; + desc->sglen = periods; + + /* build cyclic linked list */ + for (i = 0; i < periods; i++) { + if (atc_dma_cyclic_fill_desc(chan, desc, i, buf_addr, + reg_width, period_len, direction)) + goto err_fill_desc; + atdma_lli_chain(desc, i); + } + desc->total_len = buf_len; + /* lets make a cyclic list */ + desc->sg[i - 1].lli->dscr = desc->sg[0].lli_phys; + + return vchan_tx_prep(&atchan->vc, &desc->vd, flags); + +err_fill_desc: + atdma_desc_free(&desc->vd); +err_out: + clear_bit(ATC_IS_CYCLIC, &atchan->status); + return NULL; +} + +static int atc_config(struct dma_chan *chan, + struct dma_slave_config *sconfig) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + + dev_vdbg(chan2dev(chan), "%s\n", __func__); + + /* Check if it is chan is configured for slave transfers */ + if (!chan->private) + return -EINVAL; + + memcpy(&atchan->dma_sconfig, sconfig, sizeof(*sconfig)); + + convert_burst(&atchan->dma_sconfig.src_maxburst); + convert_burst(&atchan->dma_sconfig.dst_maxburst); + + return 0; +} + +static int atc_pause(struct dma_chan *chan) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma *atdma = to_at_dma(chan->device); + int chan_id = atchan->vc.chan.chan_id; + unsigned long flags; + + dev_vdbg(chan2dev(chan), "%s\n", __func__); + + spin_lock_irqsave(&atchan->vc.lock, flags); + + dma_writel(atdma, CHER, AT_DMA_SUSP(chan_id)); + set_bit(ATC_IS_PAUSED, &atchan->status); + + spin_unlock_irqrestore(&atchan->vc.lock, flags); + + return 0; +} + +static int atc_resume(struct dma_chan *chan) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma *atdma = to_at_dma(chan->device); + int chan_id = atchan->vc.chan.chan_id; + unsigned long flags; + + dev_vdbg(chan2dev(chan), "%s\n", __func__); + + if (!atc_chan_is_paused(atchan)) + return 0; + + spin_lock_irqsave(&atchan->vc.lock, flags); + + dma_writel(atdma, CHDR, AT_DMA_RES(chan_id)); + clear_bit(ATC_IS_PAUSED, &atchan->status); + + spin_unlock_irqrestore(&atchan->vc.lock, flags); + + return 0; +} + +static int atc_terminate_all(struct dma_chan *chan) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma *atdma = to_at_dma(chan->device); + int chan_id = atchan->vc.chan.chan_id; + unsigned long flags; + + LIST_HEAD(list); + + dev_vdbg(chan2dev(chan), "%s\n", __func__); + + /* + * This is only called when something went wrong elsewhere, so + * we don't really care about the data. Just disable the + * channel. We still have to poll the channel enable bit due + * to AHB/HSB limitations. + */ + spin_lock_irqsave(&atchan->vc.lock, flags); + + /* disabling channel: must also remove suspend state */ + dma_writel(atdma, CHDR, AT_DMA_RES(chan_id) | atchan->mask); + + /* confirm that this channel is disabled */ + while (dma_readl(atdma, CHSR) & atchan->mask) + cpu_relax(); + + if (atchan->desc) { + vchan_terminate_vdesc(&atchan->desc->vd); + atchan->desc = NULL; + } + + vchan_get_all_descriptors(&atchan->vc, &list); + + clear_bit(ATC_IS_PAUSED, &atchan->status); + /* if channel dedicated to cyclic operations, free it */ + clear_bit(ATC_IS_CYCLIC, &atchan->status); + + spin_unlock_irqrestore(&atchan->vc.lock, flags); + + vchan_dma_desc_free_list(&atchan->vc, &list); + + return 0; +} + +/** + * atc_tx_status - poll for transaction completion + * @chan: DMA channel + * @cookie: transaction identifier to check status of + * @txstate: if not %NULL updated with transaction state + * + * If @txstate is passed in, upon return it reflect the driver + * internal state and can be used with dma_async_is_complete() to check + * the status of multiple cookies without re-checking hardware state. + */ +static enum dma_status +atc_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + unsigned long flags; + enum dma_status dma_status; + u32 residue; + int ret; + + dma_status = dma_cookie_status(chan, cookie, txstate); + if (dma_status == DMA_COMPLETE || !txstate) + return dma_status; + + spin_lock_irqsave(&atchan->vc.lock, flags); + /* Get number of bytes left in the active transactions */ + ret = atc_get_residue(chan, cookie, &residue); + spin_unlock_irqrestore(&atchan->vc.lock, flags); + + if (unlikely(ret < 0)) { + dev_vdbg(chan2dev(chan), "get residual bytes error\n"); + return DMA_ERROR; + } else { + dma_set_residue(txstate, residue); + } + + dev_vdbg(chan2dev(chan), "tx_status %d: cookie = %d residue = %u\n", + dma_status, cookie, residue); + + return dma_status; +} + +static void atc_issue_pending(struct dma_chan *chan) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&atchan->vc.lock, flags); + if (vchan_issue_pending(&atchan->vc) && !atchan->desc) { + if (!(atc_chan_is_enabled(atchan))) + atc_dostart(atchan); + } + spin_unlock_irqrestore(&atchan->vc.lock, flags); +} + +/** + * atc_alloc_chan_resources - allocate resources for DMA channel + * @chan: allocate descriptor resources for this channel + * + * return - the number of allocated descriptors + */ +static int atc_alloc_chan_resources(struct dma_chan *chan) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma *atdma = to_at_dma(chan->device); + struct at_dma_slave *atslave; + u32 cfg; + + dev_vdbg(chan2dev(chan), "alloc_chan_resources\n"); + + /* ASSERT: channel is idle */ + if (atc_chan_is_enabled(atchan)) { + dev_dbg(chan2dev(chan), "DMA channel not idle ?\n"); + return -EIO; + } + + cfg = ATC_DEFAULT_CFG; + + atslave = chan->private; + if (atslave) { + /* + * We need controller-specific data to set up slave + * transfers. + */ + BUG_ON(!atslave->dma_dev || atslave->dma_dev != atdma->dma_device.dev); + + /* if cfg configuration specified take it instead of default */ + if (atslave->cfg) + cfg = atslave->cfg; + } + + /* channel parameters */ + channel_writel(atchan, CFG, cfg); + + return 0; +} + +/** + * atc_free_chan_resources - free all channel resources + * @chan: DMA channel + */ +static void atc_free_chan_resources(struct dma_chan *chan) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + + BUG_ON(atc_chan_is_enabled(atchan)); + + vchan_free_chan_resources(to_virt_chan(chan)); + atchan->status = 0; + + /* + * Free atslave allocated in at_dma_xlate() + */ + kfree(chan->private); + chan->private = NULL; + + dev_vdbg(chan2dev(chan), "free_chan_resources: done\n"); +} + +#ifdef CONFIG_OF +static bool at_dma_filter(struct dma_chan *chan, void *slave) +{ + struct at_dma_slave *atslave = slave; + + if (atslave->dma_dev == chan->device->dev) { + chan->private = atslave; + return true; + } else { + return false; + } +} + +static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *of_dma) +{ + struct dma_chan *chan; + struct at_dma_chan *atchan; + struct at_dma_slave *atslave; + dma_cap_mask_t mask; + unsigned int per_id; + struct platform_device *dmac_pdev; + + if (dma_spec->args_count != 2) + return NULL; + + dmac_pdev = of_find_device_by_node(dma_spec->np); + if (!dmac_pdev) + return NULL; + + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + + atslave = kmalloc(sizeof(*atslave), GFP_KERNEL); + if (!atslave) { + put_device(&dmac_pdev->dev); + return NULL; + } + + atslave->cfg = ATC_DST_H2SEL | ATC_SRC_H2SEL; + /* + * We can fill both SRC_PER and DST_PER, one of these fields will be + * ignored depending on DMA transfer direction. + */ + per_id = dma_spec->args[1] & AT91_DMA_CFG_PER_ID_MASK; + atslave->cfg |= ATC_DST_PER_ID(per_id) | ATC_SRC_PER_ID(per_id); + /* + * We have to translate the value we get from the device tree since + * the half FIFO configuration value had to be 0 to keep backward + * compatibility. + */ + switch (dma_spec->args[1] & AT91_DMA_CFG_FIFOCFG_MASK) { + case AT91_DMA_CFG_FIFOCFG_ALAP: + atslave->cfg |= FIELD_PREP(ATC_FIFOCFG, + ATC_FIFOCFG_LARGESTBURST); + break; + case AT91_DMA_CFG_FIFOCFG_ASAP: + atslave->cfg |= FIELD_PREP(ATC_FIFOCFG, + ATC_FIFOCFG_ENOUGHSPACE); + break; + case AT91_DMA_CFG_FIFOCFG_HALF: + default: + atslave->cfg |= FIELD_PREP(ATC_FIFOCFG, ATC_FIFOCFG_HALFFIFO); + } + atslave->dma_dev = &dmac_pdev->dev; + + chan = dma_request_channel(mask, at_dma_filter, atslave); + if (!chan) { + put_device(&dmac_pdev->dev); + kfree(atslave); + return NULL; + } + + atchan = to_at_dma_chan(chan); + atchan->per_if = dma_spec->args[0] & 0xff; + atchan->mem_if = (dma_spec->args[0] >> 16) & 0xff; + + return chan; +} +#else +static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *of_dma) +{ + return NULL; +} +#endif + +/*-- Module Management -----------------------------------------------*/ + +/* cap_mask is a multi-u32 bitfield, fill it with proper C code. */ +static struct at_dma_platform_data at91sam9rl_config = { + .nr_channels = 2, +}; +static struct at_dma_platform_data at91sam9g45_config = { + .nr_channels = 8, +}; + +#if defined(CONFIG_OF) +static const struct of_device_id atmel_dma_dt_ids[] = { + { + .compatible = "atmel,at91sam9rl-dma", + .data = &at91sam9rl_config, + }, { + .compatible = "atmel,at91sam9g45-dma", + .data = &at91sam9g45_config, + }, { + /* sentinel */ + } +}; + +MODULE_DEVICE_TABLE(of, atmel_dma_dt_ids); +#endif + +static const struct platform_device_id atdma_devtypes[] = { + { + .name = "at91sam9rl_dma", + .driver_data = (unsigned long) &at91sam9rl_config, + }, { + .name = "at91sam9g45_dma", + .driver_data = (unsigned long) &at91sam9g45_config, + }, { + /* sentinel */ + } +}; + +static inline const struct at_dma_platform_data * __init at_dma_get_driver_data( + struct platform_device *pdev) +{ + if (pdev->dev.of_node) { + const struct of_device_id *match; + match = of_match_node(atmel_dma_dt_ids, pdev->dev.of_node); + if (match == NULL) + return NULL; + return match->data; + } + return (struct at_dma_platform_data *) + platform_get_device_id(pdev)->driver_data; +} + +/** + * at_dma_off - disable DMA controller + * @atdma: the Atmel HDAMC device + */ +static void at_dma_off(struct at_dma *atdma) +{ + dma_writel(atdma, EN, 0); + + /* disable all interrupts */ + dma_writel(atdma, EBCIDR, -1L); + + /* confirm that all channels are disabled */ + while (dma_readl(atdma, CHSR) & atdma->all_chan_mask) + cpu_relax(); +} + +static int __init at_dma_probe(struct platform_device *pdev) +{ + struct at_dma *atdma; + int irq; + int err; + int i; + const struct at_dma_platform_data *plat_dat; + + /* setup platform data for each SoC */ + dma_cap_set(DMA_MEMCPY, at91sam9rl_config.cap_mask); + dma_cap_set(DMA_INTERLEAVE, at91sam9g45_config.cap_mask); + dma_cap_set(DMA_MEMCPY, at91sam9g45_config.cap_mask); + dma_cap_set(DMA_MEMSET, at91sam9g45_config.cap_mask); + dma_cap_set(DMA_MEMSET_SG, at91sam9g45_config.cap_mask); + dma_cap_set(DMA_PRIVATE, at91sam9g45_config.cap_mask); + dma_cap_set(DMA_SLAVE, at91sam9g45_config.cap_mask); + + /* get DMA parameters from controller type */ + plat_dat = at_dma_get_driver_data(pdev); + if (!plat_dat) + return -ENODEV; + + atdma = devm_kzalloc(&pdev->dev, + struct_size(atdma, chan, plat_dat->nr_channels), + GFP_KERNEL); + if (!atdma) + return -ENOMEM; + + atdma->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(atdma->regs)) + return PTR_ERR(atdma->regs); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + /* discover transaction capabilities */ + atdma->dma_device.cap_mask = plat_dat->cap_mask; + atdma->all_chan_mask = (1 << plat_dat->nr_channels) - 1; + + atdma->clk = devm_clk_get(&pdev->dev, "dma_clk"); + if (IS_ERR(atdma->clk)) + return PTR_ERR(atdma->clk); + + err = clk_prepare_enable(atdma->clk); + if (err) + return err; + + /* force dma off, just in case */ + at_dma_off(atdma); + + err = request_irq(irq, at_dma_interrupt, 0, "at_hdmac", atdma); + if (err) + goto err_irq; + + platform_set_drvdata(pdev, atdma); + + /* create a pool of consistent memory blocks for hardware descriptors */ + atdma->lli_pool = dma_pool_create("at_hdmac_lli_pool", + &pdev->dev, sizeof(struct at_lli), + 4 /* word alignment */, 0); + if (!atdma->lli_pool) { + dev_err(&pdev->dev, "Unable to allocate DMA LLI descriptor pool\n"); + err = -ENOMEM; + goto err_desc_pool_create; + } + + /* create a pool of consistent memory blocks for memset blocks */ + atdma->memset_pool = dma_pool_create("at_hdmac_memset_pool", + &pdev->dev, sizeof(int), 4, 0); + if (!atdma->memset_pool) { + dev_err(&pdev->dev, "No memory for memset dma pool\n"); + err = -ENOMEM; + goto err_memset_pool_create; + } + + /* clear any pending interrupt */ + while (dma_readl(atdma, EBCISR)) + cpu_relax(); + + /* initialize channels related values */ + INIT_LIST_HEAD(&atdma->dma_device.channels); + for (i = 0; i < plat_dat->nr_channels; i++) { + struct at_dma_chan *atchan = &atdma->chan[i]; + + atchan->mem_if = AT_DMA_MEM_IF; + atchan->per_if = AT_DMA_PER_IF; + + atchan->ch_regs = atdma->regs + ch_regs(i); + atchan->mask = 1 << i; + + atchan->atdma = atdma; + atchan->vc.desc_free = atdma_desc_free; + vchan_init(&atchan->vc, &atdma->dma_device); + atc_enable_chan_irq(atdma, i); + } + + /* set base routines */ + atdma->dma_device.device_alloc_chan_resources = atc_alloc_chan_resources; + atdma->dma_device.device_free_chan_resources = atc_free_chan_resources; + atdma->dma_device.device_tx_status = atc_tx_status; + atdma->dma_device.device_issue_pending = atc_issue_pending; + atdma->dma_device.dev = &pdev->dev; + + /* set prep routines based on capability */ + if (dma_has_cap(DMA_INTERLEAVE, atdma->dma_device.cap_mask)) + atdma->dma_device.device_prep_interleaved_dma = atc_prep_dma_interleaved; + + if (dma_has_cap(DMA_MEMCPY, atdma->dma_device.cap_mask)) + atdma->dma_device.device_prep_dma_memcpy = atc_prep_dma_memcpy; + + if (dma_has_cap(DMA_MEMSET, atdma->dma_device.cap_mask)) { + atdma->dma_device.device_prep_dma_memset = atc_prep_dma_memset; + atdma->dma_device.device_prep_dma_memset_sg = atc_prep_dma_memset_sg; + atdma->dma_device.fill_align = DMAENGINE_ALIGN_4_BYTES; + } + + if (dma_has_cap(DMA_SLAVE, atdma->dma_device.cap_mask)) { + atdma->dma_device.device_prep_slave_sg = atc_prep_slave_sg; + /* controller can do slave DMA: can trigger cyclic transfers */ + dma_cap_set(DMA_CYCLIC, atdma->dma_device.cap_mask); + atdma->dma_device.device_prep_dma_cyclic = atc_prep_dma_cyclic; + atdma->dma_device.device_config = atc_config; + atdma->dma_device.device_pause = atc_pause; + atdma->dma_device.device_resume = atc_resume; + atdma->dma_device.device_terminate_all = atc_terminate_all; + atdma->dma_device.src_addr_widths = ATC_DMA_BUSWIDTHS; + atdma->dma_device.dst_addr_widths = ATC_DMA_BUSWIDTHS; + atdma->dma_device.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + atdma->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + } + + dma_writel(atdma, EN, AT_DMA_ENABLE); + + dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s%s), %d channels\n", + dma_has_cap(DMA_MEMCPY, atdma->dma_device.cap_mask) ? "cpy " : "", + dma_has_cap(DMA_MEMSET, atdma->dma_device.cap_mask) ? "set " : "", + dma_has_cap(DMA_SLAVE, atdma->dma_device.cap_mask) ? "slave " : "", + plat_dat->nr_channels); + + err = dma_async_device_register(&atdma->dma_device); + if (err) { + dev_err(&pdev->dev, "Unable to register: %d.\n", err); + goto err_dma_async_device_register; + } + + /* + * Do not return an error if the dmac node is not present in order to + * not break the existing way of requesting channel with + * dma_request_channel(). + */ + if (pdev->dev.of_node) { + err = of_dma_controller_register(pdev->dev.of_node, + at_dma_xlate, atdma); + if (err) { + dev_err(&pdev->dev, "could not register of_dma_controller\n"); + goto err_of_dma_controller_register; + } + } + + return 0; + +err_of_dma_controller_register: + dma_async_device_unregister(&atdma->dma_device); +err_dma_async_device_register: + dma_pool_destroy(atdma->memset_pool); +err_memset_pool_create: + dma_pool_destroy(atdma->lli_pool); +err_desc_pool_create: + free_irq(platform_get_irq(pdev, 0), atdma); +err_irq: + clk_disable_unprepare(atdma->clk); + return err; +} + +static int at_dma_remove(struct platform_device *pdev) +{ + struct at_dma *atdma = platform_get_drvdata(pdev); + struct dma_chan *chan, *_chan; + + at_dma_off(atdma); + if (pdev->dev.of_node) + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&atdma->dma_device); + + dma_pool_destroy(atdma->memset_pool); + dma_pool_destroy(atdma->lli_pool); + free_irq(platform_get_irq(pdev, 0), atdma); + + list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels, + device_node) { + /* Disable interrupts */ + atc_disable_chan_irq(atdma, chan->chan_id); + list_del(&chan->device_node); + } + + clk_disable_unprepare(atdma->clk); + + return 0; +} + +static void at_dma_shutdown(struct platform_device *pdev) +{ + struct at_dma *atdma = platform_get_drvdata(pdev); + + at_dma_off(platform_get_drvdata(pdev)); + clk_disable_unprepare(atdma->clk); +} + +static int at_dma_prepare(struct device *dev) +{ + struct at_dma *atdma = dev_get_drvdata(dev); + struct dma_chan *chan, *_chan; + + list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels, + device_node) { + struct at_dma_chan *atchan = to_at_dma_chan(chan); + /* wait for transaction completion (except in cyclic case) */ + if (atc_chan_is_enabled(atchan) && !atc_chan_is_cyclic(atchan)) + return -EAGAIN; + } + return 0; +} + +static void atc_suspend_cyclic(struct at_dma_chan *atchan) +{ + struct dma_chan *chan = &atchan->vc.chan; + + /* Channel should be paused by user + * do it anyway even if it is not done already */ + if (!atc_chan_is_paused(atchan)) { + dev_warn(chan2dev(chan), + "cyclic channel not paused, should be done by channel user\n"); + atc_pause(chan); + } + + /* now preserve additional data for cyclic operations */ + /* next descriptor address in the cyclic list */ + atchan->save_dscr = channel_readl(atchan, DSCR); + + vdbg_dump_regs(atchan); +} + +static int at_dma_suspend_noirq(struct device *dev) +{ + struct at_dma *atdma = dev_get_drvdata(dev); + struct dma_chan *chan, *_chan; + + /* preserve data */ + list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels, + device_node) { + struct at_dma_chan *atchan = to_at_dma_chan(chan); + + if (atc_chan_is_cyclic(atchan)) + atc_suspend_cyclic(atchan); + atchan->save_cfg = channel_readl(atchan, CFG); + } + atdma->save_imr = dma_readl(atdma, EBCIMR); + + /* disable DMA controller */ + at_dma_off(atdma); + clk_disable_unprepare(atdma->clk); + return 0; +} + +static void atc_resume_cyclic(struct at_dma_chan *atchan) +{ + struct at_dma *atdma = to_at_dma(atchan->vc.chan.device); + + /* restore channel status for cyclic descriptors list: + * next descriptor in the cyclic list at the time of suspend */ + channel_writel(atchan, SADDR, 0); + channel_writel(atchan, DADDR, 0); + channel_writel(atchan, CTRLA, 0); + channel_writel(atchan, CTRLB, 0); + channel_writel(atchan, DSCR, atchan->save_dscr); + dma_writel(atdma, CHER, atchan->mask); + + /* channel pause status should be removed by channel user + * We cannot take the initiative to do it here */ + + vdbg_dump_regs(atchan); +} + +static int at_dma_resume_noirq(struct device *dev) +{ + struct at_dma *atdma = dev_get_drvdata(dev); + struct dma_chan *chan, *_chan; + + /* bring back DMA controller */ + clk_prepare_enable(atdma->clk); + dma_writel(atdma, EN, AT_DMA_ENABLE); + + /* clear any pending interrupt */ + while (dma_readl(atdma, EBCISR)) + cpu_relax(); + + /* restore saved data */ + dma_writel(atdma, EBCIER, atdma->save_imr); + list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels, + device_node) { + struct at_dma_chan *atchan = to_at_dma_chan(chan); + + channel_writel(atchan, CFG, atchan->save_cfg); + if (atc_chan_is_cyclic(atchan)) + atc_resume_cyclic(atchan); + } + return 0; +} + +static const struct dev_pm_ops __maybe_unused at_dma_dev_pm_ops = { + .prepare = at_dma_prepare, + .suspend_noirq = at_dma_suspend_noirq, + .resume_noirq = at_dma_resume_noirq, +}; + +static struct platform_driver at_dma_driver = { + .remove = at_dma_remove, + .shutdown = at_dma_shutdown, + .id_table = atdma_devtypes, + .driver = { + .name = "at_hdmac", + .pm = pm_ptr(&at_dma_dev_pm_ops), + .of_match_table = of_match_ptr(atmel_dma_dt_ids), + }, +}; + +static int __init at_dma_init(void) +{ + return platform_driver_probe(&at_dma_driver, at_dma_probe); +} +subsys_initcall(at_dma_init); + +static void __exit at_dma_exit(void) +{ + platform_driver_unregister(&at_dma_driver); +} +module_exit(at_dma_exit); + +MODULE_DESCRIPTION("Atmel AHB DMA Controller driver"); +MODULE_AUTHOR("Nicolas Ferre "); +MODULE_AUTHOR("Tudor Ambarus "); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:at_hdmac"); diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c new file mode 100644 index 0000000000..c3b37168b2 --- /dev/null +++ b/drivers/dma/at_xdmac.c @@ -0,0 +1,2503 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Driver for the Atmel Extensible DMA Controller (aka XDMAC on AT91 systems) + * + * Copyright (C) 2014 Atmel Corporation + * + * Author: Ludovic Desroches + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" + +/* Global registers */ +#define AT_XDMAC_GTYPE 0x00 /* Global Type Register */ +#define AT_XDMAC_NB_CH(i) (((i) & 0x1F) + 1) /* Number of Channels Minus One */ +#define AT_XDMAC_FIFO_SZ(i) (((i) >> 5) & 0x7FF) /* Number of Bytes */ +#define AT_XDMAC_NB_REQ(i) ((((i) >> 16) & 0x3F) + 1) /* Number of Peripheral Requests Minus One */ +#define AT_XDMAC_GCFG 0x04 /* Global Configuration Register */ +#define AT_XDMAC_WRHP(i) (((i) & 0xF) << 4) +#define AT_XDMAC_WRMP(i) (((i) & 0xF) << 8) +#define AT_XDMAC_WRLP(i) (((i) & 0xF) << 12) +#define AT_XDMAC_RDHP(i) (((i) & 0xF) << 16) +#define AT_XDMAC_RDMP(i) (((i) & 0xF) << 20) +#define AT_XDMAC_RDLP(i) (((i) & 0xF) << 24) +#define AT_XDMAC_RDSG(i) (((i) & 0xF) << 28) +#define AT_XDMAC_GCFG_M2M (AT_XDMAC_RDLP(0xF) | AT_XDMAC_WRLP(0xF)) +#define AT_XDMAC_GCFG_P2M (AT_XDMAC_RDSG(0x1) | AT_XDMAC_RDHP(0x3) | \ + AT_XDMAC_WRHP(0x5)) +#define AT_XDMAC_GWAC 0x08 /* Global Weighted Arbiter Configuration Register */ +#define AT_XDMAC_PW0(i) (((i) & 0xF) << 0) +#define AT_XDMAC_PW1(i) (((i) & 0xF) << 4) +#define AT_XDMAC_PW2(i) (((i) & 0xF) << 8) +#define AT_XDMAC_PW3(i) (((i) & 0xF) << 12) +#define AT_XDMAC_GWAC_M2M 0 +#define AT_XDMAC_GWAC_P2M (AT_XDMAC_PW0(0xF) | AT_XDMAC_PW2(0xF)) + +#define AT_XDMAC_GIE 0x0C /* Global Interrupt Enable Register */ +#define AT_XDMAC_GID 0x10 /* Global Interrupt Disable Register */ +#define AT_XDMAC_GIM 0x14 /* Global Interrupt Mask Register */ +#define AT_XDMAC_GIS 0x18 /* Global Interrupt Status Register */ +#define AT_XDMAC_GE 0x1C /* Global Channel Enable Register */ +#define AT_XDMAC_GD 0x20 /* Global Channel Disable Register */ +#define AT_XDMAC_GS 0x24 /* Global Channel Status Register */ +#define AT_XDMAC_VERSION 0xFFC /* XDMAC Version Register */ + +/* Channel relative registers offsets */ +#define AT_XDMAC_CIE 0x00 /* Channel Interrupt Enable Register */ +#define AT_XDMAC_CIE_BIE BIT(0) /* End of Block Interrupt Enable Bit */ +#define AT_XDMAC_CIE_LIE BIT(1) /* End of Linked List Interrupt Enable Bit */ +#define AT_XDMAC_CIE_DIE BIT(2) /* End of Disable Interrupt Enable Bit */ +#define AT_XDMAC_CIE_FIE BIT(3) /* End of Flush Interrupt Enable Bit */ +#define AT_XDMAC_CIE_RBEIE BIT(4) /* Read Bus Error Interrupt Enable Bit */ +#define AT_XDMAC_CIE_WBEIE BIT(5) /* Write Bus Error Interrupt Enable Bit */ +#define AT_XDMAC_CIE_ROIE BIT(6) /* Request Overflow Interrupt Enable Bit */ +#define AT_XDMAC_CID 0x04 /* Channel Interrupt Disable Register */ +#define AT_XDMAC_CID_BID BIT(0) /* End of Block Interrupt Disable Bit */ +#define AT_XDMAC_CID_LID BIT(1) /* End of Linked List Interrupt Disable Bit */ +#define AT_XDMAC_CID_DID BIT(2) /* End of Disable Interrupt Disable Bit */ +#define AT_XDMAC_CID_FID BIT(3) /* End of Flush Interrupt Disable Bit */ +#define AT_XDMAC_CID_RBEID BIT(4) /* Read Bus Error Interrupt Disable Bit */ +#define AT_XDMAC_CID_WBEID BIT(5) /* Write Bus Error Interrupt Disable Bit */ +#define AT_XDMAC_CID_ROID BIT(6) /* Request Overflow Interrupt Disable Bit */ +#define AT_XDMAC_CIM 0x08 /* Channel Interrupt Mask Register */ +#define AT_XDMAC_CIM_BIM BIT(0) /* End of Block Interrupt Mask Bit */ +#define AT_XDMAC_CIM_LIM BIT(1) /* End of Linked List Interrupt Mask Bit */ +#define AT_XDMAC_CIM_DIM BIT(2) /* End of Disable Interrupt Mask Bit */ +#define AT_XDMAC_CIM_FIM BIT(3) /* End of Flush Interrupt Mask Bit */ +#define AT_XDMAC_CIM_RBEIM BIT(4) /* Read Bus Error Interrupt Mask Bit */ +#define AT_XDMAC_CIM_WBEIM BIT(5) /* Write Bus Error Interrupt Mask Bit */ +#define AT_XDMAC_CIM_ROIM BIT(6) /* Request Overflow Interrupt Mask Bit */ +#define AT_XDMAC_CIS 0x0C /* Channel Interrupt Status Register */ +#define AT_XDMAC_CIS_BIS BIT(0) /* End of Block Interrupt Status Bit */ +#define AT_XDMAC_CIS_LIS BIT(1) /* End of Linked List Interrupt Status Bit */ +#define AT_XDMAC_CIS_DIS BIT(2) /* End of Disable Interrupt Status Bit */ +#define AT_XDMAC_CIS_FIS BIT(3) /* End of Flush Interrupt Status Bit */ +#define AT_XDMAC_CIS_RBEIS BIT(4) /* Read Bus Error Interrupt Status Bit */ +#define AT_XDMAC_CIS_WBEIS BIT(5) /* Write Bus Error Interrupt Status Bit */ +#define AT_XDMAC_CIS_ROIS BIT(6) /* Request Overflow Interrupt Status Bit */ +#define AT_XDMAC_CSA 0x10 /* Channel Source Address Register */ +#define AT_XDMAC_CDA 0x14 /* Channel Destination Address Register */ +#define AT_XDMAC_CNDA 0x18 /* Channel Next Descriptor Address Register */ +#define AT_XDMAC_CNDA_NDAIF(i) ((i) & 0x1) /* Channel x Next Descriptor Interface */ +#define AT_XDMAC_CNDA_NDA(i) ((i) & 0xfffffffc) /* Channel x Next Descriptor Address */ +#define AT_XDMAC_CNDC 0x1C /* Channel Next Descriptor Control Register */ +#define AT_XDMAC_CNDC_NDE (0x1 << 0) /* Channel x Next Descriptor Enable */ +#define AT_XDMAC_CNDC_NDSUP (0x1 << 1) /* Channel x Next Descriptor Source Update */ +#define AT_XDMAC_CNDC_NDDUP (0x1 << 2) /* Channel x Next Descriptor Destination Update */ +#define AT_XDMAC_CNDC_NDVIEW_MASK GENMASK(28, 27) +#define AT_XDMAC_CNDC_NDVIEW_NDV0 (0x0 << 3) /* Channel x Next Descriptor View 0 */ +#define AT_XDMAC_CNDC_NDVIEW_NDV1 (0x1 << 3) /* Channel x Next Descriptor View 1 */ +#define AT_XDMAC_CNDC_NDVIEW_NDV2 (0x2 << 3) /* Channel x Next Descriptor View 2 */ +#define AT_XDMAC_CNDC_NDVIEW_NDV3 (0x3 << 3) /* Channel x Next Descriptor View 3 */ +#define AT_XDMAC_CUBC 0x20 /* Channel Microblock Control Register */ +#define AT_XDMAC_CBC 0x24 /* Channel Block Control Register */ +#define AT_XDMAC_CC 0x28 /* Channel Configuration Register */ +#define AT_XDMAC_CC_TYPE (0x1 << 0) /* Channel Transfer Type */ +#define AT_XDMAC_CC_TYPE_MEM_TRAN (0x0 << 0) /* Memory to Memory Transfer */ +#define AT_XDMAC_CC_TYPE_PER_TRAN (0x1 << 0) /* Peripheral to Memory or Memory to Peripheral Transfer */ +#define AT_XDMAC_CC_MBSIZE_MASK (0x3 << 1) +#define AT_XDMAC_CC_MBSIZE_SINGLE (0x0 << 1) +#define AT_XDMAC_CC_MBSIZE_FOUR (0x1 << 1) +#define AT_XDMAC_CC_MBSIZE_EIGHT (0x2 << 1) +#define AT_XDMAC_CC_MBSIZE_SIXTEEN (0x3 << 1) +#define AT_XDMAC_CC_DSYNC (0x1 << 4) /* Channel Synchronization */ +#define AT_XDMAC_CC_DSYNC_PER2MEM (0x0 << 4) +#define AT_XDMAC_CC_DSYNC_MEM2PER (0x1 << 4) +#define AT_XDMAC_CC_PROT (0x1 << 5) /* Channel Protection */ +#define AT_XDMAC_CC_PROT_SEC (0x0 << 5) +#define AT_XDMAC_CC_PROT_UNSEC (0x1 << 5) +#define AT_XDMAC_CC_SWREQ (0x1 << 6) /* Channel Software Request Trigger */ +#define AT_XDMAC_CC_SWREQ_HWR_CONNECTED (0x0 << 6) +#define AT_XDMAC_CC_SWREQ_SWR_CONNECTED (0x1 << 6) +#define AT_XDMAC_CC_MEMSET (0x1 << 7) /* Channel Fill Block of memory */ +#define AT_XDMAC_CC_MEMSET_NORMAL_MODE (0x0 << 7) +#define AT_XDMAC_CC_MEMSET_HW_MODE (0x1 << 7) +#define AT_XDMAC_CC_CSIZE(i) ((0x7 & (i)) << 8) /* Channel Chunk Size */ +#define AT_XDMAC_CC_DWIDTH_OFFSET 11 +#define AT_XDMAC_CC_DWIDTH_MASK (0x3 << AT_XDMAC_CC_DWIDTH_OFFSET) +#define AT_XDMAC_CC_DWIDTH(i) ((0x3 & (i)) << AT_XDMAC_CC_DWIDTH_OFFSET) /* Channel Data Width */ +#define AT_XDMAC_CC_DWIDTH_BYTE 0x0 +#define AT_XDMAC_CC_DWIDTH_HALFWORD 0x1 +#define AT_XDMAC_CC_DWIDTH_WORD 0x2 +#define AT_XDMAC_CC_DWIDTH_DWORD 0x3 +#define AT_XDMAC_CC_SIF(i) ((0x1 & (i)) << 13) /* Channel Source Interface Identifier */ +#define AT_XDMAC_CC_DIF(i) ((0x1 & (i)) << 14) /* Channel Destination Interface Identifier */ +#define AT_XDMAC_CC_SAM_MASK (0x3 << 16) /* Channel Source Addressing Mode */ +#define AT_XDMAC_CC_SAM_FIXED_AM (0x0 << 16) +#define AT_XDMAC_CC_SAM_INCREMENTED_AM (0x1 << 16) +#define AT_XDMAC_CC_SAM_UBS_AM (0x2 << 16) +#define AT_XDMAC_CC_SAM_UBS_DS_AM (0x3 << 16) +#define AT_XDMAC_CC_DAM_MASK (0x3 << 18) /* Channel Source Addressing Mode */ +#define AT_XDMAC_CC_DAM_FIXED_AM (0x0 << 18) +#define AT_XDMAC_CC_DAM_INCREMENTED_AM (0x1 << 18) +#define AT_XDMAC_CC_DAM_UBS_AM (0x2 << 18) +#define AT_XDMAC_CC_DAM_UBS_DS_AM (0x3 << 18) +#define AT_XDMAC_CC_INITD (0x1 << 21) /* Channel Initialization Terminated (read only) */ +#define AT_XDMAC_CC_INITD_TERMINATED (0x0 << 21) +#define AT_XDMAC_CC_INITD_IN_PROGRESS (0x1 << 21) +#define AT_XDMAC_CC_RDIP (0x1 << 22) /* Read in Progress (read only) */ +#define AT_XDMAC_CC_RDIP_DONE (0x0 << 22) +#define AT_XDMAC_CC_RDIP_IN_PROGRESS (0x1 << 22) +#define AT_XDMAC_CC_WRIP (0x1 << 23) /* Write in Progress (read only) */ +#define AT_XDMAC_CC_WRIP_DONE (0x0 << 23) +#define AT_XDMAC_CC_WRIP_IN_PROGRESS (0x1 << 23) +#define AT_XDMAC_CC_PERID(i) ((0x7f & (i)) << 24) /* Channel Peripheral Identifier */ +#define AT_XDMAC_CDS_MSP 0x2C /* Channel Data Stride Memory Set Pattern */ +#define AT_XDMAC_CSUS 0x30 /* Channel Source Microblock Stride */ +#define AT_XDMAC_CDUS 0x34 /* Channel Destination Microblock Stride */ + +/* Microblock control members */ +#define AT_XDMAC_MBR_UBC_UBLEN_MAX 0xFFFFFFUL /* Maximum Microblock Length */ +#define AT_XDMAC_MBR_UBC_NDE (0x1 << 24) /* Next Descriptor Enable */ +#define AT_XDMAC_MBR_UBC_NSEN (0x1 << 25) /* Next Descriptor Source Update */ +#define AT_XDMAC_MBR_UBC_NDEN (0x1 << 26) /* Next Descriptor Destination Update */ +#define AT_XDMAC_MBR_UBC_NDV0 (0x0 << 27) /* Next Descriptor View 0 */ +#define AT_XDMAC_MBR_UBC_NDV1 (0x1 << 27) /* Next Descriptor View 1 */ +#define AT_XDMAC_MBR_UBC_NDV2 (0x2 << 27) /* Next Descriptor View 2 */ +#define AT_XDMAC_MBR_UBC_NDV3 (0x3 << 27) /* Next Descriptor View 3 */ + +#define AT_XDMAC_MAX_CHAN 0x20 +#define AT_XDMAC_MAX_CSIZE 16 /* 16 data */ +#define AT_XDMAC_MAX_DWIDTH 8 /* 64 bits */ +#define AT_XDMAC_RESIDUE_MAX_RETRIES 5 + +#define AT_XDMAC_DMA_BUSWIDTHS\ + (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\ + BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |\ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |\ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |\ + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)) + +enum atc_status { + AT_XDMAC_CHAN_IS_CYCLIC = 0, + AT_XDMAC_CHAN_IS_PAUSED, + AT_XDMAC_CHAN_IS_PAUSED_INTERNAL, +}; + +struct at_xdmac_layout { + /* Global Channel Read Suspend Register */ + u8 grs; + /* Global Write Suspend Register */ + u8 gws; + /* Global Channel Read Write Suspend Register */ + u8 grws; + /* Global Channel Read Write Resume Register */ + u8 grwr; + /* Global Channel Software Request Register */ + u8 gswr; + /* Global channel Software Request Status Register */ + u8 gsws; + /* Global Channel Software Flush Request Register */ + u8 gswf; + /* Channel reg base */ + u8 chan_cc_reg_base; + /* Source/Destination Interface must be specified or not */ + bool sdif; + /* AXI queue priority configuration supported */ + bool axi_config; +}; + +/* ----- Channels ----- */ +struct at_xdmac_chan { + struct dma_chan chan; + void __iomem *ch_regs; + u32 mask; /* Channel Mask */ + u32 cfg; /* Channel Configuration Register */ + u8 perid; /* Peripheral ID */ + u8 perif; /* Peripheral Interface */ + u8 memif; /* Memory Interface */ + u32 save_cc; + u32 save_cim; + u32 save_cnda; + u32 save_cndc; + u32 irq_status; + unsigned long status; + struct tasklet_struct tasklet; + struct dma_slave_config sconfig; + + spinlock_t lock; + + struct list_head xfers_list; + struct list_head free_descs_list; +}; + + +/* ----- Controller ----- */ +struct at_xdmac { + struct dma_device dma; + void __iomem *regs; + struct device *dev; + int irq; + struct clk *clk; + u32 save_gim; + u32 save_gs; + struct dma_pool *at_xdmac_desc_pool; + const struct at_xdmac_layout *layout; + struct at_xdmac_chan chan[]; +}; + + +/* ----- Descriptors ----- */ + +/* Linked List Descriptor */ +struct at_xdmac_lld { + u32 mbr_nda; /* Next Descriptor Member */ + u32 mbr_ubc; /* Microblock Control Member */ + u32 mbr_sa; /* Source Address Member */ + u32 mbr_da; /* Destination Address Member */ + u32 mbr_cfg; /* Configuration Register */ + u32 mbr_bc; /* Block Control Register */ + u32 mbr_ds; /* Data Stride Register */ + u32 mbr_sus; /* Source Microblock Stride Register */ + u32 mbr_dus; /* Destination Microblock Stride Register */ +}; + +/* 64-bit alignment needed to update CNDA and CUBC registers in an atomic way. */ +struct at_xdmac_desc { + struct at_xdmac_lld lld; + enum dma_transfer_direction direction; + struct dma_async_tx_descriptor tx_dma_desc; + struct list_head desc_node; + /* Following members are only used by the first descriptor */ + bool active_xfer; + unsigned int xfer_size; + struct list_head descs_list; + struct list_head xfer_node; +} __aligned(sizeof(u64)); + +static const struct at_xdmac_layout at_xdmac_sama5d4_layout = { + .grs = 0x28, + .gws = 0x2C, + .grws = 0x30, + .grwr = 0x34, + .gswr = 0x38, + .gsws = 0x3C, + .gswf = 0x40, + .chan_cc_reg_base = 0x50, + .sdif = true, + .axi_config = false, +}; + +static const struct at_xdmac_layout at_xdmac_sama7g5_layout = { + .grs = 0x30, + .gws = 0x38, + .grws = 0x40, + .grwr = 0x44, + .gswr = 0x48, + .gsws = 0x4C, + .gswf = 0x50, + .chan_cc_reg_base = 0x60, + .sdif = false, + .axi_config = true, +}; + +static inline void __iomem *at_xdmac_chan_reg_base(struct at_xdmac *atxdmac, unsigned int chan_nb) +{ + return atxdmac->regs + (atxdmac->layout->chan_cc_reg_base + chan_nb * 0x40); +} + +#define at_xdmac_read(atxdmac, reg) readl_relaxed((atxdmac)->regs + (reg)) +#define at_xdmac_write(atxdmac, reg, value) \ + writel_relaxed((value), (atxdmac)->regs + (reg)) + +#define at_xdmac_chan_read(atchan, reg) readl_relaxed((atchan)->ch_regs + (reg)) +#define at_xdmac_chan_write(atchan, reg, value) writel_relaxed((value), (atchan)->ch_regs + (reg)) + +static inline struct at_xdmac_chan *to_at_xdmac_chan(struct dma_chan *dchan) +{ + return container_of(dchan, struct at_xdmac_chan, chan); +} + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +static inline struct at_xdmac *to_at_xdmac(struct dma_device *ddev) +{ + return container_of(ddev, struct at_xdmac, dma); +} + +static inline struct at_xdmac_desc *txd_to_at_desc(struct dma_async_tx_descriptor *txd) +{ + return container_of(txd, struct at_xdmac_desc, tx_dma_desc); +} + +static inline int at_xdmac_chan_is_cyclic(struct at_xdmac_chan *atchan) +{ + return test_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status); +} + +static inline int at_xdmac_chan_is_paused(struct at_xdmac_chan *atchan) +{ + return test_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status); +} + +static inline int at_xdmac_chan_is_paused_internal(struct at_xdmac_chan *atchan) +{ + return test_bit(AT_XDMAC_CHAN_IS_PAUSED_INTERNAL, &atchan->status); +} + +static inline bool at_xdmac_chan_is_peripheral_xfer(u32 cfg) +{ + return cfg & AT_XDMAC_CC_TYPE_PER_TRAN; +} + +static inline u8 at_xdmac_get_dwidth(u32 cfg) +{ + return (cfg & AT_XDMAC_CC_DWIDTH_MASK) >> AT_XDMAC_CC_DWIDTH_OFFSET; +}; + +static unsigned int init_nr_desc_per_channel = 64; +module_param(init_nr_desc_per_channel, uint, 0644); +MODULE_PARM_DESC(init_nr_desc_per_channel, + "initial descriptors per channel (default: 64)"); + + +static void at_xdmac_runtime_suspend_descriptors(struct at_xdmac_chan *atchan) +{ + struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); + struct at_xdmac_desc *desc, *_desc; + + list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) { + if (!desc->active_xfer) + continue; + + pm_runtime_mark_last_busy(atxdmac->dev); + pm_runtime_put_autosuspend(atxdmac->dev); + } +} + +static int at_xdmac_runtime_resume_descriptors(struct at_xdmac_chan *atchan) +{ + struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); + struct at_xdmac_desc *desc, *_desc; + int ret; + + list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) { + if (!desc->active_xfer) + continue; + + ret = pm_runtime_resume_and_get(atxdmac->dev); + if (ret < 0) + return ret; + } + + return 0; +} + +static bool at_xdmac_chan_is_enabled(struct at_xdmac_chan *atchan) +{ + struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); + int ret; + + ret = pm_runtime_resume_and_get(atxdmac->dev); + if (ret < 0) + return false; + + ret = !!(at_xdmac_chan_read(atchan, AT_XDMAC_GS) & atchan->mask); + + pm_runtime_mark_last_busy(atxdmac->dev); + pm_runtime_put_autosuspend(atxdmac->dev); + + return ret; +} + +static void at_xdmac_off(struct at_xdmac *atxdmac, bool suspend_descriptors) +{ + struct dma_chan *chan, *_chan; + struct at_xdmac_chan *atchan; + int ret; + + ret = pm_runtime_resume_and_get(atxdmac->dev); + if (ret < 0) + return; + + at_xdmac_write(atxdmac, AT_XDMAC_GD, -1L); + + /* Wait that all chans are disabled. */ + while (at_xdmac_read(atxdmac, AT_XDMAC_GS)) + cpu_relax(); + + at_xdmac_write(atxdmac, AT_XDMAC_GID, -1L); + + /* Decrement runtime PM ref counter for each active descriptor. */ + if (!list_empty(&atxdmac->dma.channels) && suspend_descriptors) { + list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, + device_node) { + atchan = to_at_xdmac_chan(chan); + at_xdmac_runtime_suspend_descriptors(atchan); + } + } + + pm_runtime_mark_last_busy(atxdmac->dev); + pm_runtime_put_autosuspend(atxdmac->dev); +} + +/* Call with lock hold. */ +static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan, + struct at_xdmac_desc *first) +{ + struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); + u32 reg; + int ret; + + ret = pm_runtime_resume_and_get(atxdmac->dev); + if (ret < 0) + return; + + dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, first); + + /* Set transfer as active to not try to start it again. */ + first->active_xfer = true; + + /* Tell xdmac where to get the first descriptor. */ + reg = AT_XDMAC_CNDA_NDA(first->tx_dma_desc.phys); + if (atxdmac->layout->sdif) + reg |= AT_XDMAC_CNDA_NDAIF(atchan->memif); + + at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, reg); + + /* + * When doing non cyclic transfer we need to use the next + * descriptor view 2 since some fields of the configuration register + * depend on transfer size and src/dest addresses. + */ + if (at_xdmac_chan_is_cyclic(atchan)) + reg = AT_XDMAC_CNDC_NDVIEW_NDV1; + else if ((first->lld.mbr_ubc & + AT_XDMAC_CNDC_NDVIEW_MASK) == AT_XDMAC_MBR_UBC_NDV3) + reg = AT_XDMAC_CNDC_NDVIEW_NDV3; + else + reg = AT_XDMAC_CNDC_NDVIEW_NDV2; + /* + * Even if the register will be updated from the configuration in the + * descriptor when using view 2 or higher, the PROT bit won't be set + * properly. This bit can be modified only by using the channel + * configuration register. + */ + at_xdmac_chan_write(atchan, AT_XDMAC_CC, first->lld.mbr_cfg); + + reg |= AT_XDMAC_CNDC_NDDUP + | AT_XDMAC_CNDC_NDSUP + | AT_XDMAC_CNDC_NDE; + at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, reg); + + dev_vdbg(chan2dev(&atchan->chan), + "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n", + __func__, at_xdmac_chan_read(atchan, AT_XDMAC_CC), + at_xdmac_chan_read(atchan, AT_XDMAC_CNDA), + at_xdmac_chan_read(atchan, AT_XDMAC_CNDC), + at_xdmac_chan_read(atchan, AT_XDMAC_CSA), + at_xdmac_chan_read(atchan, AT_XDMAC_CDA), + at_xdmac_chan_read(atchan, AT_XDMAC_CUBC)); + + at_xdmac_chan_write(atchan, AT_XDMAC_CID, 0xffffffff); + reg = AT_XDMAC_CIE_RBEIE | AT_XDMAC_CIE_WBEIE; + /* + * Request Overflow Error is only for peripheral synchronized transfers + */ + if (at_xdmac_chan_is_peripheral_xfer(first->lld.mbr_cfg)) + reg |= AT_XDMAC_CIE_ROIE; + + /* + * There is no end of list when doing cyclic dma, we need to get + * an interrupt after each periods. + */ + if (at_xdmac_chan_is_cyclic(atchan)) + at_xdmac_chan_write(atchan, AT_XDMAC_CIE, + reg | AT_XDMAC_CIE_BIE); + else + at_xdmac_chan_write(atchan, AT_XDMAC_CIE, + reg | AT_XDMAC_CIE_LIE); + at_xdmac_write(atxdmac, AT_XDMAC_GIE, atchan->mask); + dev_vdbg(chan2dev(&atchan->chan), + "%s: enable channel (0x%08x)\n", __func__, atchan->mask); + wmb(); + at_xdmac_write(atxdmac, AT_XDMAC_GE, atchan->mask); + + dev_vdbg(chan2dev(&atchan->chan), + "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n", + __func__, at_xdmac_chan_read(atchan, AT_XDMAC_CC), + at_xdmac_chan_read(atchan, AT_XDMAC_CNDA), + at_xdmac_chan_read(atchan, AT_XDMAC_CNDC), + at_xdmac_chan_read(atchan, AT_XDMAC_CSA), + at_xdmac_chan_read(atchan, AT_XDMAC_CDA), + at_xdmac_chan_read(atchan, AT_XDMAC_CUBC)); +} + +static dma_cookie_t at_xdmac_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct at_xdmac_desc *desc = txd_to_at_desc(tx); + struct at_xdmac_chan *atchan = to_at_xdmac_chan(tx->chan); + dma_cookie_t cookie; + unsigned long irqflags; + + spin_lock_irqsave(&atchan->lock, irqflags); + cookie = dma_cookie_assign(tx); + + list_add_tail(&desc->xfer_node, &atchan->xfers_list); + spin_unlock_irqrestore(&atchan->lock, irqflags); + + dev_vdbg(chan2dev(tx->chan), "%s: atchan 0x%p, add desc 0x%p to xfers_list\n", + __func__, atchan, desc); + + return cookie; +} + +static struct at_xdmac_desc *at_xdmac_alloc_desc(struct dma_chan *chan, + gfp_t gfp_flags) +{ + struct at_xdmac_desc *desc; + struct at_xdmac *atxdmac = to_at_xdmac(chan->device); + dma_addr_t phys; + + desc = dma_pool_zalloc(atxdmac->at_xdmac_desc_pool, gfp_flags, &phys); + if (desc) { + INIT_LIST_HEAD(&desc->descs_list); + dma_async_tx_descriptor_init(&desc->tx_dma_desc, chan); + desc->tx_dma_desc.tx_submit = at_xdmac_tx_submit; + desc->tx_dma_desc.phys = phys; + } + + return desc; +} + +static void at_xdmac_init_used_desc(struct at_xdmac_desc *desc) +{ + memset(&desc->lld, 0, sizeof(desc->lld)); + INIT_LIST_HEAD(&desc->descs_list); + desc->direction = DMA_TRANS_NONE; + desc->xfer_size = 0; + desc->active_xfer = false; +} + +/* Call must be protected by lock. */ +static struct at_xdmac_desc *at_xdmac_get_desc(struct at_xdmac_chan *atchan) +{ + struct at_xdmac_desc *desc; + + if (list_empty(&atchan->free_descs_list)) { + desc = at_xdmac_alloc_desc(&atchan->chan, GFP_NOWAIT); + } else { + desc = list_first_entry(&atchan->free_descs_list, + struct at_xdmac_desc, desc_node); + list_del(&desc->desc_node); + at_xdmac_init_used_desc(desc); + } + + return desc; +} + +static void at_xdmac_queue_desc(struct dma_chan *chan, + struct at_xdmac_desc *prev, + struct at_xdmac_desc *desc) +{ + if (!prev || !desc) + return; + + prev->lld.mbr_nda = desc->tx_dma_desc.phys; + prev->lld.mbr_ubc |= AT_XDMAC_MBR_UBC_NDE; + + dev_dbg(chan2dev(chan), "%s: chain lld: prev=0x%p, mbr_nda=%pad\n", + __func__, prev, &prev->lld.mbr_nda); +} + +static inline void at_xdmac_increment_block_count(struct dma_chan *chan, + struct at_xdmac_desc *desc) +{ + if (!desc) + return; + + desc->lld.mbr_bc++; + + dev_dbg(chan2dev(chan), + "%s: incrementing the block count of the desc 0x%p\n", + __func__, desc); +} + +static struct dma_chan *at_xdmac_xlate(struct of_phandle_args *dma_spec, + struct of_dma *of_dma) +{ + struct at_xdmac *atxdmac = of_dma->of_dma_data; + struct at_xdmac_chan *atchan; + struct dma_chan *chan; + struct device *dev = atxdmac->dma.dev; + + if (dma_spec->args_count != 1) { + dev_err(dev, "dma phandler args: bad number of args\n"); + return NULL; + } + + chan = dma_get_any_slave_channel(&atxdmac->dma); + if (!chan) { + dev_err(dev, "can't get a dma channel\n"); + return NULL; + } + + atchan = to_at_xdmac_chan(chan); + atchan->memif = AT91_XDMAC_DT_GET_MEM_IF(dma_spec->args[0]); + atchan->perif = AT91_XDMAC_DT_GET_PER_IF(dma_spec->args[0]); + atchan->perid = AT91_XDMAC_DT_GET_PERID(dma_spec->args[0]); + dev_dbg(dev, "chan dt cfg: memif=%u perif=%u perid=%u\n", + atchan->memif, atchan->perif, atchan->perid); + + return chan; +} + +static int at_xdmac_compute_chan_conf(struct dma_chan *chan, + enum dma_transfer_direction direction) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); + int csize, dwidth; + + if (direction == DMA_DEV_TO_MEM) { + atchan->cfg = + AT91_XDMAC_DT_PERID(atchan->perid) + | AT_XDMAC_CC_DAM_INCREMENTED_AM + | AT_XDMAC_CC_SAM_FIXED_AM + | AT_XDMAC_CC_SWREQ_HWR_CONNECTED + | AT_XDMAC_CC_DSYNC_PER2MEM + | AT_XDMAC_CC_MBSIZE_SIXTEEN + | AT_XDMAC_CC_TYPE_PER_TRAN; + if (atxdmac->layout->sdif) + atchan->cfg |= AT_XDMAC_CC_DIF(atchan->memif) | + AT_XDMAC_CC_SIF(atchan->perif); + + csize = ffs(atchan->sconfig.src_maxburst) - 1; + if (csize < 0) { + dev_err(chan2dev(chan), "invalid src maxburst value\n"); + return -EINVAL; + } + atchan->cfg |= AT_XDMAC_CC_CSIZE(csize); + dwidth = ffs(atchan->sconfig.src_addr_width) - 1; + if (dwidth < 0) { + dev_err(chan2dev(chan), "invalid src addr width value\n"); + return -EINVAL; + } + atchan->cfg |= AT_XDMAC_CC_DWIDTH(dwidth); + } else if (direction == DMA_MEM_TO_DEV) { + atchan->cfg = + AT91_XDMAC_DT_PERID(atchan->perid) + | AT_XDMAC_CC_DAM_FIXED_AM + | AT_XDMAC_CC_SAM_INCREMENTED_AM + | AT_XDMAC_CC_SWREQ_HWR_CONNECTED + | AT_XDMAC_CC_DSYNC_MEM2PER + | AT_XDMAC_CC_MBSIZE_SIXTEEN + | AT_XDMAC_CC_TYPE_PER_TRAN; + if (atxdmac->layout->sdif) + atchan->cfg |= AT_XDMAC_CC_DIF(atchan->perif) | + AT_XDMAC_CC_SIF(atchan->memif); + + csize = ffs(atchan->sconfig.dst_maxburst) - 1; + if (csize < 0) { + dev_err(chan2dev(chan), "invalid src maxburst value\n"); + return -EINVAL; + } + atchan->cfg |= AT_XDMAC_CC_CSIZE(csize); + dwidth = ffs(atchan->sconfig.dst_addr_width) - 1; + if (dwidth < 0) { + dev_err(chan2dev(chan), "invalid dst addr width value\n"); + return -EINVAL; + } + atchan->cfg |= AT_XDMAC_CC_DWIDTH(dwidth); + } + + dev_dbg(chan2dev(chan), "%s: cfg=0x%08x\n", __func__, atchan->cfg); + + return 0; +} + +/* + * Only check that maxburst and addr width values are supported by + * the controller but not that the configuration is good to perform the + * transfer since we don't know the direction at this stage. + */ +static int at_xdmac_check_slave_config(struct dma_slave_config *sconfig) +{ + if ((sconfig->src_maxburst > AT_XDMAC_MAX_CSIZE) + || (sconfig->dst_maxburst > AT_XDMAC_MAX_CSIZE)) + return -EINVAL; + + if ((sconfig->src_addr_width > AT_XDMAC_MAX_DWIDTH) + || (sconfig->dst_addr_width > AT_XDMAC_MAX_DWIDTH)) + return -EINVAL; + + return 0; +} + +static int at_xdmac_set_slave_config(struct dma_chan *chan, + struct dma_slave_config *sconfig) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + + if (at_xdmac_check_slave_config(sconfig)) { + dev_err(chan2dev(chan), "invalid slave configuration\n"); + return -EINVAL; + } + + memcpy(&atchan->sconfig, sconfig, sizeof(atchan->sconfig)); + + return 0; +} + +static struct dma_async_tx_descriptor * +at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *first = NULL, *prev = NULL; + struct scatterlist *sg; + int i; + unsigned int xfer_size = 0; + unsigned long irqflags; + struct dma_async_tx_descriptor *ret = NULL; + + if (!sgl) + return NULL; + + if (!is_slave_direction(direction)) { + dev_err(chan2dev(chan), "invalid DMA direction\n"); + return NULL; + } + + dev_dbg(chan2dev(chan), "%s: sg_len=%d, dir=%s, flags=0x%lx\n", + __func__, sg_len, + direction == DMA_MEM_TO_DEV ? "to device" : "from device", + flags); + + /* Protect dma_sconfig field that can be modified by set_slave_conf. */ + spin_lock_irqsave(&atchan->lock, irqflags); + + if (at_xdmac_compute_chan_conf(chan, direction)) + goto spin_unlock; + + /* Prepare descriptors. */ + for_each_sg(sgl, sg, sg_len, i) { + struct at_xdmac_desc *desc = NULL; + u32 len, mem, dwidth, fixed_dwidth; + + len = sg_dma_len(sg); + mem = sg_dma_address(sg); + if (unlikely(!len)) { + dev_err(chan2dev(chan), "sg data length is zero\n"); + goto spin_unlock; + } + dev_dbg(chan2dev(chan), "%s: * sg%d len=%u, mem=0x%08x\n", + __func__, i, len, mem); + + desc = at_xdmac_get_desc(atchan); + if (!desc) { + dev_err(chan2dev(chan), "can't get descriptor\n"); + if (first) + list_splice_tail_init(&first->descs_list, + &atchan->free_descs_list); + goto spin_unlock; + } + + /* Linked list descriptor setup. */ + if (direction == DMA_DEV_TO_MEM) { + desc->lld.mbr_sa = atchan->sconfig.src_addr; + desc->lld.mbr_da = mem; + } else { + desc->lld.mbr_sa = mem; + desc->lld.mbr_da = atchan->sconfig.dst_addr; + } + dwidth = at_xdmac_get_dwidth(atchan->cfg); + fixed_dwidth = IS_ALIGNED(len, 1 << dwidth) + ? dwidth + : AT_XDMAC_CC_DWIDTH_BYTE; + desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2 /* next descriptor view */ + | AT_XDMAC_MBR_UBC_NDEN /* next descriptor dst parameter update */ + | AT_XDMAC_MBR_UBC_NSEN /* next descriptor src parameter update */ + | (len >> fixed_dwidth); /* microblock length */ + desc->lld.mbr_cfg = (atchan->cfg & ~AT_XDMAC_CC_DWIDTH_MASK) | + AT_XDMAC_CC_DWIDTH(fixed_dwidth); + dev_dbg(chan2dev(chan), + "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n", + __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc); + + /* Chain lld. */ + if (prev) + at_xdmac_queue_desc(chan, prev, desc); + + prev = desc; + if (!first) + first = desc; + + dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n", + __func__, desc, first); + list_add_tail(&desc->desc_node, &first->descs_list); + xfer_size += len; + } + + + first->tx_dma_desc.flags = flags; + first->xfer_size = xfer_size; + first->direction = direction; + ret = &first->tx_dma_desc; + +spin_unlock: + spin_unlock_irqrestore(&atchan->lock, irqflags); + return ret; +} + +static struct dma_async_tx_descriptor * +at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, + size_t buf_len, size_t period_len, + enum dma_transfer_direction direction, + unsigned long flags) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *first = NULL, *prev = NULL; + unsigned int periods = buf_len / period_len; + int i; + unsigned long irqflags; + + dev_dbg(chan2dev(chan), "%s: buf_addr=%pad, buf_len=%zd, period_len=%zd, dir=%s, flags=0x%lx\n", + __func__, &buf_addr, buf_len, period_len, + direction == DMA_MEM_TO_DEV ? "mem2per" : "per2mem", flags); + + if (!is_slave_direction(direction)) { + dev_err(chan2dev(chan), "invalid DMA direction\n"); + return NULL; + } + + if (test_and_set_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status)) { + dev_err(chan2dev(chan), "channel currently used\n"); + return NULL; + } + + if (at_xdmac_compute_chan_conf(chan, direction)) + return NULL; + + for (i = 0; i < periods; i++) { + struct at_xdmac_desc *desc = NULL; + + spin_lock_irqsave(&atchan->lock, irqflags); + desc = at_xdmac_get_desc(atchan); + if (!desc) { + dev_err(chan2dev(chan), "can't get descriptor\n"); + if (first) + list_splice_tail_init(&first->descs_list, + &atchan->free_descs_list); + spin_unlock_irqrestore(&atchan->lock, irqflags); + return NULL; + } + spin_unlock_irqrestore(&atchan->lock, irqflags); + dev_dbg(chan2dev(chan), + "%s: desc=0x%p, tx_dma_desc.phys=%pad\n", + __func__, desc, &desc->tx_dma_desc.phys); + + if (direction == DMA_DEV_TO_MEM) { + desc->lld.mbr_sa = atchan->sconfig.src_addr; + desc->lld.mbr_da = buf_addr + i * period_len; + } else { + desc->lld.mbr_sa = buf_addr + i * period_len; + desc->lld.mbr_da = atchan->sconfig.dst_addr; + } + desc->lld.mbr_cfg = atchan->cfg; + desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1 + | AT_XDMAC_MBR_UBC_NDEN + | AT_XDMAC_MBR_UBC_NSEN + | period_len >> at_xdmac_get_dwidth(desc->lld.mbr_cfg); + + dev_dbg(chan2dev(chan), + "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n", + __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc); + + /* Chain lld. */ + if (prev) + at_xdmac_queue_desc(chan, prev, desc); + + prev = desc; + if (!first) + first = desc; + + dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n", + __func__, desc, first); + list_add_tail(&desc->desc_node, &first->descs_list); + } + + at_xdmac_queue_desc(chan, prev, first); + first->tx_dma_desc.flags = flags; + first->xfer_size = buf_len; + first->direction = direction; + + return &first->tx_dma_desc; +} + +static inline u32 at_xdmac_align_width(struct dma_chan *chan, dma_addr_t addr) +{ + u32 width; + + /* + * Check address alignment to select the greater data width we + * can use. + * + * Some XDMAC implementations don't provide dword transfer, in + * this case selecting dword has the same behavior as + * selecting word transfers. + */ + if (!(addr & 7)) { + width = AT_XDMAC_CC_DWIDTH_DWORD; + dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__); + } else if (!(addr & 3)) { + width = AT_XDMAC_CC_DWIDTH_WORD; + dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__); + } else if (!(addr & 1)) { + width = AT_XDMAC_CC_DWIDTH_HALFWORD; + dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__); + } else { + width = AT_XDMAC_CC_DWIDTH_BYTE; + dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__); + } + + return width; +} + +static struct at_xdmac_desc * +at_xdmac_interleaved_queue_desc(struct dma_chan *chan, + struct at_xdmac_chan *atchan, + struct at_xdmac_desc *prev, + dma_addr_t src, dma_addr_t dst, + struct dma_interleaved_template *xt, + struct data_chunk *chunk) +{ + struct at_xdmac_desc *desc; + u32 dwidth; + unsigned long flags; + size_t ublen; + /* + * WARNING: The channel configuration is set here since there is no + * dmaengine_slave_config call in this case. Moreover we don't know the + * direction, it involves we can't dynamically set the source and dest + * interface so we have to use the same one. Only interface 0 allows EBI + * access. Hopefully we can access DDR through both ports (at least on + * SAMA5D4x), so we can use the same interface for source and dest, + * that solves the fact we don't know the direction. + * ERRATA: Even if useless for memory transfers, the PERID has to not + * match the one of another channel. If not, it could lead to spurious + * flag status. + * For SAMA7G5x case, the SIF and DIF fields are no longer used. + * Thus, no need to have the SIF/DIF interfaces here. + * For SAMA5D4x and SAMA5D2x the SIF and DIF are already configured as + * zero. + */ + u32 chan_cc = AT_XDMAC_CC_PERID(0x7f) + | AT_XDMAC_CC_MBSIZE_SIXTEEN + | AT_XDMAC_CC_TYPE_MEM_TRAN; + + dwidth = at_xdmac_align_width(chan, src | dst | chunk->size); + if (chunk->size >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) { + dev_dbg(chan2dev(chan), + "%s: chunk too big (%zu, max size %lu)...\n", + __func__, chunk->size, + AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth); + return NULL; + } + + if (prev) + dev_dbg(chan2dev(chan), + "Adding items at the end of desc 0x%p\n", prev); + + if (xt->src_inc) { + if (xt->src_sgl) + chan_cc |= AT_XDMAC_CC_SAM_UBS_AM; + else + chan_cc |= AT_XDMAC_CC_SAM_INCREMENTED_AM; + } + + if (xt->dst_inc) { + if (xt->dst_sgl) + chan_cc |= AT_XDMAC_CC_DAM_UBS_AM; + else + chan_cc |= AT_XDMAC_CC_DAM_INCREMENTED_AM; + } + + spin_lock_irqsave(&atchan->lock, flags); + desc = at_xdmac_get_desc(atchan); + spin_unlock_irqrestore(&atchan->lock, flags); + if (!desc) { + dev_err(chan2dev(chan), "can't get descriptor\n"); + return NULL; + } + + chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth); + + ublen = chunk->size >> dwidth; + + desc->lld.mbr_sa = src; + desc->lld.mbr_da = dst; + desc->lld.mbr_sus = dmaengine_get_src_icg(xt, chunk); + desc->lld.mbr_dus = dmaengine_get_dst_icg(xt, chunk); + + desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3 + | AT_XDMAC_MBR_UBC_NDEN + | AT_XDMAC_MBR_UBC_NSEN + | ublen; + desc->lld.mbr_cfg = chan_cc; + + dev_dbg(chan2dev(chan), + "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n", + __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, + desc->lld.mbr_ubc, desc->lld.mbr_cfg); + + /* Chain lld. */ + if (prev) + at_xdmac_queue_desc(chan, prev, desc); + + return desc; +} + +static struct dma_async_tx_descriptor * +at_xdmac_prep_interleaved(struct dma_chan *chan, + struct dma_interleaved_template *xt, + unsigned long flags) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *prev = NULL, *first = NULL; + dma_addr_t dst_addr, src_addr; + size_t src_skip = 0, dst_skip = 0, len = 0; + struct data_chunk *chunk; + int i; + + if (!xt || !xt->numf || (xt->dir != DMA_MEM_TO_MEM)) + return NULL; + + /* + * TODO: Handle the case where we have to repeat a chain of + * descriptors... + */ + if ((xt->numf > 1) && (xt->frame_size > 1)) + return NULL; + + dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, numf=%zu, frame_size=%zu, flags=0x%lx\n", + __func__, &xt->src_start, &xt->dst_start, xt->numf, + xt->frame_size, flags); + + src_addr = xt->src_start; + dst_addr = xt->dst_start; + + if (xt->numf > 1) { + first = at_xdmac_interleaved_queue_desc(chan, atchan, + NULL, + src_addr, dst_addr, + xt, xt->sgl); + if (!first) + return NULL; + + /* Length of the block is (BLEN+1) microblocks. */ + for (i = 0; i < xt->numf - 1; i++) + at_xdmac_increment_block_count(chan, first); + + dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n", + __func__, first, first); + list_add_tail(&first->desc_node, &first->descs_list); + } else { + for (i = 0; i < xt->frame_size; i++) { + size_t src_icg = 0, dst_icg = 0; + struct at_xdmac_desc *desc; + + chunk = xt->sgl + i; + + dst_icg = dmaengine_get_dst_icg(xt, chunk); + src_icg = dmaengine_get_src_icg(xt, chunk); + + src_skip = chunk->size + src_icg; + dst_skip = chunk->size + dst_icg; + + dev_dbg(chan2dev(chan), + "%s: chunk size=%zu, src icg=%zu, dst icg=%zu\n", + __func__, chunk->size, src_icg, dst_icg); + + desc = at_xdmac_interleaved_queue_desc(chan, atchan, + prev, + src_addr, dst_addr, + xt, chunk); + if (!desc) { + if (first) + list_splice_tail_init(&first->descs_list, + &atchan->free_descs_list); + return NULL; + } + + if (!first) + first = desc; + + dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n", + __func__, desc, first); + list_add_tail(&desc->desc_node, &first->descs_list); + + if (xt->src_sgl) + src_addr += src_skip; + + if (xt->dst_sgl) + dst_addr += dst_skip; + + len += chunk->size; + prev = desc; + } + } + + first->tx_dma_desc.cookie = -EBUSY; + first->tx_dma_desc.flags = flags; + first->xfer_size = len; + + return &first->tx_dma_desc; +} + +static struct dma_async_tx_descriptor * +at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *first = NULL, *prev = NULL; + size_t remaining_size = len, xfer_size = 0, ublen; + dma_addr_t src_addr = src, dst_addr = dest; + u32 dwidth; + /* + * WARNING: We don't know the direction, it involves we can't + * dynamically set the source and dest interface so we have to use the + * same one. Only interface 0 allows EBI access. Hopefully we can + * access DDR through both ports (at least on SAMA5D4x), so we can use + * the same interface for source and dest, that solves the fact we + * don't know the direction. + * ERRATA: Even if useless for memory transfers, the PERID has to not + * match the one of another channel. If not, it could lead to spurious + * flag status. + * For SAMA7G5x case, the SIF and DIF fields are no longer used. + * Thus, no need to have the SIF/DIF interfaces here. + * For SAMA5D4x and SAMA5D2x the SIF and DIF are already configured as + * zero. + */ + u32 chan_cc = AT_XDMAC_CC_PERID(0x7f) + | AT_XDMAC_CC_DAM_INCREMENTED_AM + | AT_XDMAC_CC_SAM_INCREMENTED_AM + | AT_XDMAC_CC_MBSIZE_SIXTEEN + | AT_XDMAC_CC_TYPE_MEM_TRAN; + unsigned long irqflags; + + dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, len=%zd, flags=0x%lx\n", + __func__, &src, &dest, len, flags); + + if (unlikely(!len)) + return NULL; + + dwidth = at_xdmac_align_width(chan, src_addr | dst_addr); + + /* Prepare descriptors. */ + while (remaining_size) { + struct at_xdmac_desc *desc = NULL; + + dev_dbg(chan2dev(chan), "%s: remaining_size=%zu\n", __func__, remaining_size); + + spin_lock_irqsave(&atchan->lock, irqflags); + desc = at_xdmac_get_desc(atchan); + spin_unlock_irqrestore(&atchan->lock, irqflags); + if (!desc) { + dev_err(chan2dev(chan), "can't get descriptor\n"); + if (first) + list_splice_tail_init(&first->descs_list, + &atchan->free_descs_list); + return NULL; + } + + /* Update src and dest addresses. */ + src_addr += xfer_size; + dst_addr += xfer_size; + + if (remaining_size >= AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth) + xfer_size = AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth; + else + xfer_size = remaining_size; + + dev_dbg(chan2dev(chan), "%s: xfer_size=%zu\n", __func__, xfer_size); + + /* Check remaining length and change data width if needed. */ + dwidth = at_xdmac_align_width(chan, + src_addr | dst_addr | xfer_size); + chan_cc &= ~AT_XDMAC_CC_DWIDTH_MASK; + chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth); + + ublen = xfer_size >> dwidth; + remaining_size -= xfer_size; + + desc->lld.mbr_sa = src_addr; + desc->lld.mbr_da = dst_addr; + desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2 + | AT_XDMAC_MBR_UBC_NDEN + | AT_XDMAC_MBR_UBC_NSEN + | ublen; + desc->lld.mbr_cfg = chan_cc; + + dev_dbg(chan2dev(chan), + "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n", + __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc, desc->lld.mbr_cfg); + + /* Chain lld. */ + if (prev) + at_xdmac_queue_desc(chan, prev, desc); + + prev = desc; + if (!first) + first = desc; + + dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n", + __func__, desc, first); + list_add_tail(&desc->desc_node, &first->descs_list); + } + + first->tx_dma_desc.flags = flags; + first->xfer_size = len; + + return &first->tx_dma_desc; +} + +static struct at_xdmac_desc *at_xdmac_memset_create_desc(struct dma_chan *chan, + struct at_xdmac_chan *atchan, + dma_addr_t dst_addr, + size_t len, + int value) +{ + struct at_xdmac_desc *desc; + unsigned long flags; + size_t ublen; + u32 dwidth; + char pattern; + /* + * WARNING: The channel configuration is set here since there is no + * dmaengine_slave_config call in this case. Moreover we don't know the + * direction, it involves we can't dynamically set the source and dest + * interface so we have to use the same one. Only interface 0 allows EBI + * access. Hopefully we can access DDR through both ports (at least on + * SAMA5D4x), so we can use the same interface for source and dest, + * that solves the fact we don't know the direction. + * ERRATA: Even if useless for memory transfers, the PERID has to not + * match the one of another channel. If not, it could lead to spurious + * flag status. + * For SAMA7G5x case, the SIF and DIF fields are no longer used. + * Thus, no need to have the SIF/DIF interfaces here. + * For SAMA5D4x and SAMA5D2x the SIF and DIF are already configured as + * zero. + */ + u32 chan_cc = AT_XDMAC_CC_PERID(0x7f) + | AT_XDMAC_CC_DAM_UBS_AM + | AT_XDMAC_CC_SAM_INCREMENTED_AM + | AT_XDMAC_CC_MBSIZE_SIXTEEN + | AT_XDMAC_CC_MEMSET_HW_MODE + | AT_XDMAC_CC_TYPE_MEM_TRAN; + + dwidth = at_xdmac_align_width(chan, dst_addr); + + if (len >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) { + dev_err(chan2dev(chan), + "%s: Transfer too large, aborting...\n", + __func__); + return NULL; + } + + spin_lock_irqsave(&atchan->lock, flags); + desc = at_xdmac_get_desc(atchan); + spin_unlock_irqrestore(&atchan->lock, flags); + if (!desc) { + dev_err(chan2dev(chan), "can't get descriptor\n"); + return NULL; + } + + chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth); + + /* Only the first byte of value is to be used according to dmaengine */ + pattern = (char)value; + + ublen = len >> dwidth; + + desc->lld.mbr_da = dst_addr; + desc->lld.mbr_ds = (pattern << 24) | + (pattern << 16) | + (pattern << 8) | + pattern; + desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3 + | AT_XDMAC_MBR_UBC_NDEN + | AT_XDMAC_MBR_UBC_NSEN + | ublen; + desc->lld.mbr_cfg = chan_cc; + + dev_dbg(chan2dev(chan), + "%s: lld: mbr_da=%pad, mbr_ds=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n", + __func__, &desc->lld.mbr_da, desc->lld.mbr_ds, desc->lld.mbr_ubc, + desc->lld.mbr_cfg); + + return desc; +} + +static struct dma_async_tx_descriptor * +at_xdmac_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, + size_t len, unsigned long flags) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *desc; + + dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%zu, pattern=0x%x, flags=0x%lx\n", + __func__, &dest, len, value, flags); + + if (unlikely(!len)) + return NULL; + + desc = at_xdmac_memset_create_desc(chan, atchan, dest, len, value); + list_add_tail(&desc->desc_node, &desc->descs_list); + + desc->tx_dma_desc.cookie = -EBUSY; + desc->tx_dma_desc.flags = flags; + desc->xfer_size = len; + + return &desc->tx_dma_desc; +} + +static struct dma_async_tx_descriptor * +at_xdmac_prep_dma_memset_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, int value, + unsigned long flags) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *desc, *pdesc = NULL, + *ppdesc = NULL, *first = NULL; + struct scatterlist *sg, *psg = NULL, *ppsg = NULL; + size_t stride = 0, pstride = 0, len = 0; + int i; + + if (!sgl) + return NULL; + + dev_dbg(chan2dev(chan), "%s: sg_len=%d, value=0x%x, flags=0x%lx\n", + __func__, sg_len, value, flags); + + /* Prepare descriptors. */ + for_each_sg(sgl, sg, sg_len, i) { + dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%d, pattern=0x%x, flags=0x%lx\n", + __func__, &sg_dma_address(sg), sg_dma_len(sg), + value, flags); + desc = at_xdmac_memset_create_desc(chan, atchan, + sg_dma_address(sg), + sg_dma_len(sg), + value); + if (!desc && first) + list_splice_tail_init(&first->descs_list, + &atchan->free_descs_list); + + if (!first) + first = desc; + + /* Update our strides */ + pstride = stride; + if (psg) + stride = sg_dma_address(sg) - + (sg_dma_address(psg) + sg_dma_len(psg)); + + /* + * The scatterlist API gives us only the address and + * length of each elements. + * + * Unfortunately, we don't have the stride, which we + * will need to compute. + * + * That make us end up in a situation like this one: + * len stride len stride len + * +-------+ +-------+ +-------+ + * | N-2 | | N-1 | | N | + * +-------+ +-------+ +-------+ + * + * We need all these three elements (N-2, N-1 and N) + * to actually take the decision on whether we need to + * queue N-1 or reuse N-2. + * + * We will only consider N if it is the last element. + */ + if (ppdesc && pdesc) { + if ((stride == pstride) && + (sg_dma_len(ppsg) == sg_dma_len(psg))) { + dev_dbg(chan2dev(chan), + "%s: desc 0x%p can be merged with desc 0x%p\n", + __func__, pdesc, ppdesc); + + /* + * Increment the block count of the + * N-2 descriptor + */ + at_xdmac_increment_block_count(chan, ppdesc); + ppdesc->lld.mbr_dus = stride; + + /* + * Put back the N-1 descriptor in the + * free descriptor list + */ + list_add_tail(&pdesc->desc_node, + &atchan->free_descs_list); + + /* + * Make our N-1 descriptor pointer + * point to the N-2 since they were + * actually merged. + */ + pdesc = ppdesc; + + /* + * Rule out the case where we don't have + * pstride computed yet (our second sg + * element) + * + * We also want to catch the case where there + * would be a negative stride, + */ + } else if (pstride || + sg_dma_address(sg) < sg_dma_address(psg)) { + /* + * Queue the N-1 descriptor after the + * N-2 + */ + at_xdmac_queue_desc(chan, ppdesc, pdesc); + + /* + * Add the N-1 descriptor to the list + * of the descriptors used for this + * transfer + */ + list_add_tail(&desc->desc_node, + &first->descs_list); + dev_dbg(chan2dev(chan), + "%s: add desc 0x%p to descs_list 0x%p\n", + __func__, desc, first); + } + } + + /* + * If we are the last element, just see if we have the + * same size than the previous element. + * + * If so, we can merge it with the previous descriptor + * since we don't care about the stride anymore. + */ + if ((i == (sg_len - 1)) && + sg_dma_len(psg) == sg_dma_len(sg)) { + dev_dbg(chan2dev(chan), + "%s: desc 0x%p can be merged with desc 0x%p\n", + __func__, desc, pdesc); + + /* + * Increment the block count of the N-1 + * descriptor + */ + at_xdmac_increment_block_count(chan, pdesc); + pdesc->lld.mbr_dus = stride; + + /* + * Put back the N descriptor in the free + * descriptor list + */ + list_add_tail(&desc->desc_node, + &atchan->free_descs_list); + } + + /* Update our descriptors */ + ppdesc = pdesc; + pdesc = desc; + + /* Update our scatter pointers */ + ppsg = psg; + psg = sg; + + len += sg_dma_len(sg); + } + + first->tx_dma_desc.cookie = -EBUSY; + first->tx_dma_desc.flags = flags; + first->xfer_size = len; + + return &first->tx_dma_desc; +} + +static enum dma_status +at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); + struct at_xdmac_desc *desc, *_desc, *iter; + struct list_head *descs_list; + enum dma_status ret; + int residue, retry, pm_status; + u32 cur_nda, check_nda, cur_ubc, mask, value; + u8 dwidth = 0; + unsigned long flags; + bool initd; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE || !txstate) + return ret; + + pm_status = pm_runtime_resume_and_get(atxdmac->dev); + if (pm_status < 0) + return DMA_ERROR; + + spin_lock_irqsave(&atchan->lock, flags); + + desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, xfer_node); + + /* + * If the transfer has not been started yet, don't need to compute the + * residue, it's the transfer length. + */ + if (!desc->active_xfer) { + dma_set_residue(txstate, desc->xfer_size); + goto spin_unlock; + } + + residue = desc->xfer_size; + /* + * Flush FIFO: only relevant when the transfer is source peripheral + * synchronized. Flush is needed before reading CUBC because data in + * the FIFO are not reported by CUBC. Reporting a residue of the + * transfer length while we have data in FIFO can cause issue. + * Usecase: atmel USART has a timeout which means I have received + * characters but there is no more character received for a while. On + * timeout, it requests the residue. If the data are in the DMA FIFO, + * we will return a residue of the transfer length. It means no data + * received. If an application is waiting for these data, it will hang + * since we won't have another USART timeout without receiving new + * data. + */ + mask = AT_XDMAC_CC_TYPE | AT_XDMAC_CC_DSYNC; + value = AT_XDMAC_CC_TYPE_PER_TRAN | AT_XDMAC_CC_DSYNC_PER2MEM; + if ((desc->lld.mbr_cfg & mask) == value) { + at_xdmac_write(atxdmac, atxdmac->layout->gswf, atchan->mask); + while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS)) + cpu_relax(); + } + + /* + * The easiest way to compute the residue should be to pause the DMA + * but doing this can lead to miss some data as some devices don't + * have FIFO. + * We need to read several registers because: + * - DMA is running therefore a descriptor change is possible while + * reading these registers + * - When the block transfer is done, the value of the CUBC register + * is set to its initial value until the fetch of the next descriptor. + * This value will corrupt the residue calculation so we have to skip + * it. + * + * INITD -------- ------------ + * |____________________| + * _______________________ _______________ + * NDA @desc2 \/ @desc3 + * _______________________/\_______________ + * __________ ___________ _______________ + * CUBC 0 \/ MAX desc1 \/ MAX desc2 + * __________/\___________/\_______________ + * + * Since descriptors are aligned on 64 bits, we can assume that + * the update of NDA and CUBC is atomic. + * Memory barriers are used to ensure the read order of the registers. + * A max number of retries is set because unlikely it could never ends. + */ + for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) { + check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; + rmb(); + cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC); + rmb(); + initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD); + rmb(); + cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; + rmb(); + + if ((check_nda == cur_nda) && initd) + break; + } + + if (unlikely(retry >= AT_XDMAC_RESIDUE_MAX_RETRIES)) { + ret = DMA_ERROR; + goto spin_unlock; + } + + /* + * Flush FIFO: only relevant when the transfer is source peripheral + * synchronized. Another flush is needed here because CUBC is updated + * when the controller sends the data write command. It can lead to + * report data that are not written in the memory or the device. The + * FIFO flush ensures that data are really written. + */ + if ((desc->lld.mbr_cfg & mask) == value) { + at_xdmac_write(atxdmac, atxdmac->layout->gswf, atchan->mask); + while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS)) + cpu_relax(); + } + + /* + * Remove size of all microblocks already transferred and the current + * one. Then add the remaining size to transfer of the current + * microblock. + */ + descs_list = &desc->descs_list; + list_for_each_entry_safe(iter, _desc, descs_list, desc_node) { + dwidth = at_xdmac_get_dwidth(iter->lld.mbr_cfg); + residue -= (iter->lld.mbr_ubc & 0xffffff) << dwidth; + if ((iter->lld.mbr_nda & 0xfffffffc) == cur_nda) { + desc = iter; + break; + } + } + residue += cur_ubc << dwidth; + + dma_set_residue(txstate, residue); + + dev_dbg(chan2dev(chan), + "%s: desc=0x%p, tx_dma_desc.phys=%pad, tx_status=%d, cookie=%d, residue=%d\n", + __func__, desc, &desc->tx_dma_desc.phys, ret, cookie, residue); + +spin_unlock: + spin_unlock_irqrestore(&atchan->lock, flags); + pm_runtime_mark_last_busy(atxdmac->dev); + pm_runtime_put_autosuspend(atxdmac->dev); + return ret; +} + +static void at_xdmac_advance_work(struct at_xdmac_chan *atchan) +{ + struct at_xdmac_desc *desc; + + /* + * If channel is enabled, do nothing, advance_work will be triggered + * after the interruption. + */ + if (at_xdmac_chan_is_enabled(atchan) || list_empty(&atchan->xfers_list)) + return; + + desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, + xfer_node); + dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc); + if (!desc->active_xfer) + at_xdmac_start_xfer(atchan, desc); +} + +static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan) +{ + struct at_xdmac_desc *desc; + struct dma_async_tx_descriptor *txd; + + spin_lock_irq(&atchan->lock); + dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n", + __func__, atchan->irq_status); + if (list_empty(&atchan->xfers_list)) { + spin_unlock_irq(&atchan->lock); + return; + } + desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, + xfer_node); + spin_unlock_irq(&atchan->lock); + txd = &desc->tx_dma_desc; + if (txd->flags & DMA_PREP_INTERRUPT) + dmaengine_desc_get_callback_invoke(txd, NULL); +} + +/* Called with atchan->lock held. */ +static void at_xdmac_handle_error(struct at_xdmac_chan *atchan) +{ + struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); + struct at_xdmac_desc *bad_desc; + int ret; + + ret = pm_runtime_resume_and_get(atxdmac->dev); + if (ret < 0) + return; + + /* + * The descriptor currently at the head of the active list is + * broken. Since we don't have any way to report errors, we'll + * just have to scream loudly and try to continue with other + * descriptors queued (if any). + */ + if (atchan->irq_status & AT_XDMAC_CIS_RBEIS) + dev_err(chan2dev(&atchan->chan), "read bus error!!!"); + if (atchan->irq_status & AT_XDMAC_CIS_WBEIS) + dev_err(chan2dev(&atchan->chan), "write bus error!!!"); + if (atchan->irq_status & AT_XDMAC_CIS_ROIS) + dev_err(chan2dev(&atchan->chan), "request overflow error!!!"); + + /* Channel must be disabled first as it's not done automatically */ + at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask); + while (at_xdmac_read(atxdmac, AT_XDMAC_GS) & atchan->mask) + cpu_relax(); + + bad_desc = list_first_entry(&atchan->xfers_list, + struct at_xdmac_desc, + xfer_node); + + /* Print bad descriptor's details if needed */ + dev_dbg(chan2dev(&atchan->chan), + "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n", + __func__, &bad_desc->lld.mbr_sa, &bad_desc->lld.mbr_da, + bad_desc->lld.mbr_ubc); + + pm_runtime_mark_last_busy(atxdmac->dev); + pm_runtime_put_autosuspend(atxdmac->dev); + + /* Then continue with usual descriptor management */ +} + +static void at_xdmac_tasklet(struct tasklet_struct *t) +{ + struct at_xdmac_chan *atchan = from_tasklet(atchan, t, tasklet); + struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); + struct at_xdmac_desc *desc; + struct dma_async_tx_descriptor *txd; + u32 error_mask; + + if (at_xdmac_chan_is_cyclic(atchan)) + return at_xdmac_handle_cyclic(atchan); + + error_mask = AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS | + AT_XDMAC_CIS_ROIS; + + spin_lock_irq(&atchan->lock); + + dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n", + __func__, atchan->irq_status); + + if (!(atchan->irq_status & AT_XDMAC_CIS_LIS) && + !(atchan->irq_status & error_mask)) { + spin_unlock_irq(&atchan->lock); + return; + } + + if (atchan->irq_status & error_mask) + at_xdmac_handle_error(atchan); + + desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, + xfer_node); + dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc); + if (!desc->active_xfer) { + dev_err(chan2dev(&atchan->chan), "Xfer not active: exiting"); + spin_unlock_irq(&atchan->lock); + return; + } + + txd = &desc->tx_dma_desc; + dma_cookie_complete(txd); + /* Remove the transfer from the transfer list. */ + list_del(&desc->xfer_node); + spin_unlock_irq(&atchan->lock); + + if (txd->flags & DMA_PREP_INTERRUPT) + dmaengine_desc_get_callback_invoke(txd, NULL); + + dma_run_dependencies(txd); + + spin_lock_irq(&atchan->lock); + /* Move the xfer descriptors into the free descriptors list. */ + list_splice_tail_init(&desc->descs_list, &atchan->free_descs_list); + at_xdmac_advance_work(atchan); + spin_unlock_irq(&atchan->lock); + + /* + * Decrement runtime PM ref counter incremented in + * at_xdmac_start_xfer(). + */ + pm_runtime_mark_last_busy(atxdmac->dev); + pm_runtime_put_autosuspend(atxdmac->dev); +} + +static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id) +{ + struct at_xdmac *atxdmac = (struct at_xdmac *)dev_id; + struct at_xdmac_chan *atchan; + u32 imr, status, pending; + u32 chan_imr, chan_status; + int i, ret = IRQ_NONE; + + do { + imr = at_xdmac_read(atxdmac, AT_XDMAC_GIM); + status = at_xdmac_read(atxdmac, AT_XDMAC_GIS); + pending = status & imr; + + dev_vdbg(atxdmac->dma.dev, + "%s: status=0x%08x, imr=0x%08x, pending=0x%08x\n", + __func__, status, imr, pending); + + if (!pending) + break; + + /* We have to find which channel has generated the interrupt. */ + for (i = 0; i < atxdmac->dma.chancnt; i++) { + if (!((1 << i) & pending)) + continue; + + atchan = &atxdmac->chan[i]; + chan_imr = at_xdmac_chan_read(atchan, AT_XDMAC_CIM); + chan_status = at_xdmac_chan_read(atchan, AT_XDMAC_CIS); + atchan->irq_status = chan_status & chan_imr; + dev_vdbg(atxdmac->dma.dev, + "%s: chan%d: imr=0x%x, status=0x%x\n", + __func__, i, chan_imr, chan_status); + dev_vdbg(chan2dev(&atchan->chan), + "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n", + __func__, + at_xdmac_chan_read(atchan, AT_XDMAC_CC), + at_xdmac_chan_read(atchan, AT_XDMAC_CNDA), + at_xdmac_chan_read(atchan, AT_XDMAC_CNDC), + at_xdmac_chan_read(atchan, AT_XDMAC_CSA), + at_xdmac_chan_read(atchan, AT_XDMAC_CDA), + at_xdmac_chan_read(atchan, AT_XDMAC_CUBC)); + + if (atchan->irq_status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS)) + at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask); + + tasklet_schedule(&atchan->tasklet); + ret = IRQ_HANDLED; + } + + } while (pending); + + return ret; +} + +static void at_xdmac_issue_pending(struct dma_chan *chan) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + unsigned long flags; + + dev_dbg(chan2dev(&atchan->chan), "%s\n", __func__); + + spin_lock_irqsave(&atchan->lock, flags); + at_xdmac_advance_work(atchan); + spin_unlock_irqrestore(&atchan->lock, flags); + + return; +} + +static int at_xdmac_device_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + int ret; + unsigned long flags; + + dev_dbg(chan2dev(chan), "%s\n", __func__); + + spin_lock_irqsave(&atchan->lock, flags); + ret = at_xdmac_set_slave_config(chan, config); + spin_unlock_irqrestore(&atchan->lock, flags); + + return ret; +} + +static void at_xdmac_device_pause_set(struct at_xdmac *atxdmac, + struct at_xdmac_chan *atchan) +{ + at_xdmac_write(atxdmac, atxdmac->layout->grws, atchan->mask); + while (at_xdmac_chan_read(atchan, AT_XDMAC_CC) & + (AT_XDMAC_CC_WRIP | AT_XDMAC_CC_RDIP)) + cpu_relax(); +} + +static void at_xdmac_device_pause_internal(struct at_xdmac_chan *atchan) +{ + struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); + unsigned long flags; + + spin_lock_irqsave(&atchan->lock, flags); + set_bit(AT_XDMAC_CHAN_IS_PAUSED_INTERNAL, &atchan->status); + at_xdmac_device_pause_set(atxdmac, atchan); + spin_unlock_irqrestore(&atchan->lock, flags); +} + +static int at_xdmac_device_pause(struct dma_chan *chan) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); + unsigned long flags; + int ret; + + dev_dbg(chan2dev(chan), "%s\n", __func__); + + if (test_and_set_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status)) + return 0; + + ret = pm_runtime_resume_and_get(atxdmac->dev); + if (ret < 0) + return ret; + + spin_lock_irqsave(&atchan->lock, flags); + + at_xdmac_device_pause_set(atxdmac, atchan); + /* Decrement runtime PM ref counter for each active descriptor. */ + at_xdmac_runtime_suspend_descriptors(atchan); + + spin_unlock_irqrestore(&atchan->lock, flags); + + pm_runtime_mark_last_busy(atxdmac->dev); + pm_runtime_put_autosuspend(atxdmac->dev); + + return 0; +} + +static void at_xdmac_device_resume_internal(struct at_xdmac_chan *atchan) +{ + struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); + unsigned long flags; + + spin_lock_irqsave(&atchan->lock, flags); + at_xdmac_write(atxdmac, atxdmac->layout->grwr, atchan->mask); + clear_bit(AT_XDMAC_CHAN_IS_PAUSED_INTERNAL, &atchan->status); + spin_unlock_irqrestore(&atchan->lock, flags); +} + +static int at_xdmac_device_resume(struct dma_chan *chan) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); + unsigned long flags; + int ret; + + dev_dbg(chan2dev(chan), "%s\n", __func__); + + ret = pm_runtime_resume_and_get(atxdmac->dev); + if (ret < 0) + return ret; + + spin_lock_irqsave(&atchan->lock, flags); + if (!at_xdmac_chan_is_paused(atchan)) + goto unlock; + + /* Increment runtime PM ref counter for each active descriptor. */ + ret = at_xdmac_runtime_resume_descriptors(atchan); + if (ret < 0) + goto unlock; + + at_xdmac_write(atxdmac, atxdmac->layout->grwr, atchan->mask); + clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status); + +unlock: + spin_unlock_irqrestore(&atchan->lock, flags); + pm_runtime_mark_last_busy(atxdmac->dev); + pm_runtime_put_autosuspend(atxdmac->dev); + + return ret; +} + +static int at_xdmac_device_terminate_all(struct dma_chan *chan) +{ + struct at_xdmac_desc *desc, *_desc; + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); + unsigned long flags; + int ret; + + dev_dbg(chan2dev(chan), "%s\n", __func__); + + ret = pm_runtime_resume_and_get(atxdmac->dev); + if (ret < 0) + return ret; + + spin_lock_irqsave(&atchan->lock, flags); + at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask); + while (at_xdmac_read(atxdmac, AT_XDMAC_GS) & atchan->mask) + cpu_relax(); + + /* Cancel all pending transfers. */ + list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) { + list_del(&desc->xfer_node); + list_splice_tail_init(&desc->descs_list, + &atchan->free_descs_list); + /* + * We incremented the runtime PM reference count on + * at_xdmac_start_xfer() for this descriptor. Now it's time + * to release it. + */ + if (desc->active_xfer) { + pm_runtime_put_autosuspend(atxdmac->dev); + pm_runtime_mark_last_busy(atxdmac->dev); + } + } + + clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status); + clear_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status); + spin_unlock_irqrestore(&atchan->lock, flags); + + pm_runtime_mark_last_busy(atxdmac->dev); + pm_runtime_put_autosuspend(atxdmac->dev); + + return 0; +} + +static int at_xdmac_alloc_chan_resources(struct dma_chan *chan) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *desc; + int i; + + if (at_xdmac_chan_is_enabled(atchan)) { + dev_err(chan2dev(chan), + "can't allocate channel resources (channel enabled)\n"); + return -EIO; + } + + if (!list_empty(&atchan->free_descs_list)) { + dev_err(chan2dev(chan), + "can't allocate channel resources (channel not free from a previous use)\n"); + return -EIO; + } + + for (i = 0; i < init_nr_desc_per_channel; i++) { + desc = at_xdmac_alloc_desc(chan, GFP_KERNEL); + if (!desc) { + if (i == 0) { + dev_warn(chan2dev(chan), + "can't allocate any descriptors\n"); + return -EIO; + } + dev_warn(chan2dev(chan), + "only %d descriptors have been allocated\n", i); + break; + } + list_add_tail(&desc->desc_node, &atchan->free_descs_list); + } + + dma_cookie_init(chan); + + dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i); + + return i; +} + +static void at_xdmac_free_chan_resources(struct dma_chan *chan) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac *atxdmac = to_at_xdmac(chan->device); + struct at_xdmac_desc *desc, *_desc; + + list_for_each_entry_safe(desc, _desc, &atchan->free_descs_list, desc_node) { + dev_dbg(chan2dev(chan), "%s: freeing descriptor %p\n", __func__, desc); + list_del(&desc->desc_node); + dma_pool_free(atxdmac->at_xdmac_desc_pool, desc, desc->tx_dma_desc.phys); + } + + return; +} + +static void at_xdmac_axi_config(struct platform_device *pdev) +{ + struct at_xdmac *atxdmac = (struct at_xdmac *)platform_get_drvdata(pdev); + bool dev_m2m = false; + u32 dma_requests; + + if (!atxdmac->layout->axi_config) + return; /* Not supported */ + + if (!of_property_read_u32(pdev->dev.of_node, "dma-requests", + &dma_requests)) { + dev_info(&pdev->dev, "controller in mem2mem mode.\n"); + dev_m2m = true; + } + + if (dev_m2m) { + at_xdmac_write(atxdmac, AT_XDMAC_GCFG, AT_XDMAC_GCFG_M2M); + at_xdmac_write(atxdmac, AT_XDMAC_GWAC, AT_XDMAC_GWAC_M2M); + } else { + at_xdmac_write(atxdmac, AT_XDMAC_GCFG, AT_XDMAC_GCFG_P2M); + at_xdmac_write(atxdmac, AT_XDMAC_GWAC, AT_XDMAC_GWAC_P2M); + } +} + +static int __maybe_unused atmel_xdmac_prepare(struct device *dev) +{ + struct at_xdmac *atxdmac = dev_get_drvdata(dev); + struct dma_chan *chan, *_chan; + + list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) { + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + + /* Wait for transfer completion, except in cyclic case. */ + if (at_xdmac_chan_is_enabled(atchan) && !at_xdmac_chan_is_cyclic(atchan)) + return -EAGAIN; + } + return 0; +} + +static int __maybe_unused atmel_xdmac_suspend(struct device *dev) +{ + struct at_xdmac *atxdmac = dev_get_drvdata(dev); + struct dma_chan *chan, *_chan; + int ret; + + ret = pm_runtime_resume_and_get(atxdmac->dev); + if (ret < 0) + return ret; + + list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) { + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + + atchan->save_cc = at_xdmac_chan_read(atchan, AT_XDMAC_CC); + if (at_xdmac_chan_is_cyclic(atchan)) { + if (!at_xdmac_chan_is_paused(atchan)) { + dev_warn(chan2dev(chan), "%s: channel %d not paused\n", + __func__, chan->chan_id); + at_xdmac_device_pause_internal(atchan); + at_xdmac_runtime_suspend_descriptors(atchan); + } + atchan->save_cim = at_xdmac_chan_read(atchan, AT_XDMAC_CIM); + atchan->save_cnda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA); + atchan->save_cndc = at_xdmac_chan_read(atchan, AT_XDMAC_CNDC); + } + } + atxdmac->save_gim = at_xdmac_read(atxdmac, AT_XDMAC_GIM); + atxdmac->save_gs = at_xdmac_read(atxdmac, AT_XDMAC_GS); + + at_xdmac_off(atxdmac, false); + pm_runtime_mark_last_busy(atxdmac->dev); + pm_runtime_put_noidle(atxdmac->dev); + clk_disable_unprepare(atxdmac->clk); + + return 0; +} + +static int __maybe_unused atmel_xdmac_resume(struct device *dev) +{ + struct at_xdmac *atxdmac = dev_get_drvdata(dev); + struct at_xdmac_chan *atchan; + struct dma_chan *chan, *_chan; + struct platform_device *pdev = container_of(dev, struct platform_device, dev); + int i, ret; + + ret = clk_prepare_enable(atxdmac->clk); + if (ret) + return ret; + + pm_runtime_get_noresume(atxdmac->dev); + + at_xdmac_axi_config(pdev); + + /* Clear pending interrupts. */ + for (i = 0; i < atxdmac->dma.chancnt; i++) { + atchan = &atxdmac->chan[i]; + while (at_xdmac_chan_read(atchan, AT_XDMAC_CIS)) + cpu_relax(); + } + + at_xdmac_write(atxdmac, AT_XDMAC_GIE, atxdmac->save_gim); + list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) { + atchan = to_at_xdmac_chan(chan); + + at_xdmac_chan_write(atchan, AT_XDMAC_CC, atchan->save_cc); + if (at_xdmac_chan_is_cyclic(atchan)) { + /* + * Resume only channels not explicitly paused by + * consumers. + */ + if (at_xdmac_chan_is_paused_internal(atchan)) { + ret = at_xdmac_runtime_resume_descriptors(atchan); + if (ret < 0) + return ret; + at_xdmac_device_resume_internal(atchan); + } + + /* + * We may resume from a deep sleep state where power + * to DMA controller is cut-off. Thus, restore the + * suspend state of channels set though dmaengine API. + */ + else if (at_xdmac_chan_is_paused(atchan)) + at_xdmac_device_pause_set(atxdmac, atchan); + + at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, atchan->save_cnda); + at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc); + at_xdmac_chan_write(atchan, AT_XDMAC_CIE, atchan->save_cim); + wmb(); + if (atxdmac->save_gs & atchan->mask) + at_xdmac_write(atxdmac, AT_XDMAC_GE, atchan->mask); + } + } + + pm_runtime_mark_last_busy(atxdmac->dev); + pm_runtime_put_autosuspend(atxdmac->dev); + + return 0; +} + +static int __maybe_unused atmel_xdmac_runtime_suspend(struct device *dev) +{ + struct at_xdmac *atxdmac = dev_get_drvdata(dev); + + clk_disable(atxdmac->clk); + + return 0; +} + +static int __maybe_unused atmel_xdmac_runtime_resume(struct device *dev) +{ + struct at_xdmac *atxdmac = dev_get_drvdata(dev); + + return clk_enable(atxdmac->clk); +} + +static int at_xdmac_probe(struct platform_device *pdev) +{ + struct at_xdmac *atxdmac; + int irq, nr_channels, i, ret; + void __iomem *base; + u32 reg; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return PTR_ERR(base); + + /* + * Read number of xdmac channels, read helper function can't be used + * since atxdmac is not yet allocated and we need to know the number + * of channels to do the allocation. + */ + reg = readl_relaxed(base + AT_XDMAC_GTYPE); + nr_channels = AT_XDMAC_NB_CH(reg); + if (nr_channels > AT_XDMAC_MAX_CHAN) { + dev_err(&pdev->dev, "invalid number of channels (%u)\n", + nr_channels); + return -EINVAL; + } + + atxdmac = devm_kzalloc(&pdev->dev, + struct_size(atxdmac, chan, nr_channels), + GFP_KERNEL); + if (!atxdmac) { + dev_err(&pdev->dev, "can't allocate at_xdmac structure\n"); + return -ENOMEM; + } + + atxdmac->regs = base; + atxdmac->irq = irq; + atxdmac->dev = &pdev->dev; + + atxdmac->layout = of_device_get_match_data(&pdev->dev); + if (!atxdmac->layout) + return -ENODEV; + + atxdmac->clk = devm_clk_get(&pdev->dev, "dma_clk"); + if (IS_ERR(atxdmac->clk)) { + dev_err(&pdev->dev, "can't get dma_clk\n"); + return PTR_ERR(atxdmac->clk); + } + + /* Do not use dev res to prevent races with tasklet */ + ret = request_irq(atxdmac->irq, at_xdmac_interrupt, 0, "at_xdmac", atxdmac); + if (ret) { + dev_err(&pdev->dev, "can't request irq\n"); + return ret; + } + + ret = clk_prepare_enable(atxdmac->clk); + if (ret) { + dev_err(&pdev->dev, "can't prepare or enable clock\n"); + goto err_free_irq; + } + + atxdmac->at_xdmac_desc_pool = + dmam_pool_create(dev_name(&pdev->dev), &pdev->dev, + sizeof(struct at_xdmac_desc), 4, 0); + if (!atxdmac->at_xdmac_desc_pool) { + dev_err(&pdev->dev, "no memory for descriptors dma pool\n"); + ret = -ENOMEM; + goto err_clk_disable; + } + + dma_cap_set(DMA_CYCLIC, atxdmac->dma.cap_mask); + dma_cap_set(DMA_INTERLEAVE, atxdmac->dma.cap_mask); + dma_cap_set(DMA_MEMCPY, atxdmac->dma.cap_mask); + dma_cap_set(DMA_MEMSET, atxdmac->dma.cap_mask); + dma_cap_set(DMA_MEMSET_SG, atxdmac->dma.cap_mask); + dma_cap_set(DMA_SLAVE, atxdmac->dma.cap_mask); + /* + * Without DMA_PRIVATE the driver is not able to allocate more than + * one channel, second allocation fails in private_candidate. + */ + dma_cap_set(DMA_PRIVATE, atxdmac->dma.cap_mask); + atxdmac->dma.dev = &pdev->dev; + atxdmac->dma.device_alloc_chan_resources = at_xdmac_alloc_chan_resources; + atxdmac->dma.device_free_chan_resources = at_xdmac_free_chan_resources; + atxdmac->dma.device_tx_status = at_xdmac_tx_status; + atxdmac->dma.device_issue_pending = at_xdmac_issue_pending; + atxdmac->dma.device_prep_dma_cyclic = at_xdmac_prep_dma_cyclic; + atxdmac->dma.device_prep_interleaved_dma = at_xdmac_prep_interleaved; + atxdmac->dma.device_prep_dma_memcpy = at_xdmac_prep_dma_memcpy; + atxdmac->dma.device_prep_dma_memset = at_xdmac_prep_dma_memset; + atxdmac->dma.device_prep_dma_memset_sg = at_xdmac_prep_dma_memset_sg; + atxdmac->dma.device_prep_slave_sg = at_xdmac_prep_slave_sg; + atxdmac->dma.device_config = at_xdmac_device_config; + atxdmac->dma.device_pause = at_xdmac_device_pause; + atxdmac->dma.device_resume = at_xdmac_device_resume; + atxdmac->dma.device_terminate_all = at_xdmac_device_terminate_all; + atxdmac->dma.src_addr_widths = AT_XDMAC_DMA_BUSWIDTHS; + atxdmac->dma.dst_addr_widths = AT_XDMAC_DMA_BUSWIDTHS; + atxdmac->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + atxdmac->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + platform_set_drvdata(pdev, atxdmac); + + pm_runtime_set_autosuspend_delay(&pdev->dev, 500); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + pm_runtime_get_noresume(&pdev->dev); + + /* Init channels. */ + INIT_LIST_HEAD(&atxdmac->dma.channels); + + /* Disable all chans and interrupts. */ + at_xdmac_off(atxdmac, true); + + for (i = 0; i < nr_channels; i++) { + struct at_xdmac_chan *atchan = &atxdmac->chan[i]; + + atchan->chan.device = &atxdmac->dma; + list_add_tail(&atchan->chan.device_node, + &atxdmac->dma.channels); + + atchan->ch_regs = at_xdmac_chan_reg_base(atxdmac, i); + atchan->mask = 1 << i; + + spin_lock_init(&atchan->lock); + INIT_LIST_HEAD(&atchan->xfers_list); + INIT_LIST_HEAD(&atchan->free_descs_list); + tasklet_setup(&atchan->tasklet, at_xdmac_tasklet); + + /* Clear pending interrupts. */ + while (at_xdmac_chan_read(atchan, AT_XDMAC_CIS)) + cpu_relax(); + } + + ret = dma_async_device_register(&atxdmac->dma); + if (ret) { + dev_err(&pdev->dev, "fail to register DMA engine device\n"); + goto err_pm_disable; + } + + ret = of_dma_controller_register(pdev->dev.of_node, + at_xdmac_xlate, atxdmac); + if (ret) { + dev_err(&pdev->dev, "could not register of dma controller\n"); + goto err_dma_unregister; + } + + dev_info(&pdev->dev, "%d channels, mapped at 0x%p\n", + nr_channels, atxdmac->regs); + + at_xdmac_axi_config(pdev); + + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_put_autosuspend(&pdev->dev); + + return 0; + +err_dma_unregister: + dma_async_device_unregister(&atxdmac->dma); +err_pm_disable: + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_disable(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); + pm_runtime_dont_use_autosuspend(&pdev->dev); +err_clk_disable: + clk_disable_unprepare(atxdmac->clk); +err_free_irq: + free_irq(atxdmac->irq, atxdmac); + return ret; +} + +static int at_xdmac_remove(struct platform_device *pdev) +{ + struct at_xdmac *atxdmac = (struct at_xdmac *)platform_get_drvdata(pdev); + int i; + + at_xdmac_off(atxdmac, true); + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&atxdmac->dma); + pm_runtime_disable(atxdmac->dev); + pm_runtime_set_suspended(&pdev->dev); + pm_runtime_dont_use_autosuspend(&pdev->dev); + clk_disable_unprepare(atxdmac->clk); + + free_irq(atxdmac->irq, atxdmac); + + for (i = 0; i < atxdmac->dma.chancnt; i++) { + struct at_xdmac_chan *atchan = &atxdmac->chan[i]; + + tasklet_kill(&atchan->tasklet); + at_xdmac_free_chan_resources(&atchan->chan); + } + + return 0; +} + +static const struct dev_pm_ops __maybe_unused atmel_xdmac_dev_pm_ops = { + .prepare = atmel_xdmac_prepare, + SET_LATE_SYSTEM_SLEEP_PM_OPS(atmel_xdmac_suspend, atmel_xdmac_resume) + SET_RUNTIME_PM_OPS(atmel_xdmac_runtime_suspend, + atmel_xdmac_runtime_resume, NULL) +}; + +static const struct of_device_id atmel_xdmac_dt_ids[] = { + { + .compatible = "atmel,sama5d4-dma", + .data = &at_xdmac_sama5d4_layout, + }, { + .compatible = "microchip,sama7g5-dma", + .data = &at_xdmac_sama7g5_layout, + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(of, atmel_xdmac_dt_ids); + +static struct platform_driver at_xdmac_driver = { + .probe = at_xdmac_probe, + .remove = at_xdmac_remove, + .driver = { + .name = "at_xdmac", + .of_match_table = of_match_ptr(atmel_xdmac_dt_ids), + .pm = pm_ptr(&atmel_xdmac_dev_pm_ops), + } +}; + +static int __init at_xdmac_init(void) +{ + return platform_driver_register(&at_xdmac_driver); +} +subsys_initcall(at_xdmac_init); + +static void __exit at_xdmac_exit(void) +{ + platform_driver_unregister(&at_xdmac_driver); +} +module_exit(at_xdmac_exit); + +MODULE_DESCRIPTION("Atmel Extended DMA Controller driver"); +MODULE_AUTHOR("Ludovic Desroches "); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c new file mode 100644 index 0000000000..94ea35330e --- /dev/null +++ b/drivers/dma/bcm-sba-raid.c @@ -0,0 +1,1771 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2017 Broadcom + +/* + * Broadcom SBA RAID Driver + * + * The Broadcom stream buffer accelerator (SBA) provides offloading + * capabilities for RAID operations. The SBA offload engine is accessible + * via Broadcom SoC specific ring manager. Two or more offload engines + * can share same Broadcom SoC specific ring manager due to this Broadcom + * SoC specific ring manager driver is implemented as a mailbox controller + * driver and offload engine drivers are implemented as mallbox clients. + * + * Typically, Broadcom SoC specific ring manager will implement larger + * number of hardware rings over one or more SBA hardware devices. By + * design, the internal buffer size of SBA hardware device is limited + * but all offload operations supported by SBA can be broken down into + * multiple small size requests and executed parallely on multiple SBA + * hardware devices for achieving high through-put. + * + * The Broadcom SBA RAID driver does not require any register programming + * except submitting request to SBA hardware device via mailbox channels. + * This driver implements a DMA device with one DMA channel using a single + * mailbox channel provided by Broadcom SoC specific ring manager driver. + * For having more SBA DMA channels, we can create more SBA device nodes + * in Broadcom SoC specific DTS based on number of hardware rings supported + * by Broadcom SoC ring manager. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" + +/* ====== Driver macros and defines ===== */ + +#define SBA_TYPE_SHIFT 48 +#define SBA_TYPE_MASK GENMASK(1, 0) +#define SBA_TYPE_A 0x0 +#define SBA_TYPE_B 0x2 +#define SBA_TYPE_C 0x3 +#define SBA_USER_DEF_SHIFT 32 +#define SBA_USER_DEF_MASK GENMASK(15, 0) +#define SBA_R_MDATA_SHIFT 24 +#define SBA_R_MDATA_MASK GENMASK(7, 0) +#define SBA_C_MDATA_MS_SHIFT 18 +#define SBA_C_MDATA_MS_MASK GENMASK(1, 0) +#define SBA_INT_SHIFT 17 +#define SBA_INT_MASK BIT(0) +#define SBA_RESP_SHIFT 16 +#define SBA_RESP_MASK BIT(0) +#define SBA_C_MDATA_SHIFT 8 +#define SBA_C_MDATA_MASK GENMASK(7, 0) +#define SBA_C_MDATA_BNUMx_SHIFT(__bnum) (2 * (__bnum)) +#define SBA_C_MDATA_BNUMx_MASK GENMASK(1, 0) +#define SBA_C_MDATA_DNUM_SHIFT 5 +#define SBA_C_MDATA_DNUM_MASK GENMASK(4, 0) +#define SBA_C_MDATA_LS(__v) ((__v) & 0xff) +#define SBA_C_MDATA_MS(__v) (((__v) >> 8) & 0x3) +#define SBA_CMD_SHIFT 0 +#define SBA_CMD_MASK GENMASK(3, 0) +#define SBA_CMD_ZERO_BUFFER 0x4 +#define SBA_CMD_ZERO_ALL_BUFFERS 0x8 +#define SBA_CMD_LOAD_BUFFER 0x9 +#define SBA_CMD_XOR 0xa +#define SBA_CMD_GALOIS_XOR 0xb +#define SBA_CMD_WRITE_BUFFER 0xc +#define SBA_CMD_GALOIS 0xe + +#define SBA_MAX_REQ_PER_MBOX_CHANNEL 8192 +#define SBA_MAX_MSG_SEND_PER_MBOX_CHANNEL 8 + +/* Driver helper macros */ +#define to_sba_request(tx) \ + container_of(tx, struct sba_request, tx) +#define to_sba_device(dchan) \ + container_of(dchan, struct sba_device, dma_chan) + +/* ===== Driver data structures ===== */ + +enum sba_request_flags { + SBA_REQUEST_STATE_FREE = 0x001, + SBA_REQUEST_STATE_ALLOCED = 0x002, + SBA_REQUEST_STATE_PENDING = 0x004, + SBA_REQUEST_STATE_ACTIVE = 0x008, + SBA_REQUEST_STATE_ABORTED = 0x010, + SBA_REQUEST_STATE_MASK = 0x0ff, + SBA_REQUEST_FENCE = 0x100, +}; + +struct sba_request { + /* Global state */ + struct list_head node; + struct sba_device *sba; + u32 flags; + /* Chained requests management */ + struct sba_request *first; + struct list_head next; + atomic_t next_pending_count; + /* BRCM message data */ + struct brcm_message msg; + struct dma_async_tx_descriptor tx; + /* SBA commands */ + struct brcm_sba_command cmds[]; +}; + +enum sba_version { + SBA_VER_1 = 0, + SBA_VER_2 +}; + +struct sba_device { + /* Underlying device */ + struct device *dev; + /* DT configuration parameters */ + enum sba_version ver; + /* Derived configuration parameters */ + u32 max_req; + u32 hw_buf_size; + u32 hw_resp_size; + u32 max_pq_coefs; + u32 max_pq_srcs; + u32 max_cmd_per_req; + u32 max_xor_srcs; + u32 max_resp_pool_size; + u32 max_cmds_pool_size; + /* Maibox client and Mailbox channels */ + struct mbox_client client; + struct mbox_chan *mchan; + struct device *mbox_dev; + /* DMA device and DMA channel */ + struct dma_device dma_dev; + struct dma_chan dma_chan; + /* DMA channel resources */ + void *resp_base; + dma_addr_t resp_dma_base; + void *cmds_base; + dma_addr_t cmds_dma_base; + spinlock_t reqs_lock; + bool reqs_fence; + struct list_head reqs_alloc_list; + struct list_head reqs_pending_list; + struct list_head reqs_active_list; + struct list_head reqs_aborted_list; + struct list_head reqs_free_list; + /* DebugFS directory entries */ + struct dentry *root; +}; + +/* ====== Command helper routines ===== */ + +static inline u64 __pure sba_cmd_enc(u64 cmd, u32 val, u32 shift, u32 mask) +{ + cmd &= ~((u64)mask << shift); + cmd |= ((u64)(val & mask) << shift); + return cmd; +} + +static inline u32 __pure sba_cmd_load_c_mdata(u32 b0) +{ + return b0 & SBA_C_MDATA_BNUMx_MASK; +} + +static inline u32 __pure sba_cmd_write_c_mdata(u32 b0) +{ + return b0 & SBA_C_MDATA_BNUMx_MASK; +} + +static inline u32 __pure sba_cmd_xor_c_mdata(u32 b1, u32 b0) +{ + return (b0 & SBA_C_MDATA_BNUMx_MASK) | + ((b1 & SBA_C_MDATA_BNUMx_MASK) << SBA_C_MDATA_BNUMx_SHIFT(1)); +} + +static inline u32 __pure sba_cmd_pq_c_mdata(u32 d, u32 b1, u32 b0) +{ + return (b0 & SBA_C_MDATA_BNUMx_MASK) | + ((b1 & SBA_C_MDATA_BNUMx_MASK) << SBA_C_MDATA_BNUMx_SHIFT(1)) | + ((d & SBA_C_MDATA_DNUM_MASK) << SBA_C_MDATA_DNUM_SHIFT); +} + +/* ====== General helper routines ===== */ + +static struct sba_request *sba_alloc_request(struct sba_device *sba) +{ + bool found = false; + unsigned long flags; + struct sba_request *req = NULL; + + spin_lock_irqsave(&sba->reqs_lock, flags); + list_for_each_entry(req, &sba->reqs_free_list, node) { + if (async_tx_test_ack(&req->tx)) { + list_move_tail(&req->node, &sba->reqs_alloc_list); + found = true; + break; + } + } + spin_unlock_irqrestore(&sba->reqs_lock, flags); + + if (!found) { + /* + * We have no more free requests so, we peek + * mailbox channels hoping few active requests + * would have completed which will create more + * room for new requests. + */ + mbox_client_peek_data(sba->mchan); + return NULL; + } + + req->flags = SBA_REQUEST_STATE_ALLOCED; + req->first = req; + INIT_LIST_HEAD(&req->next); + atomic_set(&req->next_pending_count, 1); + + dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan); + async_tx_ack(&req->tx); + + return req; +} + +/* Note: Must be called with sba->reqs_lock held */ +static void _sba_pending_request(struct sba_device *sba, + struct sba_request *req) +{ + lockdep_assert_held(&sba->reqs_lock); + req->flags &= ~SBA_REQUEST_STATE_MASK; + req->flags |= SBA_REQUEST_STATE_PENDING; + list_move_tail(&req->node, &sba->reqs_pending_list); + if (list_empty(&sba->reqs_active_list)) + sba->reqs_fence = false; +} + +/* Note: Must be called with sba->reqs_lock held */ +static bool _sba_active_request(struct sba_device *sba, + struct sba_request *req) +{ + lockdep_assert_held(&sba->reqs_lock); + if (list_empty(&sba->reqs_active_list)) + sba->reqs_fence = false; + if (sba->reqs_fence) + return false; + req->flags &= ~SBA_REQUEST_STATE_MASK; + req->flags |= SBA_REQUEST_STATE_ACTIVE; + list_move_tail(&req->node, &sba->reqs_active_list); + if (req->flags & SBA_REQUEST_FENCE) + sba->reqs_fence = true; + return true; +} + +/* Note: Must be called with sba->reqs_lock held */ +static void _sba_abort_request(struct sba_device *sba, + struct sba_request *req) +{ + lockdep_assert_held(&sba->reqs_lock); + req->flags &= ~SBA_REQUEST_STATE_MASK; + req->flags |= SBA_REQUEST_STATE_ABORTED; + list_move_tail(&req->node, &sba->reqs_aborted_list); + if (list_empty(&sba->reqs_active_list)) + sba->reqs_fence = false; +} + +/* Note: Must be called with sba->reqs_lock held */ +static void _sba_free_request(struct sba_device *sba, + struct sba_request *req) +{ + lockdep_assert_held(&sba->reqs_lock); + req->flags &= ~SBA_REQUEST_STATE_MASK; + req->flags |= SBA_REQUEST_STATE_FREE; + list_move_tail(&req->node, &sba->reqs_free_list); + if (list_empty(&sba->reqs_active_list)) + sba->reqs_fence = false; +} + +static void sba_free_chained_requests(struct sba_request *req) +{ + unsigned long flags; + struct sba_request *nreq; + struct sba_device *sba = req->sba; + + spin_lock_irqsave(&sba->reqs_lock, flags); + + _sba_free_request(sba, req); + list_for_each_entry(nreq, &req->next, next) + _sba_free_request(sba, nreq); + + spin_unlock_irqrestore(&sba->reqs_lock, flags); +} + +static void sba_chain_request(struct sba_request *first, + struct sba_request *req) +{ + unsigned long flags; + struct sba_device *sba = req->sba; + + spin_lock_irqsave(&sba->reqs_lock, flags); + + list_add_tail(&req->next, &first->next); + req->first = first; + atomic_inc(&first->next_pending_count); + + spin_unlock_irqrestore(&sba->reqs_lock, flags); +} + +static void sba_cleanup_nonpending_requests(struct sba_device *sba) +{ + unsigned long flags; + struct sba_request *req, *req1; + + spin_lock_irqsave(&sba->reqs_lock, flags); + + /* Freeup all alloced request */ + list_for_each_entry_safe(req, req1, &sba->reqs_alloc_list, node) + _sba_free_request(sba, req); + + /* Set all active requests as aborted */ + list_for_each_entry_safe(req, req1, &sba->reqs_active_list, node) + _sba_abort_request(sba, req); + + /* + * Note: We expect that aborted request will be eventually + * freed by sba_receive_message() + */ + + spin_unlock_irqrestore(&sba->reqs_lock, flags); +} + +static void sba_cleanup_pending_requests(struct sba_device *sba) +{ + unsigned long flags; + struct sba_request *req, *req1; + + spin_lock_irqsave(&sba->reqs_lock, flags); + + /* Freeup all pending request */ + list_for_each_entry_safe(req, req1, &sba->reqs_pending_list, node) + _sba_free_request(sba, req); + + spin_unlock_irqrestore(&sba->reqs_lock, flags); +} + +static int sba_send_mbox_request(struct sba_device *sba, + struct sba_request *req) +{ + int ret = 0; + + /* Send message for the request */ + req->msg.error = 0; + ret = mbox_send_message(sba->mchan, &req->msg); + if (ret < 0) { + dev_err(sba->dev, "send message failed with error %d", ret); + return ret; + } + + /* Check error returned by mailbox controller */ + ret = req->msg.error; + if (ret < 0) { + dev_err(sba->dev, "message error %d", ret); + } + + /* Signal txdone for mailbox channel */ + mbox_client_txdone(sba->mchan, ret); + + return ret; +} + +/* Note: Must be called with sba->reqs_lock held */ +static void _sba_process_pending_requests(struct sba_device *sba) +{ + int ret; + u32 count; + struct sba_request *req; + + /* Process few pending requests */ + count = SBA_MAX_MSG_SEND_PER_MBOX_CHANNEL; + while (!list_empty(&sba->reqs_pending_list) && count) { + /* Get the first pending request */ + req = list_first_entry(&sba->reqs_pending_list, + struct sba_request, node); + + /* Try to make request active */ + if (!_sba_active_request(sba, req)) + break; + + /* Send request to mailbox channel */ + ret = sba_send_mbox_request(sba, req); + if (ret < 0) { + _sba_pending_request(sba, req); + break; + } + + count--; + } +} + +static void sba_process_received_request(struct sba_device *sba, + struct sba_request *req) +{ + unsigned long flags; + struct dma_async_tx_descriptor *tx; + struct sba_request *nreq, *first = req->first; + + /* Process only after all chained requests are received */ + if (!atomic_dec_return(&first->next_pending_count)) { + tx = &first->tx; + + WARN_ON(tx->cookie < 0); + if (tx->cookie > 0) { + spin_lock_irqsave(&sba->reqs_lock, flags); + dma_cookie_complete(tx); + spin_unlock_irqrestore(&sba->reqs_lock, flags); + dmaengine_desc_get_callback_invoke(tx, NULL); + dma_descriptor_unmap(tx); + tx->callback = NULL; + tx->callback_result = NULL; + } + + dma_run_dependencies(tx); + + spin_lock_irqsave(&sba->reqs_lock, flags); + + /* Free all requests chained to first request */ + list_for_each_entry(nreq, &first->next, next) + _sba_free_request(sba, nreq); + INIT_LIST_HEAD(&first->next); + + /* Free the first request */ + _sba_free_request(sba, first); + + /* Process pending requests */ + _sba_process_pending_requests(sba); + + spin_unlock_irqrestore(&sba->reqs_lock, flags); + } +} + +static void sba_write_stats_in_seqfile(struct sba_device *sba, + struct seq_file *file) +{ + unsigned long flags; + struct sba_request *req; + u32 free_count = 0, alloced_count = 0; + u32 pending_count = 0, active_count = 0, aborted_count = 0; + + spin_lock_irqsave(&sba->reqs_lock, flags); + + list_for_each_entry(req, &sba->reqs_free_list, node) + if (async_tx_test_ack(&req->tx)) + free_count++; + + list_for_each_entry(req, &sba->reqs_alloc_list, node) + alloced_count++; + + list_for_each_entry(req, &sba->reqs_pending_list, node) + pending_count++; + + list_for_each_entry(req, &sba->reqs_active_list, node) + active_count++; + + list_for_each_entry(req, &sba->reqs_aborted_list, node) + aborted_count++; + + spin_unlock_irqrestore(&sba->reqs_lock, flags); + + seq_printf(file, "maximum requests = %d\n", sba->max_req); + seq_printf(file, "free requests = %d\n", free_count); + seq_printf(file, "alloced requests = %d\n", alloced_count); + seq_printf(file, "pending requests = %d\n", pending_count); + seq_printf(file, "active requests = %d\n", active_count); + seq_printf(file, "aborted requests = %d\n", aborted_count); +} + +/* ====== DMAENGINE callbacks ===== */ + +static void sba_free_chan_resources(struct dma_chan *dchan) +{ + /* + * Channel resources are pre-alloced so we just free-up + * whatever we can so that we can re-use pre-alloced + * channel resources next time. + */ + sba_cleanup_nonpending_requests(to_sba_device(dchan)); +} + +static int sba_device_terminate_all(struct dma_chan *dchan) +{ + /* Cleanup all pending requests */ + sba_cleanup_pending_requests(to_sba_device(dchan)); + + return 0; +} + +static void sba_issue_pending(struct dma_chan *dchan) +{ + unsigned long flags; + struct sba_device *sba = to_sba_device(dchan); + + /* Process pending requests */ + spin_lock_irqsave(&sba->reqs_lock, flags); + _sba_process_pending_requests(sba); + spin_unlock_irqrestore(&sba->reqs_lock, flags); +} + +static dma_cookie_t sba_tx_submit(struct dma_async_tx_descriptor *tx) +{ + unsigned long flags; + dma_cookie_t cookie; + struct sba_device *sba; + struct sba_request *req, *nreq; + + if (unlikely(!tx)) + return -EINVAL; + + sba = to_sba_device(tx->chan); + req = to_sba_request(tx); + + /* Assign cookie and mark all chained requests pending */ + spin_lock_irqsave(&sba->reqs_lock, flags); + cookie = dma_cookie_assign(tx); + _sba_pending_request(sba, req); + list_for_each_entry(nreq, &req->next, next) + _sba_pending_request(sba, nreq); + spin_unlock_irqrestore(&sba->reqs_lock, flags); + + return cookie; +} + +static enum dma_status sba_tx_status(struct dma_chan *dchan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + enum dma_status ret; + struct sba_device *sba = to_sba_device(dchan); + + ret = dma_cookie_status(dchan, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; + + mbox_client_peek_data(sba->mchan); + + return dma_cookie_status(dchan, cookie, txstate); +} + +static void sba_fillup_interrupt_msg(struct sba_request *req, + struct brcm_sba_command *cmds, + struct brcm_message *msg) +{ + u64 cmd; + u32 c_mdata; + dma_addr_t resp_dma = req->tx.phys; + struct brcm_sba_command *cmdsp = cmds; + + /* Type-B command to load dummy data into buf0 */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, req->sba->hw_resp_size, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_load_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = resp_dma; + cmdsp->data_len = req->sba->hw_resp_size; + cmdsp++; + + /* Type-A command to write buf0 to dummy location */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, req->sba->hw_resp_size, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + cmd = sba_cmd_enc(cmd, 0x1, + SBA_RESP_SHIFT, SBA_RESP_MASK); + c_mdata = sba_cmd_write_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + if (req->sba->hw_resp_size) { + cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP; + cmdsp->resp = resp_dma; + cmdsp->resp_len = req->sba->hw_resp_size; + } + cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT; + cmdsp->data = resp_dma; + cmdsp->data_len = req->sba->hw_resp_size; + cmdsp++; + + /* Fillup brcm_message */ + msg->type = BRCM_MESSAGE_SBA; + msg->sba.cmds = cmds; + msg->sba.cmds_count = cmdsp - cmds; + msg->ctx = req; + msg->error = 0; +} + +static struct dma_async_tx_descriptor * +sba_prep_dma_interrupt(struct dma_chan *dchan, unsigned long flags) +{ + struct sba_request *req = NULL; + struct sba_device *sba = to_sba_device(dchan); + + /* Alloc new request */ + req = sba_alloc_request(sba); + if (!req) + return NULL; + + /* + * Force fence so that no requests are submitted + * until DMA callback for this request is invoked. + */ + req->flags |= SBA_REQUEST_FENCE; + + /* Fillup request message */ + sba_fillup_interrupt_msg(req, req->cmds, &req->msg); + + /* Init async_tx descriptor */ + req->tx.flags = flags; + req->tx.cookie = -EBUSY; + + return &req->tx; +} + +static void sba_fillup_memcpy_msg(struct sba_request *req, + struct brcm_sba_command *cmds, + struct brcm_message *msg, + dma_addr_t msg_offset, size_t msg_len, + dma_addr_t dst, dma_addr_t src) +{ + u64 cmd; + u32 c_mdata; + dma_addr_t resp_dma = req->tx.phys; + struct brcm_sba_command *cmdsp = cmds; + + /* Type-B command to load data into buf0 */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_load_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = src + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + + /* Type-A command to write buf0 */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + cmd = sba_cmd_enc(cmd, 0x1, + SBA_RESP_SHIFT, SBA_RESP_MASK); + c_mdata = sba_cmd_write_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + if (req->sba->hw_resp_size) { + cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP; + cmdsp->resp = resp_dma; + cmdsp->resp_len = req->sba->hw_resp_size; + } + cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT; + cmdsp->data = dst + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + + /* Fillup brcm_message */ + msg->type = BRCM_MESSAGE_SBA; + msg->sba.cmds = cmds; + msg->sba.cmds_count = cmdsp - cmds; + msg->ctx = req; + msg->error = 0; +} + +static struct sba_request * +sba_prep_dma_memcpy_req(struct sba_device *sba, + dma_addr_t off, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct sba_request *req = NULL; + + /* Alloc new request */ + req = sba_alloc_request(sba); + if (!req) + return NULL; + if (flags & DMA_PREP_FENCE) + req->flags |= SBA_REQUEST_FENCE; + + /* Fillup request message */ + sba_fillup_memcpy_msg(req, req->cmds, &req->msg, + off, len, dst, src); + + /* Init async_tx descriptor */ + req->tx.flags = flags; + req->tx.cookie = -EBUSY; + + return req; +} + +static struct dma_async_tx_descriptor * +sba_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) +{ + size_t req_len; + dma_addr_t off = 0; + struct sba_device *sba = to_sba_device(dchan); + struct sba_request *first = NULL, *req; + + /* Create chained requests where each request is upto hw_buf_size */ + while (len) { + req_len = (len < sba->hw_buf_size) ? len : sba->hw_buf_size; + + req = sba_prep_dma_memcpy_req(sba, off, dst, src, + req_len, flags); + if (!req) { + if (first) + sba_free_chained_requests(first); + return NULL; + } + + if (first) + sba_chain_request(first, req); + else + first = req; + + off += req_len; + len -= req_len; + } + + return (first) ? &first->tx : NULL; +} + +static void sba_fillup_xor_msg(struct sba_request *req, + struct brcm_sba_command *cmds, + struct brcm_message *msg, + dma_addr_t msg_offset, size_t msg_len, + dma_addr_t dst, dma_addr_t *src, u32 src_cnt) +{ + u64 cmd; + u32 c_mdata; + unsigned int i; + dma_addr_t resp_dma = req->tx.phys; + struct brcm_sba_command *cmdsp = cmds; + + /* Type-B command to load data into buf0 */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_load_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = src[0] + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + + /* Type-B commands to xor data with buf0 and put it back in buf0 */ + for (i = 1; i < src_cnt; i++) { + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_xor_c_mdata(0, 0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_XOR, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = src[i] + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + } + + /* Type-A command to write buf0 */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + cmd = sba_cmd_enc(cmd, 0x1, + SBA_RESP_SHIFT, SBA_RESP_MASK); + c_mdata = sba_cmd_write_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + if (req->sba->hw_resp_size) { + cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP; + cmdsp->resp = resp_dma; + cmdsp->resp_len = req->sba->hw_resp_size; + } + cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT; + cmdsp->data = dst + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + + /* Fillup brcm_message */ + msg->type = BRCM_MESSAGE_SBA; + msg->sba.cmds = cmds; + msg->sba.cmds_count = cmdsp - cmds; + msg->ctx = req; + msg->error = 0; +} + +static struct sba_request * +sba_prep_dma_xor_req(struct sba_device *sba, + dma_addr_t off, dma_addr_t dst, dma_addr_t *src, + u32 src_cnt, size_t len, unsigned long flags) +{ + struct sba_request *req = NULL; + + /* Alloc new request */ + req = sba_alloc_request(sba); + if (!req) + return NULL; + if (flags & DMA_PREP_FENCE) + req->flags |= SBA_REQUEST_FENCE; + + /* Fillup request message */ + sba_fillup_xor_msg(req, req->cmds, &req->msg, + off, len, dst, src, src_cnt); + + /* Init async_tx descriptor */ + req->tx.flags = flags; + req->tx.cookie = -EBUSY; + + return req; +} + +static struct dma_async_tx_descriptor * +sba_prep_dma_xor(struct dma_chan *dchan, dma_addr_t dst, dma_addr_t *src, + u32 src_cnt, size_t len, unsigned long flags) +{ + size_t req_len; + dma_addr_t off = 0; + struct sba_device *sba = to_sba_device(dchan); + struct sba_request *first = NULL, *req; + + /* Sanity checks */ + if (unlikely(src_cnt > sba->max_xor_srcs)) + return NULL; + + /* Create chained requests where each request is upto hw_buf_size */ + while (len) { + req_len = (len < sba->hw_buf_size) ? len : sba->hw_buf_size; + + req = sba_prep_dma_xor_req(sba, off, dst, src, src_cnt, + req_len, flags); + if (!req) { + if (first) + sba_free_chained_requests(first); + return NULL; + } + + if (first) + sba_chain_request(first, req); + else + first = req; + + off += req_len; + len -= req_len; + } + + return (first) ? &first->tx : NULL; +} + +static void sba_fillup_pq_msg(struct sba_request *req, + bool pq_continue, + struct brcm_sba_command *cmds, + struct brcm_message *msg, + dma_addr_t msg_offset, size_t msg_len, + dma_addr_t *dst_p, dma_addr_t *dst_q, + const u8 *scf, dma_addr_t *src, u32 src_cnt) +{ + u64 cmd; + u32 c_mdata; + unsigned int i; + dma_addr_t resp_dma = req->tx.phys; + struct brcm_sba_command *cmdsp = cmds; + + if (pq_continue) { + /* Type-B command to load old P into buf0 */ + if (dst_p) { + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_load_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = *dst_p + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + } + + /* Type-B command to load old Q into buf1 */ + if (dst_q) { + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_load_c_mdata(1); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = *dst_q + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + } + } else { + /* Type-A command to zero all buffers */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_ZERO_ALL_BUFFERS, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + cmdsp++; + } + + /* Type-B commands for generate P onto buf0 and Q onto buf1 */ + for (i = 0; i < src_cnt; i++) { + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_pq_c_mdata(raid6_gflog[scf[i]], 1, 0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_MS(c_mdata), + SBA_C_MDATA_MS_SHIFT, SBA_C_MDATA_MS_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_GALOIS_XOR, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = src[i] + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + } + + /* Type-A command to write buf0 */ + if (dst_p) { + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + cmd = sba_cmd_enc(cmd, 0x1, + SBA_RESP_SHIFT, SBA_RESP_MASK); + c_mdata = sba_cmd_write_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + if (req->sba->hw_resp_size) { + cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP; + cmdsp->resp = resp_dma; + cmdsp->resp_len = req->sba->hw_resp_size; + } + cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT; + cmdsp->data = *dst_p + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + } + + /* Type-A command to write buf1 */ + if (dst_q) { + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + cmd = sba_cmd_enc(cmd, 0x1, + SBA_RESP_SHIFT, SBA_RESP_MASK); + c_mdata = sba_cmd_write_c_mdata(1); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + if (req->sba->hw_resp_size) { + cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP; + cmdsp->resp = resp_dma; + cmdsp->resp_len = req->sba->hw_resp_size; + } + cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT; + cmdsp->data = *dst_q + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + } + + /* Fillup brcm_message */ + msg->type = BRCM_MESSAGE_SBA; + msg->sba.cmds = cmds; + msg->sba.cmds_count = cmdsp - cmds; + msg->ctx = req; + msg->error = 0; +} + +static struct sba_request * +sba_prep_dma_pq_req(struct sba_device *sba, dma_addr_t off, + dma_addr_t *dst_p, dma_addr_t *dst_q, dma_addr_t *src, + u32 src_cnt, const u8 *scf, size_t len, unsigned long flags) +{ + struct sba_request *req = NULL; + + /* Alloc new request */ + req = sba_alloc_request(sba); + if (!req) + return NULL; + if (flags & DMA_PREP_FENCE) + req->flags |= SBA_REQUEST_FENCE; + + /* Fillup request messages */ + sba_fillup_pq_msg(req, dmaf_continue(flags), + req->cmds, &req->msg, + off, len, dst_p, dst_q, scf, src, src_cnt); + + /* Init async_tx descriptor */ + req->tx.flags = flags; + req->tx.cookie = -EBUSY; + + return req; +} + +static void sba_fillup_pq_single_msg(struct sba_request *req, + bool pq_continue, + struct brcm_sba_command *cmds, + struct brcm_message *msg, + dma_addr_t msg_offset, size_t msg_len, + dma_addr_t *dst_p, dma_addr_t *dst_q, + dma_addr_t src, u8 scf) +{ + u64 cmd; + u32 c_mdata; + u8 pos, dpos = raid6_gflog[scf]; + dma_addr_t resp_dma = req->tx.phys; + struct brcm_sba_command *cmdsp = cmds; + + if (!dst_p) + goto skip_p; + + if (pq_continue) { + /* Type-B command to load old P into buf0 */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_load_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = *dst_p + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + + /* + * Type-B commands to xor data with buf0 and put it + * back in buf0 + */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_xor_c_mdata(0, 0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_XOR, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = src + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + } else { + /* Type-B command to load old P into buf0 */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_load_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = src + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + } + + /* Type-A command to write buf0 */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + cmd = sba_cmd_enc(cmd, 0x1, + SBA_RESP_SHIFT, SBA_RESP_MASK); + c_mdata = sba_cmd_write_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + if (req->sba->hw_resp_size) { + cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP; + cmdsp->resp = resp_dma; + cmdsp->resp_len = req->sba->hw_resp_size; + } + cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT; + cmdsp->data = *dst_p + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + +skip_p: + if (!dst_q) + goto skip_q; + + /* Type-A command to zero all buffers */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_ZERO_ALL_BUFFERS, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + cmdsp++; + + if (dpos == 255) + goto skip_q_computation; + pos = (dpos < req->sba->max_pq_coefs) ? + dpos : (req->sba->max_pq_coefs - 1); + + /* + * Type-B command to generate initial Q from data + * and store output into buf0 + */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_pq_c_mdata(pos, 0, 0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_MS(c_mdata), + SBA_C_MDATA_MS_SHIFT, SBA_C_MDATA_MS_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_GALOIS, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = src + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + + dpos -= pos; + + /* Multiple Type-A command to generate final Q */ + while (dpos) { + pos = (dpos < req->sba->max_pq_coefs) ? + dpos : (req->sba->max_pq_coefs - 1); + + /* + * Type-A command to generate Q with buf0 and + * buf1 store result in buf0 + */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_pq_c_mdata(pos, 0, 1); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_MS(c_mdata), + SBA_C_MDATA_MS_SHIFT, SBA_C_MDATA_MS_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_GALOIS, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + cmdsp++; + + dpos -= pos; + } + +skip_q_computation: + if (pq_continue) { + /* + * Type-B command to XOR previous output with + * buf0 and write it into buf0 + */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_xor_c_mdata(0, 0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_XOR, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = *dst_q + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + } + + /* Type-A command to write buf0 */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + cmd = sba_cmd_enc(cmd, 0x1, + SBA_RESP_SHIFT, SBA_RESP_MASK); + c_mdata = sba_cmd_write_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + if (req->sba->hw_resp_size) { + cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP; + cmdsp->resp = resp_dma; + cmdsp->resp_len = req->sba->hw_resp_size; + } + cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT; + cmdsp->data = *dst_q + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + +skip_q: + /* Fillup brcm_message */ + msg->type = BRCM_MESSAGE_SBA; + msg->sba.cmds = cmds; + msg->sba.cmds_count = cmdsp - cmds; + msg->ctx = req; + msg->error = 0; +} + +static struct sba_request * +sba_prep_dma_pq_single_req(struct sba_device *sba, dma_addr_t off, + dma_addr_t *dst_p, dma_addr_t *dst_q, + dma_addr_t src, u8 scf, size_t len, + unsigned long flags) +{ + struct sba_request *req = NULL; + + /* Alloc new request */ + req = sba_alloc_request(sba); + if (!req) + return NULL; + if (flags & DMA_PREP_FENCE) + req->flags |= SBA_REQUEST_FENCE; + + /* Fillup request messages */ + sba_fillup_pq_single_msg(req, dmaf_continue(flags), + req->cmds, &req->msg, off, len, + dst_p, dst_q, src, scf); + + /* Init async_tx descriptor */ + req->tx.flags = flags; + req->tx.cookie = -EBUSY; + + return req; +} + +static struct dma_async_tx_descriptor * +sba_prep_dma_pq(struct dma_chan *dchan, dma_addr_t *dst, dma_addr_t *src, + u32 src_cnt, const u8 *scf, size_t len, unsigned long flags) +{ + u32 i, dst_q_index; + size_t req_len; + bool slow = false; + dma_addr_t off = 0; + dma_addr_t *dst_p = NULL, *dst_q = NULL; + struct sba_device *sba = to_sba_device(dchan); + struct sba_request *first = NULL, *req; + + /* Sanity checks */ + if (unlikely(src_cnt > sba->max_pq_srcs)) + return NULL; + for (i = 0; i < src_cnt; i++) + if (sba->max_pq_coefs <= raid6_gflog[scf[i]]) + slow = true; + + /* Figure-out P and Q destination addresses */ + if (!(flags & DMA_PREP_PQ_DISABLE_P)) + dst_p = &dst[0]; + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + dst_q = &dst[1]; + + /* Create chained requests where each request is upto hw_buf_size */ + while (len) { + req_len = (len < sba->hw_buf_size) ? len : sba->hw_buf_size; + + if (slow) { + dst_q_index = src_cnt; + + if (dst_q) { + for (i = 0; i < src_cnt; i++) { + if (*dst_q == src[i]) { + dst_q_index = i; + break; + } + } + } + + if (dst_q_index < src_cnt) { + i = dst_q_index; + req = sba_prep_dma_pq_single_req(sba, + off, dst_p, dst_q, src[i], scf[i], + req_len, flags | DMA_PREP_FENCE); + if (!req) + goto fail; + + if (first) + sba_chain_request(first, req); + else + first = req; + + flags |= DMA_PREP_CONTINUE; + } + + for (i = 0; i < src_cnt; i++) { + if (dst_q_index == i) + continue; + + req = sba_prep_dma_pq_single_req(sba, + off, dst_p, dst_q, src[i], scf[i], + req_len, flags | DMA_PREP_FENCE); + if (!req) + goto fail; + + if (first) + sba_chain_request(first, req); + else + first = req; + + flags |= DMA_PREP_CONTINUE; + } + } else { + req = sba_prep_dma_pq_req(sba, off, + dst_p, dst_q, src, src_cnt, + scf, req_len, flags); + if (!req) + goto fail; + + if (first) + sba_chain_request(first, req); + else + first = req; + } + + off += req_len; + len -= req_len; + } + + return (first) ? &first->tx : NULL; + +fail: + if (first) + sba_free_chained_requests(first); + return NULL; +} + +/* ====== Mailbox callbacks ===== */ + +static void sba_receive_message(struct mbox_client *cl, void *msg) +{ + struct brcm_message *m = msg; + struct sba_request *req = m->ctx; + struct sba_device *sba = req->sba; + + /* Error count if message has error */ + if (m->error < 0) + dev_err(sba->dev, "%s got message with error %d", + dma_chan_name(&sba->dma_chan), m->error); + + /* Process received request */ + sba_process_received_request(sba, req); +} + +/* ====== Debugfs callbacks ====== */ + +static int sba_debugfs_stats_show(struct seq_file *file, void *offset) +{ + struct sba_device *sba = dev_get_drvdata(file->private); + + /* Write stats in file */ + sba_write_stats_in_seqfile(sba, file); + + return 0; +} + +/* ====== Platform driver routines ===== */ + +static int sba_prealloc_channel_resources(struct sba_device *sba) +{ + int i, j, ret = 0; + struct sba_request *req = NULL; + + sba->resp_base = dma_alloc_coherent(sba->mbox_dev, + sba->max_resp_pool_size, + &sba->resp_dma_base, GFP_KERNEL); + if (!sba->resp_base) + return -ENOMEM; + + sba->cmds_base = dma_alloc_coherent(sba->mbox_dev, + sba->max_cmds_pool_size, + &sba->cmds_dma_base, GFP_KERNEL); + if (!sba->cmds_base) { + ret = -ENOMEM; + goto fail_free_resp_pool; + } + + spin_lock_init(&sba->reqs_lock); + sba->reqs_fence = false; + INIT_LIST_HEAD(&sba->reqs_alloc_list); + INIT_LIST_HEAD(&sba->reqs_pending_list); + INIT_LIST_HEAD(&sba->reqs_active_list); + INIT_LIST_HEAD(&sba->reqs_aborted_list); + INIT_LIST_HEAD(&sba->reqs_free_list); + + for (i = 0; i < sba->max_req; i++) { + req = devm_kzalloc(sba->dev, + struct_size(req, cmds, sba->max_cmd_per_req), + GFP_KERNEL); + if (!req) { + ret = -ENOMEM; + goto fail_free_cmds_pool; + } + INIT_LIST_HEAD(&req->node); + req->sba = sba; + req->flags = SBA_REQUEST_STATE_FREE; + INIT_LIST_HEAD(&req->next); + atomic_set(&req->next_pending_count, 0); + for (j = 0; j < sba->max_cmd_per_req; j++) { + req->cmds[j].cmd = 0; + req->cmds[j].cmd_dma = sba->cmds_base + + (i * sba->max_cmd_per_req + j) * sizeof(u64); + req->cmds[j].cmd_dma_addr = sba->cmds_dma_base + + (i * sba->max_cmd_per_req + j) * sizeof(u64); + req->cmds[j].flags = 0; + } + memset(&req->msg, 0, sizeof(req->msg)); + dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan); + async_tx_ack(&req->tx); + req->tx.tx_submit = sba_tx_submit; + req->tx.phys = sba->resp_dma_base + i * sba->hw_resp_size; + list_add_tail(&req->node, &sba->reqs_free_list); + } + + return 0; + +fail_free_cmds_pool: + dma_free_coherent(sba->mbox_dev, + sba->max_cmds_pool_size, + sba->cmds_base, sba->cmds_dma_base); +fail_free_resp_pool: + dma_free_coherent(sba->mbox_dev, + sba->max_resp_pool_size, + sba->resp_base, sba->resp_dma_base); + return ret; +} + +static void sba_freeup_channel_resources(struct sba_device *sba) +{ + dmaengine_terminate_all(&sba->dma_chan); + dma_free_coherent(sba->mbox_dev, sba->max_cmds_pool_size, + sba->cmds_base, sba->cmds_dma_base); + dma_free_coherent(sba->mbox_dev, sba->max_resp_pool_size, + sba->resp_base, sba->resp_dma_base); + sba->resp_base = NULL; + sba->resp_dma_base = 0; +} + +static int sba_async_register(struct sba_device *sba) +{ + int ret; + struct dma_device *dma_dev = &sba->dma_dev; + + /* Initialize DMA channel cookie */ + sba->dma_chan.device = dma_dev; + dma_cookie_init(&sba->dma_chan); + + /* Initialize DMA device capability mask */ + dma_cap_zero(dma_dev->cap_mask); + dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask); + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_cap_set(DMA_XOR, dma_dev->cap_mask); + dma_cap_set(DMA_PQ, dma_dev->cap_mask); + + /* + * Set mailbox channel device as the base device of + * our dma_device because the actual memory accesses + * will be done by mailbox controller + */ + dma_dev->dev = sba->mbox_dev; + + /* Set base prep routines */ + dma_dev->device_free_chan_resources = sba_free_chan_resources; + dma_dev->device_terminate_all = sba_device_terminate_all; + dma_dev->device_issue_pending = sba_issue_pending; + dma_dev->device_tx_status = sba_tx_status; + + /* Set interrupt routine */ + if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) + dma_dev->device_prep_dma_interrupt = sba_prep_dma_interrupt; + + /* Set memcpy routine */ + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) + dma_dev->device_prep_dma_memcpy = sba_prep_dma_memcpy; + + /* Set xor routine and capability */ + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { + dma_dev->device_prep_dma_xor = sba_prep_dma_xor; + dma_dev->max_xor = sba->max_xor_srcs; + } + + /* Set pq routine and capability */ + if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { + dma_dev->device_prep_dma_pq = sba_prep_dma_pq; + dma_set_maxpq(dma_dev, sba->max_pq_srcs, 0); + } + + /* Initialize DMA device channel list */ + INIT_LIST_HEAD(&dma_dev->channels); + list_add_tail(&sba->dma_chan.device_node, &dma_dev->channels); + + /* Register with Linux async DMA framework*/ + ret = dma_async_device_register(dma_dev); + if (ret) { + dev_err(sba->dev, "async device register error %d", ret); + return ret; + } + + dev_info(sba->dev, "%s capabilities: %s%s%s%s\n", + dma_chan_name(&sba->dma_chan), + dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "interrupt " : "", + dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "memcpy " : "", + dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", + dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : ""); + + return 0; +} + +static int sba_probe(struct platform_device *pdev) +{ + int ret = 0; + struct sba_device *sba; + struct platform_device *mbox_pdev; + struct of_phandle_args args; + + /* Allocate main SBA struct */ + sba = devm_kzalloc(&pdev->dev, sizeof(*sba), GFP_KERNEL); + if (!sba) + return -ENOMEM; + + sba->dev = &pdev->dev; + platform_set_drvdata(pdev, sba); + + /* Number of mailbox channels should be atleast 1 */ + ret = of_count_phandle_with_args(pdev->dev.of_node, + "mboxes", "#mbox-cells"); + if (ret <= 0) + return -ENODEV; + + /* Determine SBA version from DT compatible string */ + if (of_device_is_compatible(sba->dev->of_node, "brcm,iproc-sba")) + sba->ver = SBA_VER_1; + else if (of_device_is_compatible(sba->dev->of_node, + "brcm,iproc-sba-v2")) + sba->ver = SBA_VER_2; + else + return -ENODEV; + + /* Derived Configuration parameters */ + switch (sba->ver) { + case SBA_VER_1: + sba->hw_buf_size = 4096; + sba->hw_resp_size = 8; + sba->max_pq_coefs = 6; + sba->max_pq_srcs = 6; + break; + case SBA_VER_2: + sba->hw_buf_size = 4096; + sba->hw_resp_size = 8; + sba->max_pq_coefs = 30; + /* + * We can support max_pq_srcs == max_pq_coefs because + * we are limited by number of SBA commands that we can + * fit in one message for underlying ring manager HW. + */ + sba->max_pq_srcs = 12; + break; + default: + return -EINVAL; + } + sba->max_req = SBA_MAX_REQ_PER_MBOX_CHANNEL; + sba->max_cmd_per_req = sba->max_pq_srcs + 3; + sba->max_xor_srcs = sba->max_cmd_per_req - 1; + sba->max_resp_pool_size = sba->max_req * sba->hw_resp_size; + sba->max_cmds_pool_size = sba->max_req * + sba->max_cmd_per_req * sizeof(u64); + + /* Setup mailbox client */ + sba->client.dev = &pdev->dev; + sba->client.rx_callback = sba_receive_message; + sba->client.tx_block = false; + sba->client.knows_txdone = true; + sba->client.tx_tout = 0; + + /* Request mailbox channel */ + sba->mchan = mbox_request_channel(&sba->client, 0); + if (IS_ERR(sba->mchan)) { + ret = PTR_ERR(sba->mchan); + goto fail_free_mchan; + } + + /* Find-out underlying mailbox device */ + ret = of_parse_phandle_with_args(pdev->dev.of_node, + "mboxes", "#mbox-cells", 0, &args); + if (ret) + goto fail_free_mchan; + mbox_pdev = of_find_device_by_node(args.np); + of_node_put(args.np); + if (!mbox_pdev) { + ret = -ENODEV; + goto fail_free_mchan; + } + sba->mbox_dev = &mbox_pdev->dev; + + /* Prealloc channel resource */ + ret = sba_prealloc_channel_resources(sba); + if (ret) + goto fail_free_mchan; + + /* Check availability of debugfs */ + if (!debugfs_initialized()) + goto skip_debugfs; + + /* Create debugfs root entry */ + sba->root = debugfs_create_dir(dev_name(sba->dev), NULL); + + /* Create debugfs stats entry */ + debugfs_create_devm_seqfile(sba->dev, "stats", sba->root, + sba_debugfs_stats_show); + +skip_debugfs: + + /* Register DMA device with Linux async framework */ + ret = sba_async_register(sba); + if (ret) + goto fail_free_resources; + + /* Print device info */ + dev_info(sba->dev, "%s using SBAv%d mailbox channel from %s", + dma_chan_name(&sba->dma_chan), sba->ver+1, + dev_name(sba->mbox_dev)); + + return 0; + +fail_free_resources: + debugfs_remove_recursive(sba->root); + sba_freeup_channel_resources(sba); +fail_free_mchan: + mbox_free_channel(sba->mchan); + return ret; +} + +static int sba_remove(struct platform_device *pdev) +{ + struct sba_device *sba = platform_get_drvdata(pdev); + + dma_async_device_unregister(&sba->dma_dev); + + debugfs_remove_recursive(sba->root); + + sba_freeup_channel_resources(sba); + + mbox_free_channel(sba->mchan); + + return 0; +} + +static const struct of_device_id sba_of_match[] = { + { .compatible = "brcm,iproc-sba", }, + { .compatible = "brcm,iproc-sba-v2", }, + {}, +}; +MODULE_DEVICE_TABLE(of, sba_of_match); + +static struct platform_driver sba_driver = { + .probe = sba_probe, + .remove = sba_remove, + .driver = { + .name = "bcm-sba-raid", + .of_match_table = sba_of_match, + }, +}; +module_platform_driver(sba_driver); + +MODULE_DESCRIPTION("Broadcom SBA RAID driver"); +MODULE_AUTHOR("Anup Patel "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c new file mode 100644 index 0000000000..0807fb9eb2 --- /dev/null +++ b/drivers/dma/bcm2835-dma.c @@ -0,0 +1,1046 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * BCM2835 DMA engine support + * + * Author: Florian Meier + * Copyright 2013 + * + * Based on + * OMAP DMAengine support by Russell King + * + * BCM2708 DMA Driver + * Copyright (C) 2010 Broadcom + * + * Raspberry Pi PCM I2S ALSA Driver + * Copyright (c) by Phil Poole 2013 + * + * MARVELL MMP Peripheral DMA Driver + * Copyright 2012 Marvell International Ltd. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virt-dma.h" + +#define BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED 14 +#define BCM2835_DMA_CHAN_NAME_SIZE 8 + +/** + * struct bcm2835_dmadev - BCM2835 DMA controller + * @ddev: DMA device + * @base: base address of register map + * @zero_page: bus address of zero page (to detect transactions copying from + * zero page and avoid accessing memory if so) + */ +struct bcm2835_dmadev { + struct dma_device ddev; + void __iomem *base; + dma_addr_t zero_page; +}; + +struct bcm2835_dma_cb { + uint32_t info; + uint32_t src; + uint32_t dst; + uint32_t length; + uint32_t stride; + uint32_t next; + uint32_t pad[2]; +}; + +struct bcm2835_cb_entry { + struct bcm2835_dma_cb *cb; + dma_addr_t paddr; +}; + +struct bcm2835_chan { + struct virt_dma_chan vc; + + struct dma_slave_config cfg; + unsigned int dreq; + + int ch; + struct bcm2835_desc *desc; + struct dma_pool *cb_pool; + + void __iomem *chan_base; + int irq_number; + unsigned int irq_flags; + + bool is_lite_channel; +}; + +struct bcm2835_desc { + struct bcm2835_chan *c; + struct virt_dma_desc vd; + enum dma_transfer_direction dir; + + unsigned int frames; + size_t size; + + bool cyclic; + + struct bcm2835_cb_entry cb_list[]; +}; + +#define BCM2835_DMA_CS 0x00 +#define BCM2835_DMA_ADDR 0x04 +#define BCM2835_DMA_TI 0x08 +#define BCM2835_DMA_SOURCE_AD 0x0c +#define BCM2835_DMA_DEST_AD 0x10 +#define BCM2835_DMA_LEN 0x14 +#define BCM2835_DMA_STRIDE 0x18 +#define BCM2835_DMA_NEXTCB 0x1c +#define BCM2835_DMA_DEBUG 0x20 + +/* DMA CS Control and Status bits */ +#define BCM2835_DMA_ACTIVE BIT(0) /* activate the DMA */ +#define BCM2835_DMA_END BIT(1) /* current CB has ended */ +#define BCM2835_DMA_INT BIT(2) /* interrupt status */ +#define BCM2835_DMA_DREQ BIT(3) /* DREQ state */ +#define BCM2835_DMA_ISPAUSED BIT(4) /* Pause requested or not active */ +#define BCM2835_DMA_ISHELD BIT(5) /* Is held by DREQ flow control */ +#define BCM2835_DMA_WAITING_FOR_WRITES BIT(6) /* waiting for last + * AXI-write to ack + */ +#define BCM2835_DMA_ERR BIT(8) +#define BCM2835_DMA_PRIORITY(x) ((x & 15) << 16) /* AXI priority */ +#define BCM2835_DMA_PANIC_PRIORITY(x) ((x & 15) << 20) /* panic priority */ +/* current value of TI.BCM2835_DMA_WAIT_RESP */ +#define BCM2835_DMA_WAIT_FOR_WRITES BIT(28) +#define BCM2835_DMA_DIS_DEBUG BIT(29) /* disable debug pause signal */ +#define BCM2835_DMA_ABORT BIT(30) /* Stop current CB, go to next, WO */ +#define BCM2835_DMA_RESET BIT(31) /* WO, self clearing */ + +/* Transfer information bits - also bcm2835_cb.info field */ +#define BCM2835_DMA_INT_EN BIT(0) +#define BCM2835_DMA_TDMODE BIT(1) /* 2D-Mode */ +#define BCM2835_DMA_WAIT_RESP BIT(3) /* wait for AXI-write to be acked */ +#define BCM2835_DMA_D_INC BIT(4) +#define BCM2835_DMA_D_WIDTH BIT(5) /* 128bit writes if set */ +#define BCM2835_DMA_D_DREQ BIT(6) /* enable DREQ for destination */ +#define BCM2835_DMA_D_IGNORE BIT(7) /* ignore destination writes */ +#define BCM2835_DMA_S_INC BIT(8) +#define BCM2835_DMA_S_WIDTH BIT(9) /* 128bit writes if set */ +#define BCM2835_DMA_S_DREQ BIT(10) /* enable SREQ for source */ +#define BCM2835_DMA_S_IGNORE BIT(11) /* ignore source reads - read 0 */ +#define BCM2835_DMA_BURST_LENGTH(x) ((x & 15) << 12) +#define BCM2835_DMA_PER_MAP(x) ((x & 31) << 16) /* REQ source */ +#define BCM2835_DMA_WAIT(x) ((x & 31) << 21) /* add DMA-wait cycles */ +#define BCM2835_DMA_NO_WIDE_BURSTS BIT(26) /* no 2 beat write bursts */ + +/* debug register bits */ +#define BCM2835_DMA_DEBUG_LAST_NOT_SET_ERR BIT(0) +#define BCM2835_DMA_DEBUG_FIFO_ERR BIT(1) +#define BCM2835_DMA_DEBUG_READ_ERR BIT(2) +#define BCM2835_DMA_DEBUG_OUTSTANDING_WRITES_SHIFT 4 +#define BCM2835_DMA_DEBUG_OUTSTANDING_WRITES_BITS 4 +#define BCM2835_DMA_DEBUG_ID_SHIFT 16 +#define BCM2835_DMA_DEBUG_ID_BITS 9 +#define BCM2835_DMA_DEBUG_STATE_SHIFT 16 +#define BCM2835_DMA_DEBUG_STATE_BITS 9 +#define BCM2835_DMA_DEBUG_VERSION_SHIFT 25 +#define BCM2835_DMA_DEBUG_VERSION_BITS 3 +#define BCM2835_DMA_DEBUG_LITE BIT(28) + +/* shared registers for all dma channels */ +#define BCM2835_DMA_INT_STATUS 0xfe0 +#define BCM2835_DMA_ENABLE 0xff0 + +#define BCM2835_DMA_DATA_TYPE_S8 1 +#define BCM2835_DMA_DATA_TYPE_S16 2 +#define BCM2835_DMA_DATA_TYPE_S32 4 +#define BCM2835_DMA_DATA_TYPE_S128 16 + +/* Valid only for channels 0 - 14, 15 has its own base address */ +#define BCM2835_DMA_CHAN(n) ((n) << 8) /* Base address */ +#define BCM2835_DMA_CHANIO(base, n) ((base) + BCM2835_DMA_CHAN(n)) + +/* the max dma length for different channels */ +#define MAX_DMA_LEN SZ_1G +#define MAX_LITE_DMA_LEN (SZ_64K - 4) + +static inline size_t bcm2835_dma_max_frame_length(struct bcm2835_chan *c) +{ + /* lite and normal channels have different max frame length */ + return c->is_lite_channel ? MAX_LITE_DMA_LEN : MAX_DMA_LEN; +} + +/* how many frames of max_len size do we need to transfer len bytes */ +static inline size_t bcm2835_dma_frames_for_length(size_t len, + size_t max_len) +{ + return DIV_ROUND_UP(len, max_len); +} + +static inline struct bcm2835_dmadev *to_bcm2835_dma_dev(struct dma_device *d) +{ + return container_of(d, struct bcm2835_dmadev, ddev); +} + +static inline struct bcm2835_chan *to_bcm2835_dma_chan(struct dma_chan *c) +{ + return container_of(c, struct bcm2835_chan, vc.chan); +} + +static inline struct bcm2835_desc *to_bcm2835_dma_desc( + struct dma_async_tx_descriptor *t) +{ + return container_of(t, struct bcm2835_desc, vd.tx); +} + +static void bcm2835_dma_free_cb_chain(struct bcm2835_desc *desc) +{ + size_t i; + + for (i = 0; i < desc->frames; i++) + dma_pool_free(desc->c->cb_pool, desc->cb_list[i].cb, + desc->cb_list[i].paddr); + + kfree(desc); +} + +static void bcm2835_dma_desc_free(struct virt_dma_desc *vd) +{ + bcm2835_dma_free_cb_chain( + container_of(vd, struct bcm2835_desc, vd)); +} + +static void bcm2835_dma_create_cb_set_length( + struct bcm2835_chan *chan, + struct bcm2835_dma_cb *control_block, + size_t len, + size_t period_len, + size_t *total_len, + u32 finalextrainfo) +{ + size_t max_len = bcm2835_dma_max_frame_length(chan); + + /* set the length taking lite-channel limitations into account */ + control_block->length = min_t(u32, len, max_len); + + /* finished if we have no period_length */ + if (!period_len) + return; + + /* + * period_len means: that we need to generate + * transfers that are terminating at every + * multiple of period_len - this is typically + * used to set the interrupt flag in info + * which is required during cyclic transfers + */ + + /* have we filled in period_length yet? */ + if (*total_len + control_block->length < period_len) { + /* update number of bytes in this period so far */ + *total_len += control_block->length; + return; + } + + /* calculate the length that remains to reach period_length */ + control_block->length = period_len - *total_len; + + /* reset total_length for next period */ + *total_len = 0; + + /* add extrainfo bits in info */ + control_block->info |= finalextrainfo; +} + +static inline size_t bcm2835_dma_count_frames_for_sg( + struct bcm2835_chan *c, + struct scatterlist *sgl, + unsigned int sg_len) +{ + size_t frames = 0; + struct scatterlist *sgent; + unsigned int i; + size_t plength = bcm2835_dma_max_frame_length(c); + + for_each_sg(sgl, sgent, sg_len, i) + frames += bcm2835_dma_frames_for_length( + sg_dma_len(sgent), plength); + + return frames; +} + +/** + * bcm2835_dma_create_cb_chain - create a control block and fills data in + * + * @chan: the @dma_chan for which we run this + * @direction: the direction in which we transfer + * @cyclic: it is a cyclic transfer + * @info: the default info bits to apply per controlblock + * @frames: number of controlblocks to allocate + * @src: the src address to assign (if the S_INC bit is set + * in @info, then it gets incremented) + * @dst: the dst address to assign (if the D_INC bit is set + * in @info, then it gets incremented) + * @buf_len: the full buffer length (may also be 0) + * @period_len: the period length when to apply @finalextrainfo + * in addition to the last transfer + * this will also break some control-blocks early + * @finalextrainfo: additional bits in last controlblock + * (or when period_len is reached in case of cyclic) + * @gfp: the GFP flag to use for allocation + */ +static struct bcm2835_desc *bcm2835_dma_create_cb_chain( + struct dma_chan *chan, enum dma_transfer_direction direction, + bool cyclic, u32 info, u32 finalextrainfo, size_t frames, + dma_addr_t src, dma_addr_t dst, size_t buf_len, + size_t period_len, gfp_t gfp) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + size_t len = buf_len, total_len; + size_t frame; + struct bcm2835_desc *d; + struct bcm2835_cb_entry *cb_entry; + struct bcm2835_dma_cb *control_block; + + if (!frames) + return NULL; + + /* allocate and setup the descriptor. */ + d = kzalloc(struct_size(d, cb_list, frames), gfp); + if (!d) + return NULL; + + d->c = c; + d->dir = direction; + d->cyclic = cyclic; + + /* + * Iterate over all frames, create a control block + * for each frame and link them together. + */ + for (frame = 0, total_len = 0; frame < frames; d->frames++, frame++) { + cb_entry = &d->cb_list[frame]; + cb_entry->cb = dma_pool_alloc(c->cb_pool, gfp, + &cb_entry->paddr); + if (!cb_entry->cb) + goto error_cb; + + /* fill in the control block */ + control_block = cb_entry->cb; + control_block->info = info; + control_block->src = src; + control_block->dst = dst; + control_block->stride = 0; + control_block->next = 0; + /* set up length in control_block if requested */ + if (buf_len) { + /* calculate length honoring period_length */ + bcm2835_dma_create_cb_set_length( + c, control_block, + len, period_len, &total_len, + cyclic ? finalextrainfo : 0); + + /* calculate new remaining length */ + len -= control_block->length; + } + + /* link this the last controlblock */ + if (frame) + d->cb_list[frame - 1].cb->next = cb_entry->paddr; + + /* update src and dst and length */ + if (src && (info & BCM2835_DMA_S_INC)) + src += control_block->length; + if (dst && (info & BCM2835_DMA_D_INC)) + dst += control_block->length; + + /* Length of total transfer */ + d->size += control_block->length; + } + + /* the last frame requires extra flags */ + d->cb_list[d->frames - 1].cb->info |= finalextrainfo; + + /* detect a size missmatch */ + if (buf_len && (d->size != buf_len)) + goto error_cb; + + return d; +error_cb: + bcm2835_dma_free_cb_chain(d); + + return NULL; +} + +static void bcm2835_dma_fill_cb_chain_with_sg( + struct dma_chan *chan, + enum dma_transfer_direction direction, + struct bcm2835_cb_entry *cb, + struct scatterlist *sgl, + unsigned int sg_len) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + size_t len, max_len; + unsigned int i; + dma_addr_t addr; + struct scatterlist *sgent; + + max_len = bcm2835_dma_max_frame_length(c); + for_each_sg(sgl, sgent, sg_len, i) { + for (addr = sg_dma_address(sgent), len = sg_dma_len(sgent); + len > 0; + addr += cb->cb->length, len -= cb->cb->length, cb++) { + if (direction == DMA_DEV_TO_MEM) + cb->cb->dst = addr; + else + cb->cb->src = addr; + cb->cb->length = min(len, max_len); + } + } +} + +static void bcm2835_dma_abort(struct bcm2835_chan *c) +{ + void __iomem *chan_base = c->chan_base; + long int timeout = 10000; + + /* + * A zero control block address means the channel is idle. + * (The ACTIVE flag in the CS register is not a reliable indicator.) + */ + if (!readl(chan_base + BCM2835_DMA_ADDR)) + return; + + /* Write 0 to the active bit - Pause the DMA */ + writel(0, chan_base + BCM2835_DMA_CS); + + /* Wait for any current AXI transfer to complete */ + while ((readl(chan_base + BCM2835_DMA_CS) & + BCM2835_DMA_WAITING_FOR_WRITES) && --timeout) + cpu_relax(); + + /* Peripheral might be stuck and fail to signal AXI write responses */ + if (!timeout) + dev_err(c->vc.chan.device->dev, + "failed to complete outstanding writes\n"); + + writel(BCM2835_DMA_RESET, chan_base + BCM2835_DMA_CS); +} + +static void bcm2835_dma_start_desc(struct bcm2835_chan *c) +{ + struct virt_dma_desc *vd = vchan_next_desc(&c->vc); + struct bcm2835_desc *d; + + if (!vd) { + c->desc = NULL; + return; + } + + list_del(&vd->node); + + c->desc = d = to_bcm2835_dma_desc(&vd->tx); + + writel(d->cb_list[0].paddr, c->chan_base + BCM2835_DMA_ADDR); + writel(BCM2835_DMA_ACTIVE, c->chan_base + BCM2835_DMA_CS); +} + +static irqreturn_t bcm2835_dma_callback(int irq, void *data) +{ + struct bcm2835_chan *c = data; + struct bcm2835_desc *d; + unsigned long flags; + + /* check the shared interrupt */ + if (c->irq_flags & IRQF_SHARED) { + /* check if the interrupt is enabled */ + flags = readl(c->chan_base + BCM2835_DMA_CS); + /* if not set then we are not the reason for the irq */ + if (!(flags & BCM2835_DMA_INT)) + return IRQ_NONE; + } + + spin_lock_irqsave(&c->vc.lock, flags); + + /* + * Clear the INT flag to receive further interrupts. Keep the channel + * active in case the descriptor is cyclic or in case the client has + * already terminated the descriptor and issued a new one. (May happen + * if this IRQ handler is threaded.) If the channel is finished, it + * will remain idle despite the ACTIVE flag being set. + */ + writel(BCM2835_DMA_INT | BCM2835_DMA_ACTIVE, + c->chan_base + BCM2835_DMA_CS); + + d = c->desc; + + if (d) { + if (d->cyclic) { + /* call the cyclic callback */ + vchan_cyclic_callback(&d->vd); + } else if (!readl(c->chan_base + BCM2835_DMA_ADDR)) { + vchan_cookie_complete(&c->desc->vd); + bcm2835_dma_start_desc(c); + } + } + + spin_unlock_irqrestore(&c->vc.lock, flags); + + return IRQ_HANDLED; +} + +static int bcm2835_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + struct device *dev = c->vc.chan.device->dev; + + dev_dbg(dev, "Allocating DMA channel %d\n", c->ch); + + /* + * Control blocks are 256 bit in length and must start at a 256 bit + * (32 byte) aligned address (BCM2835 ARM Peripherals, sec. 4.2.1.1). + */ + c->cb_pool = dma_pool_create(dev_name(dev), dev, + sizeof(struct bcm2835_dma_cb), 32, 0); + if (!c->cb_pool) { + dev_err(dev, "unable to allocate descriptor pool\n"); + return -ENOMEM; + } + + return request_irq(c->irq_number, bcm2835_dma_callback, + c->irq_flags, "DMA IRQ", c); +} + +static void bcm2835_dma_free_chan_resources(struct dma_chan *chan) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + + vchan_free_chan_resources(&c->vc); + free_irq(c->irq_number, c); + dma_pool_destroy(c->cb_pool); + + dev_dbg(c->vc.chan.device->dev, "Freeing DMA channel %u\n", c->ch); +} + +static size_t bcm2835_dma_desc_size(struct bcm2835_desc *d) +{ + return d->size; +} + +static size_t bcm2835_dma_desc_size_pos(struct bcm2835_desc *d, dma_addr_t addr) +{ + unsigned int i; + size_t size; + + for (size = i = 0; i < d->frames; i++) { + struct bcm2835_dma_cb *control_block = d->cb_list[i].cb; + size_t this_size = control_block->length; + dma_addr_t dma; + + if (d->dir == DMA_DEV_TO_MEM) + dma = control_block->dst; + else + dma = control_block->src; + + if (size) + size += this_size; + else if (addr >= dma && addr < dma + this_size) + size += dma + this_size - addr; + } + + return size; +} + +static enum dma_status bcm2835_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + struct virt_dma_desc *vd; + enum dma_status ret; + unsigned long flags; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE || !txstate) + return ret; + + spin_lock_irqsave(&c->vc.lock, flags); + vd = vchan_find_desc(&c->vc, cookie); + if (vd) { + txstate->residue = + bcm2835_dma_desc_size(to_bcm2835_dma_desc(&vd->tx)); + } else if (c->desc && c->desc->vd.tx.cookie == cookie) { + struct bcm2835_desc *d = c->desc; + dma_addr_t pos; + + if (d->dir == DMA_MEM_TO_DEV) + pos = readl(c->chan_base + BCM2835_DMA_SOURCE_AD); + else if (d->dir == DMA_DEV_TO_MEM) + pos = readl(c->chan_base + BCM2835_DMA_DEST_AD); + else + pos = 0; + + txstate->residue = bcm2835_dma_desc_size_pos(d, pos); + } else { + txstate->residue = 0; + } + + spin_unlock_irqrestore(&c->vc.lock, flags); + + return ret; +} + +static void bcm2835_dma_issue_pending(struct dma_chan *chan) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&c->vc.lock, flags); + if (vchan_issue_pending(&c->vc) && !c->desc) + bcm2835_dma_start_desc(c); + + spin_unlock_irqrestore(&c->vc.lock, flags); +} + +static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + struct bcm2835_desc *d; + u32 info = BCM2835_DMA_D_INC | BCM2835_DMA_S_INC; + u32 extra = BCM2835_DMA_INT_EN | BCM2835_DMA_WAIT_RESP; + size_t max_len = bcm2835_dma_max_frame_length(c); + size_t frames; + + /* if src, dst or len is not given return with an error */ + if (!src || !dst || !len) + return NULL; + + /* calculate number of frames */ + frames = bcm2835_dma_frames_for_length(len, max_len); + + /* allocate the CB chain - this also fills in the pointers */ + d = bcm2835_dma_create_cb_chain(chan, DMA_MEM_TO_MEM, false, + info, extra, frames, + src, dst, len, 0, GFP_KERNEL); + if (!d) + return NULL; + + return vchan_tx_prep(&c->vc, &d->vd, flags); +} + +static struct dma_async_tx_descriptor *bcm2835_dma_prep_slave_sg( + struct dma_chan *chan, + struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + struct bcm2835_desc *d; + dma_addr_t src = 0, dst = 0; + u32 info = BCM2835_DMA_WAIT_RESP; + u32 extra = BCM2835_DMA_INT_EN; + size_t frames; + + if (!is_slave_direction(direction)) { + dev_err(chan->device->dev, + "%s: bad direction?\n", __func__); + return NULL; + } + + if (c->dreq != 0) + info |= BCM2835_DMA_PER_MAP(c->dreq); + + if (direction == DMA_DEV_TO_MEM) { + if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + src = c->cfg.src_addr; + info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC; + } else { + if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + dst = c->cfg.dst_addr; + info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC; + } + + /* count frames in sg list */ + frames = bcm2835_dma_count_frames_for_sg(c, sgl, sg_len); + + /* allocate the CB chain */ + d = bcm2835_dma_create_cb_chain(chan, direction, false, + info, extra, + frames, src, dst, 0, 0, + GFP_NOWAIT); + if (!d) + return NULL; + + /* fill in frames with scatterlist pointers */ + bcm2835_dma_fill_cb_chain_with_sg(chan, direction, d->cb_list, + sgl, sg_len); + + return vchan_tx_prep(&c->vc, &d->vd, flags); +} + +static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct bcm2835_dmadev *od = to_bcm2835_dma_dev(chan->device); + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + struct bcm2835_desc *d; + dma_addr_t src, dst; + u32 info = BCM2835_DMA_WAIT_RESP; + u32 extra = 0; + size_t max_len = bcm2835_dma_max_frame_length(c); + size_t frames; + + /* Grab configuration */ + if (!is_slave_direction(direction)) { + dev_err(chan->device->dev, "%s: bad direction?\n", __func__); + return NULL; + } + + if (!buf_len) { + dev_err(chan->device->dev, + "%s: bad buffer length (= 0)\n", __func__); + return NULL; + } + + if (flags & DMA_PREP_INTERRUPT) + extra |= BCM2835_DMA_INT_EN; + else + period_len = buf_len; + + /* + * warn if buf_len is not a multiple of period_len - this may leed + * to unexpected latencies for interrupts and thus audiable clicks + */ + if (buf_len % period_len) + dev_warn_once(chan->device->dev, + "%s: buffer_length (%zd) is not a multiple of period_len (%zd)\n", + __func__, buf_len, period_len); + + /* Setup DREQ channel */ + if (c->dreq != 0) + info |= BCM2835_DMA_PER_MAP(c->dreq); + + if (direction == DMA_DEV_TO_MEM) { + if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + src = c->cfg.src_addr; + dst = buf_addr; + info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC; + } else { + if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + dst = c->cfg.dst_addr; + src = buf_addr; + info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC; + + /* non-lite channels can write zeroes w/o accessing memory */ + if (buf_addr == od->zero_page && !c->is_lite_channel) + info |= BCM2835_DMA_S_IGNORE; + } + + /* calculate number of frames */ + frames = /* number of periods */ + DIV_ROUND_UP(buf_len, period_len) * + /* number of frames per period */ + bcm2835_dma_frames_for_length(period_len, max_len); + + /* + * allocate the CB chain + * note that we need to use GFP_NOWAIT, as the ALSA i2s dmaengine + * implementation calls prep_dma_cyclic with interrupts disabled. + */ + d = bcm2835_dma_create_cb_chain(chan, direction, true, + info, extra, + frames, src, dst, buf_len, + period_len, GFP_NOWAIT); + if (!d) + return NULL; + + /* wrap around into a loop */ + d->cb_list[d->frames - 1].cb->next = d->cb_list[0].paddr; + + return vchan_tx_prep(&c->vc, &d->vd, flags); +} + +static int bcm2835_dma_slave_config(struct dma_chan *chan, + struct dma_slave_config *cfg) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + + c->cfg = *cfg; + + return 0; +} + +static int bcm2835_dma_terminate_all(struct dma_chan *chan) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&c->vc.lock, flags); + + /* stop DMA activity */ + if (c->desc) { + vchan_terminate_vdesc(&c->desc->vd); + c->desc = NULL; + bcm2835_dma_abort(c); + } + + vchan_get_all_descriptors(&c->vc, &head); + spin_unlock_irqrestore(&c->vc.lock, flags); + vchan_dma_desc_free_list(&c->vc, &head); + + return 0; +} + +static void bcm2835_dma_synchronize(struct dma_chan *chan) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + + vchan_synchronize(&c->vc); +} + +static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id, + int irq, unsigned int irq_flags) +{ + struct bcm2835_chan *c; + + c = devm_kzalloc(d->ddev.dev, sizeof(*c), GFP_KERNEL); + if (!c) + return -ENOMEM; + + c->vc.desc_free = bcm2835_dma_desc_free; + vchan_init(&c->vc, &d->ddev); + + c->chan_base = BCM2835_DMA_CHANIO(d->base, chan_id); + c->ch = chan_id; + c->irq_number = irq; + c->irq_flags = irq_flags; + + /* check in DEBUG register if this is a LITE channel */ + if (readl(c->chan_base + BCM2835_DMA_DEBUG) & + BCM2835_DMA_DEBUG_LITE) + c->is_lite_channel = true; + + return 0; +} + +static void bcm2835_dma_free(struct bcm2835_dmadev *od) +{ + struct bcm2835_chan *c, *next; + + list_for_each_entry_safe(c, next, &od->ddev.channels, + vc.chan.device_node) { + list_del(&c->vc.chan.device_node); + tasklet_kill(&c->vc.task); + } + + dma_unmap_page_attrs(od->ddev.dev, od->zero_page, PAGE_SIZE, + DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); +} + +static const struct of_device_id bcm2835_dma_of_match[] = { + { .compatible = "brcm,bcm2835-dma", }, + {}, +}; +MODULE_DEVICE_TABLE(of, bcm2835_dma_of_match); + +static struct dma_chan *bcm2835_dma_xlate(struct of_phandle_args *spec, + struct of_dma *ofdma) +{ + struct bcm2835_dmadev *d = ofdma->of_dma_data; + struct dma_chan *chan; + + chan = dma_get_any_slave_channel(&d->ddev); + if (!chan) + return NULL; + + /* Set DREQ from param */ + to_bcm2835_dma_chan(chan)->dreq = spec->args[0]; + + return chan; +} + +static int bcm2835_dma_probe(struct platform_device *pdev) +{ + struct bcm2835_dmadev *od; + void __iomem *base; + int rc; + int i, j; + int irq[BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED + 1]; + int irq_flags; + uint32_t chans_available; + char chan_name[BCM2835_DMA_CHAN_NAME_SIZE]; + + if (!pdev->dev.dma_mask) + pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask; + + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (rc) { + dev_err(&pdev->dev, "Unable to set DMA mask\n"); + return rc; + } + + od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL); + if (!od) + return -ENOMEM; + + dma_set_max_seg_size(&pdev->dev, 0x3FFFFFFF); + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return PTR_ERR(base); + + od->base = base; + + dma_cap_set(DMA_SLAVE, od->ddev.cap_mask); + dma_cap_set(DMA_PRIVATE, od->ddev.cap_mask); + dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask); + dma_cap_set(DMA_MEMCPY, od->ddev.cap_mask); + od->ddev.device_alloc_chan_resources = bcm2835_dma_alloc_chan_resources; + od->ddev.device_free_chan_resources = bcm2835_dma_free_chan_resources; + od->ddev.device_tx_status = bcm2835_dma_tx_status; + od->ddev.device_issue_pending = bcm2835_dma_issue_pending; + od->ddev.device_prep_dma_cyclic = bcm2835_dma_prep_dma_cyclic; + od->ddev.device_prep_slave_sg = bcm2835_dma_prep_slave_sg; + od->ddev.device_prep_dma_memcpy = bcm2835_dma_prep_dma_memcpy; + od->ddev.device_config = bcm2835_dma_slave_config; + od->ddev.device_terminate_all = bcm2835_dma_terminate_all; + od->ddev.device_synchronize = bcm2835_dma_synchronize; + od->ddev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + od->ddev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV) | + BIT(DMA_MEM_TO_MEM); + od->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + od->ddev.descriptor_reuse = true; + od->ddev.dev = &pdev->dev; + INIT_LIST_HEAD(&od->ddev.channels); + + platform_set_drvdata(pdev, od); + + od->zero_page = dma_map_page_attrs(od->ddev.dev, ZERO_PAGE(0), 0, + PAGE_SIZE, DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(od->ddev.dev, od->zero_page)) { + dev_err(&pdev->dev, "Failed to map zero page\n"); + return -ENOMEM; + } + + /* Request DMA channel mask from device tree */ + if (of_property_read_u32(pdev->dev.of_node, + "brcm,dma-channel-mask", + &chans_available)) { + dev_err(&pdev->dev, "Failed to get channel mask\n"); + rc = -EINVAL; + goto err_no_dma; + } + + /* get irqs for each channel that we support */ + for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) { + /* skip masked out channels */ + if (!(chans_available & (1 << i))) { + irq[i] = -1; + continue; + } + + /* get the named irq */ + snprintf(chan_name, sizeof(chan_name), "dma%i", i); + irq[i] = platform_get_irq_byname(pdev, chan_name); + if (irq[i] >= 0) + continue; + + /* legacy device tree case handling */ + dev_warn_once(&pdev->dev, + "missing interrupt-names property in device tree - legacy interpretation is used\n"); + /* + * in case of channel >= 11 + * use the 11th interrupt and that is shared + */ + irq[i] = platform_get_irq(pdev, i < 11 ? i : 11); + } + + /* get irqs for each channel */ + for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) { + /* skip channels without irq */ + if (irq[i] < 0) + continue; + + /* check if there are other channels that also use this irq */ + irq_flags = 0; + for (j = 0; j <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; j++) + if ((i != j) && (irq[j] == irq[i])) { + irq_flags = IRQF_SHARED; + break; + } + + /* initialize the channel */ + rc = bcm2835_dma_chan_init(od, i, irq[i], irq_flags); + if (rc) + goto err_no_dma; + } + + dev_dbg(&pdev->dev, "Initialized %i DMA channels\n", i); + + /* Device-tree DMA controller registration */ + rc = of_dma_controller_register(pdev->dev.of_node, + bcm2835_dma_xlate, od); + if (rc) { + dev_err(&pdev->dev, "Failed to register DMA controller\n"); + goto err_no_dma; + } + + rc = dma_async_device_register(&od->ddev); + if (rc) { + dev_err(&pdev->dev, + "Failed to register slave DMA engine device: %d\n", rc); + goto err_no_dma; + } + + dev_dbg(&pdev->dev, "Load BCM2835 DMA engine driver\n"); + + return 0; + +err_no_dma: + bcm2835_dma_free(od); + return rc; +} + +static int bcm2835_dma_remove(struct platform_device *pdev) +{ + struct bcm2835_dmadev *od = platform_get_drvdata(pdev); + + dma_async_device_unregister(&od->ddev); + bcm2835_dma_free(od); + + return 0; +} + +static struct platform_driver bcm2835_dma_driver = { + .probe = bcm2835_dma_probe, + .remove = bcm2835_dma_remove, + .driver = { + .name = "bcm2835-dma", + .of_match_table = of_match_ptr(bcm2835_dma_of_match), + }, +}; + +module_platform_driver(bcm2835_dma_driver); + +MODULE_ALIAS("platform:bcm2835-dma"); +MODULE_DESCRIPTION("BCM2835 DMA engine driver"); +MODULE_AUTHOR("Florian Meier "); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/bestcomm/Kconfig b/drivers/dma/bestcomm/Kconfig new file mode 100644 index 0000000000..5dd4372959 --- /dev/null +++ b/drivers/dma/bestcomm/Kconfig @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Kconfig options for Bestcomm +# + +config PPC_BESTCOMM + tristate "Bestcomm DMA engine support" + depends on PPC_MPC52xx + default n + select PPC_LIB_RHEAP + help + BestComm is the name of the communication coprocessor found + on the Freescale MPC5200 family of processor. Its usage is + optional for some drivers (like ATA), but required for + others (like FEC). + + If you want to use drivers that require DMA operations, + answer Y or M. Otherwise say N. + +config PPC_BESTCOMM_ATA + tristate + depends on PPC_BESTCOMM + help + This option enables the support for the ATA task. + +config PPC_BESTCOMM_FEC + tristate + depends on PPC_BESTCOMM + help + This option enables the support for the FEC tasks. + +config PPC_BESTCOMM_GEN_BD + tristate + depends on PPC_BESTCOMM + help + This option enables the support for the GenBD tasks. + diff --git a/drivers/dma/bestcomm/Makefile b/drivers/dma/bestcomm/Makefile new file mode 100644 index 0000000000..8d1b33a2f0 --- /dev/null +++ b/drivers/dma/bestcomm/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for BestComm & co +# + +bestcomm-core-objs := bestcomm.o sram.o +bestcomm-ata-objs := ata.o bcom_ata_task.o +bestcomm-fec-objs := fec.o bcom_fec_rx_task.o bcom_fec_tx_task.o +bestcomm-gen-bd-objs := gen_bd.o bcom_gen_bd_rx_task.o bcom_gen_bd_tx_task.o + +obj-$(CONFIG_PPC_BESTCOMM) += bestcomm-core.o +obj-$(CONFIG_PPC_BESTCOMM_ATA) += bestcomm-ata.o +obj-$(CONFIG_PPC_BESTCOMM_FEC) += bestcomm-fec.o +obj-$(CONFIG_PPC_BESTCOMM_GEN_BD) += bestcomm-gen-bd.o + diff --git a/drivers/dma/bestcomm/ata.c b/drivers/dma/bestcomm/ata.c new file mode 100644 index 0000000000..502a45d76a --- /dev/null +++ b/drivers/dma/bestcomm/ata.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Bestcomm ATA task driver + * + * Patterned after bestcomm/fec.c by Dale Farnsworth + * 2003-2004 (c) MontaVista, Software, Inc. + * + * Copyright (C) 2006-2007 Sylvain Munaut + * Copyright (C) 2006 Freescale - John Rigby + */ + +#include +#include +#include +#include + +#include +#include +#include + + +/* ======================================================================== */ +/* Task image/var/inc */ +/* ======================================================================== */ + +/* ata task image */ +extern u32 bcom_ata_task[]; + +/* ata task vars that need to be set before enabling the task */ +struct bcom_ata_var { + u32 enable; /* (u16*) address of task's control register */ + u32 bd_base; /* (struct bcom_bd*) beginning of ring buffer */ + u32 bd_last; /* (struct bcom_bd*) end of ring buffer */ + u32 bd_start; /* (struct bcom_bd*) current bd */ + u32 buffer_size; /* size of receive buffer */ +}; + +/* ata task incs that need to be set before enabling the task */ +struct bcom_ata_inc { + u16 pad0; + s16 incr_bytes; + u16 pad1; + s16 incr_dst; + u16 pad2; + s16 incr_src; +}; + + +/* ======================================================================== */ +/* Task support code */ +/* ======================================================================== */ + +struct bcom_task * +bcom_ata_init(int queue_len, int maxbufsize) +{ + struct bcom_task *tsk; + struct bcom_ata_var *var; + struct bcom_ata_inc *inc; + + /* Prefetch breaks ATA DMA. Turn it off for ATA DMA */ + bcom_disable_prefetch(); + + tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_ata_bd), 0); + if (!tsk) + return NULL; + + tsk->flags = BCOM_FLAGS_NONE; + + bcom_ata_reset_bd(tsk); + + var = (struct bcom_ata_var *) bcom_task_var(tsk->tasknum); + inc = (struct bcom_ata_inc *) bcom_task_inc(tsk->tasknum); + + if (bcom_load_image(tsk->tasknum, bcom_ata_task)) { + bcom_task_free(tsk); + return NULL; + } + + var->enable = bcom_eng->regs_base + + offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]); + var->bd_base = tsk->bd_pa; + var->bd_last = tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size); + var->bd_start = tsk->bd_pa; + var->buffer_size = maxbufsize; + + /* Configure some stuff */ + bcom_set_task_pragma(tsk->tasknum, BCOM_ATA_PRAGMA); + bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum); + + out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_ATA_RX], BCOM_IPR_ATA_RX); + out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_ATA_TX], BCOM_IPR_ATA_TX); + + out_be32(&bcom_eng->regs->IntPend, 1<tasknum); /* Clear ints */ + + return tsk; +} +EXPORT_SYMBOL_GPL(bcom_ata_init); + +void bcom_ata_rx_prepare(struct bcom_task *tsk) +{ + struct bcom_ata_inc *inc; + + inc = (struct bcom_ata_inc *) bcom_task_inc(tsk->tasknum); + + inc->incr_bytes = -(s16)sizeof(u32); + inc->incr_src = 0; + inc->incr_dst = sizeof(u32); + + bcom_set_initiator(tsk->tasknum, BCOM_INITIATOR_ATA_RX); +} +EXPORT_SYMBOL_GPL(bcom_ata_rx_prepare); + +void bcom_ata_tx_prepare(struct bcom_task *tsk) +{ + struct bcom_ata_inc *inc; + + inc = (struct bcom_ata_inc *) bcom_task_inc(tsk->tasknum); + + inc->incr_bytes = -(s16)sizeof(u32); + inc->incr_src = sizeof(u32); + inc->incr_dst = 0; + + bcom_set_initiator(tsk->tasknum, BCOM_INITIATOR_ATA_TX); +} +EXPORT_SYMBOL_GPL(bcom_ata_tx_prepare); + +void bcom_ata_reset_bd(struct bcom_task *tsk) +{ + struct bcom_ata_var *var; + + /* Reset all BD */ + memset_io(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size); + + tsk->index = 0; + tsk->outdex = 0; + + var = (struct bcom_ata_var *) bcom_task_var(tsk->tasknum); + var->bd_start = var->bd_base; +} +EXPORT_SYMBOL_GPL(bcom_ata_reset_bd); + +void bcom_ata_release(struct bcom_task *tsk) +{ + /* Nothing special for the ATA tasks */ + bcom_task_free(tsk); +} +EXPORT_SYMBOL_GPL(bcom_ata_release); + + +MODULE_DESCRIPTION("BestComm ATA task driver"); +MODULE_AUTHOR("John Rigby"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/bestcomm/bcom_ata_task.c b/drivers/dma/bestcomm/bcom_ata_task.c new file mode 100644 index 0000000000..9a1c349e93 --- /dev/null +++ b/drivers/dma/bestcomm/bcom_ata_task.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Bestcomm ATA task microcode + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * Created based on bestcom/code_dma/image_rtos1/dma_image.hex + */ + +#include + +/* + * The header consists of the following fields: + * u32 magic; + * u8 desc_size; + * u8 var_size; + * u8 inc_size; + * u8 first_var; + * u8 reserved[8]; + * + * The size fields contain the number of 32-bit words. + */ + +u32 bcom_ata_task[] = { + /* header */ + 0x4243544b, + 0x0e060709, + 0x00000000, + 0x00000000, + + /* Task descriptors */ + 0x8198009b, /* LCD: idx0 = var3; idx0 <= var2; idx0 += inc3 */ + 0x13e00c08, /* DRD1A: var3 = var1; FN=0 MORE init=31 WS=0 RS=0 */ + 0xb8000264, /* LCD: idx1 = *idx0, idx2 = var0; idx1 < var9; idx1 += inc4, idx2 += inc4 */ + 0x10000f00, /* DRD1A: var3 = idx0; FN=0 MORE init=0 WS=0 RS=0 */ + 0x60140002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */ + 0x0c8cfc8a, /* DRD2B1: *idx2 = EU3(); EU3(*idx2,var10) */ + 0xd8988240, /* LCDEXT: idx1 = idx1; idx1 > var9; idx1 += inc0 */ + 0xf845e011, /* LCDEXT: idx2 = *(idx0 + var00000015); ; idx2 += inc2 */ + 0xb845e00a, /* LCD: idx3 = *(idx0 + var00000019); ; idx3 += inc1 */ + 0x0bfecf90, /* DRD1A: *idx3 = *idx2; FN=0 TFD init=31 WS=3 RS=3 */ + 0x9898802d, /* LCD: idx1 = idx1; idx1 once var0; idx1 += inc5 */ + 0x64000005, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 INT EXT init=0 WS=0 RS=0 */ + 0x0c0cf849, /* DRD2B1: *idx0 = EU3(); EU3(idx1,var9) */ + 0x000001f8, /* NOP */ + + /* VAR[9]-VAR[14] */ + 0x40000000, + 0x7fff7fff, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + + /* INC[0]-INC[6] */ + 0x40000000, + 0xe0000000, + 0xe0000000, + 0xa000000c, + 0x20000000, + 0x00000000, + 0x00000000, +}; + diff --git a/drivers/dma/bestcomm/bcom_fec_rx_task.c b/drivers/dma/bestcomm/bcom_fec_rx_task.c new file mode 100644 index 0000000000..c610dc76a8 --- /dev/null +++ b/drivers/dma/bestcomm/bcom_fec_rx_task.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Bestcomm FEC RX task microcode + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * Automatically created based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex + * on Tue Mar 22 11:19:38 2005 GMT + */ + +#include + +/* + * The header consists of the following fields: + * u32 magic; + * u8 desc_size; + * u8 var_size; + * u8 inc_size; + * u8 first_var; + * u8 reserved[8]; + * + * The size fields contain the number of 32-bit words. + */ + +u32 bcom_fec_rx_task[] = { + /* header */ + 0x4243544b, + 0x18060709, + 0x00000000, + 0x00000000, + + /* Task descriptors */ + 0x808220e3, /* LCD: idx0 = var1, idx1 = var4; idx1 <= var3; idx0 += inc4, idx1 += inc3 */ + 0x10601010, /* DRD1A: var4 = var2; FN=0 MORE init=3 WS=0 RS=0 */ + 0xb8800264, /* LCD: idx2 = *idx1, idx3 = var0; idx2 < var9; idx2 += inc4, idx3 += inc4 */ + 0x10001308, /* DRD1A: var4 = idx1; FN=0 MORE init=0 WS=0 RS=0 */ + 0x60140002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */ + 0x0cccfcca, /* DRD2B1: *idx3 = EU3(); EU3(*idx3,var10) */ + 0x80004000, /* LCDEXT: idx2 = 0x00000000; ; */ + 0xb8c58029, /* LCD: idx3 = *(idx1 + var00000015); idx3 once var0; idx3 += inc5 */ + 0x60000002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=0 RS=0 */ + 0x088cf8cc, /* DRD2B1: idx2 = EU3(); EU3(idx3,var12) */ + 0x991982f2, /* LCD: idx2 = idx2, idx3 = idx3; idx2 > var11; idx2 += inc6, idx3 += inc2 */ + 0x006acf80, /* DRD1A: *idx3 = *idx0; FN=0 init=3 WS=1 RS=1 */ + 0x80004000, /* LCDEXT: idx2 = 0x00000000; ; */ + 0x9999802d, /* LCD: idx3 = idx3; idx3 once var0; idx3 += inc5 */ + 0x70000002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */ + 0x034cfc4e, /* DRD2B1: var13 = EU3(); EU3(*idx1,var14) */ + 0x00008868, /* DRD1A: idx2 = var13; FN=0 init=0 WS=0 RS=0 */ + 0x99198341, /* LCD: idx2 = idx2, idx3 = idx3; idx2 > var13; idx2 += inc0, idx3 += inc1 */ + 0x007ecf80, /* DRD1A: *idx3 = *idx0; FN=0 init=3 WS=3 RS=3 */ + 0x99198272, /* LCD: idx2 = idx2, idx3 = idx3; idx2 > var9; idx2 += inc6, idx3 += inc2 */ + 0x046acf80, /* DRD1A: *idx3 = *idx0; FN=0 INT init=3 WS=1 RS=1 */ + 0x9819002d, /* LCD: idx2 = idx0; idx2 once var0; idx2 += inc5 */ + 0x0060c790, /* DRD1A: *idx1 = *idx2; FN=0 init=3 WS=0 RS=0 */ + 0x000001f8, /* NOP */ + + /* VAR[9]-VAR[14] */ + 0x40000000, + 0x7fff7fff, + 0x00000000, + 0x00000003, + 0x40000008, + 0x43ffffff, + + /* INC[0]-INC[6] */ + 0x40000000, + 0xe0000000, + 0xe0000000, + 0xa0000008, + 0x20000000, + 0x00000000, + 0x4000ffff, +}; + diff --git a/drivers/dma/bestcomm/bcom_fec_tx_task.c b/drivers/dma/bestcomm/bcom_fec_tx_task.c new file mode 100644 index 0000000000..410b426956 --- /dev/null +++ b/drivers/dma/bestcomm/bcom_fec_tx_task.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Bestcomm FEC TX task microcode + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * Automatically created based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex + * on Tue Mar 22 11:19:29 2005 GMT + */ + +#include + +/* + * The header consists of the following fields: + * u32 magic; + * u8 desc_size; + * u8 var_size; + * u8 inc_size; + * u8 first_var; + * u8 reserved[8]; + * + * The size fields contain the number of 32-bit words. + */ + +u32 bcom_fec_tx_task[] = { + /* header */ + 0x4243544b, + 0x2407070d, + 0x00000000, + 0x00000000, + + /* Task descriptors */ + 0x8018001b, /* LCD: idx0 = var0; idx0 <= var0; idx0 += inc3 */ + 0x60000005, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */ + 0x01ccfc0d, /* DRD2B1: var7 = EU3(); EU3(*idx0,var13) */ + 0x8082a123, /* LCD: idx0 = var1, idx1 = var5; idx1 <= var4; idx0 += inc4, idx1 += inc3 */ + 0x10801418, /* DRD1A: var5 = var3; FN=0 MORE init=4 WS=0 RS=0 */ + 0xf88103a4, /* LCDEXT: idx2 = *idx1, idx3 = var2; idx2 < var14; idx2 += inc4, idx3 += inc4 */ + 0x801a6024, /* LCD: idx4 = var0; ; idx4 += inc4 */ + 0x10001708, /* DRD1A: var5 = idx1; FN=0 MORE init=0 WS=0 RS=0 */ + 0x60140002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */ + 0x0cccfccf, /* DRD2B1: *idx3 = EU3(); EU3(*idx3,var15) */ + 0x991a002c, /* LCD: idx2 = idx2, idx3 = idx4; idx2 once var0; idx2 += inc5, idx3 += inc4 */ + 0x70000002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */ + 0x024cfc4d, /* DRD2B1: var9 = EU3(); EU3(*idx1,var13) */ + 0x60000003, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=3 EXT init=0 WS=0 RS=0 */ + 0x0cccf247, /* DRD2B1: *idx3 = EU3(); EU3(var9,var7) */ + 0x80004000, /* LCDEXT: idx2 = 0x00000000; ; */ + 0xb8c80029, /* LCD: idx3 = *(idx1 + var0000001a); idx3 once var0; idx3 += inc5 */ + 0x70000002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */ + 0x088cf8d1, /* DRD2B1: idx2 = EU3(); EU3(idx3,var17) */ + 0x00002f10, /* DRD1A: var11 = idx2; FN=0 init=0 WS=0 RS=0 */ + 0x99198432, /* LCD: idx2 = idx2, idx3 = idx3; idx2 > var16; idx2 += inc6, idx3 += inc2 */ + 0x008ac398, /* DRD1A: *idx0 = *idx3; FN=0 init=4 WS=1 RS=1 */ + 0x80004000, /* LCDEXT: idx2 = 0x00000000; ; */ + 0x9999802d, /* LCD: idx3 = idx3; idx3 once var0; idx3 += inc5 */ + 0x70000002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */ + 0x048cfc53, /* DRD2B1: var18 = EU3(); EU3(*idx1,var19) */ + 0x60000008, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=8 EXT init=0 WS=0 RS=0 */ + 0x088cf48b, /* DRD2B1: idx2 = EU3(); EU3(var18,var11) */ + 0x99198481, /* LCD: idx2 = idx2, idx3 = idx3; idx2 > var18; idx2 += inc0, idx3 += inc1 */ + 0x009ec398, /* DRD1A: *idx0 = *idx3; FN=0 init=4 WS=3 RS=3 */ + 0x991983b2, /* LCD: idx2 = idx2, idx3 = idx3; idx2 > var14; idx2 += inc6, idx3 += inc2 */ + 0x088ac398, /* DRD1A: *idx0 = *idx3; FN=0 TFD init=4 WS=1 RS=1 */ + 0x9919002d, /* LCD: idx2 = idx2; idx2 once var0; idx2 += inc5 */ + 0x60000005, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */ + 0x0c4cf88e, /* DRD2B1: *idx1 = EU3(); EU3(idx2,var14) */ + 0x000001f8, /* NOP */ + + /* VAR[13]-VAR[19] */ + 0x0c000000, + 0x40000000, + 0x7fff7fff, + 0x00000000, + 0x00000003, + 0x40000004, + 0x43ffffff, + + /* INC[0]-INC[6] */ + 0x40000000, + 0xe0000000, + 0xe0000000, + 0xa0000008, + 0x20000000, + 0x00000000, + 0x4000ffff, +}; + diff --git a/drivers/dma/bestcomm/bcom_gen_bd_rx_task.c b/drivers/dma/bestcomm/bcom_gen_bd_rx_task.c new file mode 100644 index 0000000000..8dd38ede26 --- /dev/null +++ b/drivers/dma/bestcomm/bcom_gen_bd_rx_task.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Bestcomm GenBD RX task microcode + * + * Copyright (C) 2006 AppSpec Computer Technologies Corp. + * Jeff Gibbons + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * Based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex + * on Tue Mar 4 10:14:12 2006 GMT + */ + +#include + +/* + * The header consists of the following fields: + * u32 magic; + * u8 desc_size; + * u8 var_size; + * u8 inc_size; + * u8 first_var; + * u8 reserved[8]; + * + * The size fields contain the number of 32-bit words. + */ + +u32 bcom_gen_bd_rx_task[] = { + /* header */ + 0x4243544b, + 0x0d020409, + 0x00000000, + 0x00000000, + + /* Task descriptors */ + 0x808220da, /* LCD: idx0 = var1, idx1 = var4; idx1 <= var3; idx0 += inc3, idx1 += inc2 */ + 0x13e01010, /* DRD1A: var4 = var2; FN=0 MORE init=31 WS=0 RS=0 */ + 0xb880025b, /* LCD: idx2 = *idx1, idx3 = var0; idx2 < var9; idx2 += inc3, idx3 += inc3 */ + 0x10001308, /* DRD1A: var4 = idx1; FN=0 MORE init=0 WS=0 RS=0 */ + 0x60140002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */ + 0x0cccfcca, /* DRD2B1: *idx3 = EU3(); EU3(*idx3,var10) */ + 0xd9190240, /* LCDEXT: idx2 = idx2; idx2 > var9; idx2 += inc0 */ + 0xb8c5e009, /* LCD: idx3 = *(idx1 + var00000015); ; idx3 += inc1 */ + 0x07fecf80, /* DRD1A: *idx3 = *idx0; FN=0 INT init=31 WS=3 RS=3 */ + 0x99190024, /* LCD: idx2 = idx2; idx2 once var0; idx2 += inc4 */ + 0x60000005, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */ + 0x0c4cf889, /* DRD2B1: *idx1 = EU3(); EU3(idx2,var9) */ + 0x000001f8, /* NOP */ + + /* VAR[9]-VAR[10] */ + 0x40000000, + 0x7fff7fff, + + /* INC[0]-INC[3] */ + 0x40000000, + 0xe0000000, + 0xa0000008, + 0x20000000, +}; + diff --git a/drivers/dma/bestcomm/bcom_gen_bd_tx_task.c b/drivers/dma/bestcomm/bcom_gen_bd_tx_task.c new file mode 100644 index 0000000000..844dfe2586 --- /dev/null +++ b/drivers/dma/bestcomm/bcom_gen_bd_tx_task.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Bestcomm GenBD TX task microcode + * + * Copyright (C) 2006 AppSpec Computer Technologies Corp. + * Jeff Gibbons + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * Based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex + * on Tue Mar 4 10:14:12 2006 GMT + */ + +#include + +/* + * The header consists of the following fields: + * u32 magic; + * u8 desc_size; + * u8 var_size; + * u8 inc_size; + * u8 first_var; + * u8 reserved[8]; + * + * The size fields contain the number of 32-bit words. + */ + +u32 bcom_gen_bd_tx_task[] = { + /* header */ + 0x4243544b, + 0x0f040609, + 0x00000000, + 0x00000000, + + /* Task descriptors */ + 0x800220e3, /* LCD: idx0 = var0, idx1 = var4; idx1 <= var3; idx0 += inc4, idx1 += inc3 */ + 0x13e01010, /* DRD1A: var4 = var2; FN=0 MORE init=31 WS=0 RS=0 */ + 0xb8808264, /* LCD: idx2 = *idx1, idx3 = var1; idx2 < var9; idx2 += inc4, idx3 += inc4 */ + 0x10001308, /* DRD1A: var4 = idx1; FN=0 MORE init=0 WS=0 RS=0 */ + 0x60140002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */ + 0x0cccfcca, /* DRD2B1: *idx3 = EU3(); EU3(*idx3,var10) */ + 0xd9190300, /* LCDEXT: idx2 = idx2; idx2 > var12; idx2 += inc0 */ + 0xb8c5e009, /* LCD: idx3 = *(idx1 + var00000015); ; idx3 += inc1 */ + 0x03fec398, /* DRD1A: *idx0 = *idx3; FN=0 init=31 WS=3 RS=3 */ + 0x9919826a, /* LCD: idx2 = idx2, idx3 = idx3; idx2 > var9; idx2 += inc5, idx3 += inc2 */ + 0x0feac398, /* DRD1A: *idx0 = *idx3; FN=0 TFD INT init=31 WS=1 RS=1 */ + 0x99190036, /* LCD: idx2 = idx2; idx2 once var0; idx2 += inc6 */ + 0x60000005, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */ + 0x0c4cf889, /* DRD2B1: *idx1 = EU3(); EU3(idx2,var9) */ + 0x000001f8, /* NOP */ + + /* VAR[9]-VAR[12] */ + 0x40000000, + 0x7fff7fff, + 0x00000000, + 0x40000004, + + /* INC[0]-INC[5] */ + 0x40000000, + 0xe0000000, + 0xe0000000, + 0xa0000008, + 0x20000000, + 0x4000ffff, +}; + diff --git a/drivers/dma/bestcomm/bestcomm.c b/drivers/dma/bestcomm/bestcomm.c new file mode 100644 index 0000000000..80096f9403 --- /dev/null +++ b/drivers/dma/bestcomm/bestcomm.c @@ -0,0 +1,525 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Driver for MPC52xx processor BestComm peripheral controller + * + * Copyright (C) 2006-2007 Sylvain Munaut + * Copyright (C) 2005 Varma Electronics Oy, + * ( by Andrey Volkov ) + * Copyright (C) 2003-2004 MontaVista, Software, Inc. + * ( by Dale Farnsworth ) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "linux/fsl/bestcomm/bestcomm.h" + +#define DRIVER_NAME "bestcomm-core" + +/* MPC5200 device tree match tables */ +static const struct of_device_id mpc52xx_sram_ids[] = { + { .compatible = "fsl,mpc5200-sram", }, + { .compatible = "mpc5200-sram", }, + {} +}; + + +struct bcom_engine *bcom_eng = NULL; +EXPORT_SYMBOL_GPL(bcom_eng); /* needed for inline functions */ + +/* ======================================================================== */ +/* Public and private API */ +/* ======================================================================== */ + +/* Private API */ + +struct bcom_task * +bcom_task_alloc(int bd_count, int bd_size, int priv_size) +{ + int i, tasknum = -1; + struct bcom_task *tsk; + + /* Don't try to do anything if bestcomm init failed */ + if (!bcom_eng) + return NULL; + + /* Get and reserve a task num */ + spin_lock(&bcom_eng->lock); + + for (i=0; itdt[i].stop) { /* we use stop as a marker */ + bcom_eng->tdt[i].stop = 0xfffffffful; /* dummy addr */ + tasknum = i; + break; + } + + spin_unlock(&bcom_eng->lock); + + if (tasknum < 0) + return NULL; + + /* Allocate our structure */ + tsk = kzalloc(sizeof(struct bcom_task) + priv_size, GFP_KERNEL); + if (!tsk) + goto error; + + tsk->tasknum = tasknum; + if (priv_size) + tsk->priv = (void*)tsk + sizeof(struct bcom_task); + + /* Get IRQ of that task */ + tsk->irq = irq_of_parse_and_map(bcom_eng->ofnode, tsk->tasknum); + if (!tsk->irq) + goto error; + + /* Init the BDs, if needed */ + if (bd_count) { + tsk->cookie = kmalloc_array(bd_count, sizeof(void *), + GFP_KERNEL); + if (!tsk->cookie) + goto error; + + tsk->bd = bcom_sram_alloc(bd_count * bd_size, 4, &tsk->bd_pa); + if (!tsk->bd) + goto error; + memset_io(tsk->bd, 0x00, bd_count * bd_size); + + tsk->num_bd = bd_count; + tsk->bd_size = bd_size; + } + + return tsk; + +error: + if (tsk) { + if (tsk->irq) + irq_dispose_mapping(tsk->irq); + bcom_sram_free(tsk->bd); + kfree(tsk->cookie); + kfree(tsk); + } + + bcom_eng->tdt[tasknum].stop = 0; + + return NULL; +} +EXPORT_SYMBOL_GPL(bcom_task_alloc); + +void +bcom_task_free(struct bcom_task *tsk) +{ + /* Stop the task */ + bcom_disable_task(tsk->tasknum); + + /* Clear TDT */ + bcom_eng->tdt[tsk->tasknum].start = 0; + bcom_eng->tdt[tsk->tasknum].stop = 0; + + /* Free everything */ + irq_dispose_mapping(tsk->irq); + bcom_sram_free(tsk->bd); + kfree(tsk->cookie); + kfree(tsk); +} +EXPORT_SYMBOL_GPL(bcom_task_free); + +int +bcom_load_image(int task, u32 *task_image) +{ + struct bcom_task_header *hdr = (struct bcom_task_header *)task_image; + struct bcom_tdt *tdt; + u32 *desc, *var, *inc; + u32 *desc_src, *var_src, *inc_src; + + /* Safety checks */ + if (hdr->magic != BCOM_TASK_MAGIC) { + printk(KERN_ERR DRIVER_NAME + ": Trying to load invalid microcode\n"); + return -EINVAL; + } + + if ((task < 0) || (task >= BCOM_MAX_TASKS)) { + printk(KERN_ERR DRIVER_NAME + ": Trying to load invalid task %d\n", task); + return -EINVAL; + } + + /* Initial load or reload */ + tdt = &bcom_eng->tdt[task]; + + if (tdt->start) { + desc = bcom_task_desc(task); + if (hdr->desc_size != bcom_task_num_descs(task)) { + printk(KERN_ERR DRIVER_NAME + ": Trying to reload wrong task image " + "(%d size %d/%d)!\n", + task, + hdr->desc_size, + bcom_task_num_descs(task)); + return -EINVAL; + } + } else { + phys_addr_t start_pa; + + desc = bcom_sram_alloc(hdr->desc_size * sizeof(u32), 4, &start_pa); + if (!desc) + return -ENOMEM; + + tdt->start = start_pa; + tdt->stop = start_pa + ((hdr->desc_size-1) * sizeof(u32)); + } + + var = bcom_task_var(task); + inc = bcom_task_inc(task); + + /* Clear & copy */ + memset_io(var, 0x00, BCOM_VAR_SIZE); + memset_io(inc, 0x00, BCOM_INC_SIZE); + + desc_src = (u32 *)(hdr + 1); + var_src = desc_src + hdr->desc_size; + inc_src = var_src + hdr->var_size; + + memcpy_toio(desc, desc_src, hdr->desc_size * sizeof(u32)); + memcpy_toio(var + hdr->first_var, var_src, hdr->var_size * sizeof(u32)); + memcpy_toio(inc, inc_src, hdr->inc_size * sizeof(u32)); + + return 0; +} +EXPORT_SYMBOL_GPL(bcom_load_image); + +void +bcom_set_initiator(int task, int initiator) +{ + int i; + int num_descs; + u32 *desc; + int next_drd_has_initiator; + + bcom_set_tcr_initiator(task, initiator); + + /* Just setting tcr is apparently not enough due to some problem */ + /* with it. So we just go thru all the microcode and replace in */ + /* the DRD directly */ + + desc = bcom_task_desc(task); + next_drd_has_initiator = 1; + num_descs = bcom_task_num_descs(task); + + for (i=0; itasknum); +} +EXPORT_SYMBOL_GPL(bcom_enable); + +void +bcom_disable(struct bcom_task *tsk) +{ + bcom_disable_task(tsk->tasknum); +} +EXPORT_SYMBOL_GPL(bcom_disable); + + +/* ======================================================================== */ +/* Engine init/cleanup */ +/* ======================================================================== */ + +/* Function Descriptor table */ +/* this will need to be updated if Freescale changes their task code FDT */ +static u32 fdt_ops[] = { + 0xa0045670, /* FDT[48] - load_acc() */ + 0x80045670, /* FDT[49] - unload_acc() */ + 0x21800000, /* FDT[50] - and() */ + 0x21e00000, /* FDT[51] - or() */ + 0x21500000, /* FDT[52] - xor() */ + 0x21400000, /* FDT[53] - andn() */ + 0x21500000, /* FDT[54] - not() */ + 0x20400000, /* FDT[55] - add() */ + 0x20500000, /* FDT[56] - sub() */ + 0x20800000, /* FDT[57] - lsh() */ + 0x20a00000, /* FDT[58] - rsh() */ + 0xc0170000, /* FDT[59] - crc8() */ + 0xc0145670, /* FDT[60] - crc16() */ + 0xc0345670, /* FDT[61] - crc32() */ + 0xa0076540, /* FDT[62] - endian32() */ + 0xa0000760, /* FDT[63] - endian16() */ +}; + + +static int bcom_engine_init(void) +{ + int task; + phys_addr_t tdt_pa, ctx_pa, var_pa, fdt_pa; + unsigned int tdt_size, ctx_size, var_size, fdt_size; + + /* Allocate & clear SRAM zones for FDT, TDTs, contexts and vars/incs */ + tdt_size = BCOM_MAX_TASKS * sizeof(struct bcom_tdt); + ctx_size = BCOM_MAX_TASKS * BCOM_CTX_SIZE; + var_size = BCOM_MAX_TASKS * (BCOM_VAR_SIZE + BCOM_INC_SIZE); + fdt_size = BCOM_FDT_SIZE; + + bcom_eng->tdt = bcom_sram_alloc(tdt_size, sizeof(u32), &tdt_pa); + bcom_eng->ctx = bcom_sram_alloc(ctx_size, BCOM_CTX_ALIGN, &ctx_pa); + bcom_eng->var = bcom_sram_alloc(var_size, BCOM_VAR_ALIGN, &var_pa); + bcom_eng->fdt = bcom_sram_alloc(fdt_size, BCOM_FDT_ALIGN, &fdt_pa); + + if (!bcom_eng->tdt || !bcom_eng->ctx || !bcom_eng->var || !bcom_eng->fdt) { + printk(KERN_ERR "DMA: SRAM alloc failed in engine init !\n"); + + bcom_sram_free(bcom_eng->tdt); + bcom_sram_free(bcom_eng->ctx); + bcom_sram_free(bcom_eng->var); + bcom_sram_free(bcom_eng->fdt); + + return -ENOMEM; + } + + memset_io(bcom_eng->tdt, 0x00, tdt_size); + memset_io(bcom_eng->ctx, 0x00, ctx_size); + memset_io(bcom_eng->var, 0x00, var_size); + memset_io(bcom_eng->fdt, 0x00, fdt_size); + + /* Copy the FDT for the EU#3 */ + memcpy_toio(&bcom_eng->fdt[48], fdt_ops, sizeof(fdt_ops)); + + /* Initialize Task base structure */ + for (task=0; taskregs->tcr[task], 0); + out_8(&bcom_eng->regs->ipr[task], 0); + + bcom_eng->tdt[task].context = ctx_pa; + bcom_eng->tdt[task].var = var_pa; + bcom_eng->tdt[task].fdt = fdt_pa; + + var_pa += BCOM_VAR_SIZE + BCOM_INC_SIZE; + ctx_pa += BCOM_CTX_SIZE; + } + + out_be32(&bcom_eng->regs->taskBar, tdt_pa); + + /* Init 'always' initiator */ + out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_ALWAYS], BCOM_IPR_ALWAYS); + + /* Disable COMM Bus Prefetch on the original 5200; it's broken */ + if ((mfspr(SPRN_SVR) & MPC5200_SVR_MASK) == MPC5200_SVR) + bcom_disable_prefetch(); + + /* Init lock */ + spin_lock_init(&bcom_eng->lock); + + return 0; +} + +static void +bcom_engine_cleanup(void) +{ + int task; + + /* Stop all tasks */ + for (task=0; taskregs->tcr[task], 0); + out_8(&bcom_eng->regs->ipr[task], 0); + } + + out_be32(&bcom_eng->regs->taskBar, 0ul); + + /* Release the SRAM zones */ + bcom_sram_free(bcom_eng->tdt); + bcom_sram_free(bcom_eng->ctx); + bcom_sram_free(bcom_eng->var); + bcom_sram_free(bcom_eng->fdt); +} + + +/* ======================================================================== */ +/* OF platform driver */ +/* ======================================================================== */ + +static int mpc52xx_bcom_probe(struct platform_device *op) +{ + struct device_node *ofn_sram; + struct resource res_bcom; + + int rv; + + /* Inform user we're ok so far */ + printk(KERN_INFO "DMA: MPC52xx BestComm driver\n"); + + /* Get the bestcomm node */ + of_node_get(op->dev.of_node); + + /* Prepare SRAM */ + ofn_sram = of_find_matching_node(NULL, mpc52xx_sram_ids); + if (!ofn_sram) { + printk(KERN_ERR DRIVER_NAME ": " + "No SRAM found in device tree\n"); + rv = -ENODEV; + goto error_ofput; + } + rv = bcom_sram_init(ofn_sram, DRIVER_NAME); + of_node_put(ofn_sram); + + if (rv) { + printk(KERN_ERR DRIVER_NAME ": " + "Error in SRAM init\n"); + goto error_ofput; + } + + /* Get a clean struct */ + bcom_eng = kzalloc(sizeof(struct bcom_engine), GFP_KERNEL); + if (!bcom_eng) { + rv = -ENOMEM; + goto error_sramclean; + } + + /* Save the node */ + bcom_eng->ofnode = op->dev.of_node; + + /* Get, reserve & map io */ + if (of_address_to_resource(op->dev.of_node, 0, &res_bcom)) { + printk(KERN_ERR DRIVER_NAME ": " + "Can't get resource\n"); + rv = -EINVAL; + goto error_sramclean; + } + + if (!request_mem_region(res_bcom.start, resource_size(&res_bcom), + DRIVER_NAME)) { + printk(KERN_ERR DRIVER_NAME ": " + "Can't request registers region\n"); + rv = -EBUSY; + goto error_sramclean; + } + + bcom_eng->regs_base = res_bcom.start; + bcom_eng->regs = ioremap(res_bcom.start, sizeof(struct mpc52xx_sdma)); + if (!bcom_eng->regs) { + printk(KERN_ERR DRIVER_NAME ": " + "Can't map registers\n"); + rv = -ENOMEM; + goto error_release; + } + + /* Now, do the real init */ + rv = bcom_engine_init(); + if (rv) + goto error_unmap; + + /* Done ! */ + printk(KERN_INFO "DMA: MPC52xx BestComm engine @%08lx ok !\n", + (long)bcom_eng->regs_base); + + return 0; + + /* Error path */ +error_unmap: + iounmap(bcom_eng->regs); +error_release: + release_mem_region(res_bcom.start, sizeof(struct mpc52xx_sdma)); +error_sramclean: + kfree(bcom_eng); + bcom_sram_cleanup(); +error_ofput: + of_node_put(op->dev.of_node); + + printk(KERN_ERR "DMA: MPC52xx BestComm init failed !\n"); + + return rv; +} + + +static int mpc52xx_bcom_remove(struct platform_device *op) +{ + /* Clean up the engine */ + bcom_engine_cleanup(); + + /* Cleanup SRAM */ + bcom_sram_cleanup(); + + /* Release regs */ + iounmap(bcom_eng->regs); + release_mem_region(bcom_eng->regs_base, sizeof(struct mpc52xx_sdma)); + + /* Release the node */ + of_node_put(bcom_eng->ofnode); + + /* Release memory */ + kfree(bcom_eng); + bcom_eng = NULL; + + return 0; +} + +static const struct of_device_id mpc52xx_bcom_of_match[] = { + { .compatible = "fsl,mpc5200-bestcomm", }, + { .compatible = "mpc5200-bestcomm", }, + {}, +}; + +MODULE_DEVICE_TABLE(of, mpc52xx_bcom_of_match); + + +static struct platform_driver mpc52xx_bcom_of_platform_driver = { + .probe = mpc52xx_bcom_probe, + .remove = mpc52xx_bcom_remove, + .driver = { + .name = DRIVER_NAME, + .of_match_table = mpc52xx_bcom_of_match, + }, +}; + + +/* ======================================================================== */ +/* Module */ +/* ======================================================================== */ + +static int __init +mpc52xx_bcom_init(void) +{ + return platform_driver_register(&mpc52xx_bcom_of_platform_driver); +} + +static void __exit +mpc52xx_bcom_exit(void) +{ + platform_driver_unregister(&mpc52xx_bcom_of_platform_driver); +} + +/* If we're not a module, we must make sure everything is setup before */ +/* anyone tries to use us ... that's why we use subsys_initcall instead */ +/* of module_init. */ +subsys_initcall(mpc52xx_bcom_init); +module_exit(mpc52xx_bcom_exit); + +MODULE_DESCRIPTION("Freescale MPC52xx BestComm DMA"); +MODULE_AUTHOR("Sylvain Munaut "); +MODULE_AUTHOR("Andrey Volkov "); +MODULE_AUTHOR("Dale Farnsworth "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/bestcomm/fec.c b/drivers/dma/bestcomm/fec.c new file mode 100644 index 0000000000..3a4a2f7910 --- /dev/null +++ b/drivers/dma/bestcomm/fec.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Bestcomm FEC tasks driver + * + * Copyright (C) 2006-2007 Sylvain Munaut + * Copyright (C) 2003-2004 MontaVista, Software, Inc. + * ( by Dale Farnsworth ) + */ + +#include +#include +#include +#include + +#include +#include +#include + + +/* ======================================================================== */ +/* Task image/var/inc */ +/* ======================================================================== */ + +/* fec tasks images */ +extern u32 bcom_fec_rx_task[]; +extern u32 bcom_fec_tx_task[]; + +/* rx task vars that need to be set before enabling the task */ +struct bcom_fec_rx_var { + u32 enable; /* (u16*) address of task's control register */ + u32 fifo; /* (u32*) address of fec's fifo */ + u32 bd_base; /* (struct bcom_bd*) beginning of ring buffer */ + u32 bd_last; /* (struct bcom_bd*) end of ring buffer */ + u32 bd_start; /* (struct bcom_bd*) current bd */ + u32 buffer_size; /* size of receive buffer */ +}; + +/* rx task incs that need to be set before enabling the task */ +struct bcom_fec_rx_inc { + u16 pad0; + s16 incr_bytes; + u16 pad1; + s16 incr_dst; + u16 pad2; + s16 incr_dst_ma; +}; + +/* tx task vars that need to be set before enabling the task */ +struct bcom_fec_tx_var { + u32 DRD; /* (u32*) address of self-modified DRD */ + u32 fifo; /* (u32*) address of fec's fifo */ + u32 enable; /* (u16*) address of task's control register */ + u32 bd_base; /* (struct bcom_bd*) beginning of ring buffer */ + u32 bd_last; /* (struct bcom_bd*) end of ring buffer */ + u32 bd_start; /* (struct bcom_bd*) current bd */ + u32 buffer_size; /* set by uCode for each packet */ +}; + +/* tx task incs that need to be set before enabling the task */ +struct bcom_fec_tx_inc { + u16 pad0; + s16 incr_bytes; + u16 pad1; + s16 incr_src; + u16 pad2; + s16 incr_src_ma; +}; + +/* private structure in the task */ +struct bcom_fec_priv { + phys_addr_t fifo; + int maxbufsize; +}; + + +/* ======================================================================== */ +/* Task support code */ +/* ======================================================================== */ + +struct bcom_task * +bcom_fec_rx_init(int queue_len, phys_addr_t fifo, int maxbufsize) +{ + struct bcom_task *tsk; + struct bcom_fec_priv *priv; + + tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_fec_bd), + sizeof(struct bcom_fec_priv)); + if (!tsk) + return NULL; + + tsk->flags = BCOM_FLAGS_NONE; + + priv = tsk->priv; + priv->fifo = fifo; + priv->maxbufsize = maxbufsize; + + if (bcom_fec_rx_reset(tsk)) { + bcom_task_free(tsk); + return NULL; + } + + return tsk; +} +EXPORT_SYMBOL_GPL(bcom_fec_rx_init); + +int +bcom_fec_rx_reset(struct bcom_task *tsk) +{ + struct bcom_fec_priv *priv = tsk->priv; + struct bcom_fec_rx_var *var; + struct bcom_fec_rx_inc *inc; + + /* Shutdown the task */ + bcom_disable_task(tsk->tasknum); + + /* Reset the microcode */ + var = (struct bcom_fec_rx_var *) bcom_task_var(tsk->tasknum); + inc = (struct bcom_fec_rx_inc *) bcom_task_inc(tsk->tasknum); + + if (bcom_load_image(tsk->tasknum, bcom_fec_rx_task)) + return -1; + + var->enable = bcom_eng->regs_base + + offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]); + var->fifo = (u32) priv->fifo; + var->bd_base = tsk->bd_pa; + var->bd_last = tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size); + var->bd_start = tsk->bd_pa; + var->buffer_size = priv->maxbufsize; + + inc->incr_bytes = -(s16)sizeof(u32); /* These should be in the */ + inc->incr_dst = sizeof(u32); /* task image, but we stick */ + inc->incr_dst_ma= sizeof(u8); /* to the official ones */ + + /* Reset the BDs */ + tsk->index = 0; + tsk->outdex = 0; + + memset_io(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size); + + /* Configure some stuff */ + bcom_set_task_pragma(tsk->tasknum, BCOM_FEC_RX_BD_PRAGMA); + bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum); + + out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_FEC_RX], BCOM_IPR_FEC_RX); + + out_be32(&bcom_eng->regs->IntPend, 1<tasknum); /* Clear ints */ + + return 0; +} +EXPORT_SYMBOL_GPL(bcom_fec_rx_reset); + +void +bcom_fec_rx_release(struct bcom_task *tsk) +{ + /* Nothing special for the FEC tasks */ + bcom_task_free(tsk); +} +EXPORT_SYMBOL_GPL(bcom_fec_rx_release); + + + + /* Return 2nd to last DRD */ + /* This is an ugly hack, but at least it's only done + once at initialization */ +static u32 *self_modified_drd(int tasknum) +{ + u32 *desc; + int num_descs; + int drd_count; + int i; + + num_descs = bcom_task_num_descs(tasknum); + desc = bcom_task_desc(tasknum) + num_descs - 1; + drd_count = 0; + for (i=0; iflags = BCOM_FLAGS_ENABLE_TASK; + + priv = tsk->priv; + priv->fifo = fifo; + + if (bcom_fec_tx_reset(tsk)) { + bcom_task_free(tsk); + return NULL; + } + + return tsk; +} +EXPORT_SYMBOL_GPL(bcom_fec_tx_init); + +int +bcom_fec_tx_reset(struct bcom_task *tsk) +{ + struct bcom_fec_priv *priv = tsk->priv; + struct bcom_fec_tx_var *var; + struct bcom_fec_tx_inc *inc; + + /* Shutdown the task */ + bcom_disable_task(tsk->tasknum); + + /* Reset the microcode */ + var = (struct bcom_fec_tx_var *) bcom_task_var(tsk->tasknum); + inc = (struct bcom_fec_tx_inc *) bcom_task_inc(tsk->tasknum); + + if (bcom_load_image(tsk->tasknum, bcom_fec_tx_task)) + return -1; + + var->enable = bcom_eng->regs_base + + offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]); + var->fifo = (u32) priv->fifo; + var->DRD = bcom_sram_va2pa(self_modified_drd(tsk->tasknum)); + var->bd_base = tsk->bd_pa; + var->bd_last = tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size); + var->bd_start = tsk->bd_pa; + + inc->incr_bytes = -(s16)sizeof(u32); /* These should be in the */ + inc->incr_src = sizeof(u32); /* task image, but we stick */ + inc->incr_src_ma= sizeof(u8); /* to the official ones */ + + /* Reset the BDs */ + tsk->index = 0; + tsk->outdex = 0; + + memset_io(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size); + + /* Configure some stuff */ + bcom_set_task_pragma(tsk->tasknum, BCOM_FEC_TX_BD_PRAGMA); + bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum); + + out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_FEC_TX], BCOM_IPR_FEC_TX); + + out_be32(&bcom_eng->regs->IntPend, 1<tasknum); /* Clear ints */ + + return 0; +} +EXPORT_SYMBOL_GPL(bcom_fec_tx_reset); + +void +bcom_fec_tx_release(struct bcom_task *tsk) +{ + /* Nothing special for the FEC tasks */ + bcom_task_free(tsk); +} +EXPORT_SYMBOL_GPL(bcom_fec_tx_release); + + +MODULE_DESCRIPTION("BestComm FEC tasks driver"); +MODULE_AUTHOR("Dale Farnsworth "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/bestcomm/gen_bd.c b/drivers/dma/bestcomm/gen_bd.c new file mode 100644 index 0000000000..8a24a5cbc2 --- /dev/null +++ b/drivers/dma/bestcomm/gen_bd.c @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Driver for MPC52xx processor BestComm General Buffer Descriptor + * + * Copyright (C) 2007 Sylvain Munaut + * Copyright (C) 2006 AppSpec Computer Technologies Corp. + * Jeff Gibbons + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + + +/* ======================================================================== */ +/* Task image/var/inc */ +/* ======================================================================== */ + +/* gen_bd tasks images */ +extern u32 bcom_gen_bd_rx_task[]; +extern u32 bcom_gen_bd_tx_task[]; + +/* rx task vars that need to be set before enabling the task */ +struct bcom_gen_bd_rx_var { + u32 enable; /* (u16*) address of task's control register */ + u32 fifo; /* (u32*) address of gen_bd's fifo */ + u32 bd_base; /* (struct bcom_bd*) beginning of ring buffer */ + u32 bd_last; /* (struct bcom_bd*) end of ring buffer */ + u32 bd_start; /* (struct bcom_bd*) current bd */ + u32 buffer_size; /* size of receive buffer */ +}; + +/* rx task incs that need to be set before enabling the task */ +struct bcom_gen_bd_rx_inc { + u16 pad0; + s16 incr_bytes; + u16 pad1; + s16 incr_dst; +}; + +/* tx task vars that need to be set before enabling the task */ +struct bcom_gen_bd_tx_var { + u32 fifo; /* (u32*) address of gen_bd's fifo */ + u32 enable; /* (u16*) address of task's control register */ + u32 bd_base; /* (struct bcom_bd*) beginning of ring buffer */ + u32 bd_last; /* (struct bcom_bd*) end of ring buffer */ + u32 bd_start; /* (struct bcom_bd*) current bd */ + u32 buffer_size; /* set by uCode for each packet */ +}; + +/* tx task incs that need to be set before enabling the task */ +struct bcom_gen_bd_tx_inc { + u16 pad0; + s16 incr_bytes; + u16 pad1; + s16 incr_src; + u16 pad2; + s16 incr_src_ma; +}; + +/* private structure */ +struct bcom_gen_bd_priv { + phys_addr_t fifo; + int initiator; + int ipr; + int maxbufsize; +}; + + +/* ======================================================================== */ +/* Task support code */ +/* ======================================================================== */ + +struct bcom_task * +bcom_gen_bd_rx_init(int queue_len, phys_addr_t fifo, + int initiator, int ipr, int maxbufsize) +{ + struct bcom_task *tsk; + struct bcom_gen_bd_priv *priv; + + tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_gen_bd), + sizeof(struct bcom_gen_bd_priv)); + if (!tsk) + return NULL; + + tsk->flags = BCOM_FLAGS_NONE; + + priv = tsk->priv; + priv->fifo = fifo; + priv->initiator = initiator; + priv->ipr = ipr; + priv->maxbufsize = maxbufsize; + + if (bcom_gen_bd_rx_reset(tsk)) { + bcom_task_free(tsk); + return NULL; + } + + return tsk; +} +EXPORT_SYMBOL_GPL(bcom_gen_bd_rx_init); + +int +bcom_gen_bd_rx_reset(struct bcom_task *tsk) +{ + struct bcom_gen_bd_priv *priv = tsk->priv; + struct bcom_gen_bd_rx_var *var; + struct bcom_gen_bd_rx_inc *inc; + + /* Shutdown the task */ + bcom_disable_task(tsk->tasknum); + + /* Reset the microcode */ + var = (struct bcom_gen_bd_rx_var *) bcom_task_var(tsk->tasknum); + inc = (struct bcom_gen_bd_rx_inc *) bcom_task_inc(tsk->tasknum); + + if (bcom_load_image(tsk->tasknum, bcom_gen_bd_rx_task)) + return -1; + + var->enable = bcom_eng->regs_base + + offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]); + var->fifo = (u32) priv->fifo; + var->bd_base = tsk->bd_pa; + var->bd_last = tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size); + var->bd_start = tsk->bd_pa; + var->buffer_size = priv->maxbufsize; + + inc->incr_bytes = -(s16)sizeof(u32); + inc->incr_dst = sizeof(u32); + + /* Reset the BDs */ + tsk->index = 0; + tsk->outdex = 0; + + memset_io(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size); + + /* Configure some stuff */ + bcom_set_task_pragma(tsk->tasknum, BCOM_GEN_RX_BD_PRAGMA); + bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum); + + out_8(&bcom_eng->regs->ipr[priv->initiator], priv->ipr); + bcom_set_initiator(tsk->tasknum, priv->initiator); + + out_be32(&bcom_eng->regs->IntPend, 1<tasknum); /* Clear ints */ + + return 0; +} +EXPORT_SYMBOL_GPL(bcom_gen_bd_rx_reset); + +void +bcom_gen_bd_rx_release(struct bcom_task *tsk) +{ + /* Nothing special for the GenBD tasks */ + bcom_task_free(tsk); +} +EXPORT_SYMBOL_GPL(bcom_gen_bd_rx_release); + + +extern struct bcom_task * +bcom_gen_bd_tx_init(int queue_len, phys_addr_t fifo, + int initiator, int ipr) +{ + struct bcom_task *tsk; + struct bcom_gen_bd_priv *priv; + + tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_gen_bd), + sizeof(struct bcom_gen_bd_priv)); + if (!tsk) + return NULL; + + tsk->flags = BCOM_FLAGS_NONE; + + priv = tsk->priv; + priv->fifo = fifo; + priv->initiator = initiator; + priv->ipr = ipr; + + if (bcom_gen_bd_tx_reset(tsk)) { + bcom_task_free(tsk); + return NULL; + } + + return tsk; +} +EXPORT_SYMBOL_GPL(bcom_gen_bd_tx_init); + +int +bcom_gen_bd_tx_reset(struct bcom_task *tsk) +{ + struct bcom_gen_bd_priv *priv = tsk->priv; + struct bcom_gen_bd_tx_var *var; + struct bcom_gen_bd_tx_inc *inc; + + /* Shutdown the task */ + bcom_disable_task(tsk->tasknum); + + /* Reset the microcode */ + var = (struct bcom_gen_bd_tx_var *) bcom_task_var(tsk->tasknum); + inc = (struct bcom_gen_bd_tx_inc *) bcom_task_inc(tsk->tasknum); + + if (bcom_load_image(tsk->tasknum, bcom_gen_bd_tx_task)) + return -1; + + var->enable = bcom_eng->regs_base + + offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]); + var->fifo = (u32) priv->fifo; + var->bd_base = tsk->bd_pa; + var->bd_last = tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size); + var->bd_start = tsk->bd_pa; + + inc->incr_bytes = -(s16)sizeof(u32); + inc->incr_src = sizeof(u32); + inc->incr_src_ma = sizeof(u8); + + /* Reset the BDs */ + tsk->index = 0; + tsk->outdex = 0; + + memset_io(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size); + + /* Configure some stuff */ + bcom_set_task_pragma(tsk->tasknum, BCOM_GEN_TX_BD_PRAGMA); + bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum); + + out_8(&bcom_eng->regs->ipr[priv->initiator], priv->ipr); + bcom_set_initiator(tsk->tasknum, priv->initiator); + + out_be32(&bcom_eng->regs->IntPend, 1<tasknum); /* Clear ints */ + + return 0; +} +EXPORT_SYMBOL_GPL(bcom_gen_bd_tx_reset); + +void +bcom_gen_bd_tx_release(struct bcom_task *tsk) +{ + /* Nothing special for the GenBD tasks */ + bcom_task_free(tsk); +} +EXPORT_SYMBOL_GPL(bcom_gen_bd_tx_release); + +/* --------------------------------------------------------------------- + * PSC support code + */ + +/** + * bcom_psc_parameters - Bestcomm initialization value table for PSC devices + * + * This structure is only used internally. It is a lookup table for PSC + * specific parameters to bestcomm tasks. + */ +static struct bcom_psc_params { + int rx_initiator; + int rx_ipr; + int tx_initiator; + int tx_ipr; +} bcom_psc_params[] = { + [0] = { + .rx_initiator = BCOM_INITIATOR_PSC1_RX, + .rx_ipr = BCOM_IPR_PSC1_RX, + .tx_initiator = BCOM_INITIATOR_PSC1_TX, + .tx_ipr = BCOM_IPR_PSC1_TX, + }, + [1] = { + .rx_initiator = BCOM_INITIATOR_PSC2_RX, + .rx_ipr = BCOM_IPR_PSC2_RX, + .tx_initiator = BCOM_INITIATOR_PSC2_TX, + .tx_ipr = BCOM_IPR_PSC2_TX, + }, + [2] = { + .rx_initiator = BCOM_INITIATOR_PSC3_RX, + .rx_ipr = BCOM_IPR_PSC3_RX, + .tx_initiator = BCOM_INITIATOR_PSC3_TX, + .tx_ipr = BCOM_IPR_PSC3_TX, + }, + [3] = { + .rx_initiator = BCOM_INITIATOR_PSC4_RX, + .rx_ipr = BCOM_IPR_PSC4_RX, + .tx_initiator = BCOM_INITIATOR_PSC4_TX, + .tx_ipr = BCOM_IPR_PSC4_TX, + }, + [4] = { + .rx_initiator = BCOM_INITIATOR_PSC5_RX, + .rx_ipr = BCOM_IPR_PSC5_RX, + .tx_initiator = BCOM_INITIATOR_PSC5_TX, + .tx_ipr = BCOM_IPR_PSC5_TX, + }, + [5] = { + .rx_initiator = BCOM_INITIATOR_PSC6_RX, + .rx_ipr = BCOM_IPR_PSC6_RX, + .tx_initiator = BCOM_INITIATOR_PSC6_TX, + .tx_ipr = BCOM_IPR_PSC6_TX, + }, +}; + +/** + * bcom_psc_gen_bd_rx_init - Allocate a receive bcom_task for a PSC port + * @psc_num: Number of the PSC to allocate a task for + * @queue_len: number of buffer descriptors to allocate for the task + * @fifo: physical address of FIFO register + * @maxbufsize: Maximum receive data size in bytes. + * + * Allocate a bestcomm task structure for receiving data from a PSC. + */ +struct bcom_task * bcom_psc_gen_bd_rx_init(unsigned psc_num, int queue_len, + phys_addr_t fifo, int maxbufsize) +{ + if (psc_num >= MPC52xx_PSC_MAXNUM) + return NULL; + + return bcom_gen_bd_rx_init(queue_len, fifo, + bcom_psc_params[psc_num].rx_initiator, + bcom_psc_params[psc_num].rx_ipr, + maxbufsize); +} +EXPORT_SYMBOL_GPL(bcom_psc_gen_bd_rx_init); + +/** + * bcom_psc_gen_bd_tx_init - Allocate a transmit bcom_task for a PSC port + * @psc_num: Number of the PSC to allocate a task for + * @queue_len: number of buffer descriptors to allocate for the task + * @fifo: physical address of FIFO register + * + * Allocate a bestcomm task structure for transmitting data to a PSC. + */ +struct bcom_task * +bcom_psc_gen_bd_tx_init(unsigned psc_num, int queue_len, phys_addr_t fifo) +{ + struct psc; + return bcom_gen_bd_tx_init(queue_len, fifo, + bcom_psc_params[psc_num].tx_initiator, + bcom_psc_params[psc_num].tx_ipr); +} +EXPORT_SYMBOL_GPL(bcom_psc_gen_bd_tx_init); + + +MODULE_DESCRIPTION("BestComm General Buffer Descriptor tasks driver"); +MODULE_AUTHOR("Jeff Gibbons "); +MODULE_LICENSE("GPL v2"); + diff --git a/drivers/dma/bestcomm/sram.c b/drivers/dma/bestcomm/sram.c new file mode 100644 index 0000000000..0553956f74 --- /dev/null +++ b/drivers/dma/bestcomm/sram.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Simple memory allocator for on-board SRAM + * + * Maintainer : Sylvain Munaut + * + * Copyright (C) 2005 Sylvain Munaut + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + + +/* Struct keeping our 'state' */ +struct bcom_sram *bcom_sram = NULL; +EXPORT_SYMBOL_GPL(bcom_sram); /* needed for inline functions */ + + +/* ======================================================================== */ +/* Public API */ +/* ======================================================================== */ +/* DO NOT USE in interrupts, if needed in irq handler, we should use the + _irqsave version of the spin_locks */ + +int bcom_sram_init(struct device_node *sram_node, char *owner) +{ + int rv; + const u32 *regaddr_p; + struct resource res; + unsigned int psize; + + /* Create our state struct */ + if (bcom_sram) { + printk(KERN_ERR "%s: bcom_sram_init: " + "Already initialized !\n", owner); + return -EBUSY; + } + + bcom_sram = kmalloc(sizeof(struct bcom_sram), GFP_KERNEL); + if (!bcom_sram) { + printk(KERN_ERR "%s: bcom_sram_init: " + "Couldn't allocate internal state !\n", owner); + return -ENOMEM; + } + + /* Get address and size of the sram */ + rv = of_address_to_resource(sram_node, 0, &res); + if (rv) { + printk(KERN_ERR "%s: bcom_sram_init: " + "Invalid device node !\n", owner); + goto error_free; + } + + bcom_sram->base_phys = res.start; + bcom_sram->size = resource_size(&res); + + /* Request region */ + if (!request_mem_region(res.start, resource_size(&res), owner)) { + printk(KERN_ERR "%s: bcom_sram_init: " + "Couldn't request region !\n", owner); + rv = -EBUSY; + goto error_free; + } + + /* Map SRAM */ + /* sram is not really __iomem */ + bcom_sram->base_virt = (void *)ioremap(res.start, resource_size(&res)); + + if (!bcom_sram->base_virt) { + printk(KERN_ERR "%s: bcom_sram_init: " + "Map error SRAM zone 0x%08lx (0x%0x)!\n", + owner, (long)bcom_sram->base_phys, bcom_sram->size ); + rv = -ENOMEM; + goto error_release; + } + + /* Create an rheap (defaults to 32 bits word alignment) */ + bcom_sram->rh = rh_create(4); + + /* Attach the free zones */ +#if 0 + /* Currently disabled ... for future use only */ + reg_addr_p = of_get_property(sram_node, "available", &psize); +#else + regaddr_p = NULL; + psize = 0; +#endif + + if (!regaddr_p || !psize) { + /* Attach the whole zone */ + rh_attach_region(bcom_sram->rh, 0, bcom_sram->size); + } else { + /* Attach each zone independently */ + while (psize >= 2 * sizeof(u32)) { + phys_addr_t zbase = of_translate_address(sram_node, regaddr_p); + rh_attach_region(bcom_sram->rh, zbase - bcom_sram->base_phys, regaddr_p[1]); + regaddr_p += 2; + psize -= 2 * sizeof(u32); + } + } + + /* Init our spinlock */ + spin_lock_init(&bcom_sram->lock); + + return 0; + +error_release: + release_mem_region(res.start, resource_size(&res)); +error_free: + kfree(bcom_sram); + bcom_sram = NULL; + + return rv; +} +EXPORT_SYMBOL_GPL(bcom_sram_init); + +void bcom_sram_cleanup(void) +{ + /* Free resources */ + if (bcom_sram) { + rh_destroy(bcom_sram->rh); + iounmap((void __iomem *)bcom_sram->base_virt); + release_mem_region(bcom_sram->base_phys, bcom_sram->size); + kfree(bcom_sram); + bcom_sram = NULL; + } +} +EXPORT_SYMBOL_GPL(bcom_sram_cleanup); + +void* bcom_sram_alloc(int size, int align, phys_addr_t *phys) +{ + unsigned long offset; + + spin_lock(&bcom_sram->lock); + offset = rh_alloc_align(bcom_sram->rh, size, align, NULL); + spin_unlock(&bcom_sram->lock); + + if (IS_ERR_VALUE(offset)) + return NULL; + + *phys = bcom_sram->base_phys + offset; + return bcom_sram->base_virt + offset; +} +EXPORT_SYMBOL_GPL(bcom_sram_alloc); + +void bcom_sram_free(void *ptr) +{ + unsigned long offset; + + if (!ptr) + return; + + offset = ptr - bcom_sram->base_virt; + + spin_lock(&bcom_sram->lock); + rh_free(bcom_sram->rh, offset); + spin_unlock(&bcom_sram->lock); +} +EXPORT_SYMBOL_GPL(bcom_sram_free); diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c new file mode 100644 index 0000000000..fc7cdad371 --- /dev/null +++ b/drivers/dma/dma-axi-dmac.c @@ -0,0 +1,1063 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Driver for the Analog Devices AXI-DMAC core + * + * Copyright 2013-2019 Analog Devices Inc. + * Author: Lars-Peter Clausen + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "dmaengine.h" +#include "virt-dma.h" + +/* + * The AXI-DMAC is a soft IP core that is used in FPGA designs. The core has + * various instantiation parameters which decided the exact feature set support + * by the core. + * + * Each channel of the core has a source interface and a destination interface. + * The number of channels and the type of the channel interfaces is selected at + * configuration time. A interface can either be a connected to a central memory + * interconnect, which allows access to system memory, or it can be connected to + * a dedicated bus which is directly connected to a data port on a peripheral. + * Given that those are configuration options of the core that are selected when + * it is instantiated this means that they can not be changed by software at + * runtime. By extension this means that each channel is uni-directional. It can + * either be device to memory or memory to device, but not both. Also since the + * device side is a dedicated data bus only connected to a single peripheral + * there is no address than can or needs to be configured for the device side. + */ + +#define AXI_DMAC_REG_INTERFACE_DESC 0x10 +#define AXI_DMAC_DMA_SRC_TYPE_MSK GENMASK(13, 12) +#define AXI_DMAC_DMA_SRC_TYPE_GET(x) FIELD_GET(AXI_DMAC_DMA_SRC_TYPE_MSK, x) +#define AXI_DMAC_DMA_SRC_WIDTH_MSK GENMASK(11, 8) +#define AXI_DMAC_DMA_SRC_WIDTH_GET(x) FIELD_GET(AXI_DMAC_DMA_SRC_WIDTH_MSK, x) +#define AXI_DMAC_DMA_DST_TYPE_MSK GENMASK(5, 4) +#define AXI_DMAC_DMA_DST_TYPE_GET(x) FIELD_GET(AXI_DMAC_DMA_DST_TYPE_MSK, x) +#define AXI_DMAC_DMA_DST_WIDTH_MSK GENMASK(3, 0) +#define AXI_DMAC_DMA_DST_WIDTH_GET(x) FIELD_GET(AXI_DMAC_DMA_DST_WIDTH_MSK, x) +#define AXI_DMAC_REG_COHERENCY_DESC 0x14 +#define AXI_DMAC_DST_COHERENT_MSK BIT(0) +#define AXI_DMAC_DST_COHERENT_GET(x) FIELD_GET(AXI_DMAC_DST_COHERENT_MSK, x) + +#define AXI_DMAC_REG_IRQ_MASK 0x80 +#define AXI_DMAC_REG_IRQ_PENDING 0x84 +#define AXI_DMAC_REG_IRQ_SOURCE 0x88 + +#define AXI_DMAC_REG_CTRL 0x400 +#define AXI_DMAC_REG_TRANSFER_ID 0x404 +#define AXI_DMAC_REG_START_TRANSFER 0x408 +#define AXI_DMAC_REG_FLAGS 0x40c +#define AXI_DMAC_REG_DEST_ADDRESS 0x410 +#define AXI_DMAC_REG_SRC_ADDRESS 0x414 +#define AXI_DMAC_REG_X_LENGTH 0x418 +#define AXI_DMAC_REG_Y_LENGTH 0x41c +#define AXI_DMAC_REG_DEST_STRIDE 0x420 +#define AXI_DMAC_REG_SRC_STRIDE 0x424 +#define AXI_DMAC_REG_TRANSFER_DONE 0x428 +#define AXI_DMAC_REG_ACTIVE_TRANSFER_ID 0x42c +#define AXI_DMAC_REG_STATUS 0x430 +#define AXI_DMAC_REG_CURRENT_SRC_ADDR 0x434 +#define AXI_DMAC_REG_CURRENT_DEST_ADDR 0x438 +#define AXI_DMAC_REG_PARTIAL_XFER_LEN 0x44c +#define AXI_DMAC_REG_PARTIAL_XFER_ID 0x450 + +#define AXI_DMAC_CTRL_ENABLE BIT(0) +#define AXI_DMAC_CTRL_PAUSE BIT(1) + +#define AXI_DMAC_IRQ_SOT BIT(0) +#define AXI_DMAC_IRQ_EOT BIT(1) + +#define AXI_DMAC_FLAG_CYCLIC BIT(0) +#define AXI_DMAC_FLAG_LAST BIT(1) +#define AXI_DMAC_FLAG_PARTIAL_REPORT BIT(2) + +#define AXI_DMAC_FLAG_PARTIAL_XFER_DONE BIT(31) + +/* The maximum ID allocated by the hardware is 31 */ +#define AXI_DMAC_SG_UNUSED 32U + +struct axi_dmac_sg { + dma_addr_t src_addr; + dma_addr_t dest_addr; + unsigned int x_len; + unsigned int y_len; + unsigned int dest_stride; + unsigned int src_stride; + unsigned int id; + unsigned int partial_len; + bool schedule_when_free; +}; + +struct axi_dmac_desc { + struct virt_dma_desc vdesc; + bool cyclic; + bool have_partial_xfer; + + unsigned int num_submitted; + unsigned int num_completed; + unsigned int num_sgs; + struct axi_dmac_sg sg[]; +}; + +struct axi_dmac_chan { + struct virt_dma_chan vchan; + + struct axi_dmac_desc *next_desc; + struct list_head active_descs; + enum dma_transfer_direction direction; + + unsigned int src_width; + unsigned int dest_width; + unsigned int src_type; + unsigned int dest_type; + + unsigned int max_length; + unsigned int address_align_mask; + unsigned int length_align_mask; + + bool hw_partial_xfer; + bool hw_cyclic; + bool hw_2d; +}; + +struct axi_dmac { + void __iomem *base; + int irq; + + struct clk *clk; + + struct dma_device dma_dev; + struct axi_dmac_chan chan; +}; + +static struct axi_dmac *chan_to_axi_dmac(struct axi_dmac_chan *chan) +{ + return container_of(chan->vchan.chan.device, struct axi_dmac, + dma_dev); +} + +static struct axi_dmac_chan *to_axi_dmac_chan(struct dma_chan *c) +{ + return container_of(c, struct axi_dmac_chan, vchan.chan); +} + +static struct axi_dmac_desc *to_axi_dmac_desc(struct virt_dma_desc *vdesc) +{ + return container_of(vdesc, struct axi_dmac_desc, vdesc); +} + +static void axi_dmac_write(struct axi_dmac *axi_dmac, unsigned int reg, + unsigned int val) +{ + writel(val, axi_dmac->base + reg); +} + +static int axi_dmac_read(struct axi_dmac *axi_dmac, unsigned int reg) +{ + return readl(axi_dmac->base + reg); +} + +static int axi_dmac_src_is_mem(struct axi_dmac_chan *chan) +{ + return chan->src_type == AXI_DMAC_BUS_TYPE_AXI_MM; +} + +static int axi_dmac_dest_is_mem(struct axi_dmac_chan *chan) +{ + return chan->dest_type == AXI_DMAC_BUS_TYPE_AXI_MM; +} + +static bool axi_dmac_check_len(struct axi_dmac_chan *chan, unsigned int len) +{ + if (len == 0) + return false; + if ((len & chan->length_align_mask) != 0) /* Not aligned */ + return false; + return true; +} + +static bool axi_dmac_check_addr(struct axi_dmac_chan *chan, dma_addr_t addr) +{ + if ((addr & chan->address_align_mask) != 0) /* Not aligned */ + return false; + return true; +} + +static void axi_dmac_start_transfer(struct axi_dmac_chan *chan) +{ + struct axi_dmac *dmac = chan_to_axi_dmac(chan); + struct virt_dma_desc *vdesc; + struct axi_dmac_desc *desc; + struct axi_dmac_sg *sg; + unsigned int flags = 0; + unsigned int val; + + val = axi_dmac_read(dmac, AXI_DMAC_REG_START_TRANSFER); + if (val) /* Queue is full, wait for the next SOT IRQ */ + return; + + desc = chan->next_desc; + + if (!desc) { + vdesc = vchan_next_desc(&chan->vchan); + if (!vdesc) + return; + list_move_tail(&vdesc->node, &chan->active_descs); + desc = to_axi_dmac_desc(vdesc); + } + sg = &desc->sg[desc->num_submitted]; + + /* Already queued in cyclic mode. Wait for it to finish */ + if (sg->id != AXI_DMAC_SG_UNUSED) { + sg->schedule_when_free = true; + return; + } + + desc->num_submitted++; + if (desc->num_submitted == desc->num_sgs || + desc->have_partial_xfer) { + if (desc->cyclic) + desc->num_submitted = 0; /* Start again */ + else + chan->next_desc = NULL; + flags |= AXI_DMAC_FLAG_LAST; + } else { + chan->next_desc = desc; + } + + sg->id = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_ID); + + if (axi_dmac_dest_is_mem(chan)) { + axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, sg->dest_addr); + axi_dmac_write(dmac, AXI_DMAC_REG_DEST_STRIDE, sg->dest_stride); + } + + if (axi_dmac_src_is_mem(chan)) { + axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, sg->src_addr); + axi_dmac_write(dmac, AXI_DMAC_REG_SRC_STRIDE, sg->src_stride); + } + + /* + * If the hardware supports cyclic transfers and there is no callback to + * call and only a single segment, enable hw cyclic mode to avoid + * unnecessary interrupts. + */ + if (chan->hw_cyclic && desc->cyclic && !desc->vdesc.tx.callback && + desc->num_sgs == 1) + flags |= AXI_DMAC_FLAG_CYCLIC; + + if (chan->hw_partial_xfer) + flags |= AXI_DMAC_FLAG_PARTIAL_REPORT; + + axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, sg->x_len - 1); + axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, sg->y_len - 1); + axi_dmac_write(dmac, AXI_DMAC_REG_FLAGS, flags); + axi_dmac_write(dmac, AXI_DMAC_REG_START_TRANSFER, 1); +} + +static struct axi_dmac_desc *axi_dmac_active_desc(struct axi_dmac_chan *chan) +{ + return list_first_entry_or_null(&chan->active_descs, + struct axi_dmac_desc, vdesc.node); +} + +static inline unsigned int axi_dmac_total_sg_bytes(struct axi_dmac_chan *chan, + struct axi_dmac_sg *sg) +{ + if (chan->hw_2d) + return sg->x_len * sg->y_len; + else + return sg->x_len; +} + +static void axi_dmac_dequeue_partial_xfers(struct axi_dmac_chan *chan) +{ + struct axi_dmac *dmac = chan_to_axi_dmac(chan); + struct axi_dmac_desc *desc; + struct axi_dmac_sg *sg; + u32 xfer_done, len, id, i; + bool found_sg; + + do { + len = axi_dmac_read(dmac, AXI_DMAC_REG_PARTIAL_XFER_LEN); + id = axi_dmac_read(dmac, AXI_DMAC_REG_PARTIAL_XFER_ID); + + found_sg = false; + list_for_each_entry(desc, &chan->active_descs, vdesc.node) { + for (i = 0; i < desc->num_sgs; i++) { + sg = &desc->sg[i]; + if (sg->id == AXI_DMAC_SG_UNUSED) + continue; + if (sg->id == id) { + desc->have_partial_xfer = true; + sg->partial_len = len; + found_sg = true; + break; + } + } + if (found_sg) + break; + } + + if (found_sg) { + dev_dbg(dmac->dma_dev.dev, + "Found partial segment id=%u, len=%u\n", + id, len); + } else { + dev_warn(dmac->dma_dev.dev, + "Not found partial segment id=%u, len=%u\n", + id, len); + } + + /* Check if we have any more partial transfers */ + xfer_done = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_DONE); + xfer_done = !(xfer_done & AXI_DMAC_FLAG_PARTIAL_XFER_DONE); + + } while (!xfer_done); +} + +static void axi_dmac_compute_residue(struct axi_dmac_chan *chan, + struct axi_dmac_desc *active) +{ + struct dmaengine_result *rslt = &active->vdesc.tx_result; + unsigned int start = active->num_completed - 1; + struct axi_dmac_sg *sg; + unsigned int i, total; + + rslt->result = DMA_TRANS_NOERROR; + rslt->residue = 0; + + /* + * We get here if the last completed segment is partial, which + * means we can compute the residue from that segment onwards + */ + for (i = start; i < active->num_sgs; i++) { + sg = &active->sg[i]; + total = axi_dmac_total_sg_bytes(chan, sg); + rslt->residue += (total - sg->partial_len); + } +} + +static bool axi_dmac_transfer_done(struct axi_dmac_chan *chan, + unsigned int completed_transfers) +{ + struct axi_dmac_desc *active; + struct axi_dmac_sg *sg; + bool start_next = false; + + active = axi_dmac_active_desc(chan); + if (!active) + return false; + + if (chan->hw_partial_xfer && + (completed_transfers & AXI_DMAC_FLAG_PARTIAL_XFER_DONE)) + axi_dmac_dequeue_partial_xfers(chan); + + do { + sg = &active->sg[active->num_completed]; + if (sg->id == AXI_DMAC_SG_UNUSED) /* Not yet submitted */ + break; + if (!(BIT(sg->id) & completed_transfers)) + break; + active->num_completed++; + sg->id = AXI_DMAC_SG_UNUSED; + if (sg->schedule_when_free) { + sg->schedule_when_free = false; + start_next = true; + } + + if (sg->partial_len) + axi_dmac_compute_residue(chan, active); + + if (active->cyclic) + vchan_cyclic_callback(&active->vdesc); + + if (active->num_completed == active->num_sgs || + sg->partial_len) { + if (active->cyclic) { + active->num_completed = 0; /* wrap around */ + } else { + list_del(&active->vdesc.node); + vchan_cookie_complete(&active->vdesc); + active = axi_dmac_active_desc(chan); + } + } + } while (active); + + return start_next; +} + +static irqreturn_t axi_dmac_interrupt_handler(int irq, void *devid) +{ + struct axi_dmac *dmac = devid; + unsigned int pending; + bool start_next = false; + + pending = axi_dmac_read(dmac, AXI_DMAC_REG_IRQ_PENDING); + if (!pending) + return IRQ_NONE; + + axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_PENDING, pending); + + spin_lock(&dmac->chan.vchan.lock); + /* One or more transfers have finished */ + if (pending & AXI_DMAC_IRQ_EOT) { + unsigned int completed; + + completed = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_DONE); + start_next = axi_dmac_transfer_done(&dmac->chan, completed); + } + /* Space has become available in the descriptor queue */ + if ((pending & AXI_DMAC_IRQ_SOT) || start_next) + axi_dmac_start_transfer(&dmac->chan); + spin_unlock(&dmac->chan.vchan.lock); + + return IRQ_HANDLED; +} + +static int axi_dmac_terminate_all(struct dma_chan *c) +{ + struct axi_dmac_chan *chan = to_axi_dmac_chan(c); + struct axi_dmac *dmac = chan_to_axi_dmac(chan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&chan->vchan.lock, flags); + axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, 0); + chan->next_desc = NULL; + vchan_get_all_descriptors(&chan->vchan, &head); + list_splice_tail_init(&chan->active_descs, &head); + spin_unlock_irqrestore(&chan->vchan.lock, flags); + + vchan_dma_desc_free_list(&chan->vchan, &head); + + return 0; +} + +static void axi_dmac_synchronize(struct dma_chan *c) +{ + struct axi_dmac_chan *chan = to_axi_dmac_chan(c); + + vchan_synchronize(&chan->vchan); +} + +static void axi_dmac_issue_pending(struct dma_chan *c) +{ + struct axi_dmac_chan *chan = to_axi_dmac_chan(c); + struct axi_dmac *dmac = chan_to_axi_dmac(chan); + unsigned long flags; + + axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, AXI_DMAC_CTRL_ENABLE); + + spin_lock_irqsave(&chan->vchan.lock, flags); + if (vchan_issue_pending(&chan->vchan)) + axi_dmac_start_transfer(chan); + spin_unlock_irqrestore(&chan->vchan.lock, flags); +} + +static struct axi_dmac_desc *axi_dmac_alloc_desc(unsigned int num_sgs) +{ + struct axi_dmac_desc *desc; + unsigned int i; + + desc = kzalloc(struct_size(desc, sg, num_sgs), GFP_NOWAIT); + if (!desc) + return NULL; + + for (i = 0; i < num_sgs; i++) + desc->sg[i].id = AXI_DMAC_SG_UNUSED; + + desc->num_sgs = num_sgs; + + return desc; +} + +static struct axi_dmac_sg *axi_dmac_fill_linear_sg(struct axi_dmac_chan *chan, + enum dma_transfer_direction direction, dma_addr_t addr, + unsigned int num_periods, unsigned int period_len, + struct axi_dmac_sg *sg) +{ + unsigned int num_segments, i; + unsigned int segment_size; + unsigned int len; + + /* Split into multiple equally sized segments if necessary */ + num_segments = DIV_ROUND_UP(period_len, chan->max_length); + segment_size = DIV_ROUND_UP(period_len, num_segments); + /* Take care of alignment */ + segment_size = ((segment_size - 1) | chan->length_align_mask) + 1; + + for (i = 0; i < num_periods; i++) { + len = period_len; + + while (len > segment_size) { + if (direction == DMA_DEV_TO_MEM) + sg->dest_addr = addr; + else + sg->src_addr = addr; + sg->x_len = segment_size; + sg->y_len = 1; + sg++; + addr += segment_size; + len -= segment_size; + } + + if (direction == DMA_DEV_TO_MEM) + sg->dest_addr = addr; + else + sg->src_addr = addr; + sg->x_len = len; + sg->y_len = 1; + sg++; + addr += len; + } + + return sg; +} + +static struct dma_async_tx_descriptor *axi_dmac_prep_slave_sg( + struct dma_chan *c, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct axi_dmac_chan *chan = to_axi_dmac_chan(c); + struct axi_dmac_desc *desc; + struct axi_dmac_sg *dsg; + struct scatterlist *sg; + unsigned int num_sgs; + unsigned int i; + + if (direction != chan->direction) + return NULL; + + num_sgs = 0; + for_each_sg(sgl, sg, sg_len, i) + num_sgs += DIV_ROUND_UP(sg_dma_len(sg), chan->max_length); + + desc = axi_dmac_alloc_desc(num_sgs); + if (!desc) + return NULL; + + dsg = desc->sg; + + for_each_sg(sgl, sg, sg_len, i) { + if (!axi_dmac_check_addr(chan, sg_dma_address(sg)) || + !axi_dmac_check_len(chan, sg_dma_len(sg))) { + kfree(desc); + return NULL; + } + + dsg = axi_dmac_fill_linear_sg(chan, direction, sg_dma_address(sg), 1, + sg_dma_len(sg), dsg); + } + + desc->cyclic = false; + + return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); +} + +static struct dma_async_tx_descriptor *axi_dmac_prep_dma_cyclic( + struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct axi_dmac_chan *chan = to_axi_dmac_chan(c); + struct axi_dmac_desc *desc; + unsigned int num_periods, num_segments; + + if (direction != chan->direction) + return NULL; + + if (!axi_dmac_check_len(chan, buf_len) || + !axi_dmac_check_addr(chan, buf_addr)) + return NULL; + + if (period_len == 0 || buf_len % period_len) + return NULL; + + num_periods = buf_len / period_len; + num_segments = DIV_ROUND_UP(period_len, chan->max_length); + + desc = axi_dmac_alloc_desc(num_periods * num_segments); + if (!desc) + return NULL; + + axi_dmac_fill_linear_sg(chan, direction, buf_addr, num_periods, + period_len, desc->sg); + + desc->cyclic = true; + + return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); +} + +static struct dma_async_tx_descriptor *axi_dmac_prep_interleaved( + struct dma_chan *c, struct dma_interleaved_template *xt, + unsigned long flags) +{ + struct axi_dmac_chan *chan = to_axi_dmac_chan(c); + struct axi_dmac_desc *desc; + size_t dst_icg, src_icg; + + if (xt->frame_size != 1) + return NULL; + + if (xt->dir != chan->direction) + return NULL; + + if (axi_dmac_src_is_mem(chan)) { + if (!xt->src_inc || !axi_dmac_check_addr(chan, xt->src_start)) + return NULL; + } + + if (axi_dmac_dest_is_mem(chan)) { + if (!xt->dst_inc || !axi_dmac_check_addr(chan, xt->dst_start)) + return NULL; + } + + dst_icg = dmaengine_get_dst_icg(xt, &xt->sgl[0]); + src_icg = dmaengine_get_src_icg(xt, &xt->sgl[0]); + + if (chan->hw_2d) { + if (!axi_dmac_check_len(chan, xt->sgl[0].size) || + xt->numf == 0) + return NULL; + if (xt->sgl[0].size + dst_icg > chan->max_length || + xt->sgl[0].size + src_icg > chan->max_length) + return NULL; + } else { + if (dst_icg != 0 || src_icg != 0) + return NULL; + if (chan->max_length / xt->sgl[0].size < xt->numf) + return NULL; + if (!axi_dmac_check_len(chan, xt->sgl[0].size * xt->numf)) + return NULL; + } + + desc = axi_dmac_alloc_desc(1); + if (!desc) + return NULL; + + if (axi_dmac_src_is_mem(chan)) { + desc->sg[0].src_addr = xt->src_start; + desc->sg[0].src_stride = xt->sgl[0].size + src_icg; + } + + if (axi_dmac_dest_is_mem(chan)) { + desc->sg[0].dest_addr = xt->dst_start; + desc->sg[0].dest_stride = xt->sgl[0].size + dst_icg; + } + + if (chan->hw_2d) { + desc->sg[0].x_len = xt->sgl[0].size; + desc->sg[0].y_len = xt->numf; + } else { + desc->sg[0].x_len = xt->sgl[0].size * xt->numf; + desc->sg[0].y_len = 1; + } + + if (flags & DMA_CYCLIC) + desc->cyclic = true; + + return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); +} + +static void axi_dmac_free_chan_resources(struct dma_chan *c) +{ + vchan_free_chan_resources(to_virt_chan(c)); +} + +static void axi_dmac_desc_free(struct virt_dma_desc *vdesc) +{ + kfree(container_of(vdesc, struct axi_dmac_desc, vdesc)); +} + +static bool axi_dmac_regmap_rdwr(struct device *dev, unsigned int reg) +{ + switch (reg) { + case AXI_DMAC_REG_IRQ_MASK: + case AXI_DMAC_REG_IRQ_SOURCE: + case AXI_DMAC_REG_IRQ_PENDING: + case AXI_DMAC_REG_CTRL: + case AXI_DMAC_REG_TRANSFER_ID: + case AXI_DMAC_REG_START_TRANSFER: + case AXI_DMAC_REG_FLAGS: + case AXI_DMAC_REG_DEST_ADDRESS: + case AXI_DMAC_REG_SRC_ADDRESS: + case AXI_DMAC_REG_X_LENGTH: + case AXI_DMAC_REG_Y_LENGTH: + case AXI_DMAC_REG_DEST_STRIDE: + case AXI_DMAC_REG_SRC_STRIDE: + case AXI_DMAC_REG_TRANSFER_DONE: + case AXI_DMAC_REG_ACTIVE_TRANSFER_ID: + case AXI_DMAC_REG_STATUS: + case AXI_DMAC_REG_CURRENT_SRC_ADDR: + case AXI_DMAC_REG_CURRENT_DEST_ADDR: + case AXI_DMAC_REG_PARTIAL_XFER_LEN: + case AXI_DMAC_REG_PARTIAL_XFER_ID: + return true; + default: + return false; + } +} + +static const struct regmap_config axi_dmac_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .max_register = AXI_DMAC_REG_PARTIAL_XFER_ID, + .readable_reg = axi_dmac_regmap_rdwr, + .writeable_reg = axi_dmac_regmap_rdwr, +}; + +static void axi_dmac_adjust_chan_params(struct axi_dmac_chan *chan) +{ + chan->address_align_mask = max(chan->dest_width, chan->src_width) - 1; + + if (axi_dmac_dest_is_mem(chan) && axi_dmac_src_is_mem(chan)) + chan->direction = DMA_MEM_TO_MEM; + else if (!axi_dmac_dest_is_mem(chan) && axi_dmac_src_is_mem(chan)) + chan->direction = DMA_MEM_TO_DEV; + else if (axi_dmac_dest_is_mem(chan) && !axi_dmac_src_is_mem(chan)) + chan->direction = DMA_DEV_TO_MEM; + else + chan->direction = DMA_DEV_TO_DEV; +} + +/* + * The configuration stored in the devicetree matches the configuration + * parameters of the peripheral instance and allows the driver to know which + * features are implemented and how it should behave. + */ +static int axi_dmac_parse_chan_dt(struct device_node *of_chan, + struct axi_dmac_chan *chan) +{ + u32 val; + int ret; + + ret = of_property_read_u32(of_chan, "reg", &val); + if (ret) + return ret; + + /* We only support 1 channel for now */ + if (val != 0) + return -EINVAL; + + ret = of_property_read_u32(of_chan, "adi,source-bus-type", &val); + if (ret) + return ret; + if (val > AXI_DMAC_BUS_TYPE_FIFO) + return -EINVAL; + chan->src_type = val; + + ret = of_property_read_u32(of_chan, "adi,destination-bus-type", &val); + if (ret) + return ret; + if (val > AXI_DMAC_BUS_TYPE_FIFO) + return -EINVAL; + chan->dest_type = val; + + ret = of_property_read_u32(of_chan, "adi,source-bus-width", &val); + if (ret) + return ret; + chan->src_width = val / 8; + + ret = of_property_read_u32(of_chan, "adi,destination-bus-width", &val); + if (ret) + return ret; + chan->dest_width = val / 8; + + axi_dmac_adjust_chan_params(chan); + + return 0; +} + +static int axi_dmac_parse_dt(struct device *dev, struct axi_dmac *dmac) +{ + struct device_node *of_channels, *of_chan; + int ret; + + of_channels = of_get_child_by_name(dev->of_node, "adi,channels"); + if (of_channels == NULL) + return -ENODEV; + + for_each_child_of_node(of_channels, of_chan) { + ret = axi_dmac_parse_chan_dt(of_chan, &dmac->chan); + if (ret) { + of_node_put(of_chan); + of_node_put(of_channels); + return -EINVAL; + } + } + of_node_put(of_channels); + + return 0; +} + +static int axi_dmac_read_chan_config(struct device *dev, struct axi_dmac *dmac) +{ + struct axi_dmac_chan *chan = &dmac->chan; + unsigned int val, desc; + + desc = axi_dmac_read(dmac, AXI_DMAC_REG_INTERFACE_DESC); + if (desc == 0) { + dev_err(dev, "DMA interface register reads zero\n"); + return -EFAULT; + } + + val = AXI_DMAC_DMA_SRC_TYPE_GET(desc); + if (val > AXI_DMAC_BUS_TYPE_FIFO) { + dev_err(dev, "Invalid source bus type read: %d\n", val); + return -EINVAL; + } + chan->src_type = val; + + val = AXI_DMAC_DMA_DST_TYPE_GET(desc); + if (val > AXI_DMAC_BUS_TYPE_FIFO) { + dev_err(dev, "Invalid destination bus type read: %d\n", val); + return -EINVAL; + } + chan->dest_type = val; + + val = AXI_DMAC_DMA_SRC_WIDTH_GET(desc); + if (val == 0) { + dev_err(dev, "Source bus width is zero\n"); + return -EINVAL; + } + /* widths are stored in log2 */ + chan->src_width = 1 << val; + + val = AXI_DMAC_DMA_DST_WIDTH_GET(desc); + if (val == 0) { + dev_err(dev, "Destination bus width is zero\n"); + return -EINVAL; + } + chan->dest_width = 1 << val; + + axi_dmac_adjust_chan_params(chan); + + return 0; +} + +static int axi_dmac_detect_caps(struct axi_dmac *dmac, unsigned int version) +{ + struct axi_dmac_chan *chan = &dmac->chan; + + axi_dmac_write(dmac, AXI_DMAC_REG_FLAGS, AXI_DMAC_FLAG_CYCLIC); + if (axi_dmac_read(dmac, AXI_DMAC_REG_FLAGS) == AXI_DMAC_FLAG_CYCLIC) + chan->hw_cyclic = true; + + axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, 1); + if (axi_dmac_read(dmac, AXI_DMAC_REG_Y_LENGTH) == 1) + chan->hw_2d = true; + + axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, 0xffffffff); + chan->max_length = axi_dmac_read(dmac, AXI_DMAC_REG_X_LENGTH); + if (chan->max_length != UINT_MAX) + chan->max_length++; + + axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, 0xffffffff); + if (axi_dmac_read(dmac, AXI_DMAC_REG_DEST_ADDRESS) == 0 && + chan->dest_type == AXI_DMAC_BUS_TYPE_AXI_MM) { + dev_err(dmac->dma_dev.dev, + "Destination memory-mapped interface not supported."); + return -ENODEV; + } + + axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, 0xffffffff); + if (axi_dmac_read(dmac, AXI_DMAC_REG_SRC_ADDRESS) == 0 && + chan->src_type == AXI_DMAC_BUS_TYPE_AXI_MM) { + dev_err(dmac->dma_dev.dev, + "Source memory-mapped interface not supported."); + return -ENODEV; + } + + if (version >= ADI_AXI_PCORE_VER(4, 2, 'a')) + chan->hw_partial_xfer = true; + + if (version >= ADI_AXI_PCORE_VER(4, 1, 'a')) { + axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, 0x00); + chan->length_align_mask = + axi_dmac_read(dmac, AXI_DMAC_REG_X_LENGTH); + } else { + chan->length_align_mask = chan->address_align_mask; + } + + return 0; +} + +static int axi_dmac_probe(struct platform_device *pdev) +{ + struct dma_device *dma_dev; + struct axi_dmac *dmac; + struct regmap *regmap; + unsigned int version; + int ret; + + dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL); + if (!dmac) + return -ENOMEM; + + dmac->irq = platform_get_irq(pdev, 0); + if (dmac->irq < 0) + return dmac->irq; + if (dmac->irq == 0) + return -EINVAL; + + dmac->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(dmac->base)) + return PTR_ERR(dmac->base); + + dmac->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(dmac->clk)) + return PTR_ERR(dmac->clk); + + ret = clk_prepare_enable(dmac->clk); + if (ret < 0) + return ret; + + version = axi_dmac_read(dmac, ADI_AXI_REG_VERSION); + + if (version >= ADI_AXI_PCORE_VER(4, 3, 'a')) + ret = axi_dmac_read_chan_config(&pdev->dev, dmac); + else + ret = axi_dmac_parse_dt(&pdev->dev, dmac); + + if (ret < 0) + goto err_clk_disable; + + INIT_LIST_HEAD(&dmac->chan.active_descs); + + dma_set_max_seg_size(&pdev->dev, UINT_MAX); + + dma_dev = &dmac->dma_dev; + dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); + dma_cap_set(DMA_CYCLIC, dma_dev->cap_mask); + dma_cap_set(DMA_INTERLEAVE, dma_dev->cap_mask); + dma_dev->device_free_chan_resources = axi_dmac_free_chan_resources; + dma_dev->device_tx_status = dma_cookie_status; + dma_dev->device_issue_pending = axi_dmac_issue_pending; + dma_dev->device_prep_slave_sg = axi_dmac_prep_slave_sg; + dma_dev->device_prep_dma_cyclic = axi_dmac_prep_dma_cyclic; + dma_dev->device_prep_interleaved_dma = axi_dmac_prep_interleaved; + dma_dev->device_terminate_all = axi_dmac_terminate_all; + dma_dev->device_synchronize = axi_dmac_synchronize; + dma_dev->dev = &pdev->dev; + dma_dev->src_addr_widths = BIT(dmac->chan.src_width); + dma_dev->dst_addr_widths = BIT(dmac->chan.dest_width); + dma_dev->directions = BIT(dmac->chan.direction); + dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + INIT_LIST_HEAD(&dma_dev->channels); + + dmac->chan.vchan.desc_free = axi_dmac_desc_free; + vchan_init(&dmac->chan.vchan, dma_dev); + + ret = axi_dmac_detect_caps(dmac, version); + if (ret) + goto err_clk_disable; + + dma_dev->copy_align = (dmac->chan.address_align_mask + 1); + + axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_MASK, 0x00); + + if (of_dma_is_coherent(pdev->dev.of_node)) { + ret = axi_dmac_read(dmac, AXI_DMAC_REG_COHERENCY_DESC); + + if (version < ADI_AXI_PCORE_VER(4, 4, 'a') || + !AXI_DMAC_DST_COHERENT_GET(ret)) { + dev_err(dmac->dma_dev.dev, + "Coherent DMA not supported in hardware"); + ret = -EINVAL; + goto err_clk_disable; + } + } + + ret = dma_async_device_register(dma_dev); + if (ret) + goto err_clk_disable; + + ret = of_dma_controller_register(pdev->dev.of_node, + of_dma_xlate_by_chan_id, dma_dev); + if (ret) + goto err_unregister_device; + + ret = request_irq(dmac->irq, axi_dmac_interrupt_handler, IRQF_SHARED, + dev_name(&pdev->dev), dmac); + if (ret) + goto err_unregister_of; + + platform_set_drvdata(pdev, dmac); + + regmap = devm_regmap_init_mmio(&pdev->dev, dmac->base, + &axi_dmac_regmap_config); + if (IS_ERR(regmap)) { + ret = PTR_ERR(regmap); + goto err_free_irq; + } + + return 0; + +err_free_irq: + free_irq(dmac->irq, dmac); +err_unregister_of: + of_dma_controller_free(pdev->dev.of_node); +err_unregister_device: + dma_async_device_unregister(&dmac->dma_dev); +err_clk_disable: + clk_disable_unprepare(dmac->clk); + + return ret; +} + +static int axi_dmac_remove(struct platform_device *pdev) +{ + struct axi_dmac *dmac = platform_get_drvdata(pdev); + + of_dma_controller_free(pdev->dev.of_node); + free_irq(dmac->irq, dmac); + tasklet_kill(&dmac->chan.vchan.task); + dma_async_device_unregister(&dmac->dma_dev); + clk_disable_unprepare(dmac->clk); + + return 0; +} + +static const struct of_device_id axi_dmac_of_match_table[] = { + { .compatible = "adi,axi-dmac-1.00.a" }, + { }, +}; +MODULE_DEVICE_TABLE(of, axi_dmac_of_match_table); + +static struct platform_driver axi_dmac_driver = { + .driver = { + .name = "dma-axi-dmac", + .of_match_table = axi_dmac_of_match_table, + }, + .probe = axi_dmac_probe, + .remove = axi_dmac_remove, +}; +module_platform_driver(axi_dmac_driver); + +MODULE_AUTHOR("Lars-Peter Clausen "); +MODULE_DESCRIPTION("DMA controller driver for the AXI-DMAC controller"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c new file mode 100644 index 0000000000..adbd47bd6a --- /dev/null +++ b/drivers/dma/dma-jz4780.c @@ -0,0 +1,1148 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Ingenic JZ4780 DMA controller + * + * Copyright (c) 2015 Imagination Technologies + * Author: Alex Smith + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" +#include "virt-dma.h" + +/* Global registers. */ +#define JZ_DMA_REG_DMAC 0x00 +#define JZ_DMA_REG_DIRQP 0x04 +#define JZ_DMA_REG_DDR 0x08 +#define JZ_DMA_REG_DDRS 0x0c +#define JZ_DMA_REG_DCKE 0x10 +#define JZ_DMA_REG_DCKES 0x14 +#define JZ_DMA_REG_DCKEC 0x18 +#define JZ_DMA_REG_DMACP 0x1c +#define JZ_DMA_REG_DSIRQP 0x20 +#define JZ_DMA_REG_DSIRQM 0x24 +#define JZ_DMA_REG_DCIRQP 0x28 +#define JZ_DMA_REG_DCIRQM 0x2c + +/* Per-channel registers. */ +#define JZ_DMA_REG_CHAN(n) (n * 0x20) +#define JZ_DMA_REG_DSA 0x00 +#define JZ_DMA_REG_DTA 0x04 +#define JZ_DMA_REG_DTC 0x08 +#define JZ_DMA_REG_DRT 0x0c +#define JZ_DMA_REG_DCS 0x10 +#define JZ_DMA_REG_DCM 0x14 +#define JZ_DMA_REG_DDA 0x18 +#define JZ_DMA_REG_DSD 0x1c + +#define JZ_DMA_DMAC_DMAE BIT(0) +#define JZ_DMA_DMAC_AR BIT(2) +#define JZ_DMA_DMAC_HLT BIT(3) +#define JZ_DMA_DMAC_FAIC BIT(27) +#define JZ_DMA_DMAC_FMSC BIT(31) + +#define JZ_DMA_DRT_AUTO 0x8 + +#define JZ_DMA_DCS_CTE BIT(0) +#define JZ_DMA_DCS_HLT BIT(2) +#define JZ_DMA_DCS_TT BIT(3) +#define JZ_DMA_DCS_AR BIT(4) +#define JZ_DMA_DCS_DES8 BIT(30) + +#define JZ_DMA_DCM_LINK BIT(0) +#define JZ_DMA_DCM_TIE BIT(1) +#define JZ_DMA_DCM_STDE BIT(2) +#define JZ_DMA_DCM_TSZ_SHIFT 8 +#define JZ_DMA_DCM_TSZ_MASK (0x7 << JZ_DMA_DCM_TSZ_SHIFT) +#define JZ_DMA_DCM_DP_SHIFT 12 +#define JZ_DMA_DCM_SP_SHIFT 14 +#define JZ_DMA_DCM_DAI BIT(22) +#define JZ_DMA_DCM_SAI BIT(23) + +#define JZ_DMA_SIZE_4_BYTE 0x0 +#define JZ_DMA_SIZE_1_BYTE 0x1 +#define JZ_DMA_SIZE_2_BYTE 0x2 +#define JZ_DMA_SIZE_16_BYTE 0x3 +#define JZ_DMA_SIZE_32_BYTE 0x4 +#define JZ_DMA_SIZE_64_BYTE 0x5 +#define JZ_DMA_SIZE_128_BYTE 0x6 + +#define JZ_DMA_WIDTH_32_BIT 0x0 +#define JZ_DMA_WIDTH_8_BIT 0x1 +#define JZ_DMA_WIDTH_16_BIT 0x2 + +#define JZ_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) + +#define JZ4780_DMA_CTRL_OFFSET 0x1000 + +/* macros for use with jz4780_dma_soc_data.flags */ +#define JZ_SOC_DATA_ALLOW_LEGACY_DT BIT(0) +#define JZ_SOC_DATA_PROGRAMMABLE_DMA BIT(1) +#define JZ_SOC_DATA_PER_CHAN_PM BIT(2) +#define JZ_SOC_DATA_NO_DCKES_DCKEC BIT(3) +#define JZ_SOC_DATA_BREAK_LINKS BIT(4) + +/** + * struct jz4780_dma_hwdesc - descriptor structure read by the DMA controller. + * @dcm: value for the DCM (channel command) register + * @dsa: source address + * @dta: target address + * @dtc: transfer count (number of blocks of the transfer size specified in DCM + * to transfer) in the low 24 bits, offset of the next descriptor from the + * descriptor base address in the upper 8 bits. + */ +struct jz4780_dma_hwdesc { + u32 dcm; + u32 dsa; + u32 dta; + u32 dtc; +}; + +/* Size of allocations for hardware descriptor blocks. */ +#define JZ_DMA_DESC_BLOCK_SIZE PAGE_SIZE +#define JZ_DMA_MAX_DESC \ + (JZ_DMA_DESC_BLOCK_SIZE / sizeof(struct jz4780_dma_hwdesc)) + +struct jz4780_dma_desc { + struct virt_dma_desc vdesc; + + struct jz4780_dma_hwdesc *desc; + dma_addr_t desc_phys; + unsigned int count; + enum dma_transaction_type type; + u32 transfer_type; + u32 status; +}; + +struct jz4780_dma_chan { + struct virt_dma_chan vchan; + unsigned int id; + struct dma_pool *desc_pool; + + u32 transfer_type_tx, transfer_type_rx; + u32 transfer_shift; + struct dma_slave_config config; + + struct jz4780_dma_desc *desc; + unsigned int curr_hwdesc; +}; + +struct jz4780_dma_soc_data { + unsigned int nb_channels; + unsigned int transfer_ord_max; + unsigned long flags; +}; + +struct jz4780_dma_dev { + struct dma_device dma_device; + void __iomem *chn_base; + void __iomem *ctrl_base; + struct clk *clk; + unsigned int irq; + const struct jz4780_dma_soc_data *soc_data; + + u32 chan_reserved; + struct jz4780_dma_chan chan[]; +}; + +struct jz4780_dma_filter_data { + u32 transfer_type_tx, transfer_type_rx; + int channel; +}; + +static inline struct jz4780_dma_chan *to_jz4780_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct jz4780_dma_chan, vchan.chan); +} + +static inline struct jz4780_dma_desc *to_jz4780_dma_desc( + struct virt_dma_desc *vdesc) +{ + return container_of(vdesc, struct jz4780_dma_desc, vdesc); +} + +static inline struct jz4780_dma_dev *jz4780_dma_chan_parent( + struct jz4780_dma_chan *jzchan) +{ + return container_of(jzchan->vchan.chan.device, struct jz4780_dma_dev, + dma_device); +} + +static inline u32 jz4780_dma_chn_readl(struct jz4780_dma_dev *jzdma, + unsigned int chn, unsigned int reg) +{ + return readl(jzdma->chn_base + reg + JZ_DMA_REG_CHAN(chn)); +} + +static inline void jz4780_dma_chn_writel(struct jz4780_dma_dev *jzdma, + unsigned int chn, unsigned int reg, u32 val) +{ + writel(val, jzdma->chn_base + reg + JZ_DMA_REG_CHAN(chn)); +} + +static inline u32 jz4780_dma_ctrl_readl(struct jz4780_dma_dev *jzdma, + unsigned int reg) +{ + return readl(jzdma->ctrl_base + reg); +} + +static inline void jz4780_dma_ctrl_writel(struct jz4780_dma_dev *jzdma, + unsigned int reg, u32 val) +{ + writel(val, jzdma->ctrl_base + reg); +} + +static inline void jz4780_dma_chan_enable(struct jz4780_dma_dev *jzdma, + unsigned int chn) +{ + if (jzdma->soc_data->flags & JZ_SOC_DATA_PER_CHAN_PM) { + unsigned int reg; + + if (jzdma->soc_data->flags & JZ_SOC_DATA_NO_DCKES_DCKEC) + reg = JZ_DMA_REG_DCKE; + else + reg = JZ_DMA_REG_DCKES; + + jz4780_dma_ctrl_writel(jzdma, reg, BIT(chn)); + } +} + +static inline void jz4780_dma_chan_disable(struct jz4780_dma_dev *jzdma, + unsigned int chn) +{ + if ((jzdma->soc_data->flags & JZ_SOC_DATA_PER_CHAN_PM) && + !(jzdma->soc_data->flags & JZ_SOC_DATA_NO_DCKES_DCKEC)) + jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DCKEC, BIT(chn)); +} + +static struct jz4780_dma_desc * +jz4780_dma_desc_alloc(struct jz4780_dma_chan *jzchan, unsigned int count, + enum dma_transaction_type type, + enum dma_transfer_direction direction) +{ + struct jz4780_dma_desc *desc; + + if (count > JZ_DMA_MAX_DESC) + return NULL; + + desc = kzalloc(sizeof(*desc), GFP_NOWAIT); + if (!desc) + return NULL; + + desc->desc = dma_pool_alloc(jzchan->desc_pool, GFP_NOWAIT, + &desc->desc_phys); + if (!desc->desc) { + kfree(desc); + return NULL; + } + + desc->count = count; + desc->type = type; + + if (direction == DMA_DEV_TO_MEM) + desc->transfer_type = jzchan->transfer_type_rx; + else + desc->transfer_type = jzchan->transfer_type_tx; + + return desc; +} + +static void jz4780_dma_desc_free(struct virt_dma_desc *vdesc) +{ + struct jz4780_dma_desc *desc = to_jz4780_dma_desc(vdesc); + struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(vdesc->tx.chan); + + dma_pool_free(jzchan->desc_pool, desc->desc, desc->desc_phys); + kfree(desc); +} + +static u32 jz4780_dma_transfer_size(struct jz4780_dma_chan *jzchan, + unsigned long val, u32 *shift) +{ + struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan); + int ord = ffs(val) - 1; + + /* + * 8 byte transfer sizes unsupported so fall back on 4. If it's larger + * than the maximum, just limit it. It is perfectly safe to fall back + * in this way since we won't exceed the maximum burst size supported + * by the device, the only effect is reduced efficiency. This is better + * than refusing to perform the request at all. + */ + if (ord == 3) + ord = 2; + else if (ord > jzdma->soc_data->transfer_ord_max) + ord = jzdma->soc_data->transfer_ord_max; + + *shift = ord; + + switch (ord) { + case 0: + return JZ_DMA_SIZE_1_BYTE; + case 1: + return JZ_DMA_SIZE_2_BYTE; + case 2: + return JZ_DMA_SIZE_4_BYTE; + case 4: + return JZ_DMA_SIZE_16_BYTE; + case 5: + return JZ_DMA_SIZE_32_BYTE; + case 6: + return JZ_DMA_SIZE_64_BYTE; + default: + return JZ_DMA_SIZE_128_BYTE; + } +} + +static int jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan, + struct jz4780_dma_hwdesc *desc, dma_addr_t addr, size_t len, + enum dma_transfer_direction direction) +{ + struct dma_slave_config *config = &jzchan->config; + u32 width, maxburst, tsz; + + if (direction == DMA_MEM_TO_DEV) { + desc->dcm = JZ_DMA_DCM_SAI; + desc->dsa = addr; + desc->dta = config->dst_addr; + + width = config->dst_addr_width; + maxburst = config->dst_maxburst; + } else { + desc->dcm = JZ_DMA_DCM_DAI; + desc->dsa = config->src_addr; + desc->dta = addr; + + width = config->src_addr_width; + maxburst = config->src_maxburst; + } + + /* + * This calculates the maximum transfer size that can be used with the + * given address, length, width and maximum burst size. The address + * must be aligned to the transfer size, the total length must be + * divisible by the transfer size, and we must not use more than the + * maximum burst specified by the user. + */ + tsz = jz4780_dma_transfer_size(jzchan, addr | len | (width * maxburst), + &jzchan->transfer_shift); + + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + case DMA_SLAVE_BUSWIDTH_2_BYTES: + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + width = JZ_DMA_WIDTH_32_BIT; + break; + default: + return -EINVAL; + } + + desc->dcm |= tsz << JZ_DMA_DCM_TSZ_SHIFT; + desc->dcm |= width << JZ_DMA_DCM_SP_SHIFT; + desc->dcm |= width << JZ_DMA_DCM_DP_SHIFT; + + desc->dtc = len >> jzchan->transfer_shift; + return 0; +} + +static struct dma_async_tx_descriptor *jz4780_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, unsigned long flags, + void *context) +{ + struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); + struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan); + struct jz4780_dma_desc *desc; + unsigned int i; + int err; + + desc = jz4780_dma_desc_alloc(jzchan, sg_len, DMA_SLAVE, direction); + if (!desc) + return NULL; + + for (i = 0; i < sg_len; i++) { + err = jz4780_dma_setup_hwdesc(jzchan, &desc->desc[i], + sg_dma_address(&sgl[i]), + sg_dma_len(&sgl[i]), + direction); + if (err < 0) { + jz4780_dma_desc_free(&jzchan->desc->vdesc); + return NULL; + } + + desc->desc[i].dcm |= JZ_DMA_DCM_TIE; + + if (i != (sg_len - 1) && + !(jzdma->soc_data->flags & JZ_SOC_DATA_BREAK_LINKS)) { + /* Automatically proceed to the next descriptor. */ + desc->desc[i].dcm |= JZ_DMA_DCM_LINK; + + /* + * The upper 8 bits of the DTC field in the descriptor + * must be set to (offset from descriptor base of next + * descriptor >> 4). + */ + desc->desc[i].dtc |= + (((i + 1) * sizeof(*desc->desc)) >> 4) << 24; + } + } + + return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags); +} + +static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); + struct jz4780_dma_desc *desc; + unsigned int periods, i; + int err; + + if (buf_len % period_len) + return NULL; + + periods = buf_len / period_len; + + desc = jz4780_dma_desc_alloc(jzchan, periods, DMA_CYCLIC, direction); + if (!desc) + return NULL; + + for (i = 0; i < periods; i++) { + err = jz4780_dma_setup_hwdesc(jzchan, &desc->desc[i], buf_addr, + period_len, direction); + if (err < 0) { + jz4780_dma_desc_free(&jzchan->desc->vdesc); + return NULL; + } + + buf_addr += period_len; + + /* + * Set the link bit to indicate that the controller should + * automatically proceed to the next descriptor. In + * jz4780_dma_begin(), this will be cleared if we need to issue + * an interrupt after each period. + */ + desc->desc[i].dcm |= JZ_DMA_DCM_TIE | JZ_DMA_DCM_LINK; + + /* + * The upper 8 bits of the DTC field in the descriptor must be + * set to (offset from descriptor base of next descriptor >> 4). + * If this is the last descriptor, link it back to the first, + * i.e. leave offset set to 0, otherwise point to the next one. + */ + if (i != (periods - 1)) { + desc->desc[i].dtc |= + (((i + 1) * sizeof(*desc->desc)) >> 4) << 24; + } + } + + return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags); +} + +static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); + struct jz4780_dma_desc *desc; + u32 tsz; + + desc = jz4780_dma_desc_alloc(jzchan, 1, DMA_MEMCPY, 0); + if (!desc) + return NULL; + + tsz = jz4780_dma_transfer_size(jzchan, dest | src | len, + &jzchan->transfer_shift); + + desc->transfer_type = JZ_DMA_DRT_AUTO; + + desc->desc[0].dsa = src; + desc->desc[0].dta = dest; + desc->desc[0].dcm = JZ_DMA_DCM_TIE | JZ_DMA_DCM_SAI | JZ_DMA_DCM_DAI | + tsz << JZ_DMA_DCM_TSZ_SHIFT | + JZ_DMA_WIDTH_32_BIT << JZ_DMA_DCM_SP_SHIFT | + JZ_DMA_WIDTH_32_BIT << JZ_DMA_DCM_DP_SHIFT; + desc->desc[0].dtc = len >> jzchan->transfer_shift; + + return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags); +} + +static void jz4780_dma_begin(struct jz4780_dma_chan *jzchan) +{ + struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan); + struct virt_dma_desc *vdesc; + unsigned int i; + dma_addr_t desc_phys; + + if (!jzchan->desc) { + vdesc = vchan_next_desc(&jzchan->vchan); + if (!vdesc) + return; + + list_del(&vdesc->node); + + jzchan->desc = to_jz4780_dma_desc(vdesc); + jzchan->curr_hwdesc = 0; + + if (jzchan->desc->type == DMA_CYCLIC && vdesc->tx.callback) { + /* + * The DMA controller doesn't support triggering an + * interrupt after processing each descriptor, only + * after processing an entire terminated list of + * descriptors. For a cyclic DMA setup the list of + * descriptors is not terminated so we can never get an + * interrupt. + * + * If the user requested a callback for a cyclic DMA + * setup then we workaround this hardware limitation + * here by degrading to a set of unlinked descriptors + * which we will submit in sequence in response to the + * completion of processing the previous descriptor. + */ + for (i = 0; i < jzchan->desc->count; i++) + jzchan->desc->desc[i].dcm &= ~JZ_DMA_DCM_LINK; + } + } else { + /* + * There is an existing transfer, therefore this must be one + * for which we unlinked the descriptors above. Advance to the + * next one in the list. + */ + jzchan->curr_hwdesc = + (jzchan->curr_hwdesc + 1) % jzchan->desc->count; + } + + /* Enable the channel's clock. */ + jz4780_dma_chan_enable(jzdma, jzchan->id); + + /* Use 4-word descriptors. */ + jz4780_dma_chn_writel(jzdma, jzchan->id, JZ_DMA_REG_DCS, 0); + + /* Set transfer type. */ + jz4780_dma_chn_writel(jzdma, jzchan->id, JZ_DMA_REG_DRT, + jzchan->desc->transfer_type); + + /* + * Set the transfer count. This is redundant for a descriptor-driven + * transfer. However, there can be a delay between the transfer start + * time and when DTCn reg contains the new transfer count. Setting + * it explicitly ensures residue is computed correctly at all times. + */ + jz4780_dma_chn_writel(jzdma, jzchan->id, JZ_DMA_REG_DTC, + jzchan->desc->desc[jzchan->curr_hwdesc].dtc); + + /* Write descriptor address and initiate descriptor fetch. */ + desc_phys = jzchan->desc->desc_phys + + (jzchan->curr_hwdesc * sizeof(*jzchan->desc->desc)); + jz4780_dma_chn_writel(jzdma, jzchan->id, JZ_DMA_REG_DDA, desc_phys); + jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DDRS, BIT(jzchan->id)); + + /* Enable the channel. */ + jz4780_dma_chn_writel(jzdma, jzchan->id, JZ_DMA_REG_DCS, + JZ_DMA_DCS_CTE); +} + +static void jz4780_dma_issue_pending(struct dma_chan *chan) +{ + struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&jzchan->vchan.lock, flags); + + if (vchan_issue_pending(&jzchan->vchan) && !jzchan->desc) + jz4780_dma_begin(jzchan); + + spin_unlock_irqrestore(&jzchan->vchan.lock, flags); +} + +static int jz4780_dma_terminate_all(struct dma_chan *chan) +{ + struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); + struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&jzchan->vchan.lock, flags); + + /* Clear the DMA status and stop the transfer. */ + jz4780_dma_chn_writel(jzdma, jzchan->id, JZ_DMA_REG_DCS, 0); + if (jzchan->desc) { + vchan_terminate_vdesc(&jzchan->desc->vdesc); + jzchan->desc = NULL; + } + + jz4780_dma_chan_disable(jzdma, jzchan->id); + + vchan_get_all_descriptors(&jzchan->vchan, &head); + + spin_unlock_irqrestore(&jzchan->vchan.lock, flags); + + vchan_dma_desc_free_list(&jzchan->vchan, &head); + return 0; +} + +static void jz4780_dma_synchronize(struct dma_chan *chan) +{ + struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); + struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan); + + vchan_synchronize(&jzchan->vchan); + jz4780_dma_chan_disable(jzdma, jzchan->id); +} + +static int jz4780_dma_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); + + if ((config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES) + || (config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)) + return -EINVAL; + + /* Copy the reset of the slave configuration, it is used later. */ + memcpy(&jzchan->config, config, sizeof(jzchan->config)); + + return 0; +} + +static size_t jz4780_dma_desc_residue(struct jz4780_dma_chan *jzchan, + struct jz4780_dma_desc *desc, unsigned int next_sg) +{ + struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan); + unsigned int count = 0; + unsigned int i; + + for (i = next_sg; i < desc->count; i++) + count += desc->desc[i].dtc & GENMASK(23, 0); + + if (next_sg != 0) + count += jz4780_dma_chn_readl(jzdma, jzchan->id, + JZ_DMA_REG_DTC); + + return count << jzchan->transfer_shift; +} + +static enum dma_status jz4780_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) +{ + struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); + struct virt_dma_desc *vdesc; + enum dma_status status; + unsigned long flags; + unsigned long residue = 0; + + spin_lock_irqsave(&jzchan->vchan.lock, flags); + + status = dma_cookie_status(chan, cookie, txstate); + if ((status == DMA_COMPLETE) || (txstate == NULL)) + goto out_unlock_irqrestore; + + vdesc = vchan_find_desc(&jzchan->vchan, cookie); + if (vdesc) { + /* On the issued list, so hasn't been processed yet */ + residue = jz4780_dma_desc_residue(jzchan, + to_jz4780_dma_desc(vdesc), 0); + } else if (cookie == jzchan->desc->vdesc.tx.cookie) { + residue = jz4780_dma_desc_residue(jzchan, jzchan->desc, + jzchan->curr_hwdesc + 1); + } + dma_set_residue(txstate, residue); + + if (vdesc && jzchan->desc && vdesc == &jzchan->desc->vdesc + && jzchan->desc->status & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT)) + status = DMA_ERROR; + +out_unlock_irqrestore: + spin_unlock_irqrestore(&jzchan->vchan.lock, flags); + return status; +} + +static bool jz4780_dma_chan_irq(struct jz4780_dma_dev *jzdma, + struct jz4780_dma_chan *jzchan) +{ + const unsigned int soc_flags = jzdma->soc_data->flags; + struct jz4780_dma_desc *desc = jzchan->desc; + u32 dcs; + bool ack = true; + + spin_lock(&jzchan->vchan.lock); + + dcs = jz4780_dma_chn_readl(jzdma, jzchan->id, JZ_DMA_REG_DCS); + jz4780_dma_chn_writel(jzdma, jzchan->id, JZ_DMA_REG_DCS, 0); + + if (dcs & JZ_DMA_DCS_AR) { + dev_warn(&jzchan->vchan.chan.dev->device, + "address error (DCS=0x%x)\n", dcs); + } + + if (dcs & JZ_DMA_DCS_HLT) { + dev_warn(&jzchan->vchan.chan.dev->device, + "channel halt (DCS=0x%x)\n", dcs); + } + + if (jzchan->desc) { + jzchan->desc->status = dcs; + + if ((dcs & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT)) == 0) { + if (jzchan->desc->type == DMA_CYCLIC) { + vchan_cyclic_callback(&jzchan->desc->vdesc); + + jz4780_dma_begin(jzchan); + } else if (dcs & JZ_DMA_DCS_TT) { + if (!(soc_flags & JZ_SOC_DATA_BREAK_LINKS) || + (jzchan->curr_hwdesc + 1 == desc->count)) { + vchan_cookie_complete(&desc->vdesc); + jzchan->desc = NULL; + } + + jz4780_dma_begin(jzchan); + } else { + /* False positive - continue the transfer */ + ack = false; + jz4780_dma_chn_writel(jzdma, jzchan->id, + JZ_DMA_REG_DCS, + JZ_DMA_DCS_CTE); + } + } + } else { + dev_err(&jzchan->vchan.chan.dev->device, + "channel IRQ with no active transfer\n"); + } + + spin_unlock(&jzchan->vchan.lock); + + return ack; +} + +static irqreturn_t jz4780_dma_irq_handler(int irq, void *data) +{ + struct jz4780_dma_dev *jzdma = data; + unsigned int nb_channels = jzdma->soc_data->nb_channels; + unsigned long pending; + u32 dmac; + int i; + + pending = jz4780_dma_ctrl_readl(jzdma, JZ_DMA_REG_DIRQP); + + for_each_set_bit(i, &pending, nb_channels) { + if (jz4780_dma_chan_irq(jzdma, &jzdma->chan[i])) + pending &= ~BIT(i); + } + + /* Clear halt and address error status of all channels. */ + dmac = jz4780_dma_ctrl_readl(jzdma, JZ_DMA_REG_DMAC); + dmac &= ~(JZ_DMA_DMAC_HLT | JZ_DMA_DMAC_AR); + jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DMAC, dmac); + + /* Clear interrupt pending status. */ + jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DIRQP, pending); + + return IRQ_HANDLED; +} + +static int jz4780_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); + + jzchan->desc_pool = dma_pool_create(dev_name(&chan->dev->device), + chan->device->dev, + JZ_DMA_DESC_BLOCK_SIZE, + PAGE_SIZE, 0); + if (!jzchan->desc_pool) { + dev_err(&chan->dev->device, + "failed to allocate descriptor pool\n"); + return -ENOMEM; + } + + return 0; +} + +static void jz4780_dma_free_chan_resources(struct dma_chan *chan) +{ + struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); + + vchan_free_chan_resources(&jzchan->vchan); + dma_pool_destroy(jzchan->desc_pool); + jzchan->desc_pool = NULL; +} + +static bool jz4780_dma_filter_fn(struct dma_chan *chan, void *param) +{ + struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); + struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan); + struct jz4780_dma_filter_data *data = param; + + + if (data->channel > -1) { + if (data->channel != jzchan->id) + return false; + } else if (jzdma->chan_reserved & BIT(jzchan->id)) { + return false; + } + + jzchan->transfer_type_tx = data->transfer_type_tx; + jzchan->transfer_type_rx = data->transfer_type_rx; + + return true; +} + +static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct jz4780_dma_dev *jzdma = ofdma->of_dma_data; + dma_cap_mask_t mask = jzdma->dma_device.cap_mask; + struct jz4780_dma_filter_data data; + + if (dma_spec->args_count == 2) { + data.transfer_type_tx = dma_spec->args[0]; + data.transfer_type_rx = dma_spec->args[0]; + data.channel = dma_spec->args[1]; + } else if (dma_spec->args_count == 3) { + data.transfer_type_tx = dma_spec->args[0]; + data.transfer_type_rx = dma_spec->args[1]; + data.channel = dma_spec->args[2]; + } else { + return NULL; + } + + if (data.channel > -1) { + if (data.channel >= jzdma->soc_data->nb_channels) { + dev_err(jzdma->dma_device.dev, + "device requested non-existent channel %u\n", + data.channel); + return NULL; + } + + /* Can only select a channel marked as reserved. */ + if (!(jzdma->chan_reserved & BIT(data.channel))) { + dev_err(jzdma->dma_device.dev, + "device requested unreserved channel %u\n", + data.channel); + return NULL; + } + + jzdma->chan[data.channel].transfer_type_tx = data.transfer_type_tx; + jzdma->chan[data.channel].transfer_type_rx = data.transfer_type_rx; + + return dma_get_slave_channel( + &jzdma->chan[data.channel].vchan.chan); + } else { + return __dma_request_channel(&mask, jz4780_dma_filter_fn, &data, + ofdma->of_node); + } +} + +static int jz4780_dma_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + const struct jz4780_dma_soc_data *soc_data; + struct jz4780_dma_dev *jzdma; + struct jz4780_dma_chan *jzchan; + struct dma_device *dd; + struct resource *res; + int i, ret; + + if (!dev->of_node) { + dev_err(dev, "This driver must be probed from devicetree\n"); + return -EINVAL; + } + + soc_data = device_get_match_data(dev); + if (!soc_data) + return -EINVAL; + + jzdma = devm_kzalloc(dev, struct_size(jzdma, chan, + soc_data->nb_channels), GFP_KERNEL); + if (!jzdma) + return -ENOMEM; + + jzdma->soc_data = soc_data; + platform_set_drvdata(pdev, jzdma); + + jzdma->chn_base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(jzdma->chn_base)) + return PTR_ERR(jzdma->chn_base); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (res) { + jzdma->ctrl_base = devm_ioremap_resource(dev, res); + if (IS_ERR(jzdma->ctrl_base)) + return PTR_ERR(jzdma->ctrl_base); + } else if (soc_data->flags & JZ_SOC_DATA_ALLOW_LEGACY_DT) { + /* + * On JZ4780, if the second memory resource was not supplied, + * assume we're using an old devicetree, and calculate the + * offset to the control registers. + */ + jzdma->ctrl_base = jzdma->chn_base + JZ4780_DMA_CTRL_OFFSET; + } else { + dev_err(dev, "failed to get I/O memory\n"); + return -EINVAL; + } + + jzdma->clk = devm_clk_get(dev, NULL); + if (IS_ERR(jzdma->clk)) { + dev_err(dev, "failed to get clock\n"); + ret = PTR_ERR(jzdma->clk); + return ret; + } + + clk_prepare_enable(jzdma->clk); + + /* Property is optional, if it doesn't exist the value will remain 0. */ + of_property_read_u32_index(dev->of_node, "ingenic,reserved-channels", + 0, &jzdma->chan_reserved); + + dd = &jzdma->dma_device; + + /* + * The real segment size limit is dependent on the size unit selected + * for the transfer. Because the size unit is selected automatically + * and may be as small as 1 byte, use a safe limit of 2^24-1 bytes to + * ensure the 24-bit transfer count in the descriptor cannot overflow. + */ + dma_set_max_seg_size(dev, 0xffffff); + + dma_cap_set(DMA_MEMCPY, dd->cap_mask); + dma_cap_set(DMA_SLAVE, dd->cap_mask); + dma_cap_set(DMA_CYCLIC, dd->cap_mask); + + dd->dev = dev; + dd->copy_align = DMAENGINE_ALIGN_4_BYTES; + dd->device_alloc_chan_resources = jz4780_dma_alloc_chan_resources; + dd->device_free_chan_resources = jz4780_dma_free_chan_resources; + dd->device_prep_slave_sg = jz4780_dma_prep_slave_sg; + dd->device_prep_dma_cyclic = jz4780_dma_prep_dma_cyclic; + dd->device_prep_dma_memcpy = jz4780_dma_prep_dma_memcpy; + dd->device_config = jz4780_dma_config; + dd->device_terminate_all = jz4780_dma_terminate_all; + dd->device_synchronize = jz4780_dma_synchronize; + dd->device_tx_status = jz4780_dma_tx_status; + dd->device_issue_pending = jz4780_dma_issue_pending; + dd->src_addr_widths = JZ_DMA_BUSWIDTHS; + dd->dst_addr_widths = JZ_DMA_BUSWIDTHS; + dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + dd->max_sg_burst = JZ_DMA_MAX_DESC; + + /* + * Enable DMA controller, mark all channels as not programmable. + * Also set the FMSC bit - it increases MSC performance, so it makes + * little sense not to enable it. + */ + jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DMAC, JZ_DMA_DMAC_DMAE | + JZ_DMA_DMAC_FAIC | JZ_DMA_DMAC_FMSC); + + if (soc_data->flags & JZ_SOC_DATA_PROGRAMMABLE_DMA) + jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DMACP, 0); + + INIT_LIST_HEAD(&dd->channels); + + for (i = 0; i < soc_data->nb_channels; i++) { + jzchan = &jzdma->chan[i]; + jzchan->id = i; + + vchan_init(&jzchan->vchan, dd); + jzchan->vchan.desc_free = jz4780_dma_desc_free; + } + + /* + * On JZ4760, chan0 won't enable properly the first time. + * Enabling then disabling chan1 will magically make chan0 work + * correctly. + */ + jz4780_dma_chan_enable(jzdma, 1); + jz4780_dma_chan_disable(jzdma, 1); + + ret = platform_get_irq(pdev, 0); + if (ret < 0) + goto err_disable_clk; + + jzdma->irq = ret; + + ret = request_irq(jzdma->irq, jz4780_dma_irq_handler, 0, dev_name(dev), + jzdma); + if (ret) { + dev_err(dev, "failed to request IRQ %u!\n", jzdma->irq); + goto err_disable_clk; + } + + ret = dmaenginem_async_device_register(dd); + if (ret) { + dev_err(dev, "failed to register device\n"); + goto err_free_irq; + } + + /* Register with OF DMA helpers. */ + ret = of_dma_controller_register(dev->of_node, jz4780_of_dma_xlate, + jzdma); + if (ret) { + dev_err(dev, "failed to register OF DMA controller\n"); + goto err_free_irq; + } + + dev_info(dev, "JZ4780 DMA controller initialised\n"); + return 0; + +err_free_irq: + free_irq(jzdma->irq, jzdma); + +err_disable_clk: + clk_disable_unprepare(jzdma->clk); + return ret; +} + +static int jz4780_dma_remove(struct platform_device *pdev) +{ + struct jz4780_dma_dev *jzdma = platform_get_drvdata(pdev); + int i; + + of_dma_controller_free(pdev->dev.of_node); + + clk_disable_unprepare(jzdma->clk); + free_irq(jzdma->irq, jzdma); + + for (i = 0; i < jzdma->soc_data->nb_channels; i++) + tasklet_kill(&jzdma->chan[i].vchan.task); + + return 0; +} + +static const struct jz4780_dma_soc_data jz4740_dma_soc_data = { + .nb_channels = 6, + .transfer_ord_max = 5, + .flags = JZ_SOC_DATA_BREAK_LINKS, +}; + +static const struct jz4780_dma_soc_data jz4725b_dma_soc_data = { + .nb_channels = 6, + .transfer_ord_max = 5, + .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC | + JZ_SOC_DATA_BREAK_LINKS, +}; + +static const struct jz4780_dma_soc_data jz4755_dma_soc_data = { + .nb_channels = 4, + .transfer_ord_max = 5, + .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC | + JZ_SOC_DATA_BREAK_LINKS, +}; + +static const struct jz4780_dma_soc_data jz4760_dma_soc_data = { + .nb_channels = 5, + .transfer_ord_max = 6, + .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC, +}; + +static const struct jz4780_dma_soc_data jz4760_mdma_soc_data = { + .nb_channels = 2, + .transfer_ord_max = 6, + .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC, +}; + +static const struct jz4780_dma_soc_data jz4760_bdma_soc_data = { + .nb_channels = 3, + .transfer_ord_max = 6, + .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC, +}; + +static const struct jz4780_dma_soc_data jz4760b_dma_soc_data = { + .nb_channels = 5, + .transfer_ord_max = 6, + .flags = JZ_SOC_DATA_PER_CHAN_PM, +}; + +static const struct jz4780_dma_soc_data jz4760b_mdma_soc_data = { + .nb_channels = 2, + .transfer_ord_max = 6, + .flags = JZ_SOC_DATA_PER_CHAN_PM, +}; + +static const struct jz4780_dma_soc_data jz4760b_bdma_soc_data = { + .nb_channels = 3, + .transfer_ord_max = 6, + .flags = JZ_SOC_DATA_PER_CHAN_PM, +}; + +static const struct jz4780_dma_soc_data jz4770_dma_soc_data = { + .nb_channels = 6, + .transfer_ord_max = 6, + .flags = JZ_SOC_DATA_PER_CHAN_PM, +}; + +static const struct jz4780_dma_soc_data jz4780_dma_soc_data = { + .nb_channels = 32, + .transfer_ord_max = 7, + .flags = JZ_SOC_DATA_ALLOW_LEGACY_DT | JZ_SOC_DATA_PROGRAMMABLE_DMA, +}; + +static const struct jz4780_dma_soc_data x1000_dma_soc_data = { + .nb_channels = 8, + .transfer_ord_max = 7, + .flags = JZ_SOC_DATA_PROGRAMMABLE_DMA, +}; + +static const struct jz4780_dma_soc_data x1830_dma_soc_data = { + .nb_channels = 32, + .transfer_ord_max = 7, + .flags = JZ_SOC_DATA_PROGRAMMABLE_DMA, +}; + +static const struct of_device_id jz4780_dma_dt_match[] = { + { .compatible = "ingenic,jz4740-dma", .data = &jz4740_dma_soc_data }, + { .compatible = "ingenic,jz4725b-dma", .data = &jz4725b_dma_soc_data }, + { .compatible = "ingenic,jz4755-dma", .data = &jz4755_dma_soc_data }, + { .compatible = "ingenic,jz4760-dma", .data = &jz4760_dma_soc_data }, + { .compatible = "ingenic,jz4760-mdma", .data = &jz4760_mdma_soc_data }, + { .compatible = "ingenic,jz4760-bdma", .data = &jz4760_bdma_soc_data }, + { .compatible = "ingenic,jz4760b-dma", .data = &jz4760b_dma_soc_data }, + { .compatible = "ingenic,jz4760b-mdma", .data = &jz4760b_mdma_soc_data }, + { .compatible = "ingenic,jz4760b-bdma", .data = &jz4760b_bdma_soc_data }, + { .compatible = "ingenic,jz4770-dma", .data = &jz4770_dma_soc_data }, + { .compatible = "ingenic,jz4780-dma", .data = &jz4780_dma_soc_data }, + { .compatible = "ingenic,x1000-dma", .data = &x1000_dma_soc_data }, + { .compatible = "ingenic,x1830-dma", .data = &x1830_dma_soc_data }, + {}, +}; +MODULE_DEVICE_TABLE(of, jz4780_dma_dt_match); + +static struct platform_driver jz4780_dma_driver = { + .probe = jz4780_dma_probe, + .remove = jz4780_dma_remove, + .driver = { + .name = "jz4780-dma", + .of_match_table = jz4780_dma_dt_match, + }, +}; + +static int __init jz4780_dma_init(void) +{ + return platform_driver_register(&jz4780_dma_driver); +} +subsys_initcall(jz4780_dma_init); + +static void __exit jz4780_dma_exit(void) +{ + platform_driver_unregister(&jz4780_dma_driver); +} +module_exit(jz4780_dma_exit); + +MODULE_AUTHOR("Alex Smith "); +MODULE_DESCRIPTION("Ingenic JZ4780 DMA controller driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c new file mode 100644 index 0000000000..491b222402 --- /dev/null +++ b/drivers/dma/dmaengine.c @@ -0,0 +1,1601 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. + */ + +/* + * This code implements the DMA subsystem. It provides a HW-neutral interface + * for other kernel code to use asynchronous memory copy capabilities, + * if present, and allows different HW DMA drivers to register as providing + * this capability. + * + * Due to the fact we are accelerating what is already a relatively fast + * operation, the code goes to great lengths to avoid additional overhead, + * such as locking. + * + * LOCKING: + * + * The subsystem keeps a global list of dma_device structs it is protected by a + * mutex, dma_list_mutex. + * + * A subsystem can get access to a channel by calling dmaengine_get() followed + * by dma_find_channel(), or if it has need for an exclusive channel it can call + * dma_request_channel(). Once a channel is allocated a reference is taken + * against its corresponding driver to disable removal. + * + * Each device has a channels list, which runs unlocked but is never modified + * once the device is registered, it's just setup by the driver. + * + * See Documentation/driver-api/dmaengine for more details + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" + +static DEFINE_MUTEX(dma_list_mutex); +static DEFINE_IDA(dma_ida); +static LIST_HEAD(dma_device_list); +static long dmaengine_ref_count; + +/* --- debugfs implementation --- */ +#ifdef CONFIG_DEBUG_FS +#include + +static struct dentry *rootdir; + +static void dmaengine_debug_register(struct dma_device *dma_dev) +{ + dma_dev->dbg_dev_root = debugfs_create_dir(dev_name(dma_dev->dev), + rootdir); + if (IS_ERR(dma_dev->dbg_dev_root)) + dma_dev->dbg_dev_root = NULL; +} + +static void dmaengine_debug_unregister(struct dma_device *dma_dev) +{ + debugfs_remove_recursive(dma_dev->dbg_dev_root); + dma_dev->dbg_dev_root = NULL; +} + +static void dmaengine_dbg_summary_show(struct seq_file *s, + struct dma_device *dma_dev) +{ + struct dma_chan *chan; + + list_for_each_entry(chan, &dma_dev->channels, device_node) { + if (chan->client_count) { + seq_printf(s, " %-13s| %s", dma_chan_name(chan), + chan->dbg_client_name ?: "in-use"); + + if (chan->router) + seq_printf(s, " (via router: %s)\n", + dev_name(chan->router->dev)); + else + seq_puts(s, "\n"); + } + } +} + +static int dmaengine_summary_show(struct seq_file *s, void *data) +{ + struct dma_device *dma_dev = NULL; + + mutex_lock(&dma_list_mutex); + list_for_each_entry(dma_dev, &dma_device_list, global_node) { + seq_printf(s, "dma%d (%s): number of channels: %u\n", + dma_dev->dev_id, dev_name(dma_dev->dev), + dma_dev->chancnt); + + if (dma_dev->dbg_summary_show) + dma_dev->dbg_summary_show(s, dma_dev); + else + dmaengine_dbg_summary_show(s, dma_dev); + + if (!list_is_last(&dma_dev->global_node, &dma_device_list)) + seq_puts(s, "\n"); + } + mutex_unlock(&dma_list_mutex); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(dmaengine_summary); + +static void __init dmaengine_debugfs_init(void) +{ + rootdir = debugfs_create_dir("dmaengine", NULL); + + /* /sys/kernel/debug/dmaengine/summary */ + debugfs_create_file("summary", 0444, rootdir, NULL, + &dmaengine_summary_fops); +} +#else +static inline void dmaengine_debugfs_init(void) { } +static inline int dmaengine_debug_register(struct dma_device *dma_dev) +{ + return 0; +} + +static inline void dmaengine_debug_unregister(struct dma_device *dma_dev) { } +#endif /* DEBUG_FS */ + +/* --- sysfs implementation --- */ + +#define DMA_SLAVE_NAME "slave" + +/** + * dev_to_dma_chan - convert a device pointer to its sysfs container object + * @dev: device node + * + * Must be called under dma_list_mutex. + */ +static struct dma_chan *dev_to_dma_chan(struct device *dev) +{ + struct dma_chan_dev *chan_dev; + + chan_dev = container_of(dev, typeof(*chan_dev), device); + return chan_dev->chan; +} + +static ssize_t memcpy_count_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dma_chan *chan; + unsigned long count = 0; + int i; + int err; + + mutex_lock(&dma_list_mutex); + chan = dev_to_dma_chan(dev); + if (chan) { + for_each_possible_cpu(i) + count += per_cpu_ptr(chan->local, i)->memcpy_count; + err = sysfs_emit(buf, "%lu\n", count); + } else + err = -ENODEV; + mutex_unlock(&dma_list_mutex); + + return err; +} +static DEVICE_ATTR_RO(memcpy_count); + +static ssize_t bytes_transferred_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dma_chan *chan; + unsigned long count = 0; + int i; + int err; + + mutex_lock(&dma_list_mutex); + chan = dev_to_dma_chan(dev); + if (chan) { + for_each_possible_cpu(i) + count += per_cpu_ptr(chan->local, i)->bytes_transferred; + err = sysfs_emit(buf, "%lu\n", count); + } else + err = -ENODEV; + mutex_unlock(&dma_list_mutex); + + return err; +} +static DEVICE_ATTR_RO(bytes_transferred); + +static ssize_t in_use_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct dma_chan *chan; + int err; + + mutex_lock(&dma_list_mutex); + chan = dev_to_dma_chan(dev); + if (chan) + err = sysfs_emit(buf, "%d\n", chan->client_count); + else + err = -ENODEV; + mutex_unlock(&dma_list_mutex); + + return err; +} +static DEVICE_ATTR_RO(in_use); + +static struct attribute *dma_dev_attrs[] = { + &dev_attr_memcpy_count.attr, + &dev_attr_bytes_transferred.attr, + &dev_attr_in_use.attr, + NULL, +}; +ATTRIBUTE_GROUPS(dma_dev); + +static void chan_dev_release(struct device *dev) +{ + struct dma_chan_dev *chan_dev; + + chan_dev = container_of(dev, typeof(*chan_dev), device); + kfree(chan_dev); +} + +static struct class dma_devclass = { + .name = "dma", + .dev_groups = dma_dev_groups, + .dev_release = chan_dev_release, +}; + +/* --- client and device registration --- */ + +/* enable iteration over all operation types */ +static dma_cap_mask_t dma_cap_mask_all; + +/** + * struct dma_chan_tbl_ent - tracks channel allocations per core/operation + * @chan: associated channel for this entry + */ +struct dma_chan_tbl_ent { + struct dma_chan *chan; +}; + +/* percpu lookup table for memory-to-memory offload providers */ +static struct dma_chan_tbl_ent __percpu *channel_table[DMA_TX_TYPE_END]; + +static int __init dma_channel_table_init(void) +{ + enum dma_transaction_type cap; + int err = 0; + + bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END); + + /* 'interrupt', 'private', and 'slave' are channel capabilities, + * but are not associated with an operation so they do not need + * an entry in the channel_table + */ + clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits); + clear_bit(DMA_PRIVATE, dma_cap_mask_all.bits); + clear_bit(DMA_SLAVE, dma_cap_mask_all.bits); + + for_each_dma_cap_mask(cap, dma_cap_mask_all) { + channel_table[cap] = alloc_percpu(struct dma_chan_tbl_ent); + if (!channel_table[cap]) { + err = -ENOMEM; + break; + } + } + + if (err) { + pr_err("dmaengine dma_channel_table_init failure: %d\n", err); + for_each_dma_cap_mask(cap, dma_cap_mask_all) + free_percpu(channel_table[cap]); + } + + return err; +} +arch_initcall(dma_channel_table_init); + +/** + * dma_chan_is_local - checks if the channel is in the same NUMA-node as the CPU + * @chan: DMA channel to test + * @cpu: CPU index which the channel should be close to + * + * Returns true if the channel is in the same NUMA-node as the CPU. + */ +static bool dma_chan_is_local(struct dma_chan *chan, int cpu) +{ + int node = dev_to_node(chan->device->dev); + return node == NUMA_NO_NODE || + cpumask_test_cpu(cpu, cpumask_of_node(node)); +} + +/** + * min_chan - finds the channel with min count and in the same NUMA-node as the CPU + * @cap: capability to match + * @cpu: CPU index which the channel should be close to + * + * If some channels are close to the given CPU, the one with the lowest + * reference count is returned. Otherwise, CPU is ignored and only the + * reference count is taken into account. + * + * Must be called under dma_list_mutex. + */ +static struct dma_chan *min_chan(enum dma_transaction_type cap, int cpu) +{ + struct dma_device *device; + struct dma_chan *chan; + struct dma_chan *min = NULL; + struct dma_chan *localmin = NULL; + + list_for_each_entry(device, &dma_device_list, global_node) { + if (!dma_has_cap(cap, device->cap_mask) || + dma_has_cap(DMA_PRIVATE, device->cap_mask)) + continue; + list_for_each_entry(chan, &device->channels, device_node) { + if (!chan->client_count) + continue; + if (!min || chan->table_count < min->table_count) + min = chan; + + if (dma_chan_is_local(chan, cpu)) + if (!localmin || + chan->table_count < localmin->table_count) + localmin = chan; + } + } + + chan = localmin ? localmin : min; + + if (chan) + chan->table_count++; + + return chan; +} + +/** + * dma_channel_rebalance - redistribute the available channels + * + * Optimize for CPU isolation (each CPU gets a dedicated channel for an + * operation type) in the SMP case, and operation isolation (avoid + * multi-tasking channels) in the non-SMP case. + * + * Must be called under dma_list_mutex. + */ +static void dma_channel_rebalance(void) +{ + struct dma_chan *chan; + struct dma_device *device; + int cpu; + int cap; + + /* undo the last distribution */ + for_each_dma_cap_mask(cap, dma_cap_mask_all) + for_each_possible_cpu(cpu) + per_cpu_ptr(channel_table[cap], cpu)->chan = NULL; + + list_for_each_entry(device, &dma_device_list, global_node) { + if (dma_has_cap(DMA_PRIVATE, device->cap_mask)) + continue; + list_for_each_entry(chan, &device->channels, device_node) + chan->table_count = 0; + } + + /* don't populate the channel_table if no clients are available */ + if (!dmaengine_ref_count) + return; + + /* redistribute available channels */ + for_each_dma_cap_mask(cap, dma_cap_mask_all) + for_each_online_cpu(cpu) { + chan = min_chan(cap, cpu); + per_cpu_ptr(channel_table[cap], cpu)->chan = chan; + } +} + +static int dma_device_satisfies_mask(struct dma_device *device, + const dma_cap_mask_t *want) +{ + dma_cap_mask_t has; + + bitmap_and(has.bits, want->bits, device->cap_mask.bits, + DMA_TX_TYPE_END); + return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END); +} + +static struct module *dma_chan_to_owner(struct dma_chan *chan) +{ + return chan->device->owner; +} + +/** + * balance_ref_count - catch up the channel reference count + * @chan: channel to balance ->client_count versus dmaengine_ref_count + * + * Must be called under dma_list_mutex. + */ +static void balance_ref_count(struct dma_chan *chan) +{ + struct module *owner = dma_chan_to_owner(chan); + + while (chan->client_count < dmaengine_ref_count) { + __module_get(owner); + chan->client_count++; + } +} + +static void dma_device_release(struct kref *ref) +{ + struct dma_device *device = container_of(ref, struct dma_device, ref); + + list_del_rcu(&device->global_node); + dma_channel_rebalance(); + + if (device->device_release) + device->device_release(device); +} + +static void dma_device_put(struct dma_device *device) +{ + lockdep_assert_held(&dma_list_mutex); + kref_put(&device->ref, dma_device_release); +} + +/** + * dma_chan_get - try to grab a DMA channel's parent driver module + * @chan: channel to grab + * + * Must be called under dma_list_mutex. + */ +static int dma_chan_get(struct dma_chan *chan) +{ + struct module *owner = dma_chan_to_owner(chan); + int ret; + + /* The channel is already in use, update client count */ + if (chan->client_count) { + __module_get(owner); + chan->client_count++; + return 0; + } + + if (!try_module_get(owner)) + return -ENODEV; + + ret = kref_get_unless_zero(&chan->device->ref); + if (!ret) { + ret = -ENODEV; + goto module_put_out; + } + + /* allocate upon first client reference */ + if (chan->device->device_alloc_chan_resources) { + ret = chan->device->device_alloc_chan_resources(chan); + if (ret < 0) + goto err_out; + } + + chan->client_count++; + + if (!dma_has_cap(DMA_PRIVATE, chan->device->cap_mask)) + balance_ref_count(chan); + + return 0; + +err_out: + dma_device_put(chan->device); +module_put_out: + module_put(owner); + return ret; +} + +/** + * dma_chan_put - drop a reference to a DMA channel's parent driver module + * @chan: channel to release + * + * Must be called under dma_list_mutex. + */ +static void dma_chan_put(struct dma_chan *chan) +{ + /* This channel is not in use, bail out */ + if (!chan->client_count) + return; + + chan->client_count--; + + /* This channel is not in use anymore, free it */ + if (!chan->client_count && chan->device->device_free_chan_resources) { + /* Make sure all operations have completed */ + dmaengine_synchronize(chan); + chan->device->device_free_chan_resources(chan); + } + + /* If the channel is used via a DMA request router, free the mapping */ + if (chan->router && chan->router->route_free) { + chan->router->route_free(chan->router->dev, chan->route_data); + chan->router = NULL; + chan->route_data = NULL; + } + + dma_device_put(chan->device); + module_put(dma_chan_to_owner(chan)); +} + +enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie) +{ + enum dma_status status; + unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000); + + dma_async_issue_pending(chan); + do { + status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); + if (time_after_eq(jiffies, dma_sync_wait_timeout)) { + dev_err(chan->device->dev, "%s: timeout!\n", __func__); + return DMA_ERROR; + } + if (status != DMA_IN_PROGRESS) + break; + cpu_relax(); + } while (1); + + return status; +} +EXPORT_SYMBOL(dma_sync_wait); + +/** + * dma_find_channel - find a channel to carry out the operation + * @tx_type: transaction type + */ +struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type) +{ + return this_cpu_read(channel_table[tx_type]->chan); +} +EXPORT_SYMBOL(dma_find_channel); + +/** + * dma_issue_pending_all - flush all pending operations across all channels + */ +void dma_issue_pending_all(void) +{ + struct dma_device *device; + struct dma_chan *chan; + + rcu_read_lock(); + list_for_each_entry_rcu(device, &dma_device_list, global_node) { + if (dma_has_cap(DMA_PRIVATE, device->cap_mask)) + continue; + list_for_each_entry(chan, &device->channels, device_node) + if (chan->client_count) + device->device_issue_pending(chan); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL(dma_issue_pending_all); + +int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps) +{ + struct dma_device *device; + + if (!chan || !caps) + return -EINVAL; + + device = chan->device; + + /* check if the channel supports slave transactions */ + if (!(test_bit(DMA_SLAVE, device->cap_mask.bits) || + test_bit(DMA_CYCLIC, device->cap_mask.bits))) + return -ENXIO; + + /* + * Check whether it reports it uses the generic slave + * capabilities, if not, that means it doesn't support any + * kind of slave capabilities reporting. + */ + if (!device->directions) + return -ENXIO; + + caps->src_addr_widths = device->src_addr_widths; + caps->dst_addr_widths = device->dst_addr_widths; + caps->directions = device->directions; + caps->min_burst = device->min_burst; + caps->max_burst = device->max_burst; + caps->max_sg_burst = device->max_sg_burst; + caps->residue_granularity = device->residue_granularity; + caps->descriptor_reuse = device->descriptor_reuse; + caps->cmd_pause = !!device->device_pause; + caps->cmd_resume = !!device->device_resume; + caps->cmd_terminate = !!device->device_terminate_all; + + /* + * DMA engine device might be configured with non-uniformly + * distributed slave capabilities per device channels. In this + * case the corresponding driver may provide the device_caps + * callback to override the generic capabilities with + * channel-specific ones. + */ + if (device->device_caps) + device->device_caps(chan, caps); + + return 0; +} +EXPORT_SYMBOL_GPL(dma_get_slave_caps); + +static struct dma_chan *private_candidate(const dma_cap_mask_t *mask, + struct dma_device *dev, + dma_filter_fn fn, void *fn_param) +{ + struct dma_chan *chan; + + if (mask && !dma_device_satisfies_mask(dev, mask)) { + dev_dbg(dev->dev, "%s: wrong capabilities\n", __func__); + return NULL; + } + /* devices with multiple channels need special handling as we need to + * ensure that all channels are either private or public. + */ + if (dev->chancnt > 1 && !dma_has_cap(DMA_PRIVATE, dev->cap_mask)) + list_for_each_entry(chan, &dev->channels, device_node) { + /* some channels are already publicly allocated */ + if (chan->client_count) + return NULL; + } + + list_for_each_entry(chan, &dev->channels, device_node) { + if (chan->client_count) { + dev_dbg(dev->dev, "%s: %s busy\n", + __func__, dma_chan_name(chan)); + continue; + } + if (fn && !fn(chan, fn_param)) { + dev_dbg(dev->dev, "%s: %s filter said false\n", + __func__, dma_chan_name(chan)); + continue; + } + return chan; + } + + return NULL; +} + +static struct dma_chan *find_candidate(struct dma_device *device, + const dma_cap_mask_t *mask, + dma_filter_fn fn, void *fn_param) +{ + struct dma_chan *chan = private_candidate(mask, device, fn, fn_param); + int err; + + if (chan) { + /* Found a suitable channel, try to grab, prep, and return it. + * We first set DMA_PRIVATE to disable balance_ref_count as this + * channel will not be published in the general-purpose + * allocator + */ + dma_cap_set(DMA_PRIVATE, device->cap_mask); + device->privatecnt++; + err = dma_chan_get(chan); + + if (err) { + if (err == -ENODEV) { + dev_dbg(device->dev, "%s: %s module removed\n", + __func__, dma_chan_name(chan)); + list_del_rcu(&device->global_node); + } else + dev_dbg(device->dev, + "%s: failed to get %s: (%d)\n", + __func__, dma_chan_name(chan), err); + + if (--device->privatecnt == 0) + dma_cap_clear(DMA_PRIVATE, device->cap_mask); + + chan = ERR_PTR(err); + } + } + + return chan ? chan : ERR_PTR(-EPROBE_DEFER); +} + +/** + * dma_get_slave_channel - try to get specific channel exclusively + * @chan: target channel + */ +struct dma_chan *dma_get_slave_channel(struct dma_chan *chan) +{ + /* lock against __dma_request_channel */ + mutex_lock(&dma_list_mutex); + + if (chan->client_count == 0) { + struct dma_device *device = chan->device; + int err; + + dma_cap_set(DMA_PRIVATE, device->cap_mask); + device->privatecnt++; + err = dma_chan_get(chan); + if (err) { + dev_dbg(chan->device->dev, + "%s: failed to get %s: (%d)\n", + __func__, dma_chan_name(chan), err); + chan = NULL; + if (--device->privatecnt == 0) + dma_cap_clear(DMA_PRIVATE, device->cap_mask); + } + } else + chan = NULL; + + mutex_unlock(&dma_list_mutex); + + + return chan; +} +EXPORT_SYMBOL_GPL(dma_get_slave_channel); + +struct dma_chan *dma_get_any_slave_channel(struct dma_device *device) +{ + dma_cap_mask_t mask; + struct dma_chan *chan; + + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + + /* lock against __dma_request_channel */ + mutex_lock(&dma_list_mutex); + + chan = find_candidate(device, &mask, NULL, NULL); + + mutex_unlock(&dma_list_mutex); + + return IS_ERR(chan) ? NULL : chan; +} +EXPORT_SYMBOL_GPL(dma_get_any_slave_channel); + +/** + * __dma_request_channel - try to allocate an exclusive channel + * @mask: capabilities that the channel must satisfy + * @fn: optional callback to disposition available channels + * @fn_param: opaque parameter to pass to dma_filter_fn() + * @np: device node to look for DMA channels + * + * Returns pointer to appropriate DMA channel on success or NULL. + */ +struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask, + dma_filter_fn fn, void *fn_param, + struct device_node *np) +{ + struct dma_device *device, *_d; + struct dma_chan *chan = NULL; + + /* Find a channel */ + mutex_lock(&dma_list_mutex); + list_for_each_entry_safe(device, _d, &dma_device_list, global_node) { + /* Finds a DMA controller with matching device node */ + if (np && device->dev->of_node && np != device->dev->of_node) + continue; + + chan = find_candidate(device, mask, fn, fn_param); + if (!IS_ERR(chan)) + break; + + chan = NULL; + } + mutex_unlock(&dma_list_mutex); + + pr_debug("%s: %s (%s)\n", + __func__, + chan ? "success" : "fail", + chan ? dma_chan_name(chan) : NULL); + + return chan; +} +EXPORT_SYMBOL_GPL(__dma_request_channel); + +static const struct dma_slave_map *dma_filter_match(struct dma_device *device, + const char *name, + struct device *dev) +{ + int i; + + if (!device->filter.mapcnt) + return NULL; + + for (i = 0; i < device->filter.mapcnt; i++) { + const struct dma_slave_map *map = &device->filter.map[i]; + + if (!strcmp(map->devname, dev_name(dev)) && + !strcmp(map->slave, name)) + return map; + } + + return NULL; +} + +/** + * dma_request_chan - try to allocate an exclusive slave channel + * @dev: pointer to client device structure + * @name: slave channel name + * + * Returns pointer to appropriate DMA channel on success or an error pointer. + */ +struct dma_chan *dma_request_chan(struct device *dev, const char *name) +{ + struct dma_device *d, *_d; + struct dma_chan *chan = NULL; + + /* If device-tree is present get slave info from here */ + if (dev->of_node) + chan = of_dma_request_slave_channel(dev->of_node, name); + + /* If device was enumerated by ACPI get slave info from here */ + if (has_acpi_companion(dev) && !chan) + chan = acpi_dma_request_slave_chan_by_name(dev, name); + + if (PTR_ERR(chan) == -EPROBE_DEFER) + return chan; + + if (!IS_ERR_OR_NULL(chan)) + goto found; + + /* Try to find the channel via the DMA filter map(s) */ + mutex_lock(&dma_list_mutex); + list_for_each_entry_safe(d, _d, &dma_device_list, global_node) { + dma_cap_mask_t mask; + const struct dma_slave_map *map = dma_filter_match(d, name, dev); + + if (!map) + continue; + + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + + chan = find_candidate(d, &mask, d->filter.fn, map->param); + if (!IS_ERR(chan)) + break; + } + mutex_unlock(&dma_list_mutex); + + if (IS_ERR(chan)) + return chan; + if (!chan) + return ERR_PTR(-EPROBE_DEFER); + +found: +#ifdef CONFIG_DEBUG_FS + chan->dbg_client_name = kasprintf(GFP_KERNEL, "%s:%s", dev_name(dev), + name); +#endif + + chan->name = kasprintf(GFP_KERNEL, "dma:%s", name); + if (!chan->name) + return chan; + chan->slave = dev; + + if (sysfs_create_link(&chan->dev->device.kobj, &dev->kobj, + DMA_SLAVE_NAME)) + dev_warn(dev, "Cannot create DMA %s symlink\n", DMA_SLAVE_NAME); + if (sysfs_create_link(&dev->kobj, &chan->dev->device.kobj, chan->name)) + dev_warn(dev, "Cannot create DMA %s symlink\n", chan->name); + + return chan; +} +EXPORT_SYMBOL_GPL(dma_request_chan); + +/** + * dma_request_chan_by_mask - allocate a channel satisfying certain capabilities + * @mask: capabilities that the channel must satisfy + * + * Returns pointer to appropriate DMA channel on success or an error pointer. + */ +struct dma_chan *dma_request_chan_by_mask(const dma_cap_mask_t *mask) +{ + struct dma_chan *chan; + + if (!mask) + return ERR_PTR(-ENODEV); + + chan = __dma_request_channel(mask, NULL, NULL, NULL); + if (!chan) { + mutex_lock(&dma_list_mutex); + if (list_empty(&dma_device_list)) + chan = ERR_PTR(-EPROBE_DEFER); + else + chan = ERR_PTR(-ENODEV); + mutex_unlock(&dma_list_mutex); + } + + return chan; +} +EXPORT_SYMBOL_GPL(dma_request_chan_by_mask); + +void dma_release_channel(struct dma_chan *chan) +{ + mutex_lock(&dma_list_mutex); + WARN_ONCE(chan->client_count != 1, + "chan reference count %d != 1\n", chan->client_count); + dma_chan_put(chan); + /* drop PRIVATE cap enabled by __dma_request_channel() */ + if (--chan->device->privatecnt == 0) + dma_cap_clear(DMA_PRIVATE, chan->device->cap_mask); + + if (chan->slave) { + sysfs_remove_link(&chan->dev->device.kobj, DMA_SLAVE_NAME); + sysfs_remove_link(&chan->slave->kobj, chan->name); + kfree(chan->name); + chan->name = NULL; + chan->slave = NULL; + } + +#ifdef CONFIG_DEBUG_FS + kfree(chan->dbg_client_name); + chan->dbg_client_name = NULL; +#endif + mutex_unlock(&dma_list_mutex); +} +EXPORT_SYMBOL_GPL(dma_release_channel); + +/** + * dmaengine_get - register interest in dma_channels + */ +void dmaengine_get(void) +{ + struct dma_device *device, *_d; + struct dma_chan *chan; + int err; + + mutex_lock(&dma_list_mutex); + dmaengine_ref_count++; + + /* try to grab channels */ + list_for_each_entry_safe(device, _d, &dma_device_list, global_node) { + if (dma_has_cap(DMA_PRIVATE, device->cap_mask)) + continue; + list_for_each_entry(chan, &device->channels, device_node) { + err = dma_chan_get(chan); + if (err == -ENODEV) { + /* module removed before we could use it */ + list_del_rcu(&device->global_node); + break; + } else if (err) + dev_dbg(chan->device->dev, + "%s: failed to get %s: (%d)\n", + __func__, dma_chan_name(chan), err); + } + } + + /* if this is the first reference and there were channels + * waiting we need to rebalance to get those channels + * incorporated into the channel table + */ + if (dmaengine_ref_count == 1) + dma_channel_rebalance(); + mutex_unlock(&dma_list_mutex); +} +EXPORT_SYMBOL(dmaengine_get); + +/** + * dmaengine_put - let DMA drivers be removed when ref_count == 0 + */ +void dmaengine_put(void) +{ + struct dma_device *device, *_d; + struct dma_chan *chan; + + mutex_lock(&dma_list_mutex); + dmaengine_ref_count--; + BUG_ON(dmaengine_ref_count < 0); + /* drop channel references */ + list_for_each_entry_safe(device, _d, &dma_device_list, global_node) { + if (dma_has_cap(DMA_PRIVATE, device->cap_mask)) + continue; + list_for_each_entry(chan, &device->channels, device_node) + dma_chan_put(chan); + } + mutex_unlock(&dma_list_mutex); +} +EXPORT_SYMBOL(dmaengine_put); + +static bool device_has_all_tx_types(struct dma_device *device) +{ + /* A device that satisfies this test has channels that will never cause + * an async_tx channel switch event as all possible operation types can + * be handled. + */ + #ifdef CONFIG_ASYNC_TX_DMA + if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask)) + return false; + #endif + + #if IS_ENABLED(CONFIG_ASYNC_MEMCPY) + if (!dma_has_cap(DMA_MEMCPY, device->cap_mask)) + return false; + #endif + + #if IS_ENABLED(CONFIG_ASYNC_XOR) + if (!dma_has_cap(DMA_XOR, device->cap_mask)) + return false; + + #ifndef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA + if (!dma_has_cap(DMA_XOR_VAL, device->cap_mask)) + return false; + #endif + #endif + + #if IS_ENABLED(CONFIG_ASYNC_PQ) + if (!dma_has_cap(DMA_PQ, device->cap_mask)) + return false; + + #ifndef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA + if (!dma_has_cap(DMA_PQ_VAL, device->cap_mask)) + return false; + #endif + #endif + + return true; +} + +static int get_dma_id(struct dma_device *device) +{ + int rc = ida_alloc(&dma_ida, GFP_KERNEL); + + if (rc < 0) + return rc; + device->dev_id = rc; + return 0; +} + +static int __dma_async_device_channel_register(struct dma_device *device, + struct dma_chan *chan) +{ + int rc; + + chan->local = alloc_percpu(typeof(*chan->local)); + if (!chan->local) + return -ENOMEM; + chan->dev = kzalloc(sizeof(*chan->dev), GFP_KERNEL); + if (!chan->dev) { + rc = -ENOMEM; + goto err_free_local; + } + + /* + * When the chan_id is a negative value, we are dynamically adding + * the channel. Otherwise we are static enumerating. + */ + chan->chan_id = ida_alloc(&device->chan_ida, GFP_KERNEL); + if (chan->chan_id < 0) { + pr_err("%s: unable to alloc ida for chan: %d\n", + __func__, chan->chan_id); + rc = chan->chan_id; + goto err_free_dev; + } + + chan->dev->device.class = &dma_devclass; + chan->dev->device.parent = device->dev; + chan->dev->chan = chan; + chan->dev->dev_id = device->dev_id; + dev_set_name(&chan->dev->device, "dma%dchan%d", + device->dev_id, chan->chan_id); + rc = device_register(&chan->dev->device); + if (rc) + goto err_out_ida; + chan->client_count = 0; + device->chancnt++; + + return 0; + + err_out_ida: + ida_free(&device->chan_ida, chan->chan_id); + err_free_dev: + kfree(chan->dev); + err_free_local: + free_percpu(chan->local); + chan->local = NULL; + return rc; +} + +int dma_async_device_channel_register(struct dma_device *device, + struct dma_chan *chan) +{ + int rc; + + rc = __dma_async_device_channel_register(device, chan); + if (rc < 0) + return rc; + + dma_channel_rebalance(); + return 0; +} +EXPORT_SYMBOL_GPL(dma_async_device_channel_register); + +static void __dma_async_device_channel_unregister(struct dma_device *device, + struct dma_chan *chan) +{ + if (chan->local == NULL) + return; + + WARN_ONCE(!device->device_release && chan->client_count, + "%s called while %d clients hold a reference\n", + __func__, chan->client_count); + mutex_lock(&dma_list_mutex); + device->chancnt--; + chan->dev->chan = NULL; + mutex_unlock(&dma_list_mutex); + ida_free(&device->chan_ida, chan->chan_id); + device_unregister(&chan->dev->device); + free_percpu(chan->local); +} + +void dma_async_device_channel_unregister(struct dma_device *device, + struct dma_chan *chan) +{ + __dma_async_device_channel_unregister(device, chan); + dma_channel_rebalance(); +} +EXPORT_SYMBOL_GPL(dma_async_device_channel_unregister); + +/** + * dma_async_device_register - registers DMA devices found + * @device: pointer to &struct dma_device + * + * After calling this routine the structure should not be freed except in the + * device_release() callback which will be called after + * dma_async_device_unregister() is called and no further references are taken. + */ +int dma_async_device_register(struct dma_device *device) +{ + int rc; + struct dma_chan* chan; + + if (!device) + return -ENODEV; + + /* validate device routines */ + if (!device->dev) { + pr_err("DMAdevice must have dev\n"); + return -EIO; + } + + device->owner = device->dev->driver->owner; + +#define CHECK_CAP(_name, _type) \ +{ \ + if (dma_has_cap(_type, device->cap_mask) && !device->device_prep_##_name) { \ + dev_err(device->dev, \ + "Device claims capability %s, but op is not defined\n", \ + __stringify(_type)); \ + return -EIO; \ + } \ +} + + CHECK_CAP(dma_memcpy, DMA_MEMCPY); + CHECK_CAP(dma_xor, DMA_XOR); + CHECK_CAP(dma_xor_val, DMA_XOR_VAL); + CHECK_CAP(dma_pq, DMA_PQ); + CHECK_CAP(dma_pq_val, DMA_PQ_VAL); + CHECK_CAP(dma_memset, DMA_MEMSET); + CHECK_CAP(dma_interrupt, DMA_INTERRUPT); + CHECK_CAP(dma_cyclic, DMA_CYCLIC); + CHECK_CAP(interleaved_dma, DMA_INTERLEAVE); + +#undef CHECK_CAP + + if (!device->device_tx_status) { + dev_err(device->dev, "Device tx_status is not defined\n"); + return -EIO; + } + + + if (!device->device_issue_pending) { + dev_err(device->dev, "Device issue_pending is not defined\n"); + return -EIO; + } + + if (!device->device_release) + dev_dbg(device->dev, + "WARN: Device release is not defined so it is not safe to unbind this driver while in use\n"); + + kref_init(&device->ref); + + /* note: this only matters in the + * CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH=n case + */ + if (device_has_all_tx_types(device)) + dma_cap_set(DMA_ASYNC_TX, device->cap_mask); + + rc = get_dma_id(device); + if (rc != 0) + return rc; + + ida_init(&device->chan_ida); + + /* represent channels in sysfs. Probably want devs too */ + list_for_each_entry(chan, &device->channels, device_node) { + rc = __dma_async_device_channel_register(device, chan); + if (rc < 0) + goto err_out; + } + + mutex_lock(&dma_list_mutex); + /* take references on public channels */ + if (dmaengine_ref_count && !dma_has_cap(DMA_PRIVATE, device->cap_mask)) + list_for_each_entry(chan, &device->channels, device_node) { + /* if clients are already waiting for channels we need + * to take references on their behalf + */ + if (dma_chan_get(chan) == -ENODEV) { + /* note we can only get here for the first + * channel as the remaining channels are + * guaranteed to get a reference + */ + rc = -ENODEV; + mutex_unlock(&dma_list_mutex); + goto err_out; + } + } + list_add_tail_rcu(&device->global_node, &dma_device_list); + if (dma_has_cap(DMA_PRIVATE, device->cap_mask)) + device->privatecnt++; /* Always private */ + dma_channel_rebalance(); + mutex_unlock(&dma_list_mutex); + + dmaengine_debug_register(device); + + return 0; + +err_out: + /* if we never registered a channel just release the idr */ + if (!device->chancnt) { + ida_free(&dma_ida, device->dev_id); + return rc; + } + + list_for_each_entry(chan, &device->channels, device_node) { + if (chan->local == NULL) + continue; + mutex_lock(&dma_list_mutex); + chan->dev->chan = NULL; + mutex_unlock(&dma_list_mutex); + device_unregister(&chan->dev->device); + free_percpu(chan->local); + } + return rc; +} +EXPORT_SYMBOL(dma_async_device_register); + +/** + * dma_async_device_unregister - unregister a DMA device + * @device: pointer to &struct dma_device + * + * This routine is called by dma driver exit routines, dmaengine holds module + * references to prevent it being called while channels are in use. + */ +void dma_async_device_unregister(struct dma_device *device) +{ + struct dma_chan *chan, *n; + + dmaengine_debug_unregister(device); + + list_for_each_entry_safe(chan, n, &device->channels, device_node) + __dma_async_device_channel_unregister(device, chan); + + mutex_lock(&dma_list_mutex); + /* + * setting DMA_PRIVATE ensures the device being torn down will not + * be used in the channel_table + */ + dma_cap_set(DMA_PRIVATE, device->cap_mask); + dma_channel_rebalance(); + ida_free(&dma_ida, device->dev_id); + dma_device_put(device); + mutex_unlock(&dma_list_mutex); +} +EXPORT_SYMBOL(dma_async_device_unregister); + +static void dmaenginem_async_device_unregister(void *device) +{ + dma_async_device_unregister(device); +} + +/** + * dmaenginem_async_device_register - registers DMA devices found + * @device: pointer to &struct dma_device + * + * The operation is managed and will be undone on driver detach. + */ +int dmaenginem_async_device_register(struct dma_device *device) +{ + int ret; + + ret = dma_async_device_register(device); + if (ret) + return ret; + + return devm_add_action_or_reset(device->dev, dmaenginem_async_device_unregister, device); +} +EXPORT_SYMBOL(dmaenginem_async_device_register); + +struct dmaengine_unmap_pool { + struct kmem_cache *cache; + const char *name; + mempool_t *pool; + size_t size; +}; + +#define __UNMAP_POOL(x) { .size = x, .name = "dmaengine-unmap-" __stringify(x) } +static struct dmaengine_unmap_pool unmap_pool[] = { + __UNMAP_POOL(2), + #if IS_ENABLED(CONFIG_DMA_ENGINE_RAID) + __UNMAP_POOL(16), + __UNMAP_POOL(128), + __UNMAP_POOL(256), + #endif +}; + +static struct dmaengine_unmap_pool *__get_unmap_pool(int nr) +{ + int order = get_count_order(nr); + + switch (order) { + case 0 ... 1: + return &unmap_pool[0]; +#if IS_ENABLED(CONFIG_DMA_ENGINE_RAID) + case 2 ... 4: + return &unmap_pool[1]; + case 5 ... 7: + return &unmap_pool[2]; + case 8: + return &unmap_pool[3]; +#endif + default: + BUG(); + return NULL; + } +} + +static void dmaengine_unmap(struct kref *kref) +{ + struct dmaengine_unmap_data *unmap = container_of(kref, typeof(*unmap), kref); + struct device *dev = unmap->dev; + int cnt, i; + + cnt = unmap->to_cnt; + for (i = 0; i < cnt; i++) + dma_unmap_page(dev, unmap->addr[i], unmap->len, + DMA_TO_DEVICE); + cnt += unmap->from_cnt; + for (; i < cnt; i++) + dma_unmap_page(dev, unmap->addr[i], unmap->len, + DMA_FROM_DEVICE); + cnt += unmap->bidi_cnt; + for (; i < cnt; i++) { + if (unmap->addr[i] == 0) + continue; + dma_unmap_page(dev, unmap->addr[i], unmap->len, + DMA_BIDIRECTIONAL); + } + cnt = unmap->map_cnt; + mempool_free(unmap, __get_unmap_pool(cnt)->pool); +} + +void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap) +{ + if (unmap) + kref_put(&unmap->kref, dmaengine_unmap); +} +EXPORT_SYMBOL_GPL(dmaengine_unmap_put); + +static void dmaengine_destroy_unmap_pool(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) { + struct dmaengine_unmap_pool *p = &unmap_pool[i]; + + mempool_destroy(p->pool); + p->pool = NULL; + kmem_cache_destroy(p->cache); + p->cache = NULL; + } +} + +static int __init dmaengine_init_unmap_pool(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) { + struct dmaengine_unmap_pool *p = &unmap_pool[i]; + size_t size; + + size = sizeof(struct dmaengine_unmap_data) + + sizeof(dma_addr_t) * p->size; + + p->cache = kmem_cache_create(p->name, size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!p->cache) + break; + p->pool = mempool_create_slab_pool(1, p->cache); + if (!p->pool) + break; + } + + if (i == ARRAY_SIZE(unmap_pool)) + return 0; + + dmaengine_destroy_unmap_pool(); + return -ENOMEM; +} + +struct dmaengine_unmap_data * +dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags) +{ + struct dmaengine_unmap_data *unmap; + + unmap = mempool_alloc(__get_unmap_pool(nr)->pool, flags); + if (!unmap) + return NULL; + + memset(unmap, 0, sizeof(*unmap)); + kref_init(&unmap->kref); + unmap->dev = dev; + unmap->map_cnt = nr; + + return unmap; +} +EXPORT_SYMBOL(dmaengine_get_unmap_data); + +void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, + struct dma_chan *chan) +{ + tx->chan = chan; + #ifdef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH + spin_lock_init(&tx->lock); + #endif +} +EXPORT_SYMBOL(dma_async_tx_descriptor_init); + +static inline int desc_check_and_set_metadata_mode( + struct dma_async_tx_descriptor *desc, enum dma_desc_metadata_mode mode) +{ + /* Make sure that the metadata mode is not mixed */ + if (!desc->desc_metadata_mode) { + if (dmaengine_is_metadata_mode_supported(desc->chan, mode)) + desc->desc_metadata_mode = mode; + else + return -ENOTSUPP; + } else if (desc->desc_metadata_mode != mode) { + return -EINVAL; + } + + return 0; +} + +int dmaengine_desc_attach_metadata(struct dma_async_tx_descriptor *desc, + void *data, size_t len) +{ + int ret; + + if (!desc) + return -EINVAL; + + ret = desc_check_and_set_metadata_mode(desc, DESC_METADATA_CLIENT); + if (ret) + return ret; + + if (!desc->metadata_ops || !desc->metadata_ops->attach) + return -ENOTSUPP; + + return desc->metadata_ops->attach(desc, data, len); +} +EXPORT_SYMBOL_GPL(dmaengine_desc_attach_metadata); + +void *dmaengine_desc_get_metadata_ptr(struct dma_async_tx_descriptor *desc, + size_t *payload_len, size_t *max_len) +{ + int ret; + + if (!desc) + return ERR_PTR(-EINVAL); + + ret = desc_check_and_set_metadata_mode(desc, DESC_METADATA_ENGINE); + if (ret) + return ERR_PTR(ret); + + if (!desc->metadata_ops || !desc->metadata_ops->get_ptr) + return ERR_PTR(-ENOTSUPP); + + return desc->metadata_ops->get_ptr(desc, payload_len, max_len); +} +EXPORT_SYMBOL_GPL(dmaengine_desc_get_metadata_ptr); + +int dmaengine_desc_set_metadata_len(struct dma_async_tx_descriptor *desc, + size_t payload_len) +{ + int ret; + + if (!desc) + return -EINVAL; + + ret = desc_check_and_set_metadata_mode(desc, DESC_METADATA_ENGINE); + if (ret) + return ret; + + if (!desc->metadata_ops || !desc->metadata_ops->set_len) + return -ENOTSUPP; + + return desc->metadata_ops->set_len(desc, payload_len); +} +EXPORT_SYMBOL_GPL(dmaengine_desc_set_metadata_len); + +/** + * dma_wait_for_async_tx - spin wait for a transaction to complete + * @tx: in-flight transaction to wait on + */ +enum dma_status +dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) +{ + unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000); + + if (!tx) + return DMA_COMPLETE; + + while (tx->cookie == -EBUSY) { + if (time_after_eq(jiffies, dma_sync_wait_timeout)) { + dev_err(tx->chan->device->dev, + "%s timeout waiting for descriptor submission\n", + __func__); + return DMA_ERROR; + } + cpu_relax(); + } + return dma_sync_wait(tx->chan, tx->cookie); +} +EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); + +/** + * dma_run_dependencies - process dependent operations on the target channel + * @tx: transaction with dependencies + * + * Helper routine for DMA drivers to process (start) dependent operations + * on their target channel. + */ +void dma_run_dependencies(struct dma_async_tx_descriptor *tx) +{ + struct dma_async_tx_descriptor *dep = txd_next(tx); + struct dma_async_tx_descriptor *dep_next; + struct dma_chan *chan; + + if (!dep) + return; + + /* we'll submit tx->next now, so clear the link */ + txd_clear_next(tx); + chan = dep->chan; + + /* keep submitting up until a channel switch is detected + * in that case we will be called again as a result of + * processing the interrupt from async_tx_channel_switch + */ + for (; dep; dep = dep_next) { + txd_lock(dep); + txd_clear_parent(dep); + dep_next = txd_next(dep); + if (dep_next && dep_next->chan == chan) + txd_clear_next(dep); /* ->next will be submitted */ + else + dep_next = NULL; /* submit current dep and terminate */ + txd_unlock(dep); + + dep->tx_submit(dep); + } + + chan->device->device_issue_pending(chan); +} +EXPORT_SYMBOL_GPL(dma_run_dependencies); + +static int __init dma_bus_init(void) +{ + int err = dmaengine_init_unmap_pool(); + + if (err) + return err; + + err = class_register(&dma_devclass); + if (!err) + dmaengine_debugfs_init(); + + return err; +} +arch_initcall(dma_bus_init); diff --git a/drivers/dma/dmaengine.h b/drivers/dma/dmaengine.h new file mode 100644 index 0000000000..53f16d3f00 --- /dev/null +++ b/drivers/dma/dmaengine.h @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * The contents of this file are private to DMA engine drivers, and is not + * part of the API to be used by DMA engine users. + */ +#ifndef DMAENGINE_H +#define DMAENGINE_H + +#include +#include + +/** + * dma_cookie_init - initialize the cookies for a DMA channel + * @chan: dma channel to initialize + */ +static inline void dma_cookie_init(struct dma_chan *chan) +{ + chan->cookie = DMA_MIN_COOKIE; + chan->completed_cookie = DMA_MIN_COOKIE; +} + +/** + * dma_cookie_assign - assign a DMA engine cookie to the descriptor + * @tx: descriptor needing cookie + * + * Assign a unique non-zero per-channel cookie to the descriptor. + * Note: caller is expected to hold a lock to prevent concurrency. + */ +static inline dma_cookie_t dma_cookie_assign(struct dma_async_tx_descriptor *tx) +{ + struct dma_chan *chan = tx->chan; + dma_cookie_t cookie; + + cookie = chan->cookie + 1; + if (cookie < DMA_MIN_COOKIE) + cookie = DMA_MIN_COOKIE; + tx->cookie = chan->cookie = cookie; + + return cookie; +} + +/** + * dma_cookie_complete - complete a descriptor + * @tx: descriptor to complete + * + * Mark this descriptor complete by updating the channels completed + * cookie marker. Zero the descriptors cookie to prevent accidental + * repeated completions. + * + * Note: caller is expected to hold a lock to prevent concurrency. + */ +static inline void dma_cookie_complete(struct dma_async_tx_descriptor *tx) +{ + BUG_ON(tx->cookie < DMA_MIN_COOKIE); + tx->chan->completed_cookie = tx->cookie; + tx->cookie = 0; +} + +/** + * dma_cookie_status - report cookie status + * @chan: dma channel + * @cookie: cookie we are interested in + * @state: dma_tx_state structure to return last/used cookies + * + * Report the status of the cookie, filling in the state structure if + * non-NULL. No locking is required. + */ +static inline enum dma_status dma_cookie_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *state) +{ + dma_cookie_t used, complete; + + used = chan->cookie; + complete = chan->completed_cookie; + barrier(); + if (state) { + state->last = complete; + state->used = used; + state->residue = 0; + state->in_flight_bytes = 0; + } + return dma_async_is_complete(cookie, complete, used); +} + +static inline void dma_set_residue(struct dma_tx_state *state, u32 residue) +{ + if (state) + state->residue = residue; +} + +static inline void dma_set_in_flight_bytes(struct dma_tx_state *state, + u32 in_flight_bytes) +{ + if (state) + state->in_flight_bytes = in_flight_bytes; +} + +struct dmaengine_desc_callback { + dma_async_tx_callback callback; + dma_async_tx_callback_result callback_result; + void *callback_param; +}; + +/** + * dmaengine_desc_get_callback - get the passed in callback function + * @tx: tx descriptor + * @cb: temp struct to hold the callback info + * + * Fill the passed in cb struct with what's available in the passed in + * tx descriptor struct + * No locking is required. + */ +static inline void +dmaengine_desc_get_callback(struct dma_async_tx_descriptor *tx, + struct dmaengine_desc_callback *cb) +{ + cb->callback = tx->callback; + cb->callback_result = tx->callback_result; + cb->callback_param = tx->callback_param; +} + +/** + * dmaengine_desc_callback_invoke - call the callback function in cb struct + * @cb: temp struct that is holding the callback info + * @result: transaction result + * + * Call the callback function provided in the cb struct with the parameter + * in the cb struct. + * Locking is dependent on the driver. + */ +static inline void +dmaengine_desc_callback_invoke(struct dmaengine_desc_callback *cb, + const struct dmaengine_result *result) +{ + struct dmaengine_result dummy_result = { + .result = DMA_TRANS_NOERROR, + .residue = 0 + }; + + if (cb->callback_result) { + if (!result) + result = &dummy_result; + cb->callback_result(cb->callback_param, result); + } else if (cb->callback) { + cb->callback(cb->callback_param); + } +} + +/** + * dmaengine_desc_get_callback_invoke - get the callback in tx descriptor and + * then immediately call the callback. + * @tx: dma async tx descriptor + * @result: transaction result + * + * Call dmaengine_desc_get_callback() and dmaengine_desc_callback_invoke() + * in a single function since no work is necessary in between for the driver. + * Locking is dependent on the driver. + */ +static inline void +dmaengine_desc_get_callback_invoke(struct dma_async_tx_descriptor *tx, + const struct dmaengine_result *result) +{ + struct dmaengine_desc_callback cb; + + dmaengine_desc_get_callback(tx, &cb); + dmaengine_desc_callback_invoke(&cb, result); +} + +/** + * dmaengine_desc_callback_valid - verify the callback is valid in cb + * @cb: callback info struct + * + * Return a bool that verifies whether callback in cb is valid or not. + * No locking is required. + */ +static inline bool +dmaengine_desc_callback_valid(struct dmaengine_desc_callback *cb) +{ + return cb->callback || cb->callback_result; +} + +struct dma_chan *dma_get_slave_channel(struct dma_chan *chan); +struct dma_chan *dma_get_any_slave_channel(struct dma_device *device); + +#ifdef CONFIG_DEBUG_FS +#include + +static inline struct dentry * +dmaengine_get_debugfs_root(struct dma_device *dma_dev) { + return dma_dev->dbg_dev_root; +} +#else +struct dentry; +static inline struct dentry * +dmaengine_get_debugfs_root(struct dma_device *dma_dev) +{ + return NULL; +} +#endif /* CONFIG_DEBUG_FS */ + +#endif diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c new file mode 100644 index 0000000000..ffe621695e --- /dev/null +++ b/drivers/dma/dmatest.c @@ -0,0 +1,1360 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * DMA Engine test module + * + * Copyright (C) 2007 Atmel Corporation + * Copyright (C) 2013 Intel Corporation + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int test_buf_size = 16384; +module_param(test_buf_size, uint, 0644); +MODULE_PARM_DESC(test_buf_size, "Size of the memcpy test buffer"); + +static char test_device[32]; +module_param_string(device, test_device, sizeof(test_device), 0644); +MODULE_PARM_DESC(device, "Bus ID of the DMA Engine to test (default: any)"); + +static unsigned int threads_per_chan = 1; +module_param(threads_per_chan, uint, 0644); +MODULE_PARM_DESC(threads_per_chan, + "Number of threads to start per channel (default: 1)"); + +static unsigned int max_channels; +module_param(max_channels, uint, 0644); +MODULE_PARM_DESC(max_channels, + "Maximum number of channels to use (default: all)"); + +static unsigned int iterations; +module_param(iterations, uint, 0644); +MODULE_PARM_DESC(iterations, + "Iterations before stopping test (default: infinite)"); + +static unsigned int dmatest; +module_param(dmatest, uint, 0644); +MODULE_PARM_DESC(dmatest, + "dmatest 0-memcpy 1-memset (default: 0)"); + +static unsigned int xor_sources = 3; +module_param(xor_sources, uint, 0644); +MODULE_PARM_DESC(xor_sources, + "Number of xor source buffers (default: 3)"); + +static unsigned int pq_sources = 3; +module_param(pq_sources, uint, 0644); +MODULE_PARM_DESC(pq_sources, + "Number of p+q source buffers (default: 3)"); + +static int timeout = 3000; +module_param(timeout, int, 0644); +MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), " + "Pass -1 for infinite timeout"); + +static bool noverify; +module_param(noverify, bool, 0644); +MODULE_PARM_DESC(noverify, "Disable data verification (default: verify)"); + +static bool norandom; +module_param(norandom, bool, 0644); +MODULE_PARM_DESC(norandom, "Disable random offset setup (default: random)"); + +static bool verbose; +module_param(verbose, bool, 0644); +MODULE_PARM_DESC(verbose, "Enable \"success\" result messages (default: off)"); + +static int alignment = -1; +module_param(alignment, int, 0644); +MODULE_PARM_DESC(alignment, "Custom data address alignment taken as 2^(alignment) (default: not used (-1))"); + +static unsigned int transfer_size; +module_param(transfer_size, uint, 0644); +MODULE_PARM_DESC(transfer_size, "Optional custom transfer size in bytes (default: not used (0))"); + +static bool polled; +module_param(polled, bool, 0644); +MODULE_PARM_DESC(polled, "Use polling for completion instead of interrupts"); + +/** + * struct dmatest_params - test parameters. + * @buf_size: size of the memcpy test buffer + * @channel: bus ID of the channel to test + * @device: bus ID of the DMA Engine to test + * @threads_per_chan: number of threads to start per channel + * @max_channels: maximum number of channels to use + * @iterations: iterations before stopping test + * @xor_sources: number of xor source buffers + * @pq_sources: number of p+q source buffers + * @timeout: transfer timeout in msec, -1 for infinite timeout + * @noverify: disable data verification + * @norandom: disable random offset setup + * @alignment: custom data address alignment taken as 2^alignment + * @transfer_size: custom transfer size in bytes + * @polled: use polling for completion instead of interrupts + */ +struct dmatest_params { + unsigned int buf_size; + char channel[20]; + char device[32]; + unsigned int threads_per_chan; + unsigned int max_channels; + unsigned int iterations; + unsigned int xor_sources; + unsigned int pq_sources; + int timeout; + bool noverify; + bool norandom; + int alignment; + unsigned int transfer_size; + bool polled; +}; + +/** + * struct dmatest_info - test information. + * @params: test parameters + * @channels: channels under test + * @nr_channels: number of channels under test + * @lock: access protection to the fields of this structure + * @did_init: module has been initialized completely + * @last_error: test has faced configuration issues + */ +static struct dmatest_info { + /* Test parameters */ + struct dmatest_params params; + + /* Internal state */ + struct list_head channels; + unsigned int nr_channels; + int last_error; + struct mutex lock; + bool did_init; +} test_info = { + .channels = LIST_HEAD_INIT(test_info.channels), + .lock = __MUTEX_INITIALIZER(test_info.lock), +}; + +static int dmatest_run_set(const char *val, const struct kernel_param *kp); +static int dmatest_run_get(char *val, const struct kernel_param *kp); +static const struct kernel_param_ops run_ops = { + .set = dmatest_run_set, + .get = dmatest_run_get, +}; +static bool dmatest_run; +module_param_cb(run, &run_ops, &dmatest_run, 0644); +MODULE_PARM_DESC(run, "Run the test (default: false)"); + +static int dmatest_chan_set(const char *val, const struct kernel_param *kp); +static int dmatest_chan_get(char *val, const struct kernel_param *kp); +static const struct kernel_param_ops multi_chan_ops = { + .set = dmatest_chan_set, + .get = dmatest_chan_get, +}; + +static char test_channel[20]; +static struct kparam_string newchan_kps = { + .string = test_channel, + .maxlen = 20, +}; +module_param_cb(channel, &multi_chan_ops, &newchan_kps, 0644); +MODULE_PARM_DESC(channel, "Bus ID of the channel to test (default: any)"); + +static int dmatest_test_list_get(char *val, const struct kernel_param *kp); +static const struct kernel_param_ops test_list_ops = { + .get = dmatest_test_list_get, +}; +module_param_cb(test_list, &test_list_ops, NULL, 0444); +MODULE_PARM_DESC(test_list, "Print current test list"); + +/* Maximum amount of mismatched bytes in buffer to print */ +#define MAX_ERROR_COUNT 32 + +/* + * Initialization patterns. All bytes in the source buffer has bit 7 + * set, all bytes in the destination buffer has bit 7 cleared. + * + * Bit 6 is set for all bytes which are to be copied by the DMA + * engine. Bit 5 is set for all bytes which are to be overwritten by + * the DMA engine. + * + * The remaining bits are the inverse of a counter which increments by + * one for each byte address. + */ +#define PATTERN_SRC 0x80 +#define PATTERN_DST 0x00 +#define PATTERN_COPY 0x40 +#define PATTERN_OVERWRITE 0x20 +#define PATTERN_COUNT_MASK 0x1f +#define PATTERN_MEMSET_IDX 0x01 + +/* Fixed point arithmetic ops */ +#define FIXPT_SHIFT 8 +#define FIXPNT_MASK 0xFF +#define FIXPT_TO_INT(a) ((a) >> FIXPT_SHIFT) +#define INT_TO_FIXPT(a) ((a) << FIXPT_SHIFT) +#define FIXPT_GET_FRAC(a) ((((a) & FIXPNT_MASK) * 100) >> FIXPT_SHIFT) + +/* poor man's completion - we want to use wait_event_freezable() on it */ +struct dmatest_done { + bool done; + wait_queue_head_t *wait; +}; + +struct dmatest_data { + u8 **raw; + u8 **aligned; + unsigned int cnt; + unsigned int off; +}; + +struct dmatest_thread { + struct list_head node; + struct dmatest_info *info; + struct task_struct *task; + struct dma_chan *chan; + struct dmatest_data src; + struct dmatest_data dst; + enum dma_transaction_type type; + wait_queue_head_t done_wait; + struct dmatest_done test_done; + bool done; + bool pending; +}; + +struct dmatest_chan { + struct list_head node; + struct dma_chan *chan; + struct list_head threads; +}; + +static DECLARE_WAIT_QUEUE_HEAD(thread_wait); +static bool wait; + +static bool is_threaded_test_run(struct dmatest_info *info) +{ + struct dmatest_chan *dtc; + + list_for_each_entry(dtc, &info->channels, node) { + struct dmatest_thread *thread; + + list_for_each_entry(thread, &dtc->threads, node) { + if (!thread->done && !thread->pending) + return true; + } + } + + return false; +} + +static bool is_threaded_test_pending(struct dmatest_info *info) +{ + struct dmatest_chan *dtc; + + list_for_each_entry(dtc, &info->channels, node) { + struct dmatest_thread *thread; + + list_for_each_entry(thread, &dtc->threads, node) { + if (thread->pending) + return true; + } + } + + return false; +} + +static int dmatest_wait_get(char *val, const struct kernel_param *kp) +{ + struct dmatest_info *info = &test_info; + struct dmatest_params *params = &info->params; + + if (params->iterations) + wait_event(thread_wait, !is_threaded_test_run(info)); + wait = true; + return param_get_bool(val, kp); +} + +static const struct kernel_param_ops wait_ops = { + .get = dmatest_wait_get, + .set = param_set_bool, +}; +module_param_cb(wait, &wait_ops, &wait, 0444); +MODULE_PARM_DESC(wait, "Wait for tests to complete (default: false)"); + +static bool dmatest_match_channel(struct dmatest_params *params, + struct dma_chan *chan) +{ + if (params->channel[0] == '\0') + return true; + return strcmp(dma_chan_name(chan), params->channel) == 0; +} + +static bool dmatest_match_device(struct dmatest_params *params, + struct dma_device *device) +{ + if (params->device[0] == '\0') + return true; + return strcmp(dev_name(device->dev), params->device) == 0; +} + +static unsigned long dmatest_random(void) +{ + unsigned long buf; + + get_random_bytes(&buf, sizeof(buf)); + return buf; +} + +static inline u8 gen_inv_idx(u8 index, bool is_memset) +{ + u8 val = is_memset ? PATTERN_MEMSET_IDX : index; + + return ~val & PATTERN_COUNT_MASK; +} + +static inline u8 gen_src_value(u8 index, bool is_memset) +{ + return PATTERN_SRC | gen_inv_idx(index, is_memset); +} + +static inline u8 gen_dst_value(u8 index, bool is_memset) +{ + return PATTERN_DST | gen_inv_idx(index, is_memset); +} + +static void dmatest_init_srcs(u8 **bufs, unsigned int start, unsigned int len, + unsigned int buf_size, bool is_memset) +{ + unsigned int i; + u8 *buf; + + for (; (buf = *bufs); bufs++) { + for (i = 0; i < start; i++) + buf[i] = gen_src_value(i, is_memset); + for ( ; i < start + len; i++) + buf[i] = gen_src_value(i, is_memset) | PATTERN_COPY; + for ( ; i < buf_size; i++) + buf[i] = gen_src_value(i, is_memset); + buf++; + } +} + +static void dmatest_init_dsts(u8 **bufs, unsigned int start, unsigned int len, + unsigned int buf_size, bool is_memset) +{ + unsigned int i; + u8 *buf; + + for (; (buf = *bufs); bufs++) { + for (i = 0; i < start; i++) + buf[i] = gen_dst_value(i, is_memset); + for ( ; i < start + len; i++) + buf[i] = gen_dst_value(i, is_memset) | + PATTERN_OVERWRITE; + for ( ; i < buf_size; i++) + buf[i] = gen_dst_value(i, is_memset); + } +} + +static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index, + unsigned int counter, bool is_srcbuf, bool is_memset) +{ + u8 diff = actual ^ pattern; + u8 expected = pattern | gen_inv_idx(counter, is_memset); + const char *thread_name = current->comm; + + if (is_srcbuf) + pr_warn("%s: srcbuf[0x%x] overwritten! Expected %02x, got %02x\n", + thread_name, index, expected, actual); + else if ((pattern & PATTERN_COPY) + && (diff & (PATTERN_COPY | PATTERN_OVERWRITE))) + pr_warn("%s: dstbuf[0x%x] not copied! Expected %02x, got %02x\n", + thread_name, index, expected, actual); + else if (diff & PATTERN_SRC) + pr_warn("%s: dstbuf[0x%x] was copied! Expected %02x, got %02x\n", + thread_name, index, expected, actual); + else + pr_warn("%s: dstbuf[0x%x] mismatch! Expected %02x, got %02x\n", + thread_name, index, expected, actual); +} + +static unsigned int dmatest_verify(u8 **bufs, unsigned int start, + unsigned int end, unsigned int counter, u8 pattern, + bool is_srcbuf, bool is_memset) +{ + unsigned int i; + unsigned int error_count = 0; + u8 actual; + u8 expected; + u8 *buf; + unsigned int counter_orig = counter; + + for (; (buf = *bufs); bufs++) { + counter = counter_orig; + for (i = start; i < end; i++) { + actual = buf[i]; + expected = pattern | gen_inv_idx(counter, is_memset); + if (actual != expected) { + if (error_count < MAX_ERROR_COUNT) + dmatest_mismatch(actual, pattern, i, + counter, is_srcbuf, + is_memset); + error_count++; + } + counter++; + } + } + + if (error_count > MAX_ERROR_COUNT) + pr_warn("%s: %u errors suppressed\n", + current->comm, error_count - MAX_ERROR_COUNT); + + return error_count; +} + + +static void dmatest_callback(void *arg) +{ + struct dmatest_done *done = arg; + struct dmatest_thread *thread = + container_of(done, struct dmatest_thread, test_done); + if (!thread->done) { + done->done = true; + wake_up_all(done->wait); + } else { + /* + * If thread->done, it means that this callback occurred + * after the parent thread has cleaned up. This can + * happen in the case that driver doesn't implement + * the terminate_all() functionality and a dma operation + * did not occur within the timeout period + */ + WARN(1, "dmatest: Kernel memory may be corrupted!!\n"); + } +} + +static unsigned int min_odd(unsigned int x, unsigned int y) +{ + unsigned int val = min(x, y); + + return val % 2 ? val : val - 1; +} + +static void result(const char *err, unsigned int n, unsigned int src_off, + unsigned int dst_off, unsigned int len, unsigned long data) +{ + if (IS_ERR_VALUE(data)) { + pr_info("%s: result #%u: '%s' with src_off=0x%x dst_off=0x%x len=0x%x (%ld)\n", + current->comm, n, err, src_off, dst_off, len, data); + } else { + pr_info("%s: result #%u: '%s' with src_off=0x%x dst_off=0x%x len=0x%x (%lu)\n", + current->comm, n, err, src_off, dst_off, len, data); + } +} + +static void dbg_result(const char *err, unsigned int n, unsigned int src_off, + unsigned int dst_off, unsigned int len, + unsigned long data) +{ + pr_debug("%s: result #%u: '%s' with src_off=0x%x dst_off=0x%x len=0x%x (%lu)\n", + current->comm, n, err, src_off, dst_off, len, data); +} + +#define verbose_result(err, n, src_off, dst_off, len, data) ({ \ + if (verbose) \ + result(err, n, src_off, dst_off, len, data); \ + else \ + dbg_result(err, n, src_off, dst_off, len, data);\ +}) + +static unsigned long long dmatest_persec(s64 runtime, unsigned int val) +{ + unsigned long long per_sec = 1000000; + + if (runtime <= 0) + return 0; + + /* drop precision until runtime is 32-bits */ + while (runtime > UINT_MAX) { + runtime >>= 1; + per_sec <<= 1; + } + + per_sec *= val; + per_sec = INT_TO_FIXPT(per_sec); + do_div(per_sec, runtime); + + return per_sec; +} + +static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len) +{ + return FIXPT_TO_INT(dmatest_persec(runtime, len >> 10)); +} + +static void __dmatest_free_test_data(struct dmatest_data *d, unsigned int cnt) +{ + unsigned int i; + + for (i = 0; i < cnt; i++) + kfree(d->raw[i]); + + kfree(d->aligned); + kfree(d->raw); +} + +static void dmatest_free_test_data(struct dmatest_data *d) +{ + __dmatest_free_test_data(d, d->cnt); +} + +static int dmatest_alloc_test_data(struct dmatest_data *d, + unsigned int buf_size, u8 align) +{ + unsigned int i = 0; + + d->raw = kcalloc(d->cnt + 1, sizeof(u8 *), GFP_KERNEL); + if (!d->raw) + return -ENOMEM; + + d->aligned = kcalloc(d->cnt + 1, sizeof(u8 *), GFP_KERNEL); + if (!d->aligned) + goto err; + + for (i = 0; i < d->cnt; i++) { + d->raw[i] = kmalloc(buf_size + align, GFP_KERNEL); + if (!d->raw[i]) + goto err; + + /* align to alignment restriction */ + if (align) + d->aligned[i] = PTR_ALIGN(d->raw[i], align); + else + d->aligned[i] = d->raw[i]; + } + + return 0; +err: + __dmatest_free_test_data(d, i); + return -ENOMEM; +} + +/* + * This function repeatedly tests DMA transfers of various lengths and + * offsets for a given operation type until it is told to exit by + * kthread_stop(). There may be multiple threads running this function + * in parallel for a single channel, and there may be multiple channels + * being tested in parallel. + * + * Before each test, the source and destination buffer is initialized + * with a known pattern. This pattern is different depending on + * whether it's in an area which is supposed to be copied or + * overwritten, and different in the source and destination buffers. + * So if the DMA engine doesn't copy exactly what we tell it to copy, + * we'll notice. + */ +static int dmatest_func(void *data) +{ + struct dmatest_thread *thread = data; + struct dmatest_done *done = &thread->test_done; + struct dmatest_info *info; + struct dmatest_params *params; + struct dma_chan *chan; + struct dma_device *dev; + struct device *dma_dev; + unsigned int error_count; + unsigned int failed_tests = 0; + unsigned int total_tests = 0; + dma_cookie_t cookie; + enum dma_status status; + enum dma_ctrl_flags flags; + u8 *pq_coefs = NULL; + int ret; + unsigned int buf_size; + struct dmatest_data *src; + struct dmatest_data *dst; + int i; + ktime_t ktime, start, diff; + ktime_t filltime = 0; + ktime_t comparetime = 0; + s64 runtime = 0; + unsigned long long total_len = 0; + unsigned long long iops = 0; + u8 align = 0; + bool is_memset = false; + dma_addr_t *srcs; + dma_addr_t *dma_pq; + + set_freezable(); + + ret = -ENOMEM; + + smp_rmb(); + thread->pending = false; + info = thread->info; + params = &info->params; + chan = thread->chan; + dev = chan->device; + dma_dev = dmaengine_get_dma_device(chan); + + src = &thread->src; + dst = &thread->dst; + if (thread->type == DMA_MEMCPY) { + align = params->alignment < 0 ? dev->copy_align : + params->alignment; + src->cnt = dst->cnt = 1; + } else if (thread->type == DMA_MEMSET) { + align = params->alignment < 0 ? dev->fill_align : + params->alignment; + src->cnt = dst->cnt = 1; + is_memset = true; + } else if (thread->type == DMA_XOR) { + /* force odd to ensure dst = src */ + src->cnt = min_odd(params->xor_sources | 1, dev->max_xor); + dst->cnt = 1; + align = params->alignment < 0 ? dev->xor_align : + params->alignment; + } else if (thread->type == DMA_PQ) { + /* force odd to ensure dst = src */ + src->cnt = min_odd(params->pq_sources | 1, dma_maxpq(dev, 0)); + dst->cnt = 2; + align = params->alignment < 0 ? dev->pq_align : + params->alignment; + + pq_coefs = kmalloc(params->pq_sources + 1, GFP_KERNEL); + if (!pq_coefs) + goto err_thread_type; + + for (i = 0; i < src->cnt; i++) + pq_coefs[i] = 1; + } else + goto err_thread_type; + + /* Check if buffer count fits into map count variable (u8) */ + if ((src->cnt + dst->cnt) >= 255) { + pr_err("too many buffers (%d of 255 supported)\n", + src->cnt + dst->cnt); + goto err_free_coefs; + } + + buf_size = params->buf_size; + if (1 << align > buf_size) { + pr_err("%u-byte buffer too small for %d-byte alignment\n", + buf_size, 1 << align); + goto err_free_coefs; + } + + if (dmatest_alloc_test_data(src, buf_size, align) < 0) + goto err_free_coefs; + + if (dmatest_alloc_test_data(dst, buf_size, align) < 0) + goto err_src; + + set_user_nice(current, 10); + + srcs = kcalloc(src->cnt, sizeof(dma_addr_t), GFP_KERNEL); + if (!srcs) + goto err_dst; + + dma_pq = kcalloc(dst->cnt, sizeof(dma_addr_t), GFP_KERNEL); + if (!dma_pq) + goto err_srcs_array; + + /* + * src and dst buffers are freed by ourselves below + */ + if (params->polled) + flags = DMA_CTRL_ACK; + else + flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT; + + ktime = ktime_get(); + while (!(kthread_should_stop() || + (params->iterations && total_tests >= params->iterations))) { + struct dma_async_tx_descriptor *tx = NULL; + struct dmaengine_unmap_data *um; + dma_addr_t *dsts; + unsigned int len; + + total_tests++; + + if (params->transfer_size) { + if (params->transfer_size >= buf_size) { + pr_err("%u-byte transfer size must be lower than %u-buffer size\n", + params->transfer_size, buf_size); + break; + } + len = params->transfer_size; + } else if (params->norandom) { + len = buf_size; + } else { + len = dmatest_random() % buf_size + 1; + } + + /* Do not alter transfer size explicitly defined by user */ + if (!params->transfer_size) { + len = (len >> align) << align; + if (!len) + len = 1 << align; + } + total_len += len; + + if (params->norandom) { + src->off = 0; + dst->off = 0; + } else { + src->off = dmatest_random() % (buf_size - len + 1); + dst->off = dmatest_random() % (buf_size - len + 1); + + src->off = (src->off >> align) << align; + dst->off = (dst->off >> align) << align; + } + + if (!params->noverify) { + start = ktime_get(); + dmatest_init_srcs(src->aligned, src->off, len, + buf_size, is_memset); + dmatest_init_dsts(dst->aligned, dst->off, len, + buf_size, is_memset); + + diff = ktime_sub(ktime_get(), start); + filltime = ktime_add(filltime, diff); + } + + um = dmaengine_get_unmap_data(dma_dev, src->cnt + dst->cnt, + GFP_KERNEL); + if (!um) { + failed_tests++; + result("unmap data NULL", total_tests, + src->off, dst->off, len, ret); + continue; + } + + um->len = buf_size; + for (i = 0; i < src->cnt; i++) { + void *buf = src->aligned[i]; + struct page *pg = virt_to_page(buf); + unsigned long pg_off = offset_in_page(buf); + + um->addr[i] = dma_map_page(dma_dev, pg, pg_off, + um->len, DMA_TO_DEVICE); + srcs[i] = um->addr[i] + src->off; + ret = dma_mapping_error(dma_dev, um->addr[i]); + if (ret) { + result("src mapping error", total_tests, + src->off, dst->off, len, ret); + goto error_unmap_continue; + } + um->to_cnt++; + } + /* map with DMA_BIDIRECTIONAL to force writeback/invalidate */ + dsts = &um->addr[src->cnt]; + for (i = 0; i < dst->cnt; i++) { + void *buf = dst->aligned[i]; + struct page *pg = virt_to_page(buf); + unsigned long pg_off = offset_in_page(buf); + + dsts[i] = dma_map_page(dma_dev, pg, pg_off, um->len, + DMA_BIDIRECTIONAL); + ret = dma_mapping_error(dma_dev, dsts[i]); + if (ret) { + result("dst mapping error", total_tests, + src->off, dst->off, len, ret); + goto error_unmap_continue; + } + um->bidi_cnt++; + } + + if (thread->type == DMA_MEMCPY) + tx = dev->device_prep_dma_memcpy(chan, + dsts[0] + dst->off, + srcs[0], len, flags); + else if (thread->type == DMA_MEMSET) + tx = dev->device_prep_dma_memset(chan, + dsts[0] + dst->off, + *(src->aligned[0] + src->off), + len, flags); + else if (thread->type == DMA_XOR) + tx = dev->device_prep_dma_xor(chan, + dsts[0] + dst->off, + srcs, src->cnt, + len, flags); + else if (thread->type == DMA_PQ) { + for (i = 0; i < dst->cnt; i++) + dma_pq[i] = dsts[i] + dst->off; + tx = dev->device_prep_dma_pq(chan, dma_pq, srcs, + src->cnt, pq_coefs, + len, flags); + } + + if (!tx) { + result("prep error", total_tests, src->off, + dst->off, len, ret); + msleep(100); + goto error_unmap_continue; + } + + done->done = false; + if (!params->polled) { + tx->callback = dmatest_callback; + tx->callback_param = done; + } + cookie = tx->tx_submit(tx); + + if (dma_submit_error(cookie)) { + result("submit error", total_tests, src->off, + dst->off, len, ret); + msleep(100); + goto error_unmap_continue; + } + + if (params->polled) { + status = dma_sync_wait(chan, cookie); + dmaengine_terminate_sync(chan); + if (status == DMA_COMPLETE) + done->done = true; + } else { + dma_async_issue_pending(chan); + + wait_event_freezable_timeout(thread->done_wait, + done->done, + msecs_to_jiffies(params->timeout)); + + status = dma_async_is_tx_complete(chan, cookie, NULL, + NULL); + } + + if (!done->done) { + result("test timed out", total_tests, src->off, dst->off, + len, 0); + goto error_unmap_continue; + } else if (status != DMA_COMPLETE && + !(dma_has_cap(DMA_COMPLETION_NO_ORDER, + dev->cap_mask) && + status == DMA_OUT_OF_ORDER)) { + result(status == DMA_ERROR ? + "completion error status" : + "completion busy status", total_tests, src->off, + dst->off, len, ret); + goto error_unmap_continue; + } + + dmaengine_unmap_put(um); + + if (params->noverify) { + verbose_result("test passed", total_tests, src->off, + dst->off, len, 0); + continue; + } + + start = ktime_get(); + pr_debug("%s: verifying source buffer...\n", current->comm); + error_count = dmatest_verify(src->aligned, 0, src->off, + 0, PATTERN_SRC, true, is_memset); + error_count += dmatest_verify(src->aligned, src->off, + src->off + len, src->off, + PATTERN_SRC | PATTERN_COPY, true, is_memset); + error_count += dmatest_verify(src->aligned, src->off + len, + buf_size, src->off + len, + PATTERN_SRC, true, is_memset); + + pr_debug("%s: verifying dest buffer...\n", current->comm); + error_count += dmatest_verify(dst->aligned, 0, dst->off, + 0, PATTERN_DST, false, is_memset); + + error_count += dmatest_verify(dst->aligned, dst->off, + dst->off + len, src->off, + PATTERN_SRC | PATTERN_COPY, false, is_memset); + + error_count += dmatest_verify(dst->aligned, dst->off + len, + buf_size, dst->off + len, + PATTERN_DST, false, is_memset); + + diff = ktime_sub(ktime_get(), start); + comparetime = ktime_add(comparetime, diff); + + if (error_count) { + result("data error", total_tests, src->off, dst->off, + len, error_count); + failed_tests++; + } else { + verbose_result("test passed", total_tests, src->off, + dst->off, len, 0); + } + + continue; + +error_unmap_continue: + dmaengine_unmap_put(um); + failed_tests++; + } + ktime = ktime_sub(ktime_get(), ktime); + ktime = ktime_sub(ktime, comparetime); + ktime = ktime_sub(ktime, filltime); + runtime = ktime_to_us(ktime); + + ret = 0; + kfree(dma_pq); +err_srcs_array: + kfree(srcs); +err_dst: + dmatest_free_test_data(dst); +err_src: + dmatest_free_test_data(src); +err_free_coefs: + kfree(pq_coefs); +err_thread_type: + iops = dmatest_persec(runtime, total_tests); + pr_info("%s: summary %u tests, %u failures %llu.%02llu iops %llu KB/s (%d)\n", + current->comm, total_tests, failed_tests, + FIXPT_TO_INT(iops), FIXPT_GET_FRAC(iops), + dmatest_KBs(runtime, total_len), ret); + + /* terminate all transfers on specified channels */ + if (ret || failed_tests) + dmaengine_terminate_sync(chan); + + thread->done = true; + wake_up(&thread_wait); + + return ret; +} + +static void dmatest_cleanup_channel(struct dmatest_chan *dtc) +{ + struct dmatest_thread *thread; + struct dmatest_thread *_thread; + int ret; + + list_for_each_entry_safe(thread, _thread, &dtc->threads, node) { + ret = kthread_stop(thread->task); + pr_debug("thread %s exited with status %d\n", + thread->task->comm, ret); + list_del(&thread->node); + put_task_struct(thread->task); + kfree(thread); + } + + /* terminate all transfers on specified channels */ + dmaengine_terminate_sync(dtc->chan); + + kfree(dtc); +} + +static int dmatest_add_threads(struct dmatest_info *info, + struct dmatest_chan *dtc, enum dma_transaction_type type) +{ + struct dmatest_params *params = &info->params; + struct dmatest_thread *thread; + struct dma_chan *chan = dtc->chan; + char *op; + unsigned int i; + + if (type == DMA_MEMCPY) + op = "copy"; + else if (type == DMA_MEMSET) + op = "set"; + else if (type == DMA_XOR) + op = "xor"; + else if (type == DMA_PQ) + op = "pq"; + else + return -EINVAL; + + for (i = 0; i < params->threads_per_chan; i++) { + thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL); + if (!thread) { + pr_warn("No memory for %s-%s%u\n", + dma_chan_name(chan), op, i); + break; + } + thread->info = info; + thread->chan = dtc->chan; + thread->type = type; + thread->test_done.wait = &thread->done_wait; + init_waitqueue_head(&thread->done_wait); + smp_wmb(); + thread->task = kthread_create(dmatest_func, thread, "%s-%s%u", + dma_chan_name(chan), op, i); + if (IS_ERR(thread->task)) { + pr_warn("Failed to create thread %s-%s%u\n", + dma_chan_name(chan), op, i); + kfree(thread); + break; + } + + /* srcbuf and dstbuf are allocated by the thread itself */ + get_task_struct(thread->task); + list_add_tail(&thread->node, &dtc->threads); + thread->pending = true; + } + + return i; +} + +static int dmatest_add_channel(struct dmatest_info *info, + struct dma_chan *chan) +{ + struct dmatest_chan *dtc; + struct dma_device *dma_dev = chan->device; + unsigned int thread_count = 0; + int cnt; + + dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL); + if (!dtc) { + pr_warn("No memory for %s\n", dma_chan_name(chan)); + return -ENOMEM; + } + + dtc->chan = chan; + INIT_LIST_HEAD(&dtc->threads); + + if (dma_has_cap(DMA_COMPLETION_NO_ORDER, dma_dev->cap_mask) && + info->params.polled) { + info->params.polled = false; + pr_warn("DMA_COMPLETION_NO_ORDER, polled disabled\n"); + } + + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { + if (dmatest == 0) { + cnt = dmatest_add_threads(info, dtc, DMA_MEMCPY); + thread_count += cnt > 0 ? cnt : 0; + } + } + + if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { + if (dmatest == 1) { + cnt = dmatest_add_threads(info, dtc, DMA_MEMSET); + thread_count += cnt > 0 ? cnt : 0; + } + } + + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { + cnt = dmatest_add_threads(info, dtc, DMA_XOR); + thread_count += cnt > 0 ? cnt : 0; + } + if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { + cnt = dmatest_add_threads(info, dtc, DMA_PQ); + thread_count += cnt > 0 ? cnt : 0; + } + + pr_info("Added %u threads using %s\n", + thread_count, dma_chan_name(chan)); + + list_add_tail(&dtc->node, &info->channels); + info->nr_channels++; + + return 0; +} + +static bool filter(struct dma_chan *chan, void *param) +{ + return dmatest_match_channel(param, chan) && dmatest_match_device(param, chan->device); +} + +static void request_channels(struct dmatest_info *info, + enum dma_transaction_type type) +{ + dma_cap_mask_t mask; + + dma_cap_zero(mask); + dma_cap_set(type, mask); + for (;;) { + struct dmatest_params *params = &info->params; + struct dma_chan *chan; + + chan = dma_request_channel(mask, filter, params); + if (chan) { + if (dmatest_add_channel(info, chan)) { + dma_release_channel(chan); + break; /* add_channel failed, punt */ + } + } else + break; /* no more channels available */ + if (params->max_channels && + info->nr_channels >= params->max_channels) + break; /* we have all we need */ + } +} + +static void add_threaded_test(struct dmatest_info *info) +{ + struct dmatest_params *params = &info->params; + + /* Copy test parameters */ + params->buf_size = test_buf_size; + strscpy(params->channel, strim(test_channel), sizeof(params->channel)); + strscpy(params->device, strim(test_device), sizeof(params->device)); + params->threads_per_chan = threads_per_chan; + params->max_channels = max_channels; + params->iterations = iterations; + params->xor_sources = xor_sources; + params->pq_sources = pq_sources; + params->timeout = timeout; + params->noverify = noverify; + params->norandom = norandom; + params->alignment = alignment; + params->transfer_size = transfer_size; + params->polled = polled; + + request_channels(info, DMA_MEMCPY); + request_channels(info, DMA_MEMSET); + request_channels(info, DMA_XOR); + request_channels(info, DMA_PQ); +} + +static void run_pending_tests(struct dmatest_info *info) +{ + struct dmatest_chan *dtc; + unsigned int thread_count = 0; + + list_for_each_entry(dtc, &info->channels, node) { + struct dmatest_thread *thread; + + thread_count = 0; + list_for_each_entry(thread, &dtc->threads, node) { + wake_up_process(thread->task); + thread_count++; + } + pr_info("Started %u threads using %s\n", + thread_count, dma_chan_name(dtc->chan)); + } +} + +static void stop_threaded_test(struct dmatest_info *info) +{ + struct dmatest_chan *dtc, *_dtc; + struct dma_chan *chan; + + list_for_each_entry_safe(dtc, _dtc, &info->channels, node) { + list_del(&dtc->node); + chan = dtc->chan; + dmatest_cleanup_channel(dtc); + pr_debug("dropped channel %s\n", dma_chan_name(chan)); + dma_release_channel(chan); + } + + info->nr_channels = 0; +} + +static void start_threaded_tests(struct dmatest_info *info) +{ + /* we might be called early to set run=, defer running until all + * parameters have been evaluated + */ + if (!info->did_init) + return; + + run_pending_tests(info); +} + +static int dmatest_run_get(char *val, const struct kernel_param *kp) +{ + struct dmatest_info *info = &test_info; + + mutex_lock(&info->lock); + if (is_threaded_test_run(info)) { + dmatest_run = true; + } else { + if (!is_threaded_test_pending(info)) + stop_threaded_test(info); + dmatest_run = false; + } + mutex_unlock(&info->lock); + + return param_get_bool(val, kp); +} + +static int dmatest_run_set(const char *val, const struct kernel_param *kp) +{ + struct dmatest_info *info = &test_info; + int ret; + + mutex_lock(&info->lock); + ret = param_set_bool(val, kp); + if (ret) { + mutex_unlock(&info->lock); + return ret; + } else if (dmatest_run) { + if (!is_threaded_test_pending(info)) { + /* + * We have nothing to run. This can be due to: + */ + ret = info->last_error; + if (ret) { + /* 1) Misconfiguration */ + pr_err("Channel misconfigured, can't continue\n"); + mutex_unlock(&info->lock); + return ret; + } else { + /* 2) We rely on defaults */ + pr_info("No channels configured, continue with any\n"); + if (!is_threaded_test_run(info)) + stop_threaded_test(info); + add_threaded_test(info); + } + } + start_threaded_tests(info); + } else { + stop_threaded_test(info); + } + + mutex_unlock(&info->lock); + + return ret; +} + +static int dmatest_chan_set(const char *val, const struct kernel_param *kp) +{ + struct dmatest_info *info = &test_info; + struct dmatest_chan *dtc; + char chan_reset_val[20]; + int ret; + + mutex_lock(&info->lock); + ret = param_set_copystring(val, kp); + if (ret) { + mutex_unlock(&info->lock); + return ret; + } + /*Clear any previously run threads */ + if (!is_threaded_test_run(info) && !is_threaded_test_pending(info)) + stop_threaded_test(info); + /* Reject channels that are already registered */ + if (is_threaded_test_pending(info)) { + list_for_each_entry(dtc, &info->channels, node) { + if (strcmp(dma_chan_name(dtc->chan), + strim(test_channel)) == 0) { + dtc = list_last_entry(&info->channels, + struct dmatest_chan, + node); + strscpy(chan_reset_val, + dma_chan_name(dtc->chan), + sizeof(chan_reset_val)); + ret = -EBUSY; + goto add_chan_err; + } + } + } + + add_threaded_test(info); + + /* Check if channel was added successfully */ + if (!list_empty(&info->channels)) { + /* + * if new channel was not successfully added, revert the + * "test_channel" string to the name of the last successfully + * added channel. exception for when users issues empty string + * to channel parameter. + */ + dtc = list_last_entry(&info->channels, struct dmatest_chan, node); + if ((strcmp(dma_chan_name(dtc->chan), strim(test_channel)) != 0) + && (strcmp("", strim(test_channel)) != 0)) { + ret = -EINVAL; + strscpy(chan_reset_val, dma_chan_name(dtc->chan), + sizeof(chan_reset_val)); + goto add_chan_err; + } + + } else { + /* Clear test_channel if no channels were added successfully */ + strscpy(chan_reset_val, "", sizeof(chan_reset_val)); + ret = -EBUSY; + goto add_chan_err; + } + + info->last_error = ret; + mutex_unlock(&info->lock); + + return ret; + +add_chan_err: + param_set_copystring(chan_reset_val, kp); + info->last_error = ret; + mutex_unlock(&info->lock); + + return ret; +} + +static int dmatest_chan_get(char *val, const struct kernel_param *kp) +{ + struct dmatest_info *info = &test_info; + + mutex_lock(&info->lock); + if (!is_threaded_test_run(info) && !is_threaded_test_pending(info)) { + stop_threaded_test(info); + strscpy(test_channel, "", sizeof(test_channel)); + } + mutex_unlock(&info->lock); + + return param_get_string(val, kp); +} + +static int dmatest_test_list_get(char *val, const struct kernel_param *kp) +{ + struct dmatest_info *info = &test_info; + struct dmatest_chan *dtc; + unsigned int thread_count = 0; + + list_for_each_entry(dtc, &info->channels, node) { + struct dmatest_thread *thread; + + thread_count = 0; + list_for_each_entry(thread, &dtc->threads, node) { + thread_count++; + } + pr_info("%u threads using %s\n", + thread_count, dma_chan_name(dtc->chan)); + } + + return 0; +} + +static int __init dmatest_init(void) +{ + struct dmatest_info *info = &test_info; + struct dmatest_params *params = &info->params; + + if (dmatest_run) { + mutex_lock(&info->lock); + add_threaded_test(info); + run_pending_tests(info); + mutex_unlock(&info->lock); + } + + if (params->iterations && wait) + wait_event(thread_wait, !is_threaded_test_run(info)); + + /* module parameters are stable, inittime tests are started, + * let userspace take over 'run' control + */ + info->did_init = true; + + return 0; +} +/* when compiled-in wait for drivers to load first */ +late_initcall(dmatest_init); + +static void __exit dmatest_exit(void) +{ + struct dmatest_info *info = &test_info; + + mutex_lock(&info->lock); + stop_threaded_test(info); + mutex_unlock(&info->lock); +} +module_exit(dmatest_exit); + +MODULE_AUTHOR("Haavard Skinnemoen (Atmel)"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/dw-axi-dmac/Makefile b/drivers/dma/dw-axi-dmac/Makefile new file mode 100644 index 0000000000..4eb2f1639d --- /dev/null +++ b/drivers/dma/dw-axi-dmac/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_DW_AXI_DMAC) += dw-axi-dmac-platform.o diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c new file mode 100644 index 0000000000..dd02f84e40 --- /dev/null +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c @@ -0,0 +1,1602 @@ +// SPDX-License-Identifier: GPL-2.0 +// (C) 2017-2018 Synopsys, Inc. (www.synopsys.com) + +/* + * Synopsys DesignWare AXI DMA Controller driver. + * + * Author: Eugeniy Paltsev + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dw-axi-dmac.h" +#include "../dmaengine.h" +#include "../virt-dma.h" + +/* + * The set of bus widths supported by the DMA controller. DW AXI DMAC supports + * master data bus width up to 512 bits (for both AXI master interfaces), but + * it depends on IP block configuration. + */ +#define AXI_DMA_BUSWIDTHS \ + (DMA_SLAVE_BUSWIDTH_1_BYTE | \ + DMA_SLAVE_BUSWIDTH_2_BYTES | \ + DMA_SLAVE_BUSWIDTH_4_BYTES | \ + DMA_SLAVE_BUSWIDTH_8_BYTES | \ + DMA_SLAVE_BUSWIDTH_16_BYTES | \ + DMA_SLAVE_BUSWIDTH_32_BYTES | \ + DMA_SLAVE_BUSWIDTH_64_BYTES) + +#define AXI_DMA_FLAG_HAS_APB_REGS BIT(0) +#define AXI_DMA_FLAG_HAS_RESETS BIT(1) +#define AXI_DMA_FLAG_USE_CFG2 BIT(2) + +static inline void +axi_dma_iowrite32(struct axi_dma_chip *chip, u32 reg, u32 val) +{ + iowrite32(val, chip->regs + reg); +} + +static inline u32 axi_dma_ioread32(struct axi_dma_chip *chip, u32 reg) +{ + return ioread32(chip->regs + reg); +} + +static inline void +axi_chan_iowrite32(struct axi_dma_chan *chan, u32 reg, u32 val) +{ + iowrite32(val, chan->chan_regs + reg); +} + +static inline u32 axi_chan_ioread32(struct axi_dma_chan *chan, u32 reg) +{ + return ioread32(chan->chan_regs + reg); +} + +static inline void +axi_chan_iowrite64(struct axi_dma_chan *chan, u32 reg, u64 val) +{ + /* + * We split one 64 bit write for two 32 bit write as some HW doesn't + * support 64 bit access. + */ + iowrite32(lower_32_bits(val), chan->chan_regs + reg); + iowrite32(upper_32_bits(val), chan->chan_regs + reg + 4); +} + +static inline void axi_chan_config_write(struct axi_dma_chan *chan, + struct axi_dma_chan_config *config) +{ + u32 cfg_lo, cfg_hi; + + cfg_lo = (config->dst_multblk_type << CH_CFG_L_DST_MULTBLK_TYPE_POS | + config->src_multblk_type << CH_CFG_L_SRC_MULTBLK_TYPE_POS); + if (chan->chip->dw->hdata->reg_map_8_channels && + !chan->chip->dw->hdata->use_cfg2) { + cfg_hi = config->tt_fc << CH_CFG_H_TT_FC_POS | + config->hs_sel_src << CH_CFG_H_HS_SEL_SRC_POS | + config->hs_sel_dst << CH_CFG_H_HS_SEL_DST_POS | + config->src_per << CH_CFG_H_SRC_PER_POS | + config->dst_per << CH_CFG_H_DST_PER_POS | + config->prior << CH_CFG_H_PRIORITY_POS; + } else { + cfg_lo |= config->src_per << CH_CFG2_L_SRC_PER_POS | + config->dst_per << CH_CFG2_L_DST_PER_POS; + cfg_hi = config->tt_fc << CH_CFG2_H_TT_FC_POS | + config->hs_sel_src << CH_CFG2_H_HS_SEL_SRC_POS | + config->hs_sel_dst << CH_CFG2_H_HS_SEL_DST_POS | + config->prior << CH_CFG2_H_PRIORITY_POS; + } + axi_chan_iowrite32(chan, CH_CFG_L, cfg_lo); + axi_chan_iowrite32(chan, CH_CFG_H, cfg_hi); +} + +static inline void axi_dma_disable(struct axi_dma_chip *chip) +{ + u32 val; + + val = axi_dma_ioread32(chip, DMAC_CFG); + val &= ~DMAC_EN_MASK; + axi_dma_iowrite32(chip, DMAC_CFG, val); +} + +static inline void axi_dma_enable(struct axi_dma_chip *chip) +{ + u32 val; + + val = axi_dma_ioread32(chip, DMAC_CFG); + val |= DMAC_EN_MASK; + axi_dma_iowrite32(chip, DMAC_CFG, val); +} + +static inline void axi_dma_irq_disable(struct axi_dma_chip *chip) +{ + u32 val; + + val = axi_dma_ioread32(chip, DMAC_CFG); + val &= ~INT_EN_MASK; + axi_dma_iowrite32(chip, DMAC_CFG, val); +} + +static inline void axi_dma_irq_enable(struct axi_dma_chip *chip) +{ + u32 val; + + val = axi_dma_ioread32(chip, DMAC_CFG); + val |= INT_EN_MASK; + axi_dma_iowrite32(chip, DMAC_CFG, val); +} + +static inline void axi_chan_irq_disable(struct axi_dma_chan *chan, u32 irq_mask) +{ + u32 val; + + if (likely(irq_mask == DWAXIDMAC_IRQ_ALL)) { + axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, DWAXIDMAC_IRQ_NONE); + } else { + val = axi_chan_ioread32(chan, CH_INTSTATUS_ENA); + val &= ~irq_mask; + axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, val); + } +} + +static inline void axi_chan_irq_set(struct axi_dma_chan *chan, u32 irq_mask) +{ + axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, irq_mask); +} + +static inline void axi_chan_irq_sig_set(struct axi_dma_chan *chan, u32 irq_mask) +{ + axi_chan_iowrite32(chan, CH_INTSIGNAL_ENA, irq_mask); +} + +static inline void axi_chan_irq_clear(struct axi_dma_chan *chan, u32 irq_mask) +{ + axi_chan_iowrite32(chan, CH_INTCLEAR, irq_mask); +} + +static inline u32 axi_chan_irq_read(struct axi_dma_chan *chan) +{ + return axi_chan_ioread32(chan, CH_INTSTATUS); +} + +static inline void axi_chan_disable(struct axi_dma_chan *chan) +{ + u32 val; + + val = axi_dma_ioread32(chan->chip, DMAC_CHEN); + val &= ~(BIT(chan->id) << DMAC_CHAN_EN_SHIFT); + if (chan->chip->dw->hdata->reg_map_8_channels) + val |= BIT(chan->id) << DMAC_CHAN_EN_WE_SHIFT; + else + val |= BIT(chan->id) << DMAC_CHAN_EN2_WE_SHIFT; + axi_dma_iowrite32(chan->chip, DMAC_CHEN, val); +} + +static inline void axi_chan_enable(struct axi_dma_chan *chan) +{ + u32 val; + + val = axi_dma_ioread32(chan->chip, DMAC_CHEN); + if (chan->chip->dw->hdata->reg_map_8_channels) + val |= BIT(chan->id) << DMAC_CHAN_EN_SHIFT | + BIT(chan->id) << DMAC_CHAN_EN_WE_SHIFT; + else + val |= BIT(chan->id) << DMAC_CHAN_EN_SHIFT | + BIT(chan->id) << DMAC_CHAN_EN2_WE_SHIFT; + axi_dma_iowrite32(chan->chip, DMAC_CHEN, val); +} + +static inline bool axi_chan_is_hw_enable(struct axi_dma_chan *chan) +{ + u32 val; + + val = axi_dma_ioread32(chan->chip, DMAC_CHEN); + + return !!(val & (BIT(chan->id) << DMAC_CHAN_EN_SHIFT)); +} + +static void axi_dma_hw_init(struct axi_dma_chip *chip) +{ + int ret; + u32 i; + + for (i = 0; i < chip->dw->hdata->nr_channels; i++) { + axi_chan_irq_disable(&chip->dw->chan[i], DWAXIDMAC_IRQ_ALL); + axi_chan_disable(&chip->dw->chan[i]); + } + ret = dma_set_mask_and_coherent(chip->dev, DMA_BIT_MASK(64)); + if (ret) + dev_warn(chip->dev, "Unable to set coherent mask\n"); +} + +static u32 axi_chan_get_xfer_width(struct axi_dma_chan *chan, dma_addr_t src, + dma_addr_t dst, size_t len) +{ + u32 max_width = chan->chip->dw->hdata->m_data_width; + + return __ffs(src | dst | len | BIT(max_width)); +} + +static inline const char *axi_chan_name(struct axi_dma_chan *chan) +{ + return dma_chan_name(&chan->vc.chan); +} + +static struct axi_dma_desc *axi_desc_alloc(u32 num) +{ + struct axi_dma_desc *desc; + + desc = kzalloc(sizeof(*desc), GFP_NOWAIT); + if (!desc) + return NULL; + + desc->hw_desc = kcalloc(num, sizeof(*desc->hw_desc), GFP_NOWAIT); + if (!desc->hw_desc) { + kfree(desc); + return NULL; + } + + return desc; +} + +static struct axi_dma_lli *axi_desc_get(struct axi_dma_chan *chan, + dma_addr_t *addr) +{ + struct axi_dma_lli *lli; + dma_addr_t phys; + + lli = dma_pool_zalloc(chan->desc_pool, GFP_NOWAIT, &phys); + if (unlikely(!lli)) { + dev_err(chan2dev(chan), "%s: not enough descriptors available\n", + axi_chan_name(chan)); + return NULL; + } + + atomic_inc(&chan->descs_allocated); + *addr = phys; + + return lli; +} + +static void axi_desc_put(struct axi_dma_desc *desc) +{ + struct axi_dma_chan *chan = desc->chan; + int count = atomic_read(&chan->descs_allocated); + struct axi_dma_hw_desc *hw_desc; + int descs_put; + + for (descs_put = 0; descs_put < count; descs_put++) { + hw_desc = &desc->hw_desc[descs_put]; + dma_pool_free(chan->desc_pool, hw_desc->lli, hw_desc->llp); + } + + kfree(desc->hw_desc); + kfree(desc); + atomic_sub(descs_put, &chan->descs_allocated); + dev_vdbg(chan2dev(chan), "%s: %d descs put, %d still allocated\n", + axi_chan_name(chan), descs_put, + atomic_read(&chan->descs_allocated)); +} + +static void vchan_desc_put(struct virt_dma_desc *vdesc) +{ + axi_desc_put(vd_to_axi_desc(vdesc)); +} + +static enum dma_status +dma_chan_tx_status(struct dma_chan *dchan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + struct virt_dma_desc *vdesc; + enum dma_status status; + u32 completed_length; + unsigned long flags; + u32 completed_blocks; + size_t bytes = 0; + u32 length; + u32 len; + + status = dma_cookie_status(dchan, cookie, txstate); + if (status == DMA_COMPLETE || !txstate) + return status; + + spin_lock_irqsave(&chan->vc.lock, flags); + + vdesc = vchan_find_desc(&chan->vc, cookie); + if (vdesc) { + length = vd_to_axi_desc(vdesc)->length; + completed_blocks = vd_to_axi_desc(vdesc)->completed_blocks; + len = vd_to_axi_desc(vdesc)->hw_desc[0].len; + completed_length = completed_blocks * len; + bytes = length - completed_length; + } + + spin_unlock_irqrestore(&chan->vc.lock, flags); + dma_set_residue(txstate, bytes); + + return status; +} + +static void write_desc_llp(struct axi_dma_hw_desc *desc, dma_addr_t adr) +{ + desc->lli->llp = cpu_to_le64(adr); +} + +static void write_chan_llp(struct axi_dma_chan *chan, dma_addr_t adr) +{ + axi_chan_iowrite64(chan, CH_LLP, adr); +} + +static void dw_axi_dma_set_byte_halfword(struct axi_dma_chan *chan, bool set) +{ + u32 offset = DMAC_APB_BYTE_WR_CH_EN; + u32 reg_width, val; + + if (!chan->chip->apb_regs) { + dev_dbg(chan->chip->dev, "apb_regs not initialized\n"); + return; + } + + reg_width = __ffs(chan->config.dst_addr_width); + if (reg_width == DWAXIDMAC_TRANS_WIDTH_16) + offset = DMAC_APB_HALFWORD_WR_CH_EN; + + val = ioread32(chan->chip->apb_regs + offset); + + if (set) + val |= BIT(chan->id); + else + val &= ~BIT(chan->id); + + iowrite32(val, chan->chip->apb_regs + offset); +} +/* Called in chan locked context */ +static void axi_chan_block_xfer_start(struct axi_dma_chan *chan, + struct axi_dma_desc *first) +{ + u32 priority = chan->chip->dw->hdata->priority[chan->id]; + struct axi_dma_chan_config config = {}; + u32 irq_mask; + u8 lms = 0; /* Select AXI0 master for LLI fetching */ + + if (unlikely(axi_chan_is_hw_enable(chan))) { + dev_err(chan2dev(chan), "%s is non-idle!\n", + axi_chan_name(chan)); + + return; + } + + axi_dma_enable(chan->chip); + + config.dst_multblk_type = DWAXIDMAC_MBLK_TYPE_LL; + config.src_multblk_type = DWAXIDMAC_MBLK_TYPE_LL; + config.tt_fc = DWAXIDMAC_TT_FC_MEM_TO_MEM_DMAC; + config.prior = priority; + config.hs_sel_dst = DWAXIDMAC_HS_SEL_HW; + config.hs_sel_src = DWAXIDMAC_HS_SEL_HW; + switch (chan->direction) { + case DMA_MEM_TO_DEV: + dw_axi_dma_set_byte_halfword(chan, true); + config.tt_fc = chan->config.device_fc ? + DWAXIDMAC_TT_FC_MEM_TO_PER_DST : + DWAXIDMAC_TT_FC_MEM_TO_PER_DMAC; + if (chan->chip->apb_regs) + config.dst_per = chan->id; + else + config.dst_per = chan->hw_handshake_num; + break; + case DMA_DEV_TO_MEM: + config.tt_fc = chan->config.device_fc ? + DWAXIDMAC_TT_FC_PER_TO_MEM_SRC : + DWAXIDMAC_TT_FC_PER_TO_MEM_DMAC; + if (chan->chip->apb_regs) + config.src_per = chan->id; + else + config.src_per = chan->hw_handshake_num; + break; + default: + break; + } + axi_chan_config_write(chan, &config); + + write_chan_llp(chan, first->hw_desc[0].llp | lms); + + irq_mask = DWAXIDMAC_IRQ_DMA_TRF | DWAXIDMAC_IRQ_ALL_ERR; + axi_chan_irq_sig_set(chan, irq_mask); + + /* Generate 'suspend' status but don't generate interrupt */ + irq_mask |= DWAXIDMAC_IRQ_SUSPENDED; + axi_chan_irq_set(chan, irq_mask); + + axi_chan_enable(chan); +} + +static void axi_chan_start_first_queued(struct axi_dma_chan *chan) +{ + struct axi_dma_desc *desc; + struct virt_dma_desc *vd; + + vd = vchan_next_desc(&chan->vc); + if (!vd) + return; + + desc = vd_to_axi_desc(vd); + dev_vdbg(chan2dev(chan), "%s: started %u\n", axi_chan_name(chan), + vd->tx.cookie); + axi_chan_block_xfer_start(chan, desc); +} + +static void dma_chan_issue_pending(struct dma_chan *dchan) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + unsigned long flags; + + spin_lock_irqsave(&chan->vc.lock, flags); + if (vchan_issue_pending(&chan->vc)) + axi_chan_start_first_queued(chan); + spin_unlock_irqrestore(&chan->vc.lock, flags); +} + +static void dw_axi_dma_synchronize(struct dma_chan *dchan) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + + vchan_synchronize(&chan->vc); +} + +static int dma_chan_alloc_chan_resources(struct dma_chan *dchan) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + + /* ASSERT: channel is idle */ + if (axi_chan_is_hw_enable(chan)) { + dev_err(chan2dev(chan), "%s is non-idle!\n", + axi_chan_name(chan)); + return -EBUSY; + } + + /* LLI address must be aligned to a 64-byte boundary */ + chan->desc_pool = dma_pool_create(dev_name(chan2dev(chan)), + chan->chip->dev, + sizeof(struct axi_dma_lli), + 64, 0); + if (!chan->desc_pool) { + dev_err(chan2dev(chan), "No memory for descriptors\n"); + return -ENOMEM; + } + dev_vdbg(dchan2dev(dchan), "%s: allocating\n", axi_chan_name(chan)); + + pm_runtime_get(chan->chip->dev); + + return 0; +} + +static void dma_chan_free_chan_resources(struct dma_chan *dchan) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + + /* ASSERT: channel is idle */ + if (axi_chan_is_hw_enable(chan)) + dev_err(dchan2dev(dchan), "%s is non-idle!\n", + axi_chan_name(chan)); + + axi_chan_disable(chan); + axi_chan_irq_disable(chan, DWAXIDMAC_IRQ_ALL); + + vchan_free_chan_resources(&chan->vc); + + dma_pool_destroy(chan->desc_pool); + chan->desc_pool = NULL; + dev_vdbg(dchan2dev(dchan), + "%s: free resources, descriptor still allocated: %u\n", + axi_chan_name(chan), atomic_read(&chan->descs_allocated)); + + pm_runtime_put(chan->chip->dev); +} + +static void dw_axi_dma_set_hw_channel(struct axi_dma_chan *chan, bool set) +{ + struct axi_dma_chip *chip = chan->chip; + unsigned long reg_value, val; + + if (!chip->apb_regs) { + dev_err(chip->dev, "apb_regs not initialized\n"); + return; + } + + /* + * An unused DMA channel has a default value of 0x3F. + * Lock the DMA channel by assign a handshake number to the channel. + * Unlock the DMA channel by assign 0x3F to the channel. + */ + if (set) + val = chan->hw_handshake_num; + else + val = UNUSED_CHANNEL; + + reg_value = lo_hi_readq(chip->apb_regs + DMAC_APB_HW_HS_SEL_0); + + /* Channel is already allocated, set handshake as per channel ID */ + /* 64 bit write should handle for 8 channels */ + + reg_value &= ~(DMA_APB_HS_SEL_MASK << + (chan->id * DMA_APB_HS_SEL_BIT_SIZE)); + reg_value |= (val << (chan->id * DMA_APB_HS_SEL_BIT_SIZE)); + lo_hi_writeq(reg_value, chip->apb_regs + DMAC_APB_HW_HS_SEL_0); + + return; +} + +/* + * If DW_axi_dmac sees CHx_CTL.ShadowReg_Or_LLI_Last bit of the fetched LLI + * as 1, it understands that the current block is the final block in the + * transfer and completes the DMA transfer operation at the end of current + * block transfer. + */ +static void set_desc_last(struct axi_dma_hw_desc *desc) +{ + u32 val; + + val = le32_to_cpu(desc->lli->ctl_hi); + val |= CH_CTL_H_LLI_LAST; + desc->lli->ctl_hi = cpu_to_le32(val); +} + +static void write_desc_sar(struct axi_dma_hw_desc *desc, dma_addr_t adr) +{ + desc->lli->sar = cpu_to_le64(adr); +} + +static void write_desc_dar(struct axi_dma_hw_desc *desc, dma_addr_t adr) +{ + desc->lli->dar = cpu_to_le64(adr); +} + +static void set_desc_src_master(struct axi_dma_hw_desc *desc) +{ + u32 val; + + /* Select AXI0 for source master */ + val = le32_to_cpu(desc->lli->ctl_lo); + val &= ~CH_CTL_L_SRC_MAST; + desc->lli->ctl_lo = cpu_to_le32(val); +} + +static void set_desc_dest_master(struct axi_dma_hw_desc *hw_desc, + struct axi_dma_desc *desc) +{ + u32 val; + + /* Select AXI1 for source master if available */ + val = le32_to_cpu(hw_desc->lli->ctl_lo); + if (desc->chan->chip->dw->hdata->nr_masters > 1) + val |= CH_CTL_L_DST_MAST; + else + val &= ~CH_CTL_L_DST_MAST; + + hw_desc->lli->ctl_lo = cpu_to_le32(val); +} + +static int dw_axi_dma_set_hw_desc(struct axi_dma_chan *chan, + struct axi_dma_hw_desc *hw_desc, + dma_addr_t mem_addr, size_t len) +{ + unsigned int data_width = BIT(chan->chip->dw->hdata->m_data_width); + unsigned int reg_width; + unsigned int mem_width; + dma_addr_t device_addr; + size_t axi_block_ts; + size_t block_ts; + u32 ctllo, ctlhi; + u32 burst_len; + + axi_block_ts = chan->chip->dw->hdata->block_size[chan->id]; + + mem_width = __ffs(data_width | mem_addr | len); + if (mem_width > DWAXIDMAC_TRANS_WIDTH_32) + mem_width = DWAXIDMAC_TRANS_WIDTH_32; + + if (!IS_ALIGNED(mem_addr, 4)) { + dev_err(chan->chip->dev, "invalid buffer alignment\n"); + return -EINVAL; + } + + switch (chan->direction) { + case DMA_MEM_TO_DEV: + reg_width = __ffs(chan->config.dst_addr_width); + device_addr = chan->config.dst_addr; + ctllo = reg_width << CH_CTL_L_DST_WIDTH_POS | + mem_width << CH_CTL_L_SRC_WIDTH_POS | + DWAXIDMAC_CH_CTL_L_NOINC << CH_CTL_L_DST_INC_POS | + DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_SRC_INC_POS; + block_ts = len >> mem_width; + break; + case DMA_DEV_TO_MEM: + reg_width = __ffs(chan->config.src_addr_width); + device_addr = chan->config.src_addr; + ctllo = reg_width << CH_CTL_L_SRC_WIDTH_POS | + mem_width << CH_CTL_L_DST_WIDTH_POS | + DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_DST_INC_POS | + DWAXIDMAC_CH_CTL_L_NOINC << CH_CTL_L_SRC_INC_POS; + block_ts = len >> reg_width; + break; + default: + return -EINVAL; + } + + if (block_ts > axi_block_ts) + return -EINVAL; + + hw_desc->lli = axi_desc_get(chan, &hw_desc->llp); + if (unlikely(!hw_desc->lli)) + return -ENOMEM; + + ctlhi = CH_CTL_H_LLI_VALID; + + if (chan->chip->dw->hdata->restrict_axi_burst_len) { + burst_len = chan->chip->dw->hdata->axi_rw_burst_len; + ctlhi |= CH_CTL_H_ARLEN_EN | CH_CTL_H_AWLEN_EN | + burst_len << CH_CTL_H_ARLEN_POS | + burst_len << CH_CTL_H_AWLEN_POS; + } + + hw_desc->lli->ctl_hi = cpu_to_le32(ctlhi); + + if (chan->direction == DMA_MEM_TO_DEV) { + write_desc_sar(hw_desc, mem_addr); + write_desc_dar(hw_desc, device_addr); + } else { + write_desc_sar(hw_desc, device_addr); + write_desc_dar(hw_desc, mem_addr); + } + + hw_desc->lli->block_ts_lo = cpu_to_le32(block_ts - 1); + + ctllo |= DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_DST_MSIZE_POS | + DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_SRC_MSIZE_POS; + hw_desc->lli->ctl_lo = cpu_to_le32(ctllo); + + set_desc_src_master(hw_desc); + + hw_desc->len = len; + return 0; +} + +static size_t calculate_block_len(struct axi_dma_chan *chan, + dma_addr_t dma_addr, size_t buf_len, + enum dma_transfer_direction direction) +{ + u32 data_width, reg_width, mem_width; + size_t axi_block_ts, block_len; + + axi_block_ts = chan->chip->dw->hdata->block_size[chan->id]; + + switch (direction) { + case DMA_MEM_TO_DEV: + data_width = BIT(chan->chip->dw->hdata->m_data_width); + mem_width = __ffs(data_width | dma_addr | buf_len); + if (mem_width > DWAXIDMAC_TRANS_WIDTH_32) + mem_width = DWAXIDMAC_TRANS_WIDTH_32; + + block_len = axi_block_ts << mem_width; + break; + case DMA_DEV_TO_MEM: + reg_width = __ffs(chan->config.src_addr_width); + block_len = axi_block_ts << reg_width; + break; + default: + block_len = 0; + } + + return block_len; +} + +static struct dma_async_tx_descriptor * +dw_axi_dma_chan_prep_cyclic(struct dma_chan *dchan, dma_addr_t dma_addr, + size_t buf_len, size_t period_len, + enum dma_transfer_direction direction, + unsigned long flags) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + struct axi_dma_hw_desc *hw_desc = NULL; + struct axi_dma_desc *desc = NULL; + dma_addr_t src_addr = dma_addr; + u32 num_periods, num_segments; + size_t axi_block_len; + u32 total_segments; + u32 segment_len; + unsigned int i; + int status; + u64 llp = 0; + u8 lms = 0; /* Select AXI0 master for LLI fetching */ + + num_periods = buf_len / period_len; + + axi_block_len = calculate_block_len(chan, dma_addr, buf_len, direction); + if (axi_block_len == 0) + return NULL; + + num_segments = DIV_ROUND_UP(period_len, axi_block_len); + segment_len = DIV_ROUND_UP(period_len, num_segments); + + total_segments = num_periods * num_segments; + + desc = axi_desc_alloc(total_segments); + if (unlikely(!desc)) + goto err_desc_get; + + chan->direction = direction; + desc->chan = chan; + chan->cyclic = true; + desc->length = 0; + desc->period_len = period_len; + + for (i = 0; i < total_segments; i++) { + hw_desc = &desc->hw_desc[i]; + + status = dw_axi_dma_set_hw_desc(chan, hw_desc, src_addr, + segment_len); + if (status < 0) + goto err_desc_get; + + desc->length += hw_desc->len; + /* Set end-of-link to the linked descriptor, so that cyclic + * callback function can be triggered during interrupt. + */ + set_desc_last(hw_desc); + + src_addr += segment_len; + } + + llp = desc->hw_desc[0].llp; + + /* Managed transfer list */ + do { + hw_desc = &desc->hw_desc[--total_segments]; + write_desc_llp(hw_desc, llp | lms); + llp = hw_desc->llp; + } while (total_segments); + + dw_axi_dma_set_hw_channel(chan, true); + + return vchan_tx_prep(&chan->vc, &desc->vd, flags); + +err_desc_get: + if (desc) + axi_desc_put(desc); + + return NULL; +} + +static struct dma_async_tx_descriptor * +dw_axi_dma_chan_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl, + unsigned int sg_len, + enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + struct axi_dma_hw_desc *hw_desc = NULL; + struct axi_dma_desc *desc = NULL; + u32 num_segments, segment_len; + unsigned int loop = 0; + struct scatterlist *sg; + size_t axi_block_len; + u32 len, num_sgs = 0; + unsigned int i; + dma_addr_t mem; + int status; + u64 llp = 0; + u8 lms = 0; /* Select AXI0 master for LLI fetching */ + + if (unlikely(!is_slave_direction(direction) || !sg_len)) + return NULL; + + mem = sg_dma_address(sgl); + len = sg_dma_len(sgl); + + axi_block_len = calculate_block_len(chan, mem, len, direction); + if (axi_block_len == 0) + return NULL; + + for_each_sg(sgl, sg, sg_len, i) + num_sgs += DIV_ROUND_UP(sg_dma_len(sg), axi_block_len); + + desc = axi_desc_alloc(num_sgs); + if (unlikely(!desc)) + goto err_desc_get; + + desc->chan = chan; + desc->length = 0; + chan->direction = direction; + + for_each_sg(sgl, sg, sg_len, i) { + mem = sg_dma_address(sg); + len = sg_dma_len(sg); + num_segments = DIV_ROUND_UP(sg_dma_len(sg), axi_block_len); + segment_len = DIV_ROUND_UP(sg_dma_len(sg), num_segments); + + do { + hw_desc = &desc->hw_desc[loop++]; + status = dw_axi_dma_set_hw_desc(chan, hw_desc, mem, segment_len); + if (status < 0) + goto err_desc_get; + + desc->length += hw_desc->len; + len -= segment_len; + mem += segment_len; + } while (len >= segment_len); + } + + /* Set end-of-link to the last link descriptor of list */ + set_desc_last(&desc->hw_desc[num_sgs - 1]); + + /* Managed transfer list */ + do { + hw_desc = &desc->hw_desc[--num_sgs]; + write_desc_llp(hw_desc, llp | lms); + llp = hw_desc->llp; + } while (num_sgs); + + dw_axi_dma_set_hw_channel(chan, true); + + return vchan_tx_prep(&chan->vc, &desc->vd, flags); + +err_desc_get: + if (desc) + axi_desc_put(desc); + + return NULL; +} + +static struct dma_async_tx_descriptor * +dma_chan_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst_adr, + dma_addr_t src_adr, size_t len, unsigned long flags) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + size_t block_ts, max_block_ts, xfer_len; + struct axi_dma_hw_desc *hw_desc = NULL; + struct axi_dma_desc *desc = NULL; + u32 xfer_width, reg, num; + u64 llp = 0; + u8 lms = 0; /* Select AXI0 master for LLI fetching */ + + dev_dbg(chan2dev(chan), "%s: memcpy: src: %pad dst: %pad length: %zd flags: %#lx", + axi_chan_name(chan), &src_adr, &dst_adr, len, flags); + + max_block_ts = chan->chip->dw->hdata->block_size[chan->id]; + xfer_width = axi_chan_get_xfer_width(chan, src_adr, dst_adr, len); + num = DIV_ROUND_UP(len, max_block_ts << xfer_width); + desc = axi_desc_alloc(num); + if (unlikely(!desc)) + goto err_desc_get; + + desc->chan = chan; + num = 0; + desc->length = 0; + while (len) { + xfer_len = len; + + hw_desc = &desc->hw_desc[num]; + /* + * Take care for the alignment. + * Actually source and destination widths can be different, but + * make them same to be simpler. + */ + xfer_width = axi_chan_get_xfer_width(chan, src_adr, dst_adr, xfer_len); + + /* + * block_ts indicates the total number of data of width + * to be transferred in a DMA block transfer. + * BLOCK_TS register should be set to block_ts - 1 + */ + block_ts = xfer_len >> xfer_width; + if (block_ts > max_block_ts) { + block_ts = max_block_ts; + xfer_len = max_block_ts << xfer_width; + } + + hw_desc->lli = axi_desc_get(chan, &hw_desc->llp); + if (unlikely(!hw_desc->lli)) + goto err_desc_get; + + write_desc_sar(hw_desc, src_adr); + write_desc_dar(hw_desc, dst_adr); + hw_desc->lli->block_ts_lo = cpu_to_le32(block_ts - 1); + + reg = CH_CTL_H_LLI_VALID; + if (chan->chip->dw->hdata->restrict_axi_burst_len) { + u32 burst_len = chan->chip->dw->hdata->axi_rw_burst_len; + + reg |= (CH_CTL_H_ARLEN_EN | + burst_len << CH_CTL_H_ARLEN_POS | + CH_CTL_H_AWLEN_EN | + burst_len << CH_CTL_H_AWLEN_POS); + } + hw_desc->lli->ctl_hi = cpu_to_le32(reg); + + reg = (DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_DST_MSIZE_POS | + DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_SRC_MSIZE_POS | + xfer_width << CH_CTL_L_DST_WIDTH_POS | + xfer_width << CH_CTL_L_SRC_WIDTH_POS | + DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_DST_INC_POS | + DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_SRC_INC_POS); + hw_desc->lli->ctl_lo = cpu_to_le32(reg); + + set_desc_src_master(hw_desc); + set_desc_dest_master(hw_desc, desc); + + hw_desc->len = xfer_len; + desc->length += hw_desc->len; + /* update the length and addresses for the next loop cycle */ + len -= xfer_len; + dst_adr += xfer_len; + src_adr += xfer_len; + num++; + } + + /* Set end-of-link to the last link descriptor of list */ + set_desc_last(&desc->hw_desc[num - 1]); + /* Managed transfer list */ + do { + hw_desc = &desc->hw_desc[--num]; + write_desc_llp(hw_desc, llp | lms); + llp = hw_desc->llp; + } while (num); + + return vchan_tx_prep(&chan->vc, &desc->vd, flags); + +err_desc_get: + if (desc) + axi_desc_put(desc); + return NULL; +} + +static int dw_axi_dma_chan_slave_config(struct dma_chan *dchan, + struct dma_slave_config *config) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + + memcpy(&chan->config, config, sizeof(*config)); + + return 0; +} + +static void axi_chan_dump_lli(struct axi_dma_chan *chan, + struct axi_dma_hw_desc *desc) +{ + if (!desc->lli) { + dev_err(dchan2dev(&chan->vc.chan), "NULL LLI\n"); + return; + } + + dev_err(dchan2dev(&chan->vc.chan), + "SAR: 0x%llx DAR: 0x%llx LLP: 0x%llx BTS 0x%x CTL: 0x%x:%08x", + le64_to_cpu(desc->lli->sar), + le64_to_cpu(desc->lli->dar), + le64_to_cpu(desc->lli->llp), + le32_to_cpu(desc->lli->block_ts_lo), + le32_to_cpu(desc->lli->ctl_hi), + le32_to_cpu(desc->lli->ctl_lo)); +} + +static void axi_chan_list_dump_lli(struct axi_dma_chan *chan, + struct axi_dma_desc *desc_head) +{ + int count = atomic_read(&chan->descs_allocated); + int i; + + for (i = 0; i < count; i++) + axi_chan_dump_lli(chan, &desc_head->hw_desc[i]); +} + +static noinline void axi_chan_handle_err(struct axi_dma_chan *chan, u32 status) +{ + struct virt_dma_desc *vd; + unsigned long flags; + + spin_lock_irqsave(&chan->vc.lock, flags); + + axi_chan_disable(chan); + + /* The bad descriptor currently is in the head of vc list */ + vd = vchan_next_desc(&chan->vc); + if (!vd) { + dev_err(chan2dev(chan), "BUG: %s, IRQ with no descriptors\n", + axi_chan_name(chan)); + goto out; + } + /* Remove the completed descriptor from issued list */ + list_del(&vd->node); + + /* WARN about bad descriptor */ + dev_err(chan2dev(chan), + "Bad descriptor submitted for %s, cookie: %d, irq: 0x%08x\n", + axi_chan_name(chan), vd->tx.cookie, status); + axi_chan_list_dump_lli(chan, vd_to_axi_desc(vd)); + + vchan_cookie_complete(vd); + + /* Try to restart the controller */ + axi_chan_start_first_queued(chan); + +out: + spin_unlock_irqrestore(&chan->vc.lock, flags); +} + +static void axi_chan_block_xfer_complete(struct axi_dma_chan *chan) +{ + int count = atomic_read(&chan->descs_allocated); + struct axi_dma_hw_desc *hw_desc; + struct axi_dma_desc *desc; + struct virt_dma_desc *vd; + unsigned long flags; + u64 llp; + int i; + + spin_lock_irqsave(&chan->vc.lock, flags); + if (unlikely(axi_chan_is_hw_enable(chan))) { + dev_err(chan2dev(chan), "BUG: %s caught DWAXIDMAC_IRQ_DMA_TRF, but channel not idle!\n", + axi_chan_name(chan)); + axi_chan_disable(chan); + } + + /* The completed descriptor currently is in the head of vc list */ + vd = vchan_next_desc(&chan->vc); + if (!vd) { + dev_err(chan2dev(chan), "BUG: %s, IRQ with no descriptors\n", + axi_chan_name(chan)); + goto out; + } + + if (chan->cyclic) { + desc = vd_to_axi_desc(vd); + if (desc) { + llp = lo_hi_readq(chan->chan_regs + CH_LLP); + for (i = 0; i < count; i++) { + hw_desc = &desc->hw_desc[i]; + if (hw_desc->llp == llp) { + axi_chan_irq_clear(chan, hw_desc->lli->status_lo); + hw_desc->lli->ctl_hi |= CH_CTL_H_LLI_VALID; + desc->completed_blocks = i; + + if (((hw_desc->len * (i + 1)) % desc->period_len) == 0) + vchan_cyclic_callback(vd); + break; + } + } + + axi_chan_enable(chan); + } + } else { + /* Remove the completed descriptor from issued list before completing */ + list_del(&vd->node); + vchan_cookie_complete(vd); + + /* Submit queued descriptors after processing the completed ones */ + axi_chan_start_first_queued(chan); + } + +out: + spin_unlock_irqrestore(&chan->vc.lock, flags); +} + +static irqreturn_t dw_axi_dma_interrupt(int irq, void *dev_id) +{ + struct axi_dma_chip *chip = dev_id; + struct dw_axi_dma *dw = chip->dw; + struct axi_dma_chan *chan; + + u32 status, i; + + /* Disable DMAC interrupts. We'll enable them after processing channels */ + axi_dma_irq_disable(chip); + + /* Poll, clear and process every channel interrupt status */ + for (i = 0; i < dw->hdata->nr_channels; i++) { + chan = &dw->chan[i]; + status = axi_chan_irq_read(chan); + axi_chan_irq_clear(chan, status); + + dev_vdbg(chip->dev, "%s %u IRQ status: 0x%08x\n", + axi_chan_name(chan), i, status); + + if (status & DWAXIDMAC_IRQ_ALL_ERR) + axi_chan_handle_err(chan, status); + else if (status & DWAXIDMAC_IRQ_DMA_TRF) + axi_chan_block_xfer_complete(chan); + } + + /* Re-enable interrupts */ + axi_dma_irq_enable(chip); + + return IRQ_HANDLED; +} + +static int dma_chan_terminate_all(struct dma_chan *dchan) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + u32 chan_active = BIT(chan->id) << DMAC_CHAN_EN_SHIFT; + unsigned long flags; + u32 val; + int ret; + LIST_HEAD(head); + + axi_chan_disable(chan); + + ret = readl_poll_timeout_atomic(chan->chip->regs + DMAC_CHEN, val, + !(val & chan_active), 1000, 50000); + if (ret == -ETIMEDOUT) + dev_warn(dchan2dev(dchan), + "%s failed to stop\n", axi_chan_name(chan)); + + if (chan->direction != DMA_MEM_TO_MEM) + dw_axi_dma_set_hw_channel(chan, false); + if (chan->direction == DMA_MEM_TO_DEV) + dw_axi_dma_set_byte_halfword(chan, false); + + spin_lock_irqsave(&chan->vc.lock, flags); + + vchan_get_all_descriptors(&chan->vc, &head); + + chan->cyclic = false; + spin_unlock_irqrestore(&chan->vc.lock, flags); + + vchan_dma_desc_free_list(&chan->vc, &head); + + dev_vdbg(dchan2dev(dchan), "terminated: %s\n", axi_chan_name(chan)); + + return 0; +} + +static int dma_chan_pause(struct dma_chan *dchan) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + unsigned long flags; + unsigned int timeout = 20; /* timeout iterations */ + u32 val; + + spin_lock_irqsave(&chan->vc.lock, flags); + + if (chan->chip->dw->hdata->reg_map_8_channels) { + val = axi_dma_ioread32(chan->chip, DMAC_CHEN); + val |= BIT(chan->id) << DMAC_CHAN_SUSP_SHIFT | + BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT; + axi_dma_iowrite32(chan->chip, DMAC_CHEN, val); + } else { + val = axi_dma_ioread32(chan->chip, DMAC_CHSUSPREG); + val |= BIT(chan->id) << DMAC_CHAN_SUSP2_SHIFT | + BIT(chan->id) << DMAC_CHAN_SUSP2_WE_SHIFT; + axi_dma_iowrite32(chan->chip, DMAC_CHSUSPREG, val); + } + + do { + if (axi_chan_irq_read(chan) & DWAXIDMAC_IRQ_SUSPENDED) + break; + + udelay(2); + } while (--timeout); + + axi_chan_irq_clear(chan, DWAXIDMAC_IRQ_SUSPENDED); + + chan->is_paused = true; + + spin_unlock_irqrestore(&chan->vc.lock, flags); + + return timeout ? 0 : -EAGAIN; +} + +/* Called in chan locked context */ +static inline void axi_chan_resume(struct axi_dma_chan *chan) +{ + u32 val; + + if (chan->chip->dw->hdata->reg_map_8_channels) { + val = axi_dma_ioread32(chan->chip, DMAC_CHEN); + val &= ~(BIT(chan->id) << DMAC_CHAN_SUSP_SHIFT); + val |= (BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT); + axi_dma_iowrite32(chan->chip, DMAC_CHEN, val); + } else { + val = axi_dma_ioread32(chan->chip, DMAC_CHSUSPREG); + val &= ~(BIT(chan->id) << DMAC_CHAN_SUSP2_SHIFT); + val |= (BIT(chan->id) << DMAC_CHAN_SUSP2_WE_SHIFT); + axi_dma_iowrite32(chan->chip, DMAC_CHSUSPREG, val); + } + + chan->is_paused = false; +} + +static int dma_chan_resume(struct dma_chan *dchan) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + unsigned long flags; + + spin_lock_irqsave(&chan->vc.lock, flags); + + if (chan->is_paused) + axi_chan_resume(chan); + + spin_unlock_irqrestore(&chan->vc.lock, flags); + + return 0; +} + +static int axi_dma_suspend(struct axi_dma_chip *chip) +{ + axi_dma_irq_disable(chip); + axi_dma_disable(chip); + + clk_disable_unprepare(chip->core_clk); + clk_disable_unprepare(chip->cfgr_clk); + + return 0; +} + +static int axi_dma_resume(struct axi_dma_chip *chip) +{ + int ret; + + ret = clk_prepare_enable(chip->cfgr_clk); + if (ret < 0) + return ret; + + ret = clk_prepare_enable(chip->core_clk); + if (ret < 0) + return ret; + + axi_dma_enable(chip); + axi_dma_irq_enable(chip); + + return 0; +} + +static int __maybe_unused axi_dma_runtime_suspend(struct device *dev) +{ + struct axi_dma_chip *chip = dev_get_drvdata(dev); + + return axi_dma_suspend(chip); +} + +static int __maybe_unused axi_dma_runtime_resume(struct device *dev) +{ + struct axi_dma_chip *chip = dev_get_drvdata(dev); + + return axi_dma_resume(chip); +} + +static struct dma_chan *dw_axi_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct dw_axi_dma *dw = ofdma->of_dma_data; + struct axi_dma_chan *chan; + struct dma_chan *dchan; + + dchan = dma_get_any_slave_channel(&dw->dma); + if (!dchan) + return NULL; + + chan = dchan_to_axi_dma_chan(dchan); + chan->hw_handshake_num = dma_spec->args[0]; + return dchan; +} + +static int parse_device_properties(struct axi_dma_chip *chip) +{ + struct device *dev = chip->dev; + u32 tmp, carr[DMAC_MAX_CHANNELS]; + int ret; + + ret = device_property_read_u32(dev, "dma-channels", &tmp); + if (ret) + return ret; + if (tmp == 0 || tmp > DMAC_MAX_CHANNELS) + return -EINVAL; + + chip->dw->hdata->nr_channels = tmp; + if (tmp <= DMA_REG_MAP_CH_REF) + chip->dw->hdata->reg_map_8_channels = true; + + ret = device_property_read_u32(dev, "snps,dma-masters", &tmp); + if (ret) + return ret; + if (tmp == 0 || tmp > DMAC_MAX_MASTERS) + return -EINVAL; + + chip->dw->hdata->nr_masters = tmp; + + ret = device_property_read_u32(dev, "snps,data-width", &tmp); + if (ret) + return ret; + if (tmp > DWAXIDMAC_TRANS_WIDTH_MAX) + return -EINVAL; + + chip->dw->hdata->m_data_width = tmp; + + ret = device_property_read_u32_array(dev, "snps,block-size", carr, + chip->dw->hdata->nr_channels); + if (ret) + return ret; + for (tmp = 0; tmp < chip->dw->hdata->nr_channels; tmp++) { + if (carr[tmp] == 0 || carr[tmp] > DMAC_MAX_BLK_SIZE) + return -EINVAL; + + chip->dw->hdata->block_size[tmp] = carr[tmp]; + } + + ret = device_property_read_u32_array(dev, "snps,priority", carr, + chip->dw->hdata->nr_channels); + if (ret) + return ret; + /* Priority value must be programmed within [0:nr_channels-1] range */ + for (tmp = 0; tmp < chip->dw->hdata->nr_channels; tmp++) { + if (carr[tmp] >= chip->dw->hdata->nr_channels) + return -EINVAL; + + chip->dw->hdata->priority[tmp] = carr[tmp]; + } + + /* axi-max-burst-len is optional property */ + ret = device_property_read_u32(dev, "snps,axi-max-burst-len", &tmp); + if (!ret) { + if (tmp > DWAXIDMAC_ARWLEN_MAX + 1) + return -EINVAL; + if (tmp < DWAXIDMAC_ARWLEN_MIN + 1) + return -EINVAL; + + chip->dw->hdata->restrict_axi_burst_len = true; + chip->dw->hdata->axi_rw_burst_len = tmp; + } + + return 0; +} + +static int dw_probe(struct platform_device *pdev) +{ + struct axi_dma_chip *chip; + struct dw_axi_dma *dw; + struct dw_axi_dma_hcfg *hdata; + struct reset_control *resets; + unsigned int flags; + u32 i; + int ret; + + chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + dw = devm_kzalloc(&pdev->dev, sizeof(*dw), GFP_KERNEL); + if (!dw) + return -ENOMEM; + + hdata = devm_kzalloc(&pdev->dev, sizeof(*hdata), GFP_KERNEL); + if (!hdata) + return -ENOMEM; + + chip->dw = dw; + chip->dev = &pdev->dev; + chip->dw->hdata = hdata; + + chip->irq = platform_get_irq(pdev, 0); + if (chip->irq < 0) + return chip->irq; + + chip->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(chip->regs)) + return PTR_ERR(chip->regs); + + flags = (uintptr_t)of_device_get_match_data(&pdev->dev); + if (flags & AXI_DMA_FLAG_HAS_APB_REGS) { + chip->apb_regs = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(chip->apb_regs)) + return PTR_ERR(chip->apb_regs); + } + + if (flags & AXI_DMA_FLAG_HAS_RESETS) { + resets = devm_reset_control_array_get_exclusive(&pdev->dev); + if (IS_ERR(resets)) + return PTR_ERR(resets); + + ret = reset_control_deassert(resets); + if (ret) + return ret; + } + + chip->dw->hdata->use_cfg2 = !!(flags & AXI_DMA_FLAG_USE_CFG2); + + chip->core_clk = devm_clk_get(chip->dev, "core-clk"); + if (IS_ERR(chip->core_clk)) + return PTR_ERR(chip->core_clk); + + chip->cfgr_clk = devm_clk_get(chip->dev, "cfgr-clk"); + if (IS_ERR(chip->cfgr_clk)) + return PTR_ERR(chip->cfgr_clk); + + ret = parse_device_properties(chip); + if (ret) + return ret; + + dw->chan = devm_kcalloc(chip->dev, hdata->nr_channels, + sizeof(*dw->chan), GFP_KERNEL); + if (!dw->chan) + return -ENOMEM; + + ret = devm_request_irq(chip->dev, chip->irq, dw_axi_dma_interrupt, + IRQF_SHARED, KBUILD_MODNAME, chip); + if (ret) + return ret; + + INIT_LIST_HEAD(&dw->dma.channels); + for (i = 0; i < hdata->nr_channels; i++) { + struct axi_dma_chan *chan = &dw->chan[i]; + + chan->chip = chip; + chan->id = i; + chan->chan_regs = chip->regs + COMMON_REG_LEN + i * CHAN_REG_LEN; + atomic_set(&chan->descs_allocated, 0); + + chan->vc.desc_free = vchan_desc_put; + vchan_init(&chan->vc, &dw->dma); + } + + /* Set capabilities */ + dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask); + dma_cap_set(DMA_SLAVE, dw->dma.cap_mask); + dma_cap_set(DMA_CYCLIC, dw->dma.cap_mask); + + /* DMA capabilities */ + dw->dma.max_burst = hdata->axi_rw_burst_len; + dw->dma.src_addr_widths = AXI_DMA_BUSWIDTHS; + dw->dma.dst_addr_widths = AXI_DMA_BUSWIDTHS; + dw->dma.directions = BIT(DMA_MEM_TO_MEM); + dw->dma.directions |= BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); + dw->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + dw->dma.dev = chip->dev; + dw->dma.device_tx_status = dma_chan_tx_status; + dw->dma.device_issue_pending = dma_chan_issue_pending; + dw->dma.device_terminate_all = dma_chan_terminate_all; + dw->dma.device_pause = dma_chan_pause; + dw->dma.device_resume = dma_chan_resume; + + dw->dma.device_alloc_chan_resources = dma_chan_alloc_chan_resources; + dw->dma.device_free_chan_resources = dma_chan_free_chan_resources; + + dw->dma.device_prep_dma_memcpy = dma_chan_prep_dma_memcpy; + dw->dma.device_synchronize = dw_axi_dma_synchronize; + dw->dma.device_config = dw_axi_dma_chan_slave_config; + dw->dma.device_prep_slave_sg = dw_axi_dma_chan_prep_slave_sg; + dw->dma.device_prep_dma_cyclic = dw_axi_dma_chan_prep_cyclic; + + /* + * Synopsis DesignWare AxiDMA datasheet mentioned Maximum + * supported blocks is 1024. Device register width is 4 bytes. + * Therefore, set constraint to 1024 * 4. + */ + dw->dma.dev->dma_parms = &dw->dma_parms; + dma_set_max_seg_size(&pdev->dev, MAX_BLOCK_SIZE); + platform_set_drvdata(pdev, chip); + + pm_runtime_enable(chip->dev); + + /* + * We can't just call pm_runtime_get here instead of + * pm_runtime_get_noresume + axi_dma_resume because we need + * driver to work also without Runtime PM. + */ + pm_runtime_get_noresume(chip->dev); + ret = axi_dma_resume(chip); + if (ret < 0) + goto err_pm_disable; + + axi_dma_hw_init(chip); + + pm_runtime_put(chip->dev); + + ret = dmaenginem_async_device_register(&dw->dma); + if (ret) + goto err_pm_disable; + + /* Register with OF helpers for DMA lookups */ + ret = of_dma_controller_register(pdev->dev.of_node, + dw_axi_dma_of_xlate, dw); + if (ret < 0) + dev_warn(&pdev->dev, + "Failed to register OF DMA controller, fallback to MEM_TO_MEM mode\n"); + + dev_info(chip->dev, "DesignWare AXI DMA Controller, %d channels\n", + dw->hdata->nr_channels); + + return 0; + +err_pm_disable: + pm_runtime_disable(chip->dev); + + return ret; +} + +static int dw_remove(struct platform_device *pdev) +{ + struct axi_dma_chip *chip = platform_get_drvdata(pdev); + struct dw_axi_dma *dw = chip->dw; + struct axi_dma_chan *chan, *_chan; + u32 i; + + /* Enable clk before accessing to registers */ + clk_prepare_enable(chip->cfgr_clk); + clk_prepare_enable(chip->core_clk); + axi_dma_irq_disable(chip); + for (i = 0; i < dw->hdata->nr_channels; i++) { + axi_chan_disable(&chip->dw->chan[i]); + axi_chan_irq_disable(&chip->dw->chan[i], DWAXIDMAC_IRQ_ALL); + } + axi_dma_disable(chip); + + pm_runtime_disable(chip->dev); + axi_dma_suspend(chip); + + devm_free_irq(chip->dev, chip->irq, chip); + + of_dma_controller_free(chip->dev->of_node); + + list_for_each_entry_safe(chan, _chan, &dw->dma.channels, + vc.chan.device_node) { + list_del(&chan->vc.chan.device_node); + tasklet_kill(&chan->vc.task); + } + + return 0; +} + +static const struct dev_pm_ops dw_axi_dma_pm_ops = { + SET_RUNTIME_PM_OPS(axi_dma_runtime_suspend, axi_dma_runtime_resume, NULL) +}; + +static const struct of_device_id dw_dma_of_id_table[] = { + { + .compatible = "snps,axi-dma-1.01a" + }, { + .compatible = "intel,kmb-axi-dma", + .data = (void *)AXI_DMA_FLAG_HAS_APB_REGS, + }, { + .compatible = "starfive,jh7110-axi-dma", + .data = (void *)(AXI_DMA_FLAG_HAS_RESETS | AXI_DMA_FLAG_USE_CFG2), + }, + {} +}; +MODULE_DEVICE_TABLE(of, dw_dma_of_id_table); + +static struct platform_driver dw_driver = { + .probe = dw_probe, + .remove = dw_remove, + .driver = { + .name = KBUILD_MODNAME, + .of_match_table = dw_dma_of_id_table, + .pm = &dw_axi_dma_pm_ops, + }, +}; +module_platform_driver(dw_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Synopsys DesignWare AXI DMA Controller platform driver"); +MODULE_AUTHOR("Eugeniy Paltsev "); diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac.h b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h new file mode 100644 index 0000000000..eb267cb24f --- /dev/null +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h @@ -0,0 +1,400 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// (C) 2017-2018 Synopsys, Inc. (www.synopsys.com) + +/* + * Synopsys DesignWare AXI DMA Controller driver. + * + * Author: Eugeniy Paltsev + */ + +#ifndef _AXI_DMA_PLATFORM_H +#define _AXI_DMA_PLATFORM_H + +#include +#include +#include +#include +#include + +#include "../virt-dma.h" + +#define DMAC_MAX_CHANNELS 16 +#define DMAC_MAX_MASTERS 2 +#define DMAC_MAX_BLK_SIZE 0x200000 + +struct dw_axi_dma_hcfg { + u32 nr_channels; + u32 nr_masters; + u32 m_data_width; + u32 block_size[DMAC_MAX_CHANNELS]; + u32 priority[DMAC_MAX_CHANNELS]; + /* maximum supported axi burst length */ + u32 axi_rw_burst_len; + /* Register map for DMAX_NUM_CHANNELS <= 8 */ + bool reg_map_8_channels; + bool restrict_axi_burst_len; + bool use_cfg2; +}; + +struct axi_dma_chan { + struct axi_dma_chip *chip; + void __iomem *chan_regs; + u8 id; + u8 hw_handshake_num; + atomic_t descs_allocated; + + struct dma_pool *desc_pool; + struct virt_dma_chan vc; + + struct axi_dma_desc *desc; + struct dma_slave_config config; + enum dma_transfer_direction direction; + bool cyclic; + /* these other elements are all protected by vc.lock */ + bool is_paused; +}; + +struct dw_axi_dma { + struct dma_device dma; + struct dw_axi_dma_hcfg *hdata; + struct device_dma_parameters dma_parms; + + /* channels */ + struct axi_dma_chan *chan; +}; + +struct axi_dma_chip { + struct device *dev; + int irq; + void __iomem *regs; + void __iomem *apb_regs; + struct clk *core_clk; + struct clk *cfgr_clk; + struct dw_axi_dma *dw; +}; + +/* LLI == Linked List Item */ +struct __packed axi_dma_lli { + __le64 sar; + __le64 dar; + __le32 block_ts_lo; + __le32 block_ts_hi; + __le64 llp; + __le32 ctl_lo; + __le32 ctl_hi; + __le32 sstat; + __le32 dstat; + __le32 status_lo; + __le32 status_hi; + __le32 reserved_lo; + __le32 reserved_hi; +}; + +struct axi_dma_hw_desc { + struct axi_dma_lli *lli; + dma_addr_t llp; + u32 len; +}; + +struct axi_dma_desc { + struct axi_dma_hw_desc *hw_desc; + + struct virt_dma_desc vd; + struct axi_dma_chan *chan; + u32 completed_blocks; + u32 length; + u32 period_len; +}; + +struct axi_dma_chan_config { + u8 dst_multblk_type; + u8 src_multblk_type; + u8 dst_per; + u8 src_per; + u8 tt_fc; + u8 prior; + u8 hs_sel_dst; + u8 hs_sel_src; +}; + +static inline struct device *dchan2dev(struct dma_chan *dchan) +{ + return &dchan->dev->device; +} + +static inline struct device *chan2dev(struct axi_dma_chan *chan) +{ + return &chan->vc.chan.dev->device; +} + +static inline struct axi_dma_desc *vd_to_axi_desc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct axi_dma_desc, vd); +} + +static inline struct axi_dma_chan *vc_to_axi_dma_chan(struct virt_dma_chan *vc) +{ + return container_of(vc, struct axi_dma_chan, vc); +} + +static inline struct axi_dma_chan *dchan_to_axi_dma_chan(struct dma_chan *dchan) +{ + return vc_to_axi_dma_chan(to_virt_chan(dchan)); +} + + +#define COMMON_REG_LEN 0x100 +#define CHAN_REG_LEN 0x100 + +/* Common registers offset */ +#define DMAC_ID 0x000 /* R DMAC ID */ +#define DMAC_COMPVER 0x008 /* R DMAC Component Version */ +#define DMAC_CFG 0x010 /* R/W DMAC Configuration */ +#define DMAC_CHEN 0x018 /* R/W DMAC Channel Enable */ +#define DMAC_CHEN_L 0x018 /* R/W DMAC Channel Enable 00-31 */ +#define DMAC_CHEN_H 0x01C /* R/W DMAC Channel Enable 32-63 */ +#define DMAC_CHSUSPREG 0x020 /* R/W DMAC Channel Suspend */ +#define DMAC_CHABORTREG 0x028 /* R/W DMAC Channel Abort */ +#define DMAC_INTSTATUS 0x030 /* R DMAC Interrupt Status */ +#define DMAC_COMMON_INTCLEAR 0x038 /* W DMAC Interrupt Clear */ +#define DMAC_COMMON_INTSTATUS_ENA 0x040 /* R DMAC Interrupt Status Enable */ +#define DMAC_COMMON_INTSIGNAL_ENA 0x048 /* R/W DMAC Interrupt Signal Enable */ +#define DMAC_COMMON_INTSTATUS 0x050 /* R DMAC Interrupt Status */ +#define DMAC_RESET 0x058 /* R DMAC Reset Register1 */ + +/* DMA channel registers offset */ +#define CH_SAR 0x000 /* R/W Chan Source Address */ +#define CH_DAR 0x008 /* R/W Chan Destination Address */ +#define CH_BLOCK_TS 0x010 /* R/W Chan Block Transfer Size */ +#define CH_CTL 0x018 /* R/W Chan Control */ +#define CH_CTL_L 0x018 /* R/W Chan Control 00-31 */ +#define CH_CTL_H 0x01C /* R/W Chan Control 32-63 */ +#define CH_CFG 0x020 /* R/W Chan Configuration */ +#define CH_CFG_L 0x020 /* R/W Chan Configuration 00-31 */ +#define CH_CFG_H 0x024 /* R/W Chan Configuration 32-63 */ +#define CH_LLP 0x028 /* R/W Chan Linked List Pointer */ +#define CH_STATUS 0x030 /* R Chan Status */ +#define CH_SWHSSRC 0x038 /* R/W Chan SW Handshake Source */ +#define CH_SWHSDST 0x040 /* R/W Chan SW Handshake Destination */ +#define CH_BLK_TFR_RESUMEREQ 0x048 /* W Chan Block Transfer Resume Req */ +#define CH_AXI_ID 0x050 /* R/W Chan AXI ID */ +#define CH_AXI_QOS 0x058 /* R/W Chan AXI QOS */ +#define CH_SSTAT 0x060 /* R Chan Source Status */ +#define CH_DSTAT 0x068 /* R Chan Destination Status */ +#define CH_SSTATAR 0x070 /* R/W Chan Source Status Fetch Addr */ +#define CH_DSTATAR 0x078 /* R/W Chan Destination Status Fetch Addr */ +#define CH_INTSTATUS_ENA 0x080 /* R/W Chan Interrupt Status Enable */ +#define CH_INTSTATUS 0x088 /* R/W Chan Interrupt Status */ +#define CH_INTSIGNAL_ENA 0x090 /* R/W Chan Interrupt Signal Enable */ +#define CH_INTCLEAR 0x098 /* W Chan Interrupt Clear */ + +/* These Apb registers are used by Intel KeemBay SoC */ +#define DMAC_APB_CFG 0x000 /* DMAC Apb Configuration Register */ +#define DMAC_APB_STAT 0x004 /* DMAC Apb Status Register */ +#define DMAC_APB_DEBUG_STAT_0 0x008 /* DMAC Apb Debug Status Register 0 */ +#define DMAC_APB_DEBUG_STAT_1 0x00C /* DMAC Apb Debug Status Register 1 */ +#define DMAC_APB_HW_HS_SEL_0 0x010 /* DMAC Apb HW HS register 0 */ +#define DMAC_APB_HW_HS_SEL_1 0x014 /* DMAC Apb HW HS register 1 */ +#define DMAC_APB_LPI 0x018 /* DMAC Apb Low Power Interface Reg */ +#define DMAC_APB_BYTE_WR_CH_EN 0x01C /* DMAC Apb Byte Write Enable */ +#define DMAC_APB_HALFWORD_WR_CH_EN 0x020 /* DMAC Halfword write enables */ + +#define UNUSED_CHANNEL 0x3F /* Set unused DMA channel to 0x3F */ +#define DMA_APB_HS_SEL_BIT_SIZE 0x08 /* HW handshake bits per channel */ +#define DMA_APB_HS_SEL_MASK 0xFF /* HW handshake select masks */ +#define MAX_BLOCK_SIZE 0x1000 /* 1024 blocks * 4 bytes data width */ +#define DMA_REG_MAP_CH_REF 0x08 /* Channel count to choose register map */ + +/* DMAC_CFG */ +#define DMAC_EN_POS 0 +#define DMAC_EN_MASK BIT(DMAC_EN_POS) + +#define INT_EN_POS 1 +#define INT_EN_MASK BIT(INT_EN_POS) + +/* DMAC_CHEN */ +#define DMAC_CHAN_EN_SHIFT 0 +#define DMAC_CHAN_EN_WE_SHIFT 8 + +#define DMAC_CHAN_SUSP_SHIFT 16 +#define DMAC_CHAN_SUSP_WE_SHIFT 24 + +/* DMAC_CHEN2 */ +#define DMAC_CHAN_EN2_WE_SHIFT 16 + +/* DMAC_CHSUSP */ +#define DMAC_CHAN_SUSP2_SHIFT 0 +#define DMAC_CHAN_SUSP2_WE_SHIFT 16 + +/* CH_CTL_H */ +#define CH_CTL_H_ARLEN_EN BIT(6) +#define CH_CTL_H_ARLEN_POS 7 +#define CH_CTL_H_AWLEN_EN BIT(15) +#define CH_CTL_H_AWLEN_POS 16 + +enum { + DWAXIDMAC_ARWLEN_1 = 0, + DWAXIDMAC_ARWLEN_2 = 1, + DWAXIDMAC_ARWLEN_4 = 3, + DWAXIDMAC_ARWLEN_8 = 7, + DWAXIDMAC_ARWLEN_16 = 15, + DWAXIDMAC_ARWLEN_32 = 31, + DWAXIDMAC_ARWLEN_64 = 63, + DWAXIDMAC_ARWLEN_128 = 127, + DWAXIDMAC_ARWLEN_256 = 255, + DWAXIDMAC_ARWLEN_MIN = DWAXIDMAC_ARWLEN_1, + DWAXIDMAC_ARWLEN_MAX = DWAXIDMAC_ARWLEN_256 +}; + +#define CH_CTL_H_LLI_LAST BIT(30) +#define CH_CTL_H_LLI_VALID BIT(31) + +/* CH_CTL_L */ +#define CH_CTL_L_LAST_WRITE_EN BIT(30) + +#define CH_CTL_L_DST_MSIZE_POS 18 +#define CH_CTL_L_SRC_MSIZE_POS 14 + +enum { + DWAXIDMAC_BURST_TRANS_LEN_1 = 0, + DWAXIDMAC_BURST_TRANS_LEN_4, + DWAXIDMAC_BURST_TRANS_LEN_8, + DWAXIDMAC_BURST_TRANS_LEN_16, + DWAXIDMAC_BURST_TRANS_LEN_32, + DWAXIDMAC_BURST_TRANS_LEN_64, + DWAXIDMAC_BURST_TRANS_LEN_128, + DWAXIDMAC_BURST_TRANS_LEN_256, + DWAXIDMAC_BURST_TRANS_LEN_512, + DWAXIDMAC_BURST_TRANS_LEN_1024 +}; + +#define CH_CTL_L_DST_WIDTH_POS 11 +#define CH_CTL_L_SRC_WIDTH_POS 8 + +#define CH_CTL_L_DST_INC_POS 6 +#define CH_CTL_L_SRC_INC_POS 4 +enum { + DWAXIDMAC_CH_CTL_L_INC = 0, + DWAXIDMAC_CH_CTL_L_NOINC +}; + +#define CH_CTL_L_DST_MAST BIT(2) +#define CH_CTL_L_SRC_MAST BIT(0) + +/* CH_CFG_H */ +#define CH_CFG_H_PRIORITY_POS 17 +#define CH_CFG_H_DST_PER_POS 12 +#define CH_CFG_H_SRC_PER_POS 7 +#define CH_CFG_H_HS_SEL_DST_POS 4 +#define CH_CFG_H_HS_SEL_SRC_POS 3 +enum { + DWAXIDMAC_HS_SEL_HW = 0, + DWAXIDMAC_HS_SEL_SW +}; + +#define CH_CFG_H_TT_FC_POS 0 +enum { + DWAXIDMAC_TT_FC_MEM_TO_MEM_DMAC = 0, + DWAXIDMAC_TT_FC_MEM_TO_PER_DMAC, + DWAXIDMAC_TT_FC_PER_TO_MEM_DMAC, + DWAXIDMAC_TT_FC_PER_TO_PER_DMAC, + DWAXIDMAC_TT_FC_PER_TO_MEM_SRC, + DWAXIDMAC_TT_FC_PER_TO_PER_SRC, + DWAXIDMAC_TT_FC_MEM_TO_PER_DST, + DWAXIDMAC_TT_FC_PER_TO_PER_DST +}; + +/* CH_CFG_L */ +#define CH_CFG_L_DST_MULTBLK_TYPE_POS 2 +#define CH_CFG_L_SRC_MULTBLK_TYPE_POS 0 +enum { + DWAXIDMAC_MBLK_TYPE_CONTIGUOUS = 0, + DWAXIDMAC_MBLK_TYPE_RELOAD, + DWAXIDMAC_MBLK_TYPE_SHADOW_REG, + DWAXIDMAC_MBLK_TYPE_LL +}; + +/* CH_CFG2 */ +#define CH_CFG2_L_SRC_PER_POS 4 +#define CH_CFG2_L_DST_PER_POS 11 + +#define CH_CFG2_H_TT_FC_POS 0 +#define CH_CFG2_H_HS_SEL_SRC_POS 3 +#define CH_CFG2_H_HS_SEL_DST_POS 4 +#define CH_CFG2_H_PRIORITY_POS 20 + +/** + * DW AXI DMA channel interrupts + * + * @DWAXIDMAC_IRQ_NONE: Bitmask of no one interrupt + * @DWAXIDMAC_IRQ_BLOCK_TRF: Block transfer complete + * @DWAXIDMAC_IRQ_DMA_TRF: Dma transfer complete + * @DWAXIDMAC_IRQ_SRC_TRAN: Source transaction complete + * @DWAXIDMAC_IRQ_DST_TRAN: Destination transaction complete + * @DWAXIDMAC_IRQ_SRC_DEC_ERR: Source decode error + * @DWAXIDMAC_IRQ_DST_DEC_ERR: Destination decode error + * @DWAXIDMAC_IRQ_SRC_SLV_ERR: Source slave error + * @DWAXIDMAC_IRQ_DST_SLV_ERR: Destination slave error + * @DWAXIDMAC_IRQ_LLI_RD_DEC_ERR: LLI read decode error + * @DWAXIDMAC_IRQ_LLI_WR_DEC_ERR: LLI write decode error + * @DWAXIDMAC_IRQ_LLI_RD_SLV_ERR: LLI read slave error + * @DWAXIDMAC_IRQ_LLI_WR_SLV_ERR: LLI write slave error + * @DWAXIDMAC_IRQ_INVALID_ERR: LLI invalid error or Shadow register error + * @DWAXIDMAC_IRQ_MULTIBLKTYPE_ERR: Slave Interface Multiblock type error + * @DWAXIDMAC_IRQ_DEC_ERR: Slave Interface decode error + * @DWAXIDMAC_IRQ_WR2RO_ERR: Slave Interface write to read only error + * @DWAXIDMAC_IRQ_RD2RWO_ERR: Slave Interface read to write only error + * @DWAXIDMAC_IRQ_WRONCHEN_ERR: Slave Interface write to channel error + * @DWAXIDMAC_IRQ_SHADOWREG_ERR: Slave Interface shadow reg error + * @DWAXIDMAC_IRQ_WRONHOLD_ERR: Slave Interface hold error + * @DWAXIDMAC_IRQ_LOCK_CLEARED: Lock Cleared Status + * @DWAXIDMAC_IRQ_SRC_SUSPENDED: Source Suspended Status + * @DWAXIDMAC_IRQ_SUSPENDED: Channel Suspended Status + * @DWAXIDMAC_IRQ_DISABLED: Channel Disabled Status + * @DWAXIDMAC_IRQ_ABORTED: Channel Aborted Status + * @DWAXIDMAC_IRQ_ALL_ERR: Bitmask of all error interrupts + * @DWAXIDMAC_IRQ_ALL: Bitmask of all interrupts + */ +enum { + DWAXIDMAC_IRQ_NONE = 0, + DWAXIDMAC_IRQ_BLOCK_TRF = BIT(0), + DWAXIDMAC_IRQ_DMA_TRF = BIT(1), + DWAXIDMAC_IRQ_SRC_TRAN = BIT(3), + DWAXIDMAC_IRQ_DST_TRAN = BIT(4), + DWAXIDMAC_IRQ_SRC_DEC_ERR = BIT(5), + DWAXIDMAC_IRQ_DST_DEC_ERR = BIT(6), + DWAXIDMAC_IRQ_SRC_SLV_ERR = BIT(7), + DWAXIDMAC_IRQ_DST_SLV_ERR = BIT(8), + DWAXIDMAC_IRQ_LLI_RD_DEC_ERR = BIT(9), + DWAXIDMAC_IRQ_LLI_WR_DEC_ERR = BIT(10), + DWAXIDMAC_IRQ_LLI_RD_SLV_ERR = BIT(11), + DWAXIDMAC_IRQ_LLI_WR_SLV_ERR = BIT(12), + DWAXIDMAC_IRQ_INVALID_ERR = BIT(13), + DWAXIDMAC_IRQ_MULTIBLKTYPE_ERR = BIT(14), + DWAXIDMAC_IRQ_DEC_ERR = BIT(16), + DWAXIDMAC_IRQ_WR2RO_ERR = BIT(17), + DWAXIDMAC_IRQ_RD2RWO_ERR = BIT(18), + DWAXIDMAC_IRQ_WRONCHEN_ERR = BIT(19), + DWAXIDMAC_IRQ_SHADOWREG_ERR = BIT(20), + DWAXIDMAC_IRQ_WRONHOLD_ERR = BIT(21), + DWAXIDMAC_IRQ_LOCK_CLEARED = BIT(27), + DWAXIDMAC_IRQ_SRC_SUSPENDED = BIT(28), + DWAXIDMAC_IRQ_SUSPENDED = BIT(29), + DWAXIDMAC_IRQ_DISABLED = BIT(30), + DWAXIDMAC_IRQ_ABORTED = BIT(31), + DWAXIDMAC_IRQ_ALL_ERR = (GENMASK(21, 16) | GENMASK(14, 5)), + DWAXIDMAC_IRQ_ALL = GENMASK(31, 0) +}; + +enum { + DWAXIDMAC_TRANS_WIDTH_8 = 0, + DWAXIDMAC_TRANS_WIDTH_16, + DWAXIDMAC_TRANS_WIDTH_32, + DWAXIDMAC_TRANS_WIDTH_64, + DWAXIDMAC_TRANS_WIDTH_128, + DWAXIDMAC_TRANS_WIDTH_256, + DWAXIDMAC_TRANS_WIDTH_512, + DWAXIDMAC_TRANS_WIDTH_MAX = DWAXIDMAC_TRANS_WIDTH_512 +}; + +#endif /* _AXI_DMA_PLATFORM_H */ diff --git a/drivers/dma/dw-edma/Kconfig b/drivers/dma/dw-edma/Kconfig new file mode 100644 index 0000000000..2b6f267950 --- /dev/null +++ b/drivers/dma/dw-edma/Kconfig @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 + +config DW_EDMA + tristate "Synopsys DesignWare eDMA controller driver" + depends on PCI && PCI_MSI + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support the Synopsys DesignWare eDMA controller, normally + implemented on endpoints SoCs. + +if DW_EDMA + +config DW_EDMA_PCIE + tristate "Synopsys DesignWare eDMA PCIe driver" + depends on PCI && PCI_MSI + help + Provides a glue-logic between the Synopsys DesignWare + eDMA controller and an endpoint PCIe device. This also serves + as a reference design to whom desires to use this IP. + +endif # DW_EDMA diff --git a/drivers/dma/dw-edma/Makefile b/drivers/dma/dw-edma/Makefile new file mode 100644 index 0000000000..83ab58f877 --- /dev/null +++ b/drivers/dma/dw-edma/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_DW_EDMA) += dw-edma.o +dw-edma-$(CONFIG_DEBUG_FS) := dw-edma-v0-debugfs.o \ + dw-hdma-v0-debugfs.o +dw-edma-objs := dw-edma-core.o \ + dw-edma-v0-core.o \ + dw-hdma-v0-core.o $(dw-edma-y) +obj-$(CONFIG_DW_EDMA_PCIE) += dw-edma-pcie.o diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c new file mode 100644 index 0000000000..6823624705 --- /dev/null +++ b/drivers/dma/dw-edma/dw-edma-core.c @@ -0,0 +1,1016 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates. + * Synopsys DesignWare eDMA core driver + * + * Author: Gustavo Pimentel + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dw-edma-core.h" +#include "dw-edma-v0-core.h" +#include "dw-hdma-v0-core.h" +#include "../dmaengine.h" +#include "../virt-dma.h" + +static inline +struct device *dchan2dev(struct dma_chan *dchan) +{ + return &dchan->dev->device; +} + +static inline +struct device *chan2dev(struct dw_edma_chan *chan) +{ + return &chan->vc.chan.dev->device; +} + +static inline +struct dw_edma_desc *vd2dw_edma_desc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct dw_edma_desc, vd); +} + +static inline +u64 dw_edma_get_pci_address(struct dw_edma_chan *chan, phys_addr_t cpu_addr) +{ + struct dw_edma_chip *chip = chan->dw->chip; + + if (chip->ops->pci_address) + return chip->ops->pci_address(chip->dev, cpu_addr); + + return cpu_addr; +} + +static struct dw_edma_burst *dw_edma_alloc_burst(struct dw_edma_chunk *chunk) +{ + struct dw_edma_burst *burst; + + burst = kzalloc(sizeof(*burst), GFP_NOWAIT); + if (unlikely(!burst)) + return NULL; + + INIT_LIST_HEAD(&burst->list); + if (chunk->burst) { + /* Create and add new element into the linked list */ + chunk->bursts_alloc++; + list_add_tail(&burst->list, &chunk->burst->list); + } else { + /* List head */ + chunk->bursts_alloc = 0; + chunk->burst = burst; + } + + return burst; +} + +static struct dw_edma_chunk *dw_edma_alloc_chunk(struct dw_edma_desc *desc) +{ + struct dw_edma_chip *chip = desc->chan->dw->chip; + struct dw_edma_chan *chan = desc->chan; + struct dw_edma_chunk *chunk; + + chunk = kzalloc(sizeof(*chunk), GFP_NOWAIT); + if (unlikely(!chunk)) + return NULL; + + INIT_LIST_HEAD(&chunk->list); + chunk->chan = chan; + /* Toggling change bit (CB) in each chunk, this is a mechanism to + * inform the eDMA HW block that this is a new linked list ready + * to be consumed. + * - Odd chunks originate CB equal to 0 + * - Even chunks originate CB equal to 1 + */ + chunk->cb = !(desc->chunks_alloc % 2); + if (chan->dir == EDMA_DIR_WRITE) { + chunk->ll_region.paddr = chip->ll_region_wr[chan->id].paddr; + chunk->ll_region.vaddr = chip->ll_region_wr[chan->id].vaddr; + } else { + chunk->ll_region.paddr = chip->ll_region_rd[chan->id].paddr; + chunk->ll_region.vaddr = chip->ll_region_rd[chan->id].vaddr; + } + + if (desc->chunk) { + /* Create and add new element into the linked list */ + if (!dw_edma_alloc_burst(chunk)) { + kfree(chunk); + return NULL; + } + desc->chunks_alloc++; + list_add_tail(&chunk->list, &desc->chunk->list); + } else { + /* List head */ + chunk->burst = NULL; + desc->chunks_alloc = 0; + desc->chunk = chunk; + } + + return chunk; +} + +static struct dw_edma_desc *dw_edma_alloc_desc(struct dw_edma_chan *chan) +{ + struct dw_edma_desc *desc; + + desc = kzalloc(sizeof(*desc), GFP_NOWAIT); + if (unlikely(!desc)) + return NULL; + + desc->chan = chan; + if (!dw_edma_alloc_chunk(desc)) { + kfree(desc); + return NULL; + } + + return desc; +} + +static void dw_edma_free_burst(struct dw_edma_chunk *chunk) +{ + struct dw_edma_burst *child, *_next; + + /* Remove all the list elements */ + list_for_each_entry_safe(child, _next, &chunk->burst->list, list) { + list_del(&child->list); + kfree(child); + chunk->bursts_alloc--; + } + + /* Remove the list head */ + kfree(child); + chunk->burst = NULL; +} + +static void dw_edma_free_chunk(struct dw_edma_desc *desc) +{ + struct dw_edma_chunk *child, *_next; + + if (!desc->chunk) + return; + + /* Remove all the list elements */ + list_for_each_entry_safe(child, _next, &desc->chunk->list, list) { + dw_edma_free_burst(child); + list_del(&child->list); + kfree(child); + desc->chunks_alloc--; + } + + /* Remove the list head */ + kfree(child); + desc->chunk = NULL; +} + +static void dw_edma_free_desc(struct dw_edma_desc *desc) +{ + dw_edma_free_chunk(desc); + kfree(desc); +} + +static void vchan_free_desc(struct virt_dma_desc *vdesc) +{ + dw_edma_free_desc(vd2dw_edma_desc(vdesc)); +} + +static int dw_edma_start_transfer(struct dw_edma_chan *chan) +{ + struct dw_edma *dw = chan->dw; + struct dw_edma_chunk *child; + struct dw_edma_desc *desc; + struct virt_dma_desc *vd; + + vd = vchan_next_desc(&chan->vc); + if (!vd) + return 0; + + desc = vd2dw_edma_desc(vd); + if (!desc) + return 0; + + child = list_first_entry_or_null(&desc->chunk->list, + struct dw_edma_chunk, list); + if (!child) + return 0; + + dw_edma_core_start(dw, child, !desc->xfer_sz); + desc->xfer_sz += child->ll_region.sz; + dw_edma_free_burst(child); + list_del(&child->list); + kfree(child); + desc->chunks_alloc--; + + return 1; +} + +static void dw_edma_device_caps(struct dma_chan *dchan, + struct dma_slave_caps *caps) +{ + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); + + if (chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL) { + if (chan->dir == EDMA_DIR_READ) + caps->directions = BIT(DMA_DEV_TO_MEM); + else + caps->directions = BIT(DMA_MEM_TO_DEV); + } else { + if (chan->dir == EDMA_DIR_WRITE) + caps->directions = BIT(DMA_DEV_TO_MEM); + else + caps->directions = BIT(DMA_MEM_TO_DEV); + } +} + +static int dw_edma_device_config(struct dma_chan *dchan, + struct dma_slave_config *config) +{ + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); + + memcpy(&chan->config, config, sizeof(*config)); + chan->configured = true; + + return 0; +} + +static int dw_edma_device_pause(struct dma_chan *dchan) +{ + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); + int err = 0; + + if (!chan->configured) + err = -EPERM; + else if (chan->status != EDMA_ST_BUSY) + err = -EPERM; + else if (chan->request != EDMA_REQ_NONE) + err = -EPERM; + else + chan->request = EDMA_REQ_PAUSE; + + return err; +} + +static int dw_edma_device_resume(struct dma_chan *dchan) +{ + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); + int err = 0; + + if (!chan->configured) { + err = -EPERM; + } else if (chan->status != EDMA_ST_PAUSE) { + err = -EPERM; + } else if (chan->request != EDMA_REQ_NONE) { + err = -EPERM; + } else { + chan->status = EDMA_ST_BUSY; + dw_edma_start_transfer(chan); + } + + return err; +} + +static int dw_edma_device_terminate_all(struct dma_chan *dchan) +{ + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); + int err = 0; + + if (!chan->configured) { + /* Do nothing */ + } else if (chan->status == EDMA_ST_PAUSE) { + chan->status = EDMA_ST_IDLE; + chan->configured = false; + } else if (chan->status == EDMA_ST_IDLE) { + chan->configured = false; + } else if (dw_edma_core_ch_status(chan) == DMA_COMPLETE) { + /* + * The channel is in a false BUSY state, probably didn't + * receive or lost an interrupt + */ + chan->status = EDMA_ST_IDLE; + chan->configured = false; + } else if (chan->request > EDMA_REQ_PAUSE) { + err = -EPERM; + } else { + chan->request = EDMA_REQ_STOP; + } + + return err; +} + +static void dw_edma_device_issue_pending(struct dma_chan *dchan) +{ + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); + unsigned long flags; + + if (!chan->configured) + return; + + spin_lock_irqsave(&chan->vc.lock, flags); + if (vchan_issue_pending(&chan->vc) && chan->request == EDMA_REQ_NONE && + chan->status == EDMA_ST_IDLE) { + chan->status = EDMA_ST_BUSY; + dw_edma_start_transfer(chan); + } + spin_unlock_irqrestore(&chan->vc.lock, flags); +} + +static enum dma_status +dw_edma_device_tx_status(struct dma_chan *dchan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); + struct dw_edma_desc *desc; + struct virt_dma_desc *vd; + unsigned long flags; + enum dma_status ret; + u32 residue = 0; + + ret = dma_cookie_status(dchan, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; + + if (ret == DMA_IN_PROGRESS && chan->status == EDMA_ST_PAUSE) + ret = DMA_PAUSED; + + if (!txstate) + goto ret_residue; + + spin_lock_irqsave(&chan->vc.lock, flags); + vd = vchan_find_desc(&chan->vc, cookie); + if (vd) { + desc = vd2dw_edma_desc(vd); + if (desc) + residue = desc->alloc_sz - desc->xfer_sz; + } + spin_unlock_irqrestore(&chan->vc.lock, flags); + +ret_residue: + dma_set_residue(txstate, residue); + + return ret; +} + +static struct dma_async_tx_descriptor * +dw_edma_device_transfer(struct dw_edma_transfer *xfer) +{ + struct dw_edma_chan *chan = dchan2dw_edma_chan(xfer->dchan); + enum dma_transfer_direction dir = xfer->direction; + struct scatterlist *sg = NULL; + struct dw_edma_chunk *chunk; + struct dw_edma_burst *burst; + struct dw_edma_desc *desc; + u64 src_addr, dst_addr; + size_t fsz = 0; + u32 cnt = 0; + int i; + + if (!chan->configured) + return NULL; + + /* + * Local Root Port/End-point Remote End-point + * +-----------------------+ PCIe bus +----------------------+ + * | | +-+ | | + * | DEV_TO_MEM Rx Ch <----+ +---+ Tx Ch DEV_TO_MEM | + * | | | | | | + * | MEM_TO_DEV Tx Ch +----+ +---> Rx Ch MEM_TO_DEV | + * | | +-+ | | + * +-----------------------+ +----------------------+ + * + * 1. Normal logic: + * If eDMA is embedded into the DW PCIe RP/EP and controlled from the + * CPU/Application side, the Rx channel (EDMA_DIR_READ) will be used + * for the device read operations (DEV_TO_MEM) and the Tx channel + * (EDMA_DIR_WRITE) - for the write operations (MEM_TO_DEV). + * + * 2. Inverted logic: + * If eDMA is embedded into a Remote PCIe EP and is controlled by the + * MWr/MRd TLPs sent from the CPU's PCIe host controller, the Tx + * channel (EDMA_DIR_WRITE) will be used for the device read operations + * (DEV_TO_MEM) and the Rx channel (EDMA_DIR_READ) - for the write + * operations (MEM_TO_DEV). + * + * It is the client driver responsibility to choose a proper channel + * for the DMA transfers. + */ + if (chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL) { + if ((chan->dir == EDMA_DIR_READ && dir != DMA_DEV_TO_MEM) || + (chan->dir == EDMA_DIR_WRITE && dir != DMA_MEM_TO_DEV)) + return NULL; + } else { + if ((chan->dir == EDMA_DIR_WRITE && dir != DMA_DEV_TO_MEM) || + (chan->dir == EDMA_DIR_READ && dir != DMA_MEM_TO_DEV)) + return NULL; + } + + if (xfer->type == EDMA_XFER_CYCLIC) { + if (!xfer->xfer.cyclic.len || !xfer->xfer.cyclic.cnt) + return NULL; + } else if (xfer->type == EDMA_XFER_SCATTER_GATHER) { + if (xfer->xfer.sg.len < 1) + return NULL; + } else if (xfer->type == EDMA_XFER_INTERLEAVED) { + if (!xfer->xfer.il->numf || xfer->xfer.il->frame_size < 1) + return NULL; + if (!xfer->xfer.il->src_inc || !xfer->xfer.il->dst_inc) + return NULL; + } else { + return NULL; + } + + desc = dw_edma_alloc_desc(chan); + if (unlikely(!desc)) + goto err_alloc; + + chunk = dw_edma_alloc_chunk(desc); + if (unlikely(!chunk)) + goto err_alloc; + + if (xfer->type == EDMA_XFER_INTERLEAVED) { + src_addr = xfer->xfer.il->src_start; + dst_addr = xfer->xfer.il->dst_start; + } else { + src_addr = chan->config.src_addr; + dst_addr = chan->config.dst_addr; + } + + if (dir == DMA_DEV_TO_MEM) + src_addr = dw_edma_get_pci_address(chan, (phys_addr_t)src_addr); + else + dst_addr = dw_edma_get_pci_address(chan, (phys_addr_t)dst_addr); + + if (xfer->type == EDMA_XFER_CYCLIC) { + cnt = xfer->xfer.cyclic.cnt; + } else if (xfer->type == EDMA_XFER_SCATTER_GATHER) { + cnt = xfer->xfer.sg.len; + sg = xfer->xfer.sg.sgl; + } else if (xfer->type == EDMA_XFER_INTERLEAVED) { + cnt = xfer->xfer.il->numf * xfer->xfer.il->frame_size; + fsz = xfer->xfer.il->frame_size; + } + + for (i = 0; i < cnt; i++) { + if (xfer->type == EDMA_XFER_SCATTER_GATHER && !sg) + break; + + if (chunk->bursts_alloc == chan->ll_max) { + chunk = dw_edma_alloc_chunk(desc); + if (unlikely(!chunk)) + goto err_alloc; + } + + burst = dw_edma_alloc_burst(chunk); + if (unlikely(!burst)) + goto err_alloc; + + if (xfer->type == EDMA_XFER_CYCLIC) + burst->sz = xfer->xfer.cyclic.len; + else if (xfer->type == EDMA_XFER_SCATTER_GATHER) + burst->sz = sg_dma_len(sg); + else if (xfer->type == EDMA_XFER_INTERLEAVED) + burst->sz = xfer->xfer.il->sgl[i % fsz].size; + + chunk->ll_region.sz += burst->sz; + desc->alloc_sz += burst->sz; + + if (dir == DMA_DEV_TO_MEM) { + burst->sar = src_addr; + if (xfer->type == EDMA_XFER_CYCLIC) { + burst->dar = xfer->xfer.cyclic.paddr; + } else if (xfer->type == EDMA_XFER_SCATTER_GATHER) { + src_addr += sg_dma_len(sg); + burst->dar = sg_dma_address(sg); + /* Unlike the typical assumption by other + * drivers/IPs the peripheral memory isn't + * a FIFO memory, in this case, it's a + * linear memory and that why the source + * and destination addresses are increased + * by the same portion (data length) + */ + } else if (xfer->type == EDMA_XFER_INTERLEAVED) { + burst->dar = dst_addr; + } + } else { + burst->dar = dst_addr; + if (xfer->type == EDMA_XFER_CYCLIC) { + burst->sar = xfer->xfer.cyclic.paddr; + } else if (xfer->type == EDMA_XFER_SCATTER_GATHER) { + dst_addr += sg_dma_len(sg); + burst->sar = sg_dma_address(sg); + /* Unlike the typical assumption by other + * drivers/IPs the peripheral memory isn't + * a FIFO memory, in this case, it's a + * linear memory and that why the source + * and destination addresses are increased + * by the same portion (data length) + */ + } else if (xfer->type == EDMA_XFER_INTERLEAVED) { + burst->sar = src_addr; + } + } + + if (xfer->type == EDMA_XFER_SCATTER_GATHER) { + sg = sg_next(sg); + } else if (xfer->type == EDMA_XFER_INTERLEAVED) { + struct dma_interleaved_template *il = xfer->xfer.il; + struct data_chunk *dc = &il->sgl[i % fsz]; + + src_addr += burst->sz; + if (il->src_sgl) + src_addr += dmaengine_get_src_icg(il, dc); + + dst_addr += burst->sz; + if (il->dst_sgl) + dst_addr += dmaengine_get_dst_icg(il, dc); + } + } + + return vchan_tx_prep(&chan->vc, &desc->vd, xfer->flags); + +err_alloc: + if (desc) + dw_edma_free_desc(desc); + + return NULL; +} + +static struct dma_async_tx_descriptor * +dw_edma_device_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl, + unsigned int len, + enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct dw_edma_transfer xfer; + + xfer.dchan = dchan; + xfer.direction = direction; + xfer.xfer.sg.sgl = sgl; + xfer.xfer.sg.len = len; + xfer.flags = flags; + xfer.type = EDMA_XFER_SCATTER_GATHER; + + return dw_edma_device_transfer(&xfer); +} + +static struct dma_async_tx_descriptor * +dw_edma_device_prep_dma_cyclic(struct dma_chan *dchan, dma_addr_t paddr, + size_t len, size_t count, + enum dma_transfer_direction direction, + unsigned long flags) +{ + struct dw_edma_transfer xfer; + + xfer.dchan = dchan; + xfer.direction = direction; + xfer.xfer.cyclic.paddr = paddr; + xfer.xfer.cyclic.len = len; + xfer.xfer.cyclic.cnt = count; + xfer.flags = flags; + xfer.type = EDMA_XFER_CYCLIC; + + return dw_edma_device_transfer(&xfer); +} + +static struct dma_async_tx_descriptor * +dw_edma_device_prep_interleaved_dma(struct dma_chan *dchan, + struct dma_interleaved_template *ilt, + unsigned long flags) +{ + struct dw_edma_transfer xfer; + + xfer.dchan = dchan; + xfer.direction = ilt->dir; + xfer.xfer.il = ilt; + xfer.flags = flags; + xfer.type = EDMA_XFER_INTERLEAVED; + + return dw_edma_device_transfer(&xfer); +} + +static void dw_edma_done_interrupt(struct dw_edma_chan *chan) +{ + struct dw_edma_desc *desc; + struct virt_dma_desc *vd; + unsigned long flags; + + spin_lock_irqsave(&chan->vc.lock, flags); + vd = vchan_next_desc(&chan->vc); + if (vd) { + switch (chan->request) { + case EDMA_REQ_NONE: + desc = vd2dw_edma_desc(vd); + if (!desc->chunks_alloc) { + list_del(&vd->node); + vchan_cookie_complete(vd); + } + + /* Continue transferring if there are remaining chunks or issued requests. + */ + chan->status = dw_edma_start_transfer(chan) ? EDMA_ST_BUSY : EDMA_ST_IDLE; + break; + + case EDMA_REQ_STOP: + list_del(&vd->node); + vchan_cookie_complete(vd); + chan->request = EDMA_REQ_NONE; + chan->status = EDMA_ST_IDLE; + break; + + case EDMA_REQ_PAUSE: + chan->request = EDMA_REQ_NONE; + chan->status = EDMA_ST_PAUSE; + break; + + default: + break; + } + } + spin_unlock_irqrestore(&chan->vc.lock, flags); +} + +static void dw_edma_abort_interrupt(struct dw_edma_chan *chan) +{ + struct virt_dma_desc *vd; + unsigned long flags; + + spin_lock_irqsave(&chan->vc.lock, flags); + vd = vchan_next_desc(&chan->vc); + if (vd) { + list_del(&vd->node); + vchan_cookie_complete(vd); + } + spin_unlock_irqrestore(&chan->vc.lock, flags); + chan->request = EDMA_REQ_NONE; + chan->status = EDMA_ST_IDLE; +} + +static inline irqreturn_t dw_edma_interrupt_write(int irq, void *data) +{ + struct dw_edma_irq *dw_irq = data; + + return dw_edma_core_handle_int(dw_irq, EDMA_DIR_WRITE, + dw_edma_done_interrupt, + dw_edma_abort_interrupt); +} + +static inline irqreturn_t dw_edma_interrupt_read(int irq, void *data) +{ + struct dw_edma_irq *dw_irq = data; + + return dw_edma_core_handle_int(dw_irq, EDMA_DIR_READ, + dw_edma_done_interrupt, + dw_edma_abort_interrupt); +} + +static irqreturn_t dw_edma_interrupt_common(int irq, void *data) +{ + irqreturn_t ret = IRQ_NONE; + + ret |= dw_edma_interrupt_write(irq, data); + ret |= dw_edma_interrupt_read(irq, data); + + return ret; +} + +static int dw_edma_alloc_chan_resources(struct dma_chan *dchan) +{ + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); + + if (chan->status != EDMA_ST_IDLE) + return -EBUSY; + + return 0; +} + +static void dw_edma_free_chan_resources(struct dma_chan *dchan) +{ + unsigned long timeout = jiffies + msecs_to_jiffies(5000); + int ret; + + while (time_before(jiffies, timeout)) { + ret = dw_edma_device_terminate_all(dchan); + if (!ret) + break; + + if (time_after_eq(jiffies, timeout)) + return; + + cpu_relax(); + } +} + +static int dw_edma_channel_setup(struct dw_edma *dw, u32 wr_alloc, u32 rd_alloc) +{ + struct dw_edma_chip *chip = dw->chip; + struct device *dev = chip->dev; + struct dw_edma_chan *chan; + struct dw_edma_irq *irq; + struct dma_device *dma; + u32 i, ch_cnt; + u32 pos; + + ch_cnt = dw->wr_ch_cnt + dw->rd_ch_cnt; + dma = &dw->dma; + + INIT_LIST_HEAD(&dma->channels); + + for (i = 0; i < ch_cnt; i++) { + chan = &dw->chan[i]; + + chan->dw = dw; + + if (i < dw->wr_ch_cnt) { + chan->id = i; + chan->dir = EDMA_DIR_WRITE; + } else { + chan->id = i - dw->wr_ch_cnt; + chan->dir = EDMA_DIR_READ; + } + + chan->configured = false; + chan->request = EDMA_REQ_NONE; + chan->status = EDMA_ST_IDLE; + + if (chan->dir == EDMA_DIR_WRITE) + chan->ll_max = (chip->ll_region_wr[chan->id].sz / EDMA_LL_SZ); + else + chan->ll_max = (chip->ll_region_rd[chan->id].sz / EDMA_LL_SZ); + chan->ll_max -= 1; + + dev_vdbg(dev, "L. List:\tChannel %s[%u] max_cnt=%u\n", + chan->dir == EDMA_DIR_WRITE ? "write" : "read", + chan->id, chan->ll_max); + + if (dw->nr_irqs == 1) + pos = 0; + else if (chan->dir == EDMA_DIR_WRITE) + pos = chan->id % wr_alloc; + else + pos = wr_alloc + chan->id % rd_alloc; + + irq = &dw->irq[pos]; + + if (chan->dir == EDMA_DIR_WRITE) + irq->wr_mask |= BIT(chan->id); + else + irq->rd_mask |= BIT(chan->id); + + irq->dw = dw; + memcpy(&chan->msi, &irq->msi, sizeof(chan->msi)); + + dev_vdbg(dev, "MSI:\t\tChannel %s[%u] addr=0x%.8x%.8x, data=0x%.8x\n", + chan->dir == EDMA_DIR_WRITE ? "write" : "read", chan->id, + chan->msi.address_hi, chan->msi.address_lo, + chan->msi.data); + + chan->vc.desc_free = vchan_free_desc; + chan->vc.chan.private = chan->dir == EDMA_DIR_WRITE ? + &dw->chip->dt_region_wr[chan->id] : + &dw->chip->dt_region_rd[chan->id]; + + vchan_init(&chan->vc, dma); + + dw_edma_core_ch_config(chan); + } + + /* Set DMA channel capabilities */ + dma_cap_zero(dma->cap_mask); + dma_cap_set(DMA_SLAVE, dma->cap_mask); + dma_cap_set(DMA_CYCLIC, dma->cap_mask); + dma_cap_set(DMA_PRIVATE, dma->cap_mask); + dma_cap_set(DMA_INTERLEAVE, dma->cap_mask); + dma->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + dma->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + dma->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + dma->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + + /* Set DMA channel callbacks */ + dma->dev = chip->dev; + dma->device_alloc_chan_resources = dw_edma_alloc_chan_resources; + dma->device_free_chan_resources = dw_edma_free_chan_resources; + dma->device_caps = dw_edma_device_caps; + dma->device_config = dw_edma_device_config; + dma->device_pause = dw_edma_device_pause; + dma->device_resume = dw_edma_device_resume; + dma->device_terminate_all = dw_edma_device_terminate_all; + dma->device_issue_pending = dw_edma_device_issue_pending; + dma->device_tx_status = dw_edma_device_tx_status; + dma->device_prep_slave_sg = dw_edma_device_prep_slave_sg; + dma->device_prep_dma_cyclic = dw_edma_device_prep_dma_cyclic; + dma->device_prep_interleaved_dma = dw_edma_device_prep_interleaved_dma; + + dma_set_max_seg_size(dma->dev, U32_MAX); + + /* Register DMA device */ + return dma_async_device_register(dma); +} + +static inline void dw_edma_dec_irq_alloc(int *nr_irqs, u32 *alloc, u16 cnt) +{ + if (*nr_irqs && *alloc < cnt) { + (*alloc)++; + (*nr_irqs)--; + } +} + +static inline void dw_edma_add_irq_mask(u32 *mask, u32 alloc, u16 cnt) +{ + while (*mask * alloc < cnt) + (*mask)++; +} + +static int dw_edma_irq_request(struct dw_edma *dw, + u32 *wr_alloc, u32 *rd_alloc) +{ + struct dw_edma_chip *chip = dw->chip; + struct device *dev = dw->chip->dev; + u32 wr_mask = 1; + u32 rd_mask = 1; + int i, err = 0; + u32 ch_cnt; + int irq; + + ch_cnt = dw->wr_ch_cnt + dw->rd_ch_cnt; + + if (chip->nr_irqs < 1 || !chip->ops->irq_vector) + return -EINVAL; + + dw->irq = devm_kcalloc(dev, chip->nr_irqs, sizeof(*dw->irq), GFP_KERNEL); + if (!dw->irq) + return -ENOMEM; + + if (chip->nr_irqs == 1) { + /* Common IRQ shared among all channels */ + irq = chip->ops->irq_vector(dev, 0); + err = request_irq(irq, dw_edma_interrupt_common, + IRQF_SHARED, dw->name, &dw->irq[0]); + if (err) { + dw->nr_irqs = 0; + return err; + } + + if (irq_get_msi_desc(irq)) + get_cached_msi_msg(irq, &dw->irq[0].msi); + + dw->nr_irqs = 1; + } else { + /* Distribute IRQs equally among all channels */ + int tmp = chip->nr_irqs; + + while (tmp && (*wr_alloc + *rd_alloc) < ch_cnt) { + dw_edma_dec_irq_alloc(&tmp, wr_alloc, dw->wr_ch_cnt); + dw_edma_dec_irq_alloc(&tmp, rd_alloc, dw->rd_ch_cnt); + } + + dw_edma_add_irq_mask(&wr_mask, *wr_alloc, dw->wr_ch_cnt); + dw_edma_add_irq_mask(&rd_mask, *rd_alloc, dw->rd_ch_cnt); + + for (i = 0; i < (*wr_alloc + *rd_alloc); i++) { + irq = chip->ops->irq_vector(dev, i); + err = request_irq(irq, + i < *wr_alloc ? + dw_edma_interrupt_write : + dw_edma_interrupt_read, + IRQF_SHARED, dw->name, + &dw->irq[i]); + if (err) + goto err_irq_free; + + if (irq_get_msi_desc(irq)) + get_cached_msi_msg(irq, &dw->irq[i].msi); + } + + dw->nr_irqs = i; + } + + return 0; + +err_irq_free: + for (i--; i >= 0; i--) { + irq = chip->ops->irq_vector(dev, i); + free_irq(irq, &dw->irq[i]); + } + + return err; +} + +int dw_edma_probe(struct dw_edma_chip *chip) +{ + struct device *dev; + struct dw_edma *dw; + u32 wr_alloc = 0; + u32 rd_alloc = 0; + int i, err; + + if (!chip) + return -EINVAL; + + dev = chip->dev; + if (!dev || !chip->ops) + return -EINVAL; + + dw = devm_kzalloc(dev, sizeof(*dw), GFP_KERNEL); + if (!dw) + return -ENOMEM; + + dw->chip = chip; + + if (dw->chip->mf == EDMA_MF_HDMA_NATIVE) + dw_hdma_v0_core_register(dw); + else + dw_edma_v0_core_register(dw); + + raw_spin_lock_init(&dw->lock); + + dw->wr_ch_cnt = min_t(u16, chip->ll_wr_cnt, + dw_edma_core_ch_count(dw, EDMA_DIR_WRITE)); + dw->wr_ch_cnt = min_t(u16, dw->wr_ch_cnt, EDMA_MAX_WR_CH); + + dw->rd_ch_cnt = min_t(u16, chip->ll_rd_cnt, + dw_edma_core_ch_count(dw, EDMA_DIR_READ)); + dw->rd_ch_cnt = min_t(u16, dw->rd_ch_cnt, EDMA_MAX_RD_CH); + + if (!dw->wr_ch_cnt && !dw->rd_ch_cnt) + return -EINVAL; + + dev_vdbg(dev, "Channels:\twrite=%d, read=%d\n", + dw->wr_ch_cnt, dw->rd_ch_cnt); + + /* Allocate channels */ + dw->chan = devm_kcalloc(dev, dw->wr_ch_cnt + dw->rd_ch_cnt, + sizeof(*dw->chan), GFP_KERNEL); + if (!dw->chan) + return -ENOMEM; + + snprintf(dw->name, sizeof(dw->name), "dw-edma-core:%s", + dev_name(chip->dev)); + + /* Disable eDMA, only to establish the ideal initial conditions */ + dw_edma_core_off(dw); + + /* Request IRQs */ + err = dw_edma_irq_request(dw, &wr_alloc, &rd_alloc); + if (err) + return err; + + /* Setup write/read channels */ + err = dw_edma_channel_setup(dw, wr_alloc, rd_alloc); + if (err) + goto err_irq_free; + + /* Turn debugfs on */ + dw_edma_core_debugfs_on(dw); + + chip->dw = dw; + + return 0; + +err_irq_free: + for (i = (dw->nr_irqs - 1); i >= 0; i--) + free_irq(chip->ops->irq_vector(dev, i), &dw->irq[i]); + + return err; +} +EXPORT_SYMBOL_GPL(dw_edma_probe); + +int dw_edma_remove(struct dw_edma_chip *chip) +{ + struct dw_edma_chan *chan, *_chan; + struct device *dev = chip->dev; + struct dw_edma *dw = chip->dw; + int i; + + /* Skip removal if no private data found */ + if (!dw) + return -ENODEV; + + /* Disable eDMA */ + dw_edma_core_off(dw); + + /* Free irqs */ + for (i = (dw->nr_irqs - 1); i >= 0; i--) + free_irq(chip->ops->irq_vector(dev, i), &dw->irq[i]); + + /* Deregister eDMA device */ + dma_async_device_unregister(&dw->dma); + list_for_each_entry_safe(chan, _chan, &dw->dma.channels, + vc.chan.device_node) { + tasklet_kill(&chan->vc.task); + list_del(&chan->vc.chan.device_node); + } + + return 0; +} +EXPORT_SYMBOL_GPL(dw_edma_remove); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Synopsys DesignWare eDMA controller core driver"); +MODULE_AUTHOR("Gustavo Pimentel "); diff --git a/drivers/dma/dw-edma/dw-edma-core.h b/drivers/dma/dw-edma/dw-edma-core.h new file mode 100644 index 0000000000..71894b9e0b --- /dev/null +++ b/drivers/dma/dw-edma/dw-edma-core.h @@ -0,0 +1,209 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates. + * Synopsys DesignWare eDMA core driver + * + * Author: Gustavo Pimentel + */ + +#ifndef _DW_EDMA_CORE_H +#define _DW_EDMA_CORE_H + +#include +#include + +#include "../virt-dma.h" + +#define EDMA_LL_SZ 24 + +enum dw_edma_dir { + EDMA_DIR_WRITE = 0, + EDMA_DIR_READ +}; + +enum dw_edma_request { + EDMA_REQ_NONE = 0, + EDMA_REQ_STOP, + EDMA_REQ_PAUSE +}; + +enum dw_edma_status { + EDMA_ST_IDLE = 0, + EDMA_ST_PAUSE, + EDMA_ST_BUSY +}; + +enum dw_edma_xfer_type { + EDMA_XFER_SCATTER_GATHER = 0, + EDMA_XFER_CYCLIC, + EDMA_XFER_INTERLEAVED +}; + +struct dw_edma_chan; +struct dw_edma_chunk; + +struct dw_edma_burst { + struct list_head list; + u64 sar; + u64 dar; + u32 sz; +}; + +struct dw_edma_chunk { + struct list_head list; + struct dw_edma_chan *chan; + struct dw_edma_burst *burst; + + u32 bursts_alloc; + + u8 cb; + struct dw_edma_region ll_region; /* Linked list */ +}; + +struct dw_edma_desc { + struct virt_dma_desc vd; + struct dw_edma_chan *chan; + struct dw_edma_chunk *chunk; + + u32 chunks_alloc; + + u32 alloc_sz; + u32 xfer_sz; +}; + +struct dw_edma_chan { + struct virt_dma_chan vc; + struct dw_edma *dw; + int id; + enum dw_edma_dir dir; + + u32 ll_max; + + struct msi_msg msi; + + enum dw_edma_request request; + enum dw_edma_status status; + u8 configured; + + struct dma_slave_config config; +}; + +struct dw_edma_irq { + struct msi_msg msi; + u32 wr_mask; + u32 rd_mask; + struct dw_edma *dw; +}; + +struct dw_edma { + char name[32]; + + struct dma_device dma; + + u16 wr_ch_cnt; + u16 rd_ch_cnt; + + struct dw_edma_irq *irq; + int nr_irqs; + + struct dw_edma_chan *chan; + + raw_spinlock_t lock; /* Only for legacy */ + + struct dw_edma_chip *chip; + + const struct dw_edma_core_ops *core; +}; + +typedef void (*dw_edma_handler_t)(struct dw_edma_chan *); + +struct dw_edma_core_ops { + void (*off)(struct dw_edma *dw); + u16 (*ch_count)(struct dw_edma *dw, enum dw_edma_dir dir); + enum dma_status (*ch_status)(struct dw_edma_chan *chan); + irqreturn_t (*handle_int)(struct dw_edma_irq *dw_irq, enum dw_edma_dir dir, + dw_edma_handler_t done, dw_edma_handler_t abort); + void (*start)(struct dw_edma_chunk *chunk, bool first); + void (*ch_config)(struct dw_edma_chan *chan); + void (*debugfs_on)(struct dw_edma *dw); +}; + +struct dw_edma_sg { + struct scatterlist *sgl; + unsigned int len; +}; + +struct dw_edma_cyclic { + dma_addr_t paddr; + size_t len; + size_t cnt; +}; + +struct dw_edma_transfer { + struct dma_chan *dchan; + union dw_edma_xfer { + struct dw_edma_sg sg; + struct dw_edma_cyclic cyclic; + struct dma_interleaved_template *il; + } xfer; + enum dma_transfer_direction direction; + unsigned long flags; + enum dw_edma_xfer_type type; +}; + +static inline +struct dw_edma_chan *vc2dw_edma_chan(struct virt_dma_chan *vc) +{ + return container_of(vc, struct dw_edma_chan, vc); +} + +static inline +struct dw_edma_chan *dchan2dw_edma_chan(struct dma_chan *dchan) +{ + return vc2dw_edma_chan(to_virt_chan(dchan)); +} + +static inline +void dw_edma_core_off(struct dw_edma *dw) +{ + dw->core->off(dw); +} + +static inline +u16 dw_edma_core_ch_count(struct dw_edma *dw, enum dw_edma_dir dir) +{ + return dw->core->ch_count(dw, dir); +} + +static inline +enum dma_status dw_edma_core_ch_status(struct dw_edma_chan *chan) +{ + return chan->dw->core->ch_status(chan); +} + +static inline irqreturn_t +dw_edma_core_handle_int(struct dw_edma_irq *dw_irq, enum dw_edma_dir dir, + dw_edma_handler_t done, dw_edma_handler_t abort) +{ + return dw_irq->dw->core->handle_int(dw_irq, dir, done, abort); +} + +static inline +void dw_edma_core_start(struct dw_edma *dw, struct dw_edma_chunk *chunk, bool first) +{ + dw->core->start(chunk, first); +} + +static inline +void dw_edma_core_ch_config(struct dw_edma_chan *chan) +{ + chan->dw->core->ch_config(chan); +} + +static inline +void dw_edma_core_debugfs_on(struct dw_edma *dw) +{ + dw->core->debugfs_on(dw); +} + +#endif /* _DW_EDMA_CORE_H */ diff --git a/drivers/dma/dw-edma/dw-edma-pcie.c b/drivers/dma/dw-edma/dw-edma-pcie.c new file mode 100644 index 0000000000..1c6043751d --- /dev/null +++ b/drivers/dma/dw-edma/dw-edma-pcie.c @@ -0,0 +1,378 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates. + * Synopsys DesignWare eDMA PCIe driver + * + * Author: Gustavo Pimentel + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dw-edma-core.h" + +#define DW_PCIE_VSEC_DMA_ID 0x6 +#define DW_PCIE_VSEC_DMA_BAR GENMASK(10, 8) +#define DW_PCIE_VSEC_DMA_MAP GENMASK(2, 0) +#define DW_PCIE_VSEC_DMA_WR_CH GENMASK(9, 0) +#define DW_PCIE_VSEC_DMA_RD_CH GENMASK(25, 16) + +#define DW_BLOCK(a, b, c) \ + { \ + .bar = a, \ + .off = b, \ + .sz = c, \ + }, + +struct dw_edma_block { + enum pci_barno bar; + off_t off; + size_t sz; +}; + +struct dw_edma_pcie_data { + /* eDMA registers location */ + struct dw_edma_block rg; + /* eDMA memory linked list location */ + struct dw_edma_block ll_wr[EDMA_MAX_WR_CH]; + struct dw_edma_block ll_rd[EDMA_MAX_RD_CH]; + /* eDMA memory data location */ + struct dw_edma_block dt_wr[EDMA_MAX_WR_CH]; + struct dw_edma_block dt_rd[EDMA_MAX_RD_CH]; + /* Other */ + enum dw_edma_map_format mf; + u8 irqs; + u16 wr_ch_cnt; + u16 rd_ch_cnt; +}; + +static const struct dw_edma_pcie_data snps_edda_data = { + /* eDMA registers location */ + .rg.bar = BAR_0, + .rg.off = 0x00001000, /* 4 Kbytes */ + .rg.sz = 0x00002000, /* 8 Kbytes */ + /* eDMA memory linked list location */ + .ll_wr = { + /* Channel 0 - BAR 2, offset 0 Mbytes, size 2 Kbytes */ + DW_BLOCK(BAR_2, 0x00000000, 0x00000800) + /* Channel 1 - BAR 2, offset 2 Mbytes, size 2 Kbytes */ + DW_BLOCK(BAR_2, 0x00200000, 0x00000800) + }, + .ll_rd = { + /* Channel 0 - BAR 2, offset 4 Mbytes, size 2 Kbytes */ + DW_BLOCK(BAR_2, 0x00400000, 0x00000800) + /* Channel 1 - BAR 2, offset 6 Mbytes, size 2 Kbytes */ + DW_BLOCK(BAR_2, 0x00600000, 0x00000800) + }, + /* eDMA memory data location */ + .dt_wr = { + /* Channel 0 - BAR 2, offset 8 Mbytes, size 2 Kbytes */ + DW_BLOCK(BAR_2, 0x00800000, 0x00000800) + /* Channel 1 - BAR 2, offset 9 Mbytes, size 2 Kbytes */ + DW_BLOCK(BAR_2, 0x00900000, 0x00000800) + }, + .dt_rd = { + /* Channel 0 - BAR 2, offset 10 Mbytes, size 2 Kbytes */ + DW_BLOCK(BAR_2, 0x00a00000, 0x00000800) + /* Channel 1 - BAR 2, offset 11 Mbytes, size 2 Kbytes */ + DW_BLOCK(BAR_2, 0x00b00000, 0x00000800) + }, + /* Other */ + .mf = EDMA_MF_EDMA_UNROLL, + .irqs = 1, + .wr_ch_cnt = 2, + .rd_ch_cnt = 2, +}; + +static int dw_edma_pcie_irq_vector(struct device *dev, unsigned int nr) +{ + return pci_irq_vector(to_pci_dev(dev), nr); +} + +static u64 dw_edma_pcie_address(struct device *dev, phys_addr_t cpu_addr) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_bus_region region; + struct resource res = { + .flags = IORESOURCE_MEM, + .start = cpu_addr, + .end = cpu_addr, + }; + + pcibios_resource_to_bus(pdev->bus, ®ion, &res); + return region.start; +} + +static const struct dw_edma_plat_ops dw_edma_pcie_plat_ops = { + .irq_vector = dw_edma_pcie_irq_vector, + .pci_address = dw_edma_pcie_address, +}; + +static void dw_edma_pcie_get_vsec_dma_data(struct pci_dev *pdev, + struct dw_edma_pcie_data *pdata) +{ + u32 val, map; + u16 vsec; + u64 off; + + vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_SYNOPSYS, + DW_PCIE_VSEC_DMA_ID); + if (!vsec) + return; + + pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val); + if (PCI_VNDR_HEADER_REV(val) != 0x00 || + PCI_VNDR_HEADER_LEN(val) != 0x18) + return; + + pci_dbg(pdev, "Detected PCIe Vendor-Specific Extended Capability DMA\n"); + pci_read_config_dword(pdev, vsec + 0x8, &val); + map = FIELD_GET(DW_PCIE_VSEC_DMA_MAP, val); + if (map != EDMA_MF_EDMA_LEGACY && + map != EDMA_MF_EDMA_UNROLL && + map != EDMA_MF_HDMA_COMPAT) + return; + + pdata->mf = map; + pdata->rg.bar = FIELD_GET(DW_PCIE_VSEC_DMA_BAR, val); + + pci_read_config_dword(pdev, vsec + 0xc, &val); + pdata->wr_ch_cnt = min_t(u16, pdata->wr_ch_cnt, + FIELD_GET(DW_PCIE_VSEC_DMA_WR_CH, val)); + pdata->rd_ch_cnt = min_t(u16, pdata->rd_ch_cnt, + FIELD_GET(DW_PCIE_VSEC_DMA_RD_CH, val)); + + pci_read_config_dword(pdev, vsec + 0x14, &val); + off = val; + pci_read_config_dword(pdev, vsec + 0x10, &val); + off <<= 32; + off |= val; + pdata->rg.off = off; +} + +static int dw_edma_pcie_probe(struct pci_dev *pdev, + const struct pci_device_id *pid) +{ + struct dw_edma_pcie_data *pdata = (void *)pid->driver_data; + struct dw_edma_pcie_data vsec_data; + struct device *dev = &pdev->dev; + struct dw_edma_chip *chip; + int err, nr_irqs; + int i, mask; + + /* Enable PCI device */ + err = pcim_enable_device(pdev); + if (err) { + pci_err(pdev, "enabling device failed\n"); + return err; + } + + memcpy(&vsec_data, pdata, sizeof(struct dw_edma_pcie_data)); + + /* + * Tries to find if exists a PCIe Vendor-Specific Extended Capability + * for the DMA, if one exists, then reconfigures it. + */ + dw_edma_pcie_get_vsec_dma_data(pdev, &vsec_data); + + /* Mapping PCI BAR regions */ + mask = BIT(vsec_data.rg.bar); + for (i = 0; i < vsec_data.wr_ch_cnt; i++) { + mask |= BIT(vsec_data.ll_wr[i].bar); + mask |= BIT(vsec_data.dt_wr[i].bar); + } + for (i = 0; i < vsec_data.rd_ch_cnt; i++) { + mask |= BIT(vsec_data.ll_rd[i].bar); + mask |= BIT(vsec_data.dt_rd[i].bar); + } + err = pcim_iomap_regions(pdev, mask, pci_name(pdev)); + if (err) { + pci_err(pdev, "eDMA BAR I/O remapping failed\n"); + return err; + } + + pci_set_master(pdev); + + /* DMA configuration */ + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + pci_err(pdev, "DMA mask 64 set failed\n"); + return err; + } + + /* Data structure allocation */ + chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + /* IRQs allocation */ + nr_irqs = pci_alloc_irq_vectors(pdev, 1, vsec_data.irqs, + PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (nr_irqs < 1) { + pci_err(pdev, "fail to alloc IRQ vector (number of IRQs=%u)\n", + nr_irqs); + return -EPERM; + } + + /* Data structure initialization */ + chip->dev = dev; + + chip->mf = vsec_data.mf; + chip->nr_irqs = nr_irqs; + chip->ops = &dw_edma_pcie_plat_ops; + + chip->ll_wr_cnt = vsec_data.wr_ch_cnt; + chip->ll_rd_cnt = vsec_data.rd_ch_cnt; + + chip->reg_base = pcim_iomap_table(pdev)[vsec_data.rg.bar]; + if (!chip->reg_base) + return -ENOMEM; + + for (i = 0; i < chip->ll_wr_cnt; i++) { + struct dw_edma_region *ll_region = &chip->ll_region_wr[i]; + struct dw_edma_region *dt_region = &chip->dt_region_wr[i]; + struct dw_edma_block *ll_block = &vsec_data.ll_wr[i]; + struct dw_edma_block *dt_block = &vsec_data.dt_wr[i]; + + ll_region->vaddr.io = pcim_iomap_table(pdev)[ll_block->bar]; + if (!ll_region->vaddr.io) + return -ENOMEM; + + ll_region->vaddr.io += ll_block->off; + ll_region->paddr = pci_bus_address(pdev, ll_block->bar); + ll_region->paddr += ll_block->off; + ll_region->sz = ll_block->sz; + + dt_region->vaddr.io = pcim_iomap_table(pdev)[dt_block->bar]; + if (!dt_region->vaddr.io) + return -ENOMEM; + + dt_region->vaddr.io += dt_block->off; + dt_region->paddr = pci_bus_address(pdev, dt_block->bar); + dt_region->paddr += dt_block->off; + dt_region->sz = dt_block->sz; + } + + for (i = 0; i < chip->ll_rd_cnt; i++) { + struct dw_edma_region *ll_region = &chip->ll_region_rd[i]; + struct dw_edma_region *dt_region = &chip->dt_region_rd[i]; + struct dw_edma_block *ll_block = &vsec_data.ll_rd[i]; + struct dw_edma_block *dt_block = &vsec_data.dt_rd[i]; + + ll_region->vaddr.io = pcim_iomap_table(pdev)[ll_block->bar]; + if (!ll_region->vaddr.io) + return -ENOMEM; + + ll_region->vaddr.io += ll_block->off; + ll_region->paddr = pci_bus_address(pdev, ll_block->bar); + ll_region->paddr += ll_block->off; + ll_region->sz = ll_block->sz; + + dt_region->vaddr.io = pcim_iomap_table(pdev)[dt_block->bar]; + if (!dt_region->vaddr.io) + return -ENOMEM; + + dt_region->vaddr.io += dt_block->off; + dt_region->paddr = pci_bus_address(pdev, dt_block->bar); + dt_region->paddr += dt_block->off; + dt_region->sz = dt_block->sz; + } + + /* Debug info */ + if (chip->mf == EDMA_MF_EDMA_LEGACY) + pci_dbg(pdev, "Version:\teDMA Port Logic (0x%x)\n", chip->mf); + else if (chip->mf == EDMA_MF_EDMA_UNROLL) + pci_dbg(pdev, "Version:\teDMA Unroll (0x%x)\n", chip->mf); + else if (chip->mf == EDMA_MF_HDMA_COMPAT) + pci_dbg(pdev, "Version:\tHDMA Compatible (0x%x)\n", chip->mf); + else + pci_dbg(pdev, "Version:\tUnknown (0x%x)\n", chip->mf); + + pci_dbg(pdev, "Registers:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p)\n", + vsec_data.rg.bar, vsec_data.rg.off, vsec_data.rg.sz, + chip->reg_base); + + + for (i = 0; i < chip->ll_wr_cnt; i++) { + pci_dbg(pdev, "L. List:\tWRITE CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n", + i, vsec_data.ll_wr[i].bar, + vsec_data.ll_wr[i].off, chip->ll_region_wr[i].sz, + chip->ll_region_wr[i].vaddr.io, &chip->ll_region_wr[i].paddr); + + pci_dbg(pdev, "Data:\tWRITE CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n", + i, vsec_data.dt_wr[i].bar, + vsec_data.dt_wr[i].off, chip->dt_region_wr[i].sz, + chip->dt_region_wr[i].vaddr.io, &chip->dt_region_wr[i].paddr); + } + + for (i = 0; i < chip->ll_rd_cnt; i++) { + pci_dbg(pdev, "L. List:\tREAD CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n", + i, vsec_data.ll_rd[i].bar, + vsec_data.ll_rd[i].off, chip->ll_region_rd[i].sz, + chip->ll_region_rd[i].vaddr.io, &chip->ll_region_rd[i].paddr); + + pci_dbg(pdev, "Data:\tREAD CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n", + i, vsec_data.dt_rd[i].bar, + vsec_data.dt_rd[i].off, chip->dt_region_rd[i].sz, + chip->dt_region_rd[i].vaddr.io, &chip->dt_region_rd[i].paddr); + } + + pci_dbg(pdev, "Nr. IRQs:\t%u\n", chip->nr_irqs); + + /* Validating if PCI interrupts were enabled */ + if (!pci_dev_msi_enabled(pdev)) { + pci_err(pdev, "enable interrupt failed\n"); + return -EPERM; + } + + /* Starting eDMA driver */ + err = dw_edma_probe(chip); + if (err) { + pci_err(pdev, "eDMA probe failed\n"); + return err; + } + + /* Saving data structure reference */ + pci_set_drvdata(pdev, chip); + + return 0; +} + +static void dw_edma_pcie_remove(struct pci_dev *pdev) +{ + struct dw_edma_chip *chip = pci_get_drvdata(pdev); + int err; + + /* Stopping eDMA driver */ + err = dw_edma_remove(chip); + if (err) + pci_warn(pdev, "can't remove device properly: %d\n", err); + + /* Freeing IRQs */ + pci_free_irq_vectors(pdev); +} + +static const struct pci_device_id dw_edma_pcie_id_table[] = { + { PCI_DEVICE_DATA(SYNOPSYS, EDDA, &snps_edda_data) }, + { } +}; +MODULE_DEVICE_TABLE(pci, dw_edma_pcie_id_table); + +static struct pci_driver dw_edma_pcie_driver = { + .name = "dw-edma-pcie", + .id_table = dw_edma_pcie_id_table, + .probe = dw_edma_pcie_probe, + .remove = dw_edma_pcie_remove, +}; + +module_pci_driver(dw_edma_pcie_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Synopsys DesignWare eDMA PCIe driver"); +MODULE_AUTHOR("Gustavo Pimentel "); diff --git a/drivers/dma/dw-edma/dw-edma-v0-core.c b/drivers/dma/dw-edma/dw-edma-v0-core.c new file mode 100644 index 0000000000..b38786f0ad --- /dev/null +++ b/drivers/dma/dw-edma/dw-edma-v0-core.c @@ -0,0 +1,508 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates. + * Synopsys DesignWare eDMA v0 core + * + * Author: Gustavo Pimentel + */ + +#include +#include +#include + +#include "dw-edma-core.h" +#include "dw-edma-v0-core.h" +#include "dw-edma-v0-regs.h" +#include "dw-edma-v0-debugfs.h" + +enum dw_edma_control { + DW_EDMA_V0_CB = BIT(0), + DW_EDMA_V0_TCB = BIT(1), + DW_EDMA_V0_LLP = BIT(2), + DW_EDMA_V0_LIE = BIT(3), + DW_EDMA_V0_RIE = BIT(4), + DW_EDMA_V0_CCS = BIT(8), + DW_EDMA_V0_LLE = BIT(9), +}; + +static inline struct dw_edma_v0_regs __iomem *__dw_regs(struct dw_edma *dw) +{ + return dw->chip->reg_base; +} + +#define SET_32(dw, name, value) \ + writel(value, &(__dw_regs(dw)->name)) + +#define GET_32(dw, name) \ + readl(&(__dw_regs(dw)->name)) + +#define SET_RW_32(dw, dir, name, value) \ + do { \ + if ((dir) == EDMA_DIR_WRITE) \ + SET_32(dw, wr_##name, value); \ + else \ + SET_32(dw, rd_##name, value); \ + } while (0) + +#define GET_RW_32(dw, dir, name) \ + ((dir) == EDMA_DIR_WRITE \ + ? GET_32(dw, wr_##name) \ + : GET_32(dw, rd_##name)) + +#define SET_BOTH_32(dw, name, value) \ + do { \ + SET_32(dw, wr_##name, value); \ + SET_32(dw, rd_##name, value); \ + } while (0) + +#define SET_64(dw, name, value) \ + writeq(value, &(__dw_regs(dw)->name)) + +#define GET_64(dw, name) \ + readq(&(__dw_regs(dw)->name)) + +#define SET_RW_64(dw, dir, name, value) \ + do { \ + if ((dir) == EDMA_DIR_WRITE) \ + SET_64(dw, wr_##name, value); \ + else \ + SET_64(dw, rd_##name, value); \ + } while (0) + +#define GET_RW_64(dw, dir, name) \ + ((dir) == EDMA_DIR_WRITE \ + ? GET_64(dw, wr_##name) \ + : GET_64(dw, rd_##name)) + +#define SET_BOTH_64(dw, name, value) \ + do { \ + SET_64(dw, wr_##name, value); \ + SET_64(dw, rd_##name, value); \ + } while (0) + +#define SET_COMPAT(dw, name, value) \ + writel(value, &(__dw_regs(dw)->type.unroll.name)) + +#define SET_RW_COMPAT(dw, dir, name, value) \ + do { \ + if ((dir) == EDMA_DIR_WRITE) \ + SET_COMPAT(dw, wr_##name, value); \ + else \ + SET_COMPAT(dw, rd_##name, value); \ + } while (0) + +static inline struct dw_edma_v0_ch_regs __iomem * +__dw_ch_regs(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch) +{ + if (dw->chip->mf == EDMA_MF_EDMA_LEGACY) + return &(__dw_regs(dw)->type.legacy.ch); + + if (dir == EDMA_DIR_WRITE) + return &__dw_regs(dw)->type.unroll.ch[ch].wr; + + return &__dw_regs(dw)->type.unroll.ch[ch].rd; +} + +static inline void writel_ch(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch, + u32 value, void __iomem *addr) +{ + if (dw->chip->mf == EDMA_MF_EDMA_LEGACY) { + u32 viewport_sel; + unsigned long flags; + + raw_spin_lock_irqsave(&dw->lock, flags); + + viewport_sel = FIELD_PREP(EDMA_V0_VIEWPORT_MASK, ch); + if (dir == EDMA_DIR_READ) + viewport_sel |= BIT(31); + + writel(viewport_sel, + &(__dw_regs(dw)->type.legacy.viewport_sel)); + writel(value, addr); + + raw_spin_unlock_irqrestore(&dw->lock, flags); + } else { + writel(value, addr); + } +} + +static inline u32 readl_ch(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch, + const void __iomem *addr) +{ + u32 value; + + if (dw->chip->mf == EDMA_MF_EDMA_LEGACY) { + u32 viewport_sel; + unsigned long flags; + + raw_spin_lock_irqsave(&dw->lock, flags); + + viewport_sel = FIELD_PREP(EDMA_V0_VIEWPORT_MASK, ch); + if (dir == EDMA_DIR_READ) + viewport_sel |= BIT(31); + + writel(viewport_sel, + &(__dw_regs(dw)->type.legacy.viewport_sel)); + value = readl(addr); + + raw_spin_unlock_irqrestore(&dw->lock, flags); + } else { + value = readl(addr); + } + + return value; +} + +#define SET_CH_32(dw, dir, ch, name, value) \ + writel_ch(dw, dir, ch, value, &(__dw_ch_regs(dw, dir, ch)->name)) + +#define GET_CH_32(dw, dir, ch, name) \ + readl_ch(dw, dir, ch, &(__dw_ch_regs(dw, dir, ch)->name)) + +/* eDMA management callbacks */ +static void dw_edma_v0_core_off(struct dw_edma *dw) +{ + SET_BOTH_32(dw, int_mask, + EDMA_V0_DONE_INT_MASK | EDMA_V0_ABORT_INT_MASK); + SET_BOTH_32(dw, int_clear, + EDMA_V0_DONE_INT_MASK | EDMA_V0_ABORT_INT_MASK); + SET_BOTH_32(dw, engine_en, 0); +} + +static u16 dw_edma_v0_core_ch_count(struct dw_edma *dw, enum dw_edma_dir dir) +{ + u32 num_ch; + + if (dir == EDMA_DIR_WRITE) + num_ch = FIELD_GET(EDMA_V0_WRITE_CH_COUNT_MASK, + GET_32(dw, ctrl)); + else + num_ch = FIELD_GET(EDMA_V0_READ_CH_COUNT_MASK, + GET_32(dw, ctrl)); + + if (num_ch > EDMA_V0_MAX_NR_CH) + num_ch = EDMA_V0_MAX_NR_CH; + + return (u16)num_ch; +} + +static enum dma_status dw_edma_v0_core_ch_status(struct dw_edma_chan *chan) +{ + struct dw_edma *dw = chan->dw; + u32 tmp; + + tmp = FIELD_GET(EDMA_V0_CH_STATUS_MASK, + GET_CH_32(dw, chan->dir, chan->id, ch_control1)); + + if (tmp == 1) + return DMA_IN_PROGRESS; + else if (tmp == 3) + return DMA_COMPLETE; + else + return DMA_ERROR; +} + +static void dw_edma_v0_core_clear_done_int(struct dw_edma_chan *chan) +{ + struct dw_edma *dw = chan->dw; + + SET_RW_32(dw, chan->dir, int_clear, + FIELD_PREP(EDMA_V0_DONE_INT_MASK, BIT(chan->id))); +} + +static void dw_edma_v0_core_clear_abort_int(struct dw_edma_chan *chan) +{ + struct dw_edma *dw = chan->dw; + + SET_RW_32(dw, chan->dir, int_clear, + FIELD_PREP(EDMA_V0_ABORT_INT_MASK, BIT(chan->id))); +} + +static u32 dw_edma_v0_core_status_done_int(struct dw_edma *dw, enum dw_edma_dir dir) +{ + return FIELD_GET(EDMA_V0_DONE_INT_MASK, + GET_RW_32(dw, dir, int_status)); +} + +static u32 dw_edma_v0_core_status_abort_int(struct dw_edma *dw, enum dw_edma_dir dir) +{ + return FIELD_GET(EDMA_V0_ABORT_INT_MASK, + GET_RW_32(dw, dir, int_status)); +} + +static irqreturn_t +dw_edma_v0_core_handle_int(struct dw_edma_irq *dw_irq, enum dw_edma_dir dir, + dw_edma_handler_t done, dw_edma_handler_t abort) +{ + struct dw_edma *dw = dw_irq->dw; + unsigned long total, pos, val; + irqreturn_t ret = IRQ_NONE; + struct dw_edma_chan *chan; + unsigned long off; + u32 mask; + + if (dir == EDMA_DIR_WRITE) { + total = dw->wr_ch_cnt; + off = 0; + mask = dw_irq->wr_mask; + } else { + total = dw->rd_ch_cnt; + off = dw->wr_ch_cnt; + mask = dw_irq->rd_mask; + } + + val = dw_edma_v0_core_status_done_int(dw, dir); + val &= mask; + for_each_set_bit(pos, &val, total) { + chan = &dw->chan[pos + off]; + + dw_edma_v0_core_clear_done_int(chan); + done(chan); + + ret = IRQ_HANDLED; + } + + val = dw_edma_v0_core_status_abort_int(dw, dir); + val &= mask; + for_each_set_bit(pos, &val, total) { + chan = &dw->chan[pos + off]; + + dw_edma_v0_core_clear_abort_int(chan); + abort(chan); + + ret = IRQ_HANDLED; + } + + return ret; +} + +static void dw_edma_v0_write_ll_data(struct dw_edma_chunk *chunk, int i, + u32 control, u32 size, u64 sar, u64 dar) +{ + ptrdiff_t ofs = i * sizeof(struct dw_edma_v0_lli); + + if (chunk->chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL) { + struct dw_edma_v0_lli *lli = chunk->ll_region.vaddr.mem + ofs; + + lli->control = control; + lli->transfer_size = size; + lli->sar.reg = sar; + lli->dar.reg = dar; + } else { + struct dw_edma_v0_lli __iomem *lli = chunk->ll_region.vaddr.io + ofs; + + writel(control, &lli->control); + writel(size, &lli->transfer_size); + writeq(sar, &lli->sar.reg); + writeq(dar, &lli->dar.reg); + } +} + +static void dw_edma_v0_write_ll_link(struct dw_edma_chunk *chunk, + int i, u32 control, u64 pointer) +{ + ptrdiff_t ofs = i * sizeof(struct dw_edma_v0_lli); + + if (chunk->chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL) { + struct dw_edma_v0_llp *llp = chunk->ll_region.vaddr.mem + ofs; + + llp->control = control; + llp->llp.reg = pointer; + } else { + struct dw_edma_v0_llp __iomem *llp = chunk->ll_region.vaddr.io + ofs; + + writel(control, &llp->control); + writeq(pointer, &llp->llp.reg); + } +} + +static void dw_edma_v0_core_write_chunk(struct dw_edma_chunk *chunk) +{ + struct dw_edma_burst *child; + struct dw_edma_chan *chan = chunk->chan; + u32 control = 0, i = 0; + int j; + + if (chunk->cb) + control = DW_EDMA_V0_CB; + + j = chunk->bursts_alloc; + list_for_each_entry(child, &chunk->burst->list, list) { + j--; + if (!j) { + control |= DW_EDMA_V0_LIE; + if (!(chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL)) + control |= DW_EDMA_V0_RIE; + } + + dw_edma_v0_write_ll_data(chunk, i++, control, child->sz, + child->sar, child->dar); + } + + control = DW_EDMA_V0_LLP | DW_EDMA_V0_TCB; + if (!chunk->cb) + control |= DW_EDMA_V0_CB; + + dw_edma_v0_write_ll_link(chunk, i, control, chunk->ll_region.paddr); +} + +static void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first) +{ + struct dw_edma_chan *chan = chunk->chan; + struct dw_edma *dw = chan->dw; + u32 tmp; + + dw_edma_v0_core_write_chunk(chunk); + + if (first) { + /* Enable engine */ + SET_RW_32(dw, chan->dir, engine_en, BIT(0)); + if (dw->chip->mf == EDMA_MF_HDMA_COMPAT) { + switch (chan->id) { + case 0: + SET_RW_COMPAT(dw, chan->dir, ch0_pwr_en, + BIT(0)); + break; + case 1: + SET_RW_COMPAT(dw, chan->dir, ch1_pwr_en, + BIT(0)); + break; + case 2: + SET_RW_COMPAT(dw, chan->dir, ch2_pwr_en, + BIT(0)); + break; + case 3: + SET_RW_COMPAT(dw, chan->dir, ch3_pwr_en, + BIT(0)); + break; + case 4: + SET_RW_COMPAT(dw, chan->dir, ch4_pwr_en, + BIT(0)); + break; + case 5: + SET_RW_COMPAT(dw, chan->dir, ch5_pwr_en, + BIT(0)); + break; + case 6: + SET_RW_COMPAT(dw, chan->dir, ch6_pwr_en, + BIT(0)); + break; + case 7: + SET_RW_COMPAT(dw, chan->dir, ch7_pwr_en, + BIT(0)); + break; + } + } + /* Interrupt unmask - done, abort */ + tmp = GET_RW_32(dw, chan->dir, int_mask); + tmp &= ~FIELD_PREP(EDMA_V0_DONE_INT_MASK, BIT(chan->id)); + tmp &= ~FIELD_PREP(EDMA_V0_ABORT_INT_MASK, BIT(chan->id)); + SET_RW_32(dw, chan->dir, int_mask, tmp); + /* Linked list error */ + tmp = GET_RW_32(dw, chan->dir, linked_list_err_en); + tmp |= FIELD_PREP(EDMA_V0_LINKED_LIST_ERR_MASK, BIT(chan->id)); + SET_RW_32(dw, chan->dir, linked_list_err_en, tmp); + /* Channel control */ + SET_CH_32(dw, chan->dir, chan->id, ch_control1, + (DW_EDMA_V0_CCS | DW_EDMA_V0_LLE)); + /* Linked list */ + /* llp is not aligned on 64bit -> keep 32bit accesses */ + SET_CH_32(dw, chan->dir, chan->id, llp.lsb, + lower_32_bits(chunk->ll_region.paddr)); + SET_CH_32(dw, chan->dir, chan->id, llp.msb, + upper_32_bits(chunk->ll_region.paddr)); + } + /* Doorbell */ + SET_RW_32(dw, chan->dir, doorbell, + FIELD_PREP(EDMA_V0_DOORBELL_CH_MASK, chan->id)); +} + +static void dw_edma_v0_core_ch_config(struct dw_edma_chan *chan) +{ + struct dw_edma *dw = chan->dw; + u32 tmp = 0; + + /* MSI done addr - low, high */ + SET_RW_32(dw, chan->dir, done_imwr.lsb, chan->msi.address_lo); + SET_RW_32(dw, chan->dir, done_imwr.msb, chan->msi.address_hi); + /* MSI abort addr - low, high */ + SET_RW_32(dw, chan->dir, abort_imwr.lsb, chan->msi.address_lo); + SET_RW_32(dw, chan->dir, abort_imwr.msb, chan->msi.address_hi); + /* MSI data - low, high */ + switch (chan->id) { + case 0: + case 1: + tmp = GET_RW_32(dw, chan->dir, ch01_imwr_data); + break; + + case 2: + case 3: + tmp = GET_RW_32(dw, chan->dir, ch23_imwr_data); + break; + + case 4: + case 5: + tmp = GET_RW_32(dw, chan->dir, ch45_imwr_data); + break; + + case 6: + case 7: + tmp = GET_RW_32(dw, chan->dir, ch67_imwr_data); + break; + } + + if (chan->id & BIT(0)) { + /* Channel odd {1, 3, 5, 7} */ + tmp &= EDMA_V0_CH_EVEN_MSI_DATA_MASK; + tmp |= FIELD_PREP(EDMA_V0_CH_ODD_MSI_DATA_MASK, + chan->msi.data); + } else { + /* Channel even {0, 2, 4, 6} */ + tmp &= EDMA_V0_CH_ODD_MSI_DATA_MASK; + tmp |= FIELD_PREP(EDMA_V0_CH_EVEN_MSI_DATA_MASK, + chan->msi.data); + } + + switch (chan->id) { + case 0: + case 1: + SET_RW_32(dw, chan->dir, ch01_imwr_data, tmp); + break; + + case 2: + case 3: + SET_RW_32(dw, chan->dir, ch23_imwr_data, tmp); + break; + + case 4: + case 5: + SET_RW_32(dw, chan->dir, ch45_imwr_data, tmp); + break; + + case 6: + case 7: + SET_RW_32(dw, chan->dir, ch67_imwr_data, tmp); + break; + } +} + +/* eDMA debugfs callbacks */ +static void dw_edma_v0_core_debugfs_on(struct dw_edma *dw) +{ + dw_edma_v0_debugfs_on(dw); +} + +static const struct dw_edma_core_ops dw_edma_v0_core = { + .off = dw_edma_v0_core_off, + .ch_count = dw_edma_v0_core_ch_count, + .ch_status = dw_edma_v0_core_ch_status, + .handle_int = dw_edma_v0_core_handle_int, + .start = dw_edma_v0_core_start, + .ch_config = dw_edma_v0_core_ch_config, + .debugfs_on = dw_edma_v0_core_debugfs_on, +}; + +void dw_edma_v0_core_register(struct dw_edma *dw) +{ + dw->core = &dw_edma_v0_core; +} diff --git a/drivers/dma/dw-edma/dw-edma-v0-core.h b/drivers/dma/dw-edma/dw-edma-v0-core.h new file mode 100644 index 0000000000..04a882222f --- /dev/null +++ b/drivers/dma/dw-edma/dw-edma-v0-core.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates. + * Synopsys DesignWare eDMA v0 core + * + * Author: Gustavo Pimentel + */ + +#ifndef _DW_EDMA_V0_CORE_H +#define _DW_EDMA_V0_CORE_H + +#include + +/* eDMA core register */ +void dw_edma_v0_core_register(struct dw_edma *dw); + +#endif /* _DW_EDMA_V0_CORE_H */ diff --git a/drivers/dma/dw-edma/dw-edma-v0-debugfs.c b/drivers/dma/dw-edma/dw-edma-v0-debugfs.c new file mode 100644 index 0000000000..0745d9e7d2 --- /dev/null +++ b/drivers/dma/dw-edma/dw-edma-v0-debugfs.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates. + * Synopsys DesignWare eDMA v0 core + * + * Author: Gustavo Pimentel + */ + +#include +#include + +#include "dw-edma-v0-debugfs.h" +#include "dw-edma-v0-regs.h" +#include "dw-edma-core.h" + +#define REGS_ADDR(dw, name) \ + ({ \ + struct dw_edma_v0_regs __iomem *__regs = (dw)->chip->reg_base; \ + \ + (void __iomem *)&__regs->name; \ + }) + +#define REGS_CH_ADDR(dw, name, _dir, _ch) \ + ({ \ + struct dw_edma_v0_ch_regs __iomem *__ch_regs; \ + \ + if ((dw)->chip->mf == EDMA_MF_EDMA_LEGACY) \ + __ch_regs = REGS_ADDR(dw, type.legacy.ch); \ + else if (_dir == EDMA_DIR_READ) \ + __ch_regs = REGS_ADDR(dw, type.unroll.ch[_ch].rd); \ + else \ + __ch_regs = REGS_ADDR(dw, type.unroll.ch[_ch].wr); \ + \ + (void __iomem *)&__ch_regs->name; \ + }) + +#define REGISTER(dw, name) \ + { dw, #name, REGS_ADDR(dw, name) } + +#define CTX_REGISTER(dw, name, dir, ch) \ + { dw, #name, REGS_CH_ADDR(dw, name, dir, ch), dir, ch } + +#define WR_REGISTER(dw, name) \ + { dw, #name, REGS_ADDR(dw, wr_##name) } +#define RD_REGISTER(dw, name) \ + { dw, #name, REGS_ADDR(dw, rd_##name) } + +#define WR_REGISTER_LEGACY(dw, name) \ + { dw, #name, REGS_ADDR(dw, type.legacy.wr_##name) } +#define RD_REGISTER_LEGACY(name) \ + { dw, #name, REGS_ADDR(dw, type.legacy.rd_##name) } + +#define WR_REGISTER_UNROLL(dw, name) \ + { dw, #name, REGS_ADDR(dw, type.unroll.wr_##name) } +#define RD_REGISTER_UNROLL(dw, name) \ + { dw, #name, REGS_ADDR(dw, type.unroll.rd_##name) } + +#define WRITE_STR "write" +#define READ_STR "read" +#define CHANNEL_STR "channel" +#define REGISTERS_STR "registers" + +struct dw_edma_debugfs_entry { + struct dw_edma *dw; + const char *name; + void __iomem *reg; + enum dw_edma_dir dir; + u16 ch; +}; + +static int dw_edma_debugfs_u32_get(void *data, u64 *val) +{ + struct dw_edma_debugfs_entry *entry = data; + struct dw_edma *dw = entry->dw; + void __iomem *reg = entry->reg; + + if (dw->chip->mf == EDMA_MF_EDMA_LEGACY && + reg >= REGS_ADDR(dw, type.legacy.ch)) { + unsigned long flags; + u32 viewport_sel; + + viewport_sel = entry->dir == EDMA_DIR_READ ? BIT(31) : 0; + viewport_sel |= FIELD_PREP(EDMA_V0_VIEWPORT_MASK, entry->ch); + + raw_spin_lock_irqsave(&dw->lock, flags); + + writel(viewport_sel, REGS_ADDR(dw, type.legacy.viewport_sel)); + *val = readl(reg); + + raw_spin_unlock_irqrestore(&dw->lock, flags); + } else { + *val = readl(reg); + } + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_x32, dw_edma_debugfs_u32_get, NULL, "0x%08llx\n"); + +static void dw_edma_debugfs_create_x32(struct dw_edma *dw, + const struct dw_edma_debugfs_entry ini[], + int nr_entries, struct dentry *dent) +{ + struct dw_edma_debugfs_entry *entries; + int i; + + entries = devm_kcalloc(dw->chip->dev, nr_entries, sizeof(*entries), + GFP_KERNEL); + if (!entries) + return; + + for (i = 0; i < nr_entries; i++) { + entries[i] = ini[i]; + + debugfs_create_file_unsafe(entries[i].name, 0444, dent, + &entries[i], &fops_x32); + } +} + +static void dw_edma_debugfs_regs_ch(struct dw_edma *dw, enum dw_edma_dir dir, + u16 ch, struct dentry *dent) +{ + struct dw_edma_debugfs_entry debugfs_regs[] = { + CTX_REGISTER(dw, ch_control1, dir, ch), + CTX_REGISTER(dw, ch_control2, dir, ch), + CTX_REGISTER(dw, transfer_size, dir, ch), + CTX_REGISTER(dw, sar.lsb, dir, ch), + CTX_REGISTER(dw, sar.msb, dir, ch), + CTX_REGISTER(dw, dar.lsb, dir, ch), + CTX_REGISTER(dw, dar.msb, dir, ch), + CTX_REGISTER(dw, llp.lsb, dir, ch), + CTX_REGISTER(dw, llp.msb, dir, ch), + }; + int nr_entries; + + nr_entries = ARRAY_SIZE(debugfs_regs); + dw_edma_debugfs_create_x32(dw, debugfs_regs, nr_entries, dent); +} + +static noinline_for_stack void +dw_edma_debugfs_regs_wr(struct dw_edma *dw, struct dentry *dent) +{ + const struct dw_edma_debugfs_entry debugfs_regs[] = { + /* eDMA global registers */ + WR_REGISTER(dw, engine_en), + WR_REGISTER(dw, doorbell), + WR_REGISTER(dw, ch_arb_weight.lsb), + WR_REGISTER(dw, ch_arb_weight.msb), + /* eDMA interrupts registers */ + WR_REGISTER(dw, int_status), + WR_REGISTER(dw, int_mask), + WR_REGISTER(dw, int_clear), + WR_REGISTER(dw, err_status), + WR_REGISTER(dw, done_imwr.lsb), + WR_REGISTER(dw, done_imwr.msb), + WR_REGISTER(dw, abort_imwr.lsb), + WR_REGISTER(dw, abort_imwr.msb), + WR_REGISTER(dw, ch01_imwr_data), + WR_REGISTER(dw, ch23_imwr_data), + WR_REGISTER(dw, ch45_imwr_data), + WR_REGISTER(dw, ch67_imwr_data), + WR_REGISTER(dw, linked_list_err_en), + }; + const struct dw_edma_debugfs_entry debugfs_unroll_regs[] = { + /* eDMA channel context grouping */ + WR_REGISTER_UNROLL(dw, engine_chgroup), + WR_REGISTER_UNROLL(dw, engine_hshake_cnt.lsb), + WR_REGISTER_UNROLL(dw, engine_hshake_cnt.msb), + WR_REGISTER_UNROLL(dw, ch0_pwr_en), + WR_REGISTER_UNROLL(dw, ch1_pwr_en), + WR_REGISTER_UNROLL(dw, ch2_pwr_en), + WR_REGISTER_UNROLL(dw, ch3_pwr_en), + WR_REGISTER_UNROLL(dw, ch4_pwr_en), + WR_REGISTER_UNROLL(dw, ch5_pwr_en), + WR_REGISTER_UNROLL(dw, ch6_pwr_en), + WR_REGISTER_UNROLL(dw, ch7_pwr_en), + }; + struct dentry *regs_dent, *ch_dent; + int nr_entries, i; + char name[16]; + + regs_dent = debugfs_create_dir(WRITE_STR, dent); + + nr_entries = ARRAY_SIZE(debugfs_regs); + dw_edma_debugfs_create_x32(dw, debugfs_regs, nr_entries, regs_dent); + + if (dw->chip->mf == EDMA_MF_HDMA_COMPAT) { + nr_entries = ARRAY_SIZE(debugfs_unroll_regs); + dw_edma_debugfs_create_x32(dw, debugfs_unroll_regs, nr_entries, + regs_dent); + } + + for (i = 0; i < dw->wr_ch_cnt; i++) { + snprintf(name, sizeof(name), "%s:%d", CHANNEL_STR, i); + + ch_dent = debugfs_create_dir(name, regs_dent); + + dw_edma_debugfs_regs_ch(dw, EDMA_DIR_WRITE, i, ch_dent); + } +} + +static noinline_for_stack void dw_edma_debugfs_regs_rd(struct dw_edma *dw, + struct dentry *dent) +{ + const struct dw_edma_debugfs_entry debugfs_regs[] = { + /* eDMA global registers */ + RD_REGISTER(dw, engine_en), + RD_REGISTER(dw, doorbell), + RD_REGISTER(dw, ch_arb_weight.lsb), + RD_REGISTER(dw, ch_arb_weight.msb), + /* eDMA interrupts registers */ + RD_REGISTER(dw, int_status), + RD_REGISTER(dw, int_mask), + RD_REGISTER(dw, int_clear), + RD_REGISTER(dw, err_status.lsb), + RD_REGISTER(dw, err_status.msb), + RD_REGISTER(dw, linked_list_err_en), + RD_REGISTER(dw, done_imwr.lsb), + RD_REGISTER(dw, done_imwr.msb), + RD_REGISTER(dw, abort_imwr.lsb), + RD_REGISTER(dw, abort_imwr.msb), + RD_REGISTER(dw, ch01_imwr_data), + RD_REGISTER(dw, ch23_imwr_data), + RD_REGISTER(dw, ch45_imwr_data), + RD_REGISTER(dw, ch67_imwr_data), + }; + const struct dw_edma_debugfs_entry debugfs_unroll_regs[] = { + /* eDMA channel context grouping */ + RD_REGISTER_UNROLL(dw, engine_chgroup), + RD_REGISTER_UNROLL(dw, engine_hshake_cnt.lsb), + RD_REGISTER_UNROLL(dw, engine_hshake_cnt.msb), + RD_REGISTER_UNROLL(dw, ch0_pwr_en), + RD_REGISTER_UNROLL(dw, ch1_pwr_en), + RD_REGISTER_UNROLL(dw, ch2_pwr_en), + RD_REGISTER_UNROLL(dw, ch3_pwr_en), + RD_REGISTER_UNROLL(dw, ch4_pwr_en), + RD_REGISTER_UNROLL(dw, ch5_pwr_en), + RD_REGISTER_UNROLL(dw, ch6_pwr_en), + RD_REGISTER_UNROLL(dw, ch7_pwr_en), + }; + struct dentry *regs_dent, *ch_dent; + int nr_entries, i; + char name[16]; + + regs_dent = debugfs_create_dir(READ_STR, dent); + + nr_entries = ARRAY_SIZE(debugfs_regs); + dw_edma_debugfs_create_x32(dw, debugfs_regs, nr_entries, regs_dent); + + if (dw->chip->mf == EDMA_MF_HDMA_COMPAT) { + nr_entries = ARRAY_SIZE(debugfs_unroll_regs); + dw_edma_debugfs_create_x32(dw, debugfs_unroll_regs, nr_entries, + regs_dent); + } + + for (i = 0; i < dw->rd_ch_cnt; i++) { + snprintf(name, sizeof(name), "%s:%d", CHANNEL_STR, i); + + ch_dent = debugfs_create_dir(name, regs_dent); + + dw_edma_debugfs_regs_ch(dw, EDMA_DIR_READ, i, ch_dent); + } +} + +static void dw_edma_debugfs_regs(struct dw_edma *dw) +{ + const struct dw_edma_debugfs_entry debugfs_regs[] = { + REGISTER(dw, ctrl_data_arb_prior), + REGISTER(dw, ctrl), + }; + struct dentry *regs_dent; + int nr_entries; + + regs_dent = debugfs_create_dir(REGISTERS_STR, dw->dma.dbg_dev_root); + + nr_entries = ARRAY_SIZE(debugfs_regs); + dw_edma_debugfs_create_x32(dw, debugfs_regs, nr_entries, regs_dent); + + dw_edma_debugfs_regs_wr(dw, regs_dent); + dw_edma_debugfs_regs_rd(dw, regs_dent); +} + +void dw_edma_v0_debugfs_on(struct dw_edma *dw) +{ + if (!debugfs_initialized()) + return; + + debugfs_create_u32("mf", 0444, dw->dma.dbg_dev_root, &dw->chip->mf); + debugfs_create_u16("wr_ch_cnt", 0444, dw->dma.dbg_dev_root, &dw->wr_ch_cnt); + debugfs_create_u16("rd_ch_cnt", 0444, dw->dma.dbg_dev_root, &dw->rd_ch_cnt); + + dw_edma_debugfs_regs(dw); +} diff --git a/drivers/dma/dw-edma/dw-edma-v0-debugfs.h b/drivers/dma/dw-edma/dw-edma-v0-debugfs.h new file mode 100644 index 0000000000..fb3342d97d --- /dev/null +++ b/drivers/dma/dw-edma/dw-edma-v0-debugfs.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates. + * Synopsys DesignWare eDMA v0 core + * + * Author: Gustavo Pimentel + */ + +#ifndef _DW_EDMA_V0_DEBUG_FS_H +#define _DW_EDMA_V0_DEBUG_FS_H + +#include + +#ifdef CONFIG_DEBUG_FS +void dw_edma_v0_debugfs_on(struct dw_edma *dw); +#else +static inline void dw_edma_v0_debugfs_on(struct dw_edma *dw) +{ +} +#endif /* CONFIG_DEBUG_FS */ + +#endif /* _DW_EDMA_V0_DEBUG_FS_H */ diff --git a/drivers/dma/dw-edma/dw-edma-v0-regs.h b/drivers/dma/dw-edma/dw-edma-v0-regs.h new file mode 100644 index 0000000000..e175f7b204 --- /dev/null +++ b/drivers/dma/dw-edma/dw-edma-v0-regs.h @@ -0,0 +1,233 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates. + * Synopsys DesignWare eDMA v0 core + * + * Author: Gustavo Pimentel + */ + +#ifndef _DW_EDMA_V0_REGS_H +#define _DW_EDMA_V0_REGS_H + +#include + +#define EDMA_V0_MAX_NR_CH 8 +#define EDMA_V0_VIEWPORT_MASK GENMASK(2, 0) +#define EDMA_V0_DONE_INT_MASK GENMASK(7, 0) +#define EDMA_V0_ABORT_INT_MASK GENMASK(23, 16) +#define EDMA_V0_WRITE_CH_COUNT_MASK GENMASK(3, 0) +#define EDMA_V0_READ_CH_COUNT_MASK GENMASK(19, 16) +#define EDMA_V0_CH_STATUS_MASK GENMASK(6, 5) +#define EDMA_V0_DOORBELL_CH_MASK GENMASK(2, 0) +#define EDMA_V0_LINKED_LIST_ERR_MASK GENMASK(7, 0) + +#define EDMA_V0_CH_ODD_MSI_DATA_MASK GENMASK(31, 16) +#define EDMA_V0_CH_EVEN_MSI_DATA_MASK GENMASK(15, 0) + +struct dw_edma_v0_ch_regs { + u32 ch_control1; /* 0x0000 */ + u32 ch_control2; /* 0x0004 */ + u32 transfer_size; /* 0x0008 */ + union { + u64 reg; /* 0x000c..0x0010 */ + struct { + u32 lsb; /* 0x000c */ + u32 msb; /* 0x0010 */ + }; + } sar; + union { + u64 reg; /* 0x0014..0x0018 */ + struct { + u32 lsb; /* 0x0014 */ + u32 msb; /* 0x0018 */ + }; + } dar; + union { + u64 reg; /* 0x001c..0x0020 */ + struct { + u32 lsb; /* 0x001c */ + u32 msb; /* 0x0020 */ + }; + } llp; +} __packed; + +struct dw_edma_v0_ch { + struct dw_edma_v0_ch_regs wr; /* 0x0200 */ + u32 padding_1[55]; /* 0x0224..0x02fc */ + struct dw_edma_v0_ch_regs rd; /* 0x0300 */ + u32 padding_2[55]; /* 0x0324..0x03fc */ +} __packed; + +struct dw_edma_v0_unroll { + u32 padding_1; /* 0x00f8 */ + u32 wr_engine_chgroup; /* 0x0100 */ + u32 rd_engine_chgroup; /* 0x0104 */ + union { + u64 reg; /* 0x0108..0x010c */ + struct { + u32 lsb; /* 0x0108 */ + u32 msb; /* 0x010c */ + }; + } wr_engine_hshake_cnt; + u32 padding_2[2]; /* 0x0110..0x0114 */ + union { + u64 reg; /* 0x0120..0x0124 */ + struct { + u32 lsb; /* 0x0120 */ + u32 msb; /* 0x0124 */ + }; + } rd_engine_hshake_cnt; + u32 padding_3[2]; /* 0x0120..0x0124 */ + u32 wr_ch0_pwr_en; /* 0x0128 */ + u32 wr_ch1_pwr_en; /* 0x012c */ + u32 wr_ch2_pwr_en; /* 0x0130 */ + u32 wr_ch3_pwr_en; /* 0x0134 */ + u32 wr_ch4_pwr_en; /* 0x0138 */ + u32 wr_ch5_pwr_en; /* 0x013c */ + u32 wr_ch6_pwr_en; /* 0x0140 */ + u32 wr_ch7_pwr_en; /* 0x0144 */ + u32 padding_4[8]; /* 0x0148..0x0164 */ + u32 rd_ch0_pwr_en; /* 0x0168 */ + u32 rd_ch1_pwr_en; /* 0x016c */ + u32 rd_ch2_pwr_en; /* 0x0170 */ + u32 rd_ch3_pwr_en; /* 0x0174 */ + u32 rd_ch4_pwr_en; /* 0x0178 */ + u32 rd_ch5_pwr_en; /* 0x018c */ + u32 rd_ch6_pwr_en; /* 0x0180 */ + u32 rd_ch7_pwr_en; /* 0x0184 */ + u32 padding_5[30]; /* 0x0188..0x01fc */ + struct dw_edma_v0_ch ch[EDMA_V0_MAX_NR_CH]; /* 0x0200..0x1120 */ +} __packed; + +struct dw_edma_v0_legacy { + u32 viewport_sel; /* 0x00f8 */ + struct dw_edma_v0_ch_regs ch; /* 0x0100..0x0120 */ +} __packed; + +struct dw_edma_v0_regs { + /* eDMA global registers */ + u32 ctrl_data_arb_prior; /* 0x0000 */ + u32 padding_1; /* 0x0004 */ + u32 ctrl; /* 0x0008 */ + u32 wr_engine_en; /* 0x000c */ + u32 wr_doorbell; /* 0x0010 */ + u32 padding_2; /* 0x0014 */ + union { + u64 reg; /* 0x0018..0x001c */ + struct { + u32 lsb; /* 0x0018 */ + u32 msb; /* 0x001c */ + }; + } wr_ch_arb_weight; + u32 padding_3[3]; /* 0x0020..0x0028 */ + u32 rd_engine_en; /* 0x002c */ + u32 rd_doorbell; /* 0x0030 */ + u32 padding_4; /* 0x0034 */ + union { + u64 reg; /* 0x0038..0x003c */ + struct { + u32 lsb; /* 0x0038 */ + u32 msb; /* 0x003c */ + }; + } rd_ch_arb_weight; + u32 padding_5[3]; /* 0x0040..0x0048 */ + /* eDMA interrupts registers */ + u32 wr_int_status; /* 0x004c */ + u32 padding_6; /* 0x0050 */ + u32 wr_int_mask; /* 0x0054 */ + u32 wr_int_clear; /* 0x0058 */ + u32 wr_err_status; /* 0x005c */ + union { + u64 reg; /* 0x0060..0x0064 */ + struct { + u32 lsb; /* 0x0060 */ + u32 msb; /* 0x0064 */ + }; + } wr_done_imwr; + union { + u64 reg; /* 0x0068..0x006c */ + struct { + u32 lsb; /* 0x0068 */ + u32 msb; /* 0x006c */ + }; + } wr_abort_imwr; + u32 wr_ch01_imwr_data; /* 0x0070 */ + u32 wr_ch23_imwr_data; /* 0x0074 */ + u32 wr_ch45_imwr_data; /* 0x0078 */ + u32 wr_ch67_imwr_data; /* 0x007c */ + u32 padding_7[4]; /* 0x0080..0x008c */ + u32 wr_linked_list_err_en; /* 0x0090 */ + u32 padding_8[3]; /* 0x0094..0x009c */ + u32 rd_int_status; /* 0x00a0 */ + u32 padding_9; /* 0x00a4 */ + u32 rd_int_mask; /* 0x00a8 */ + u32 rd_int_clear; /* 0x00ac */ + u32 padding_10; /* 0x00b0 */ + union { + u64 reg; /* 0x00b4..0x00b8 */ + struct { + u32 lsb; /* 0x00b4 */ + u32 msb; /* 0x00b8 */ + }; + } rd_err_status; + u32 padding_11[2]; /* 0x00bc..0x00c0 */ + u32 rd_linked_list_err_en; /* 0x00c4 */ + u32 padding_12; /* 0x00c8 */ + union { + u64 reg; /* 0x00cc..0x00d0 */ + struct { + u32 lsb; /* 0x00cc */ + u32 msb; /* 0x00d0 */ + }; + } rd_done_imwr; + union { + u64 reg; /* 0x00d4..0x00d8 */ + struct { + u32 lsb; /* 0x00d4 */ + u32 msb; /* 0x00d8 */ + }; + } rd_abort_imwr; + u32 rd_ch01_imwr_data; /* 0x00dc */ + u32 rd_ch23_imwr_data; /* 0x00e0 */ + u32 rd_ch45_imwr_data; /* 0x00e4 */ + u32 rd_ch67_imwr_data; /* 0x00e8 */ + u32 padding_13[4]; /* 0x00ec..0x00f8 */ + /* eDMA channel context grouping */ + union dw_edma_v0_type { + struct dw_edma_v0_legacy legacy; /* 0x00f8..0x0120 */ + struct dw_edma_v0_unroll unroll; /* 0x00f8..0x1120 */ + } type; +} __packed; + +struct dw_edma_v0_lli { + u32 control; + u32 transfer_size; + union { + u64 reg; + struct { + u32 lsb; + u32 msb; + }; + } sar; + union { + u64 reg; + struct { + u32 lsb; + u32 msb; + }; + } dar; +} __packed; + +struct dw_edma_v0_llp { + u32 control; + u32 reserved; + union { + u64 reg; + struct { + u32 lsb; + u32 msb; + }; + } llp; +} __packed; + +#endif /* _DW_EDMA_V0_REGS_H */ diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c new file mode 100644 index 0000000000..00b735a020 --- /dev/null +++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023 Cai Huoqing + * Synopsys DesignWare HDMA v0 core + */ + +#include +#include +#include + +#include "dw-edma-core.h" +#include "dw-hdma-v0-core.h" +#include "dw-hdma-v0-regs.h" +#include "dw-hdma-v0-debugfs.h" + +enum dw_hdma_control { + DW_HDMA_V0_CB = BIT(0), + DW_HDMA_V0_TCB = BIT(1), + DW_HDMA_V0_LLP = BIT(2), + DW_HDMA_V0_LIE = BIT(3), + DW_HDMA_V0_RIE = BIT(4), + DW_HDMA_V0_CCS = BIT(8), + DW_HDMA_V0_LLE = BIT(9), +}; + +static inline struct dw_hdma_v0_regs __iomem *__dw_regs(struct dw_edma *dw) +{ + return dw->chip->reg_base; +} + +static inline struct dw_hdma_v0_ch_regs __iomem * +__dw_ch_regs(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch) +{ + if (dir == EDMA_DIR_WRITE) + return &(__dw_regs(dw)->ch[ch].wr); + else + return &(__dw_regs(dw)->ch[ch].rd); +} + +#define SET_CH_32(dw, dir, ch, name, value) \ + writel(value, &(__dw_ch_regs(dw, dir, ch)->name)) + +#define GET_CH_32(dw, dir, ch, name) \ + readl(&(__dw_ch_regs(dw, dir, ch)->name)) + +#define SET_BOTH_CH_32(dw, ch, name, value) \ + do { \ + writel(value, &(__dw_ch_regs(dw, EDMA_DIR_WRITE, ch)->name)); \ + writel(value, &(__dw_ch_regs(dw, EDMA_DIR_READ, ch)->name)); \ + } while (0) + +/* HDMA management callbacks */ +static void dw_hdma_v0_core_off(struct dw_edma *dw) +{ + int id; + + for (id = 0; id < HDMA_V0_MAX_NR_CH; id++) { + SET_BOTH_CH_32(dw, id, int_setup, + HDMA_V0_STOP_INT_MASK | HDMA_V0_ABORT_INT_MASK); + SET_BOTH_CH_32(dw, id, int_clear, + HDMA_V0_STOP_INT_MASK | HDMA_V0_ABORT_INT_MASK); + SET_BOTH_CH_32(dw, id, ch_en, 0); + } +} + +static u16 dw_hdma_v0_core_ch_count(struct dw_edma *dw, enum dw_edma_dir dir) +{ + u32 num_ch = 0; + int id; + + for (id = 0; id < HDMA_V0_MAX_NR_CH; id++) { + if (GET_CH_32(dw, id, dir, ch_en) & BIT(0)) + num_ch++; + } + + if (num_ch > HDMA_V0_MAX_NR_CH) + num_ch = HDMA_V0_MAX_NR_CH; + + return (u16)num_ch; +} + +static enum dma_status dw_hdma_v0_core_ch_status(struct dw_edma_chan *chan) +{ + struct dw_edma *dw = chan->dw; + u32 tmp; + + tmp = FIELD_GET(HDMA_V0_CH_STATUS_MASK, + GET_CH_32(dw, chan->id, chan->dir, ch_stat)); + + if (tmp == 1) + return DMA_IN_PROGRESS; + else if (tmp == 3) + return DMA_COMPLETE; + else + return DMA_ERROR; +} + +static void dw_hdma_v0_core_clear_done_int(struct dw_edma_chan *chan) +{ + struct dw_edma *dw = chan->dw; + + SET_CH_32(dw, chan->dir, chan->id, int_clear, HDMA_V0_STOP_INT_MASK); +} + +static void dw_hdma_v0_core_clear_abort_int(struct dw_edma_chan *chan) +{ + struct dw_edma *dw = chan->dw; + + SET_CH_32(dw, chan->dir, chan->id, int_clear, HDMA_V0_ABORT_INT_MASK); +} + +static u32 dw_hdma_v0_core_status_int(struct dw_edma_chan *chan) +{ + struct dw_edma *dw = chan->dw; + + return GET_CH_32(dw, chan->dir, chan->id, int_stat); +} + +static irqreturn_t +dw_hdma_v0_core_handle_int(struct dw_edma_irq *dw_irq, enum dw_edma_dir dir, + dw_edma_handler_t done, dw_edma_handler_t abort) +{ + struct dw_edma *dw = dw_irq->dw; + unsigned long total, pos, val; + irqreturn_t ret = IRQ_NONE; + struct dw_edma_chan *chan; + unsigned long off, mask; + + if (dir == EDMA_DIR_WRITE) { + total = dw->wr_ch_cnt; + off = 0; + mask = dw_irq->wr_mask; + } else { + total = dw->rd_ch_cnt; + off = dw->wr_ch_cnt; + mask = dw_irq->rd_mask; + } + + for_each_set_bit(pos, &mask, total) { + chan = &dw->chan[pos + off]; + + val = dw_hdma_v0_core_status_int(chan); + if (FIELD_GET(HDMA_V0_STOP_INT_MASK, val)) { + dw_hdma_v0_core_clear_done_int(chan); + done(chan); + + ret = IRQ_HANDLED; + } + + if (FIELD_GET(HDMA_V0_ABORT_INT_MASK, val)) { + dw_hdma_v0_core_clear_abort_int(chan); + abort(chan); + + ret = IRQ_HANDLED; + } + } + + return ret; +} + +static void dw_hdma_v0_write_ll_data(struct dw_edma_chunk *chunk, int i, + u32 control, u32 size, u64 sar, u64 dar) +{ + ptrdiff_t ofs = i * sizeof(struct dw_hdma_v0_lli); + + if (chunk->chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL) { + struct dw_hdma_v0_lli *lli = chunk->ll_region.vaddr.mem + ofs; + + lli->control = control; + lli->transfer_size = size; + lli->sar.reg = sar; + lli->dar.reg = dar; + } else { + struct dw_hdma_v0_lli __iomem *lli = chunk->ll_region.vaddr.io + ofs; + + writel(control, &lli->control); + writel(size, &lli->transfer_size); + writeq(sar, &lli->sar.reg); + writeq(dar, &lli->dar.reg); + } +} + +static void dw_hdma_v0_write_ll_link(struct dw_edma_chunk *chunk, + int i, u32 control, u64 pointer) +{ + ptrdiff_t ofs = i * sizeof(struct dw_hdma_v0_lli); + + if (chunk->chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL) { + struct dw_hdma_v0_llp *llp = chunk->ll_region.vaddr.mem + ofs; + + llp->control = control; + llp->llp.reg = pointer; + } else { + struct dw_hdma_v0_llp __iomem *llp = chunk->ll_region.vaddr.io + ofs; + + writel(control, &llp->control); + writeq(pointer, &llp->llp.reg); + } +} + +static void dw_hdma_v0_core_write_chunk(struct dw_edma_chunk *chunk) +{ + struct dw_edma_burst *child; + struct dw_edma_chan *chan = chunk->chan; + u32 control = 0, i = 0; + int j; + + if (chunk->cb) + control = DW_HDMA_V0_CB; + + j = chunk->bursts_alloc; + list_for_each_entry(child, &chunk->burst->list, list) { + j--; + if (!j) { + control |= DW_HDMA_V0_LIE; + if (!(chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL)) + control |= DW_HDMA_V0_RIE; + } + + dw_hdma_v0_write_ll_data(chunk, i++, control, child->sz, + child->sar, child->dar); + } + + control = DW_HDMA_V0_LLP | DW_HDMA_V0_TCB; + if (!chunk->cb) + control |= DW_HDMA_V0_CB; + + dw_hdma_v0_write_ll_link(chunk, i, control, chunk->ll_region.paddr); +} + +static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first) +{ + struct dw_edma_chan *chan = chunk->chan; + struct dw_edma *dw = chan->dw; + u32 tmp; + + dw_hdma_v0_core_write_chunk(chunk); + + if (first) { + /* Enable engine */ + SET_CH_32(dw, chan->dir, chan->id, ch_en, BIT(0)); + /* Interrupt enable&unmask - done, abort */ + tmp = GET_CH_32(dw, chan->dir, chan->id, int_setup) | + HDMA_V0_STOP_INT_MASK | HDMA_V0_ABORT_INT_MASK | + HDMA_V0_LOCAL_STOP_INT_EN | HDMA_V0_LOCAL_STOP_INT_EN; + SET_CH_32(dw, chan->dir, chan->id, int_setup, tmp); + /* Channel control */ + SET_CH_32(dw, chan->dir, chan->id, control1, HDMA_V0_LINKLIST_EN); + /* Linked list */ + /* llp is not aligned on 64bit -> keep 32bit accesses */ + SET_CH_32(dw, chan->dir, chan->id, llp.lsb, + lower_32_bits(chunk->ll_region.paddr)); + SET_CH_32(dw, chan->dir, chan->id, llp.msb, + upper_32_bits(chunk->ll_region.paddr)); + } + /* Set consumer cycle */ + SET_CH_32(dw, chan->dir, chan->id, cycle_sync, + HDMA_V0_CONSUMER_CYCLE_STAT | HDMA_V0_CONSUMER_CYCLE_BIT); + /* Doorbell */ + SET_CH_32(dw, chan->dir, chan->id, doorbell, HDMA_V0_DOORBELL_START); +} + +static void dw_hdma_v0_core_ch_config(struct dw_edma_chan *chan) +{ + struct dw_edma *dw = chan->dw; + + /* MSI done addr - low, high */ + SET_CH_32(dw, chan->dir, chan->id, msi_stop.lsb, chan->msi.address_lo); + SET_CH_32(dw, chan->dir, chan->id, msi_stop.msb, chan->msi.address_hi); + /* MSI abort addr - low, high */ + SET_CH_32(dw, chan->dir, chan->id, msi_abort.lsb, chan->msi.address_lo); + SET_CH_32(dw, chan->dir, chan->id, msi_abort.msb, chan->msi.address_hi); + /* config MSI data */ + SET_CH_32(dw, chan->dir, chan->id, msi_msgdata, chan->msi.data); +} + +/* HDMA debugfs callbacks */ +static void dw_hdma_v0_core_debugfs_on(struct dw_edma *dw) +{ + dw_hdma_v0_debugfs_on(dw); +} + +static const struct dw_edma_core_ops dw_hdma_v0_core = { + .off = dw_hdma_v0_core_off, + .ch_count = dw_hdma_v0_core_ch_count, + .ch_status = dw_hdma_v0_core_ch_status, + .handle_int = dw_hdma_v0_core_handle_int, + .start = dw_hdma_v0_core_start, + .ch_config = dw_hdma_v0_core_ch_config, + .debugfs_on = dw_hdma_v0_core_debugfs_on, +}; + +void dw_hdma_v0_core_register(struct dw_edma *dw) +{ + dw->core = &dw_hdma_v0_core; +} diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.h b/drivers/dma/dw-edma/dw-hdma-v0-core.h new file mode 100644 index 0000000000..c373b4f0bd --- /dev/null +++ b/drivers/dma/dw-edma/dw-hdma-v0-core.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Cai Huoqing + * Synopsys DesignWare HDMA v0 core + * + * Author: Cai Huoqing + */ + +#ifndef _DW_HDMA_V0_CORE_H +#define _DW_HDMA_V0_CORE_H + +#include + +/* HDMA core register */ +void dw_hdma_v0_core_register(struct dw_edma *dw); + +#endif /* _DW_HDMA_V0_CORE_H */ diff --git a/drivers/dma/dw-edma/dw-hdma-v0-debugfs.c b/drivers/dma/dw-edma/dw-hdma-v0-debugfs.c new file mode 100644 index 0000000000..520c81978b --- /dev/null +++ b/drivers/dma/dw-edma/dw-hdma-v0-debugfs.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023 Cai Huoqing + * Synopsys DesignWare HDMA v0 debugfs + * + * Author: Cai Huoqing + */ + +#include +#include + +#include "dw-hdma-v0-debugfs.h" +#include "dw-hdma-v0-regs.h" +#include "dw-edma-core.h" + +#define REGS_ADDR(dw, name) \ + ({ \ + struct dw_hdma_v0_regs __iomem *__regs = (dw)->chip->reg_base; \ + \ + (void __iomem *)&__regs->name; \ + }) + +#define REGS_CH_ADDR(dw, name, _dir, _ch) \ + ({ \ + struct dw_hdma_v0_ch_regs __iomem *__ch_regs; \ + \ + if (_dir == EDMA_DIR_READ) \ + __ch_regs = REGS_ADDR(dw, ch[_ch].rd); \ + else \ + __ch_regs = REGS_ADDR(dw, ch[_ch].wr); \ + \ + (void __iomem *)&__ch_regs->name; \ + }) + +#define CTX_REGISTER(dw, name, dir, ch) \ + {#name, REGS_CH_ADDR(dw, name, dir, ch)} + +#define WRITE_STR "write" +#define READ_STR "read" +#define CHANNEL_STR "channel" +#define REGISTERS_STR "registers" + +struct dw_hdma_debugfs_entry { + const char *name; + void __iomem *reg; +}; + +static int dw_hdma_debugfs_u32_get(void *data, u64 *val) +{ + struct dw_hdma_debugfs_entry *entry = data; + void __iomem *reg = entry->reg; + + *val = readl(reg); + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_x32, dw_hdma_debugfs_u32_get, NULL, "0x%08llx\n"); + +static void dw_hdma_debugfs_create_x32(struct dw_edma *dw, + const struct dw_hdma_debugfs_entry ini[], + int nr_entries, struct dentry *dent) +{ + struct dw_hdma_debugfs_entry *entries; + int i; + + entries = devm_kcalloc(dw->chip->dev, nr_entries, sizeof(*entries), + GFP_KERNEL); + if (!entries) + return; + + for (i = 0; i < nr_entries; i++) { + entries[i] = ini[i]; + + debugfs_create_file_unsafe(entries[i].name, 0444, dent, + &entries[i], &fops_x32); + } +} + +static void dw_hdma_debugfs_regs_ch(struct dw_edma *dw, enum dw_edma_dir dir, + u16 ch, struct dentry *dent) +{ + const struct dw_hdma_debugfs_entry debugfs_regs[] = { + CTX_REGISTER(dw, ch_en, dir, ch), + CTX_REGISTER(dw, doorbell, dir, ch), + CTX_REGISTER(dw, prefetch, dir, ch), + CTX_REGISTER(dw, handshake, dir, ch), + CTX_REGISTER(dw, llp.lsb, dir, ch), + CTX_REGISTER(dw, llp.msb, dir, ch), + CTX_REGISTER(dw, cycle_sync, dir, ch), + CTX_REGISTER(dw, transfer_size, dir, ch), + CTX_REGISTER(dw, sar.lsb, dir, ch), + CTX_REGISTER(dw, sar.msb, dir, ch), + CTX_REGISTER(dw, dar.lsb, dir, ch), + CTX_REGISTER(dw, dar.msb, dir, ch), + CTX_REGISTER(dw, watermark_en, dir, ch), + CTX_REGISTER(dw, control1, dir, ch), + CTX_REGISTER(dw, func_num, dir, ch), + CTX_REGISTER(dw, qos, dir, ch), + CTX_REGISTER(dw, ch_stat, dir, ch), + CTX_REGISTER(dw, int_stat, dir, ch), + CTX_REGISTER(dw, int_setup, dir, ch), + CTX_REGISTER(dw, int_clear, dir, ch), + CTX_REGISTER(dw, msi_stop.lsb, dir, ch), + CTX_REGISTER(dw, msi_stop.msb, dir, ch), + CTX_REGISTER(dw, msi_watermark.lsb, dir, ch), + CTX_REGISTER(dw, msi_watermark.msb, dir, ch), + CTX_REGISTER(dw, msi_abort.lsb, dir, ch), + CTX_REGISTER(dw, msi_abort.msb, dir, ch), + CTX_REGISTER(dw, msi_msgdata, dir, ch), + }; + int nr_entries = ARRAY_SIZE(debugfs_regs); + + dw_hdma_debugfs_create_x32(dw, debugfs_regs, nr_entries, dent); +} + +static void dw_hdma_debugfs_regs_wr(struct dw_edma *dw, struct dentry *dent) +{ + struct dentry *regs_dent, *ch_dent; + char name[16]; + int i; + + regs_dent = debugfs_create_dir(WRITE_STR, dent); + + for (i = 0; i < dw->wr_ch_cnt; i++) { + snprintf(name, sizeof(name), "%s:%d", CHANNEL_STR, i); + + ch_dent = debugfs_create_dir(name, regs_dent); + + dw_hdma_debugfs_regs_ch(dw, EDMA_DIR_WRITE, i, ch_dent); + } +} + +static void dw_hdma_debugfs_regs_rd(struct dw_edma *dw, struct dentry *dent) +{ + struct dentry *regs_dent, *ch_dent; + char name[16]; + int i; + + regs_dent = debugfs_create_dir(READ_STR, dent); + + for (i = 0; i < dw->rd_ch_cnt; i++) { + snprintf(name, sizeof(name), "%s:%d", CHANNEL_STR, i); + + ch_dent = debugfs_create_dir(name, regs_dent); + + dw_hdma_debugfs_regs_ch(dw, EDMA_DIR_READ, i, ch_dent); + } +} + +static void dw_hdma_debugfs_regs(struct dw_edma *dw) +{ + struct dentry *regs_dent; + + regs_dent = debugfs_create_dir(REGISTERS_STR, dw->dma.dbg_dev_root); + + dw_hdma_debugfs_regs_wr(dw, regs_dent); + dw_hdma_debugfs_regs_rd(dw, regs_dent); +} + +void dw_hdma_v0_debugfs_on(struct dw_edma *dw) +{ + if (!debugfs_initialized()) + return; + + debugfs_create_u32("mf", 0444, dw->dma.dbg_dev_root, &dw->chip->mf); + debugfs_create_u16("wr_ch_cnt", 0444, dw->dma.dbg_dev_root, &dw->wr_ch_cnt); + debugfs_create_u16("rd_ch_cnt", 0444, dw->dma.dbg_dev_root, &dw->rd_ch_cnt); + + dw_hdma_debugfs_regs(dw); +} diff --git a/drivers/dma/dw-edma/dw-hdma-v0-debugfs.h b/drivers/dma/dw-edma/dw-hdma-v0-debugfs.h new file mode 100644 index 0000000000..e6842c8377 --- /dev/null +++ b/drivers/dma/dw-edma/dw-hdma-v0-debugfs.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Cai Huoqing + * Synopsys DesignWare HDMA v0 debugfs + * + * Author: Cai Huoqing + */ + +#ifndef _DW_HDMA_V0_DEBUG_FS_H +#define _DW_HDMA_V0_DEBUG_FS_H + +#include + +#ifdef CONFIG_DEBUG_FS +void dw_hdma_v0_debugfs_on(struct dw_edma *dw); +#else +static inline void dw_hdma_v0_debugfs_on(struct dw_edma *dw) +{ +} +#endif /* CONFIG_DEBUG_FS */ + +#endif /* _DW_HDMA_V0_DEBUG_FS_H */ diff --git a/drivers/dma/dw-edma/dw-hdma-v0-regs.h b/drivers/dma/dw-edma/dw-hdma-v0-regs.h new file mode 100644 index 0000000000..a974abdf8a --- /dev/null +++ b/drivers/dma/dw-edma/dw-hdma-v0-regs.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Cai Huoqing + * Synopsys DesignWare HDMA v0 reg + * + * Author: Cai Huoqing + */ + +#ifndef _DW_HDMA_V0_REGS_H +#define _DW_HDMA_V0_REGS_H + +#include + +#define HDMA_V0_MAX_NR_CH 8 +#define HDMA_V0_LOCAL_ABORT_INT_EN BIT(6) +#define HDMA_V0_REMOTE_ABORT_INT_EN BIT(5) +#define HDMA_V0_LOCAL_STOP_INT_EN BIT(4) +#define HDMA_V0_REMOTEL_STOP_INT_EN BIT(3) +#define HDMA_V0_ABORT_INT_MASK BIT(2) +#define HDMA_V0_STOP_INT_MASK BIT(0) +#define HDMA_V0_LINKLIST_EN BIT(0) +#define HDMA_V0_CONSUMER_CYCLE_STAT BIT(1) +#define HDMA_V0_CONSUMER_CYCLE_BIT BIT(0) +#define HDMA_V0_DOORBELL_START BIT(0) +#define HDMA_V0_CH_STATUS_MASK GENMASK(1, 0) + +struct dw_hdma_v0_ch_regs { + u32 ch_en; /* 0x0000 */ + u32 doorbell; /* 0x0004 */ + u32 prefetch; /* 0x0008 */ + u32 handshake; /* 0x000c */ + union { + u64 reg; /* 0x0010..0x0014 */ + struct { + u32 lsb; /* 0x0010 */ + u32 msb; /* 0x0014 */ + }; + } llp; + u32 cycle_sync; /* 0x0018 */ + u32 transfer_size; /* 0x001c */ + union { + u64 reg; /* 0x0020..0x0024 */ + struct { + u32 lsb; /* 0x0020 */ + u32 msb; /* 0x0024 */ + }; + } sar; + union { + u64 reg; /* 0x0028..0x002c */ + struct { + u32 lsb; /* 0x0028 */ + u32 msb; /* 0x002c */ + }; + } dar; + u32 watermark_en; /* 0x0030 */ + u32 control1; /* 0x0034 */ + u32 func_num; /* 0x0038 */ + u32 qos; /* 0x003c */ + u32 padding_1[16]; /* 0x0040..0x007c */ + u32 ch_stat; /* 0x0080 */ + u32 int_stat; /* 0x0084 */ + u32 int_setup; /* 0x0088 */ + u32 int_clear; /* 0x008c */ + union { + u64 reg; /* 0x0090..0x0094 */ + struct { + u32 lsb; /* 0x0090 */ + u32 msb; /* 0x0094 */ + }; + } msi_stop; + union { + u64 reg; /* 0x0098..0x009c */ + struct { + u32 lsb; /* 0x0098 */ + u32 msb; /* 0x009c */ + }; + } msi_watermark; + union { + u64 reg; /* 0x00a0..0x00a4 */ + struct { + u32 lsb; /* 0x00a0 */ + u32 msb; /* 0x00a4 */ + }; + } msi_abort; + u32 msi_msgdata; /* 0x00a8 */ + u32 padding_2[21]; /* 0x00ac..0x00fc */ +} __packed; + +struct dw_hdma_v0_ch { + struct dw_hdma_v0_ch_regs wr; /* 0x0000 */ + struct dw_hdma_v0_ch_regs rd; /* 0x0100 */ +} __packed; + +struct dw_hdma_v0_regs { + struct dw_hdma_v0_ch ch[HDMA_V0_MAX_NR_CH]; /* 0x0000..0x0fa8 */ +} __packed; + +struct dw_hdma_v0_lli { + u32 control; + u32 transfer_size; + union { + u64 reg; + struct { + u32 lsb; + u32 msb; + }; + } sar; + union { + u64 reg; + struct { + u32 lsb; + u32 msb; + }; + } dar; +} __packed; + +struct dw_hdma_v0_llp { + u32 control; + u32 reserved; + union { + u64 reg; + struct { + u32 lsb; + u32 msb; + }; + } llp; +} __packed; + +#endif /* _DW_HDMA_V0_REGS_H */ diff --git a/drivers/dma/dw/Kconfig b/drivers/dma/dw/Kconfig new file mode 100644 index 0000000000..a9828ddd6d --- /dev/null +++ b/drivers/dma/dw/Kconfig @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: GPL-2.0 + +# +# DMA engine configuration for dw +# + +config DW_DMAC_CORE + tristate + select DMA_ENGINE + +config DW_DMAC + tristate "Synopsys DesignWare AHB DMA platform driver" + depends on HAS_IOMEM + select DW_DMAC_CORE + help + Support the Synopsys DesignWare AHB DMA controller. This + can be integrated in chips such as the Intel Cherrytrail. + +config RZN1_DMAMUX + tristate "Renesas RZ/N1 DMAMUX driver" + depends on DW_DMAC + depends on ARCH_RZN1 || COMPILE_TEST + help + Support the Renesas RZ/N1 DMAMUX which is located in front of + the Synopsys DesignWare AHB DMA controller located on Renesas + SoCs. + +config DW_DMAC_PCI + tristate "Synopsys DesignWare AHB DMA PCI driver" + depends on PCI + depends on HAS_IOMEM + select DW_DMAC_CORE + help + Support the Synopsys DesignWare AHB DMA controller on the + platforms that enumerate it as a PCI device. For example, + Intel Medfield has integrated this GPDMA controller. diff --git a/drivers/dma/dw/Makefile b/drivers/dma/dw/Makefile new file mode 100644 index 0000000000..e1796015f2 --- /dev/null +++ b/drivers/dma/dw/Makefile @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_DW_DMAC_CORE) += dw_dmac_core.o +dw_dmac_core-y := core.o dw.o idma32.o +dw_dmac_core-$(CONFIG_ACPI) += acpi.o + +obj-$(CONFIG_DW_DMAC) += dw_dmac.o +dw_dmac-y := platform.o +dw_dmac-$(CONFIG_OF) += of.o + +obj-$(CONFIG_DW_DMAC_PCI) += dw_dmac_pci.o +dw_dmac_pci-y := pci.o + +obj-$(CONFIG_RZN1_DMAMUX) += rzn1-dmamux.o diff --git a/drivers/dma/dw/acpi.c b/drivers/dma/dw/acpi.c new file mode 100644 index 0000000000..c510c109d2 --- /dev/null +++ b/drivers/dma/dw/acpi.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2013,2019 Intel Corporation + +#include +#include + +#include "internal.h" + +static bool dw_dma_acpi_filter(struct dma_chan *chan, void *param) +{ + struct acpi_dma_spec *dma_spec = param; + struct dw_dma_slave slave = { + .dma_dev = dma_spec->dev, + .src_id = dma_spec->slave_id, + .dst_id = dma_spec->slave_id, + .m_master = 0, + .p_master = 1, + }; + + return dw_dma_filter(chan, &slave); +} + +void dw_dma_acpi_controller_register(struct dw_dma *dw) +{ + struct device *dev = dw->dma.dev; + struct acpi_dma_filter_info *info; + int ret; + + if (!has_acpi_companion(dev)) + return; + + info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL); + if (!info) + return; + + dma_cap_zero(info->dma_cap); + dma_cap_set(DMA_SLAVE, info->dma_cap); + info->filter_fn = dw_dma_acpi_filter; + + ret = acpi_dma_controller_register(dev, acpi_dma_simple_xlate, info); + if (ret) + dev_err(dev, "could not register acpi_dma_controller\n"); +} +EXPORT_SYMBOL_GPL(dw_dma_acpi_controller_register); + +void dw_dma_acpi_controller_free(struct dw_dma *dw) +{ + struct device *dev = dw->dma.dev; + + if (!has_acpi_companion(dev)) + return; + + acpi_dma_controller_free(dev); +} +EXPORT_SYMBOL_GPL(dw_dma_acpi_controller_free); diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c new file mode 100644 index 0000000000..5f7d690e3d --- /dev/null +++ b/drivers/dma/dw/core.c @@ -0,0 +1,1319 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Core driver for the Synopsys DesignWare DMA Controller + * + * Copyright (C) 2007-2008 Atmel Corporation + * Copyright (C) 2010-2011 ST Microelectronics + * Copyright (C) 2013 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../dmaengine.h" +#include "internal.h" + +/* + * This supports the Synopsys "DesignWare AHB Central DMA Controller", + * (DW_ahb_dmac) which is used with various AMBA 2.0 systems (not all + * of which use ARM any more). See the "Databook" from Synopsys for + * information beyond what licensees probably provide. + */ + +/* The set of bus widths supported by the DMA controller */ +#define DW_DMA_BUSWIDTHS \ + BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \ + BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) + +/*----------------------------------------------------------------------*/ + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc) +{ + return to_dw_desc(dwc->active_list.next); +} + +static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct dw_desc *desc = txd_to_dw_desc(tx); + struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan); + dma_cookie_t cookie; + unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); + cookie = dma_cookie_assign(tx); + + /* + * REVISIT: We should attempt to chain as many descriptors as + * possible, perhaps even appending to those already submitted + * for DMA. But this is hard to do in a race-free manner. + */ + + list_add_tail(&desc->desc_node, &dwc->queue); + spin_unlock_irqrestore(&dwc->lock, flags); + dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", + __func__, desc->txd.cookie); + + return cookie; +} + +static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + struct dw_desc *desc; + dma_addr_t phys; + + desc = dma_pool_zalloc(dw->desc_pool, GFP_ATOMIC, &phys); + if (!desc) + return NULL; + + dwc->descs_allocated++; + INIT_LIST_HEAD(&desc->tx_list); + dma_async_tx_descriptor_init(&desc->txd, &dwc->chan); + desc->txd.tx_submit = dwc_tx_submit; + desc->txd.flags = DMA_CTRL_ACK; + desc->txd.phys = phys; + return desc; +} + +static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + struct dw_desc *child, *_next; + + if (unlikely(!desc)) + return; + + list_for_each_entry_safe(child, _next, &desc->tx_list, desc_node) { + list_del(&child->desc_node); + dma_pool_free(dw->desc_pool, child, child->txd.phys); + dwc->descs_allocated--; + } + + dma_pool_free(dw->desc_pool, desc, desc->txd.phys); + dwc->descs_allocated--; +} + +static void dwc_initialize(struct dw_dma_chan *dwc) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + + dw->initialize_chan(dwc); + + /* Enable interrupts */ + channel_set_bit(dw, MASK.XFER, dwc->mask); + channel_set_bit(dw, MASK.ERROR, dwc->mask); +} + +/*----------------------------------------------------------------------*/ + +static inline void dwc_dump_chan_regs(struct dw_dma_chan *dwc) +{ + dev_err(chan2dev(&dwc->chan), + " SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n", + channel_readl(dwc, SAR), + channel_readl(dwc, DAR), + channel_readl(dwc, LLP), + channel_readl(dwc, CTL_HI), + channel_readl(dwc, CTL_LO)); +} + +static inline void dwc_chan_disable(struct dw_dma *dw, struct dw_dma_chan *dwc) +{ + channel_clear_bit(dw, CH_EN, dwc->mask); + while (dma_readl(dw, CH_EN) & dwc->mask) + cpu_relax(); +} + +/*----------------------------------------------------------------------*/ + +/* Perform single block transfer */ +static inline void dwc_do_single_block(struct dw_dma_chan *dwc, + struct dw_desc *desc) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + u32 ctllo; + + /* + * Software emulation of LLP mode relies on interrupts to continue + * multi block transfer. + */ + ctllo = lli_read(desc, ctllo) | DWC_CTLL_INT_EN; + + channel_writel(dwc, SAR, lli_read(desc, sar)); + channel_writel(dwc, DAR, lli_read(desc, dar)); + channel_writel(dwc, CTL_LO, ctllo); + channel_writel(dwc, CTL_HI, lli_read(desc, ctlhi)); + channel_set_bit(dw, CH_EN, dwc->mask); + + /* Move pointer to next descriptor */ + dwc->tx_node_active = dwc->tx_node_active->next; +} + +/* Called with dwc->lock held and bh disabled */ +static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + u8 lms = DWC_LLP_LMS(dwc->dws.m_master); + unsigned long was_soft_llp; + + /* ASSERT: channel is idle */ + if (dma_readl(dw, CH_EN) & dwc->mask) { + dev_err(chan2dev(&dwc->chan), + "%s: BUG: Attempted to start non-idle channel\n", + __func__); + dwc_dump_chan_regs(dwc); + + /* The tasklet will hopefully advance the queue... */ + return; + } + + if (dwc->nollp) { + was_soft_llp = test_and_set_bit(DW_DMA_IS_SOFT_LLP, + &dwc->flags); + if (was_soft_llp) { + dev_err(chan2dev(&dwc->chan), + "BUG: Attempted to start new LLP transfer inside ongoing one\n"); + return; + } + + dwc_initialize(dwc); + + first->residue = first->total_len; + dwc->tx_node_active = &first->tx_list; + + /* Submit first block */ + dwc_do_single_block(dwc, first); + + return; + } + + dwc_initialize(dwc); + + channel_writel(dwc, LLP, first->txd.phys | lms); + channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); + channel_writel(dwc, CTL_HI, 0); + channel_set_bit(dw, CH_EN, dwc->mask); +} + +static void dwc_dostart_first_queued(struct dw_dma_chan *dwc) +{ + struct dw_desc *desc; + + if (list_empty(&dwc->queue)) + return; + + list_move(dwc->queue.next, &dwc->active_list); + desc = dwc_first_active(dwc); + dev_vdbg(chan2dev(&dwc->chan), "%s: started %u\n", __func__, desc->txd.cookie); + dwc_dostart(dwc, desc); +} + +/*----------------------------------------------------------------------*/ + +static void +dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc, + bool callback_required) +{ + struct dma_async_tx_descriptor *txd = &desc->txd; + struct dw_desc *child; + unsigned long flags; + struct dmaengine_desc_callback cb; + + dev_vdbg(chan2dev(&dwc->chan), "descriptor %u complete\n", txd->cookie); + + spin_lock_irqsave(&dwc->lock, flags); + dma_cookie_complete(txd); + if (callback_required) + dmaengine_desc_get_callback(txd, &cb); + else + memset(&cb, 0, sizeof(cb)); + + /* async_tx_ack */ + list_for_each_entry(child, &desc->tx_list, desc_node) + async_tx_ack(&child->txd); + async_tx_ack(&desc->txd); + dwc_desc_put(dwc, desc); + spin_unlock_irqrestore(&dwc->lock, flags); + + dmaengine_desc_callback_invoke(&cb, NULL); +} + +static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc) +{ + struct dw_desc *desc, *_desc; + LIST_HEAD(list); + unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); + if (dma_readl(dw, CH_EN) & dwc->mask) { + dev_err(chan2dev(&dwc->chan), + "BUG: XFER bit set, but channel not idle!\n"); + + /* Try to continue after resetting the channel... */ + dwc_chan_disable(dw, dwc); + } + + /* + * Submit queued descriptors ASAP, i.e. before we go through + * the completed ones. + */ + list_splice_init(&dwc->active_list, &list); + dwc_dostart_first_queued(dwc); + + spin_unlock_irqrestore(&dwc->lock, flags); + + list_for_each_entry_safe(desc, _desc, &list, desc_node) + dwc_descriptor_complete(dwc, desc, true); +} + +/* Returns how many bytes were already received from source */ +static inline u32 dwc_get_sent(struct dw_dma_chan *dwc) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + u32 ctlhi = channel_readl(dwc, CTL_HI); + u32 ctllo = channel_readl(dwc, CTL_LO); + + return dw->block2bytes(dwc, ctlhi, ctllo >> 4 & 7); +} + +static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) +{ + dma_addr_t llp; + struct dw_desc *desc, *_desc; + struct dw_desc *child; + u32 status_xfer; + unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); + llp = channel_readl(dwc, LLP); + status_xfer = dma_readl(dw, RAW.XFER); + + if (status_xfer & dwc->mask) { + /* Everything we've submitted is done */ + dma_writel(dw, CLEAR.XFER, dwc->mask); + + if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags)) { + struct list_head *head, *active = dwc->tx_node_active; + + /* + * We are inside first active descriptor. + * Otherwise something is really wrong. + */ + desc = dwc_first_active(dwc); + + head = &desc->tx_list; + if (active != head) { + /* Update residue to reflect last sent descriptor */ + if (active == head->next) + desc->residue -= desc->len; + else + desc->residue -= to_dw_desc(active->prev)->len; + + child = to_dw_desc(active); + + /* Submit next block */ + dwc_do_single_block(dwc, child); + + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } + + /* We are done here */ + clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags); + } + + spin_unlock_irqrestore(&dwc->lock, flags); + + dwc_complete_all(dw, dwc); + return; + } + + if (list_empty(&dwc->active_list)) { + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } + + if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags)) { + dev_vdbg(chan2dev(&dwc->chan), "%s: soft LLP mode\n", __func__); + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } + + dev_vdbg(chan2dev(&dwc->chan), "%s: llp=%pad\n", __func__, &llp); + + list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) { + /* Initial residue value */ + desc->residue = desc->total_len; + + /* Check first descriptors addr */ + if (desc->txd.phys == DWC_LLP_LOC(llp)) { + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } + + /* Check first descriptors llp */ + if (lli_read(desc, llp) == llp) { + /* This one is currently in progress */ + desc->residue -= dwc_get_sent(dwc); + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } + + desc->residue -= desc->len; + list_for_each_entry(child, &desc->tx_list, desc_node) { + if (lli_read(child, llp) == llp) { + /* Currently in progress */ + desc->residue -= dwc_get_sent(dwc); + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } + desc->residue -= child->len; + } + + /* + * No descriptors so far seem to be in progress, i.e. + * this one must be done. + */ + spin_unlock_irqrestore(&dwc->lock, flags); + dwc_descriptor_complete(dwc, desc, true); + spin_lock_irqsave(&dwc->lock, flags); + } + + dev_err(chan2dev(&dwc->chan), + "BUG: All descriptors done, but channel not idle!\n"); + + /* Try to continue after resetting the channel... */ + dwc_chan_disable(dw, dwc); + + dwc_dostart_first_queued(dwc); + spin_unlock_irqrestore(&dwc->lock, flags); +} + +static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_desc *desc) +{ + dev_crit(chan2dev(&dwc->chan), " desc: s0x%x d0x%x l0x%x c0x%x:%x\n", + lli_read(desc, sar), + lli_read(desc, dar), + lli_read(desc, llp), + lli_read(desc, ctlhi), + lli_read(desc, ctllo)); +} + +static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) +{ + struct dw_desc *bad_desc; + struct dw_desc *child; + unsigned long flags; + + dwc_scan_descriptors(dw, dwc); + + spin_lock_irqsave(&dwc->lock, flags); + + /* + * The descriptor currently at the head of the active list is + * borked. Since we don't have any way to report errors, we'll + * just have to scream loudly and try to carry on. + */ + bad_desc = dwc_first_active(dwc); + list_del_init(&bad_desc->desc_node); + list_move(dwc->queue.next, dwc->active_list.prev); + + /* Clear the error flag and try to restart the controller */ + dma_writel(dw, CLEAR.ERROR, dwc->mask); + if (!list_empty(&dwc->active_list)) + dwc_dostart(dwc, dwc_first_active(dwc)); + + /* + * WARN may seem harsh, but since this only happens + * when someone submits a bad physical address in a + * descriptor, we should consider ourselves lucky that the + * controller flagged an error instead of scribbling over + * random memory locations. + */ + dev_WARN(chan2dev(&dwc->chan), "Bad descriptor submitted for DMA!\n" + " cookie: %d\n", bad_desc->txd.cookie); + dwc_dump_lli(dwc, bad_desc); + list_for_each_entry(child, &bad_desc->tx_list, desc_node) + dwc_dump_lli(dwc, child); + + spin_unlock_irqrestore(&dwc->lock, flags); + + /* Pretend the descriptor completed successfully */ + dwc_descriptor_complete(dwc, bad_desc, true); +} + +static void dw_dma_tasklet(struct tasklet_struct *t) +{ + struct dw_dma *dw = from_tasklet(dw, t, tasklet); + struct dw_dma_chan *dwc; + u32 status_xfer; + u32 status_err; + unsigned int i; + + status_xfer = dma_readl(dw, RAW.XFER); + status_err = dma_readl(dw, RAW.ERROR); + + dev_vdbg(dw->dma.dev, "%s: status_err=%x\n", __func__, status_err); + + for (i = 0; i < dw->dma.chancnt; i++) { + dwc = &dw->chan[i]; + if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) + dev_vdbg(dw->dma.dev, "Cyclic xfer is not implemented\n"); + else if (status_err & (1 << i)) + dwc_handle_error(dw, dwc); + else if (status_xfer & (1 << i)) + dwc_scan_descriptors(dw, dwc); + } + + /* Re-enable interrupts */ + channel_set_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask); +} + +static irqreturn_t dw_dma_interrupt(int irq, void *dev_id) +{ + struct dw_dma *dw = dev_id; + u32 status; + + /* Check if we have any interrupt from the DMAC which is not in use */ + if (!dw->in_use) + return IRQ_NONE; + + status = dma_readl(dw, STATUS_INT); + dev_vdbg(dw->dma.dev, "%s: status=0x%x\n", __func__, status); + + /* Check if we have any interrupt from the DMAC */ + if (!status) + return IRQ_NONE; + + /* + * Just disable the interrupts. We'll turn them back on in the + * softirq handler. + */ + channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); + channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); + + status = dma_readl(dw, STATUS_INT); + if (status) { + dev_err(dw->dma.dev, + "BUG: Unexpected interrupts pending: 0x%x\n", + status); + + /* Try to recover */ + channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1); + channel_clear_bit(dw, MASK.BLOCK, (1 << 8) - 1); + channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1); + channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1); + channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1); + } + + tasklet_schedule(&dw->tasklet); + + return IRQ_HANDLED; +} + +/*----------------------------------------------------------------------*/ + +static struct dma_async_tx_descriptor * +dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + struct dw_desc *desc; + struct dw_desc *first; + struct dw_desc *prev; + size_t xfer_count; + size_t offset; + u8 m_master = dwc->dws.m_master; + unsigned int src_width; + unsigned int dst_width; + unsigned int data_width = dw->pdata->data_width[m_master]; + u32 ctllo, ctlhi; + u8 lms = DWC_LLP_LMS(m_master); + + dev_vdbg(chan2dev(chan), + "%s: d%pad s%pad l0x%zx f0x%lx\n", __func__, + &dest, &src, len, flags); + + if (unlikely(!len)) { + dev_dbg(chan2dev(chan), "%s: length is zero!\n", __func__); + return NULL; + } + + dwc->direction = DMA_MEM_TO_MEM; + + src_width = dst_width = __ffs(data_width | src | dest | len); + + ctllo = dw->prepare_ctllo(dwc) + | DWC_CTLL_DST_WIDTH(dst_width) + | DWC_CTLL_SRC_WIDTH(src_width) + | DWC_CTLL_DST_INC + | DWC_CTLL_SRC_INC + | DWC_CTLL_FC_M2M; + prev = first = NULL; + + for (offset = 0; offset < len; offset += xfer_count) { + desc = dwc_desc_get(dwc); + if (!desc) + goto err_desc_get; + + ctlhi = dw->bytes2block(dwc, len - offset, src_width, &xfer_count); + + lli_write(desc, sar, src + offset); + lli_write(desc, dar, dest + offset); + lli_write(desc, ctllo, ctllo); + lli_write(desc, ctlhi, ctlhi); + desc->len = xfer_count; + + if (!first) { + first = desc; + } else { + lli_write(prev, llp, desc->txd.phys | lms); + list_add_tail(&desc->desc_node, &first->tx_list); + } + prev = desc; + } + + if (flags & DMA_PREP_INTERRUPT) + /* Trigger interrupt after last block */ + lli_set(prev, ctllo, DWC_CTLL_INT_EN); + + prev->lli.llp = 0; + lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); + first->txd.flags = flags; + first->total_len = len; + + return &first->txd; + +err_desc_get: + dwc_desc_put(dwc, first); + return NULL; +} + +static struct dma_async_tx_descriptor * +dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + struct dma_slave_config *sconfig = &dwc->dma_sconfig; + struct dw_desc *prev; + struct dw_desc *first; + u32 ctllo, ctlhi; + u8 m_master = dwc->dws.m_master; + u8 lms = DWC_LLP_LMS(m_master); + dma_addr_t reg; + unsigned int reg_width; + unsigned int mem_width; + unsigned int data_width = dw->pdata->data_width[m_master]; + unsigned int i; + struct scatterlist *sg; + size_t total_len = 0; + + dev_vdbg(chan2dev(chan), "%s\n", __func__); + + if (unlikely(!is_slave_direction(direction) || !sg_len)) + return NULL; + + dwc->direction = direction; + + prev = first = NULL; + + switch (direction) { + case DMA_MEM_TO_DEV: + reg_width = __ffs(sconfig->dst_addr_width); + reg = sconfig->dst_addr; + ctllo = dw->prepare_ctllo(dwc) + | DWC_CTLL_DST_WIDTH(reg_width) + | DWC_CTLL_DST_FIX + | DWC_CTLL_SRC_INC; + + ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) : + DWC_CTLL_FC(DW_DMA_FC_D_M2P); + + for_each_sg(sgl, sg, sg_len, i) { + struct dw_desc *desc; + u32 len, mem; + size_t dlen; + + mem = sg_dma_address(sg); + len = sg_dma_len(sg); + + mem_width = __ffs(data_width | mem | len); + +slave_sg_todev_fill_desc: + desc = dwc_desc_get(dwc); + if (!desc) + goto err_desc_get; + + ctlhi = dw->bytes2block(dwc, len, mem_width, &dlen); + + lli_write(desc, sar, mem); + lli_write(desc, dar, reg); + lli_write(desc, ctlhi, ctlhi); + lli_write(desc, ctllo, ctllo | DWC_CTLL_SRC_WIDTH(mem_width)); + desc->len = dlen; + + if (!first) { + first = desc; + } else { + lli_write(prev, llp, desc->txd.phys | lms); + list_add_tail(&desc->desc_node, &first->tx_list); + } + prev = desc; + + mem += dlen; + len -= dlen; + total_len += dlen; + + if (len) + goto slave_sg_todev_fill_desc; + } + break; + case DMA_DEV_TO_MEM: + reg_width = __ffs(sconfig->src_addr_width); + reg = sconfig->src_addr; + ctllo = dw->prepare_ctllo(dwc) + | DWC_CTLL_SRC_WIDTH(reg_width) + | DWC_CTLL_DST_INC + | DWC_CTLL_SRC_FIX; + + ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) : + DWC_CTLL_FC(DW_DMA_FC_D_P2M); + + for_each_sg(sgl, sg, sg_len, i) { + struct dw_desc *desc; + u32 len, mem; + size_t dlen; + + mem = sg_dma_address(sg); + len = sg_dma_len(sg); + +slave_sg_fromdev_fill_desc: + desc = dwc_desc_get(dwc); + if (!desc) + goto err_desc_get; + + ctlhi = dw->bytes2block(dwc, len, reg_width, &dlen); + + lli_write(desc, sar, reg); + lli_write(desc, dar, mem); + lli_write(desc, ctlhi, ctlhi); + mem_width = __ffs(data_width | mem); + lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width)); + desc->len = dlen; + + if (!first) { + first = desc; + } else { + lli_write(prev, llp, desc->txd.phys | lms); + list_add_tail(&desc->desc_node, &first->tx_list); + } + prev = desc; + + mem += dlen; + len -= dlen; + total_len += dlen; + + if (len) + goto slave_sg_fromdev_fill_desc; + } + break; + default: + return NULL; + } + + if (flags & DMA_PREP_INTERRUPT) + /* Trigger interrupt after last block */ + lli_set(prev, ctllo, DWC_CTLL_INT_EN); + + prev->lli.llp = 0; + lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); + first->total_len = total_len; + + return &first->txd; + +err_desc_get: + dev_err(chan2dev(chan), + "not enough descriptors available. Direction %d\n", direction); + dwc_desc_put(dwc, first); + return NULL; +} + +bool dw_dma_filter(struct dma_chan *chan, void *param) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma_slave *dws = param; + + if (dws->dma_dev != chan->device->dev) + return false; + + /* permit channels in accordance with the channels mask */ + if (dws->channels && !(dws->channels & dwc->mask)) + return false; + + /* We have to copy data since dws can be temporary storage */ + memcpy(&dwc->dws, dws, sizeof(struct dw_dma_slave)); + + return true; +} +EXPORT_SYMBOL_GPL(dw_dma_filter); + +static int dwc_config(struct dma_chan *chan, struct dma_slave_config *sconfig) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + + memcpy(&dwc->dma_sconfig, sconfig, sizeof(*sconfig)); + + dwc->dma_sconfig.src_maxburst = + clamp(dwc->dma_sconfig.src_maxburst, 0U, dwc->max_burst); + dwc->dma_sconfig.dst_maxburst = + clamp(dwc->dma_sconfig.dst_maxburst, 0U, dwc->max_burst); + + dw->encode_maxburst(dwc, &dwc->dma_sconfig.src_maxburst); + dw->encode_maxburst(dwc, &dwc->dma_sconfig.dst_maxburst); + + return 0; +} + +static void dwc_chan_pause(struct dw_dma_chan *dwc, bool drain) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + unsigned int count = 20; /* timeout iterations */ + + dw->suspend_chan(dwc, drain); + + while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--) + udelay(2); + + set_bit(DW_DMA_IS_PAUSED, &dwc->flags); +} + +static int dwc_pause(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); + dwc_chan_pause(dwc, false); + spin_unlock_irqrestore(&dwc->lock, flags); + + return 0; +} + +static inline void dwc_chan_resume(struct dw_dma_chan *dwc, bool drain) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + + dw->resume_chan(dwc, drain); + + clear_bit(DW_DMA_IS_PAUSED, &dwc->flags); +} + +static int dwc_resume(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); + + if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags)) + dwc_chan_resume(dwc, false); + + spin_unlock_irqrestore(&dwc->lock, flags); + + return 0; +} + +static int dwc_terminate_all(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + struct dw_desc *desc, *_desc; + unsigned long flags; + LIST_HEAD(list); + + spin_lock_irqsave(&dwc->lock, flags); + + clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags); + + dwc_chan_pause(dwc, true); + + dwc_chan_disable(dw, dwc); + + dwc_chan_resume(dwc, true); + + /* active_list entries will end up before queued entries */ + list_splice_init(&dwc->queue, &list); + list_splice_init(&dwc->active_list, &list); + + spin_unlock_irqrestore(&dwc->lock, flags); + + /* Flush all pending and queued descriptors */ + list_for_each_entry_safe(desc, _desc, &list, desc_node) + dwc_descriptor_complete(dwc, desc, false); + + return 0; +} + +static struct dw_desc *dwc_find_desc(struct dw_dma_chan *dwc, dma_cookie_t c) +{ + struct dw_desc *desc; + + list_for_each_entry(desc, &dwc->active_list, desc_node) + if (desc->txd.cookie == c) + return desc; + + return NULL; +} + +static u32 dwc_get_residue_and_status(struct dw_dma_chan *dwc, dma_cookie_t cookie, + enum dma_status *status) +{ + struct dw_desc *desc; + unsigned long flags; + u32 residue; + + spin_lock_irqsave(&dwc->lock, flags); + + desc = dwc_find_desc(dwc, cookie); + if (desc) { + if (desc == dwc_first_active(dwc)) { + residue = desc->residue; + if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue) + residue -= dwc_get_sent(dwc); + if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags)) + *status = DMA_PAUSED; + } else { + residue = desc->total_len; + } + } else { + residue = 0; + } + + spin_unlock_irqrestore(&dwc->lock, flags); + return residue; +} + +static enum dma_status +dwc_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + enum dma_status ret; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; + + dwc_scan_descriptors(to_dw_dma(chan->device), dwc); + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; + + dma_set_residue(txstate, dwc_get_residue_and_status(dwc, cookie, &ret)); + return ret; +} + +static void dwc_issue_pending(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); + if (list_empty(&dwc->active_list)) + dwc_dostart_first_queued(dwc); + spin_unlock_irqrestore(&dwc->lock, flags); +} + +/*----------------------------------------------------------------------*/ + +void do_dw_dma_off(struct dw_dma *dw) +{ + dma_writel(dw, CFG, 0); + + channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); + channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask); + channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask); + channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); + + while (dma_readl(dw, CFG) & DW_CFG_DMA_EN) + cpu_relax(); +} + +void do_dw_dma_on(struct dw_dma *dw) +{ + dma_writel(dw, CFG, DW_CFG_DMA_EN); +} + +static int dwc_alloc_chan_resources(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + + dev_vdbg(chan2dev(chan), "%s\n", __func__); + + /* ASSERT: channel is idle */ + if (dma_readl(dw, CH_EN) & dwc->mask) { + dev_dbg(chan2dev(chan), "DMA channel not idle?\n"); + return -EIO; + } + + dma_cookie_init(chan); + + /* + * NOTE: some controllers may have additional features that we + * need to initialize here, like "scatter-gather" (which + * doesn't mean what you think it means), and status writeback. + */ + + /* + * We need controller-specific data to set up slave transfers. + */ + if (chan->private && !dw_dma_filter(chan, chan->private)) { + dev_warn(chan2dev(chan), "Wrong controller-specific data\n"); + return -EINVAL; + } + + /* Enable controller here if needed */ + if (!dw->in_use) + do_dw_dma_on(dw); + dw->in_use |= dwc->mask; + + return 0; +} + +static void dwc_free_chan_resources(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + unsigned long flags; + + dev_dbg(chan2dev(chan), "%s: descs allocated=%u\n", __func__, + dwc->descs_allocated); + + /* ASSERT: channel is idle */ + BUG_ON(!list_empty(&dwc->active_list)); + BUG_ON(!list_empty(&dwc->queue)); + BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask); + + spin_lock_irqsave(&dwc->lock, flags); + + /* Clear custom channel configuration */ + memset(&dwc->dws, 0, sizeof(struct dw_dma_slave)); + + /* Disable interrupts */ + channel_clear_bit(dw, MASK.XFER, dwc->mask); + channel_clear_bit(dw, MASK.BLOCK, dwc->mask); + channel_clear_bit(dw, MASK.ERROR, dwc->mask); + + spin_unlock_irqrestore(&dwc->lock, flags); + + /* Disable controller in case it was a last user */ + dw->in_use &= ~dwc->mask; + if (!dw->in_use) + do_dw_dma_off(dw); + + dev_vdbg(chan2dev(chan), "%s: done\n", __func__); +} + +static void dwc_caps(struct dma_chan *chan, struct dma_slave_caps *caps) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + + caps->max_burst = dwc->max_burst; + + /* + * It might be crucial for some devices to have the hardware + * accelerated multi-block transfers supported, aka LLPs in DW DMAC + * notation. So if LLPs are supported then max_sg_burst is set to + * zero which means unlimited number of SG entries can be handled in a + * single DMA transaction, otherwise it's just one SG entry. + */ + if (dwc->nollp) + caps->max_sg_burst = 1; + else + caps->max_sg_burst = 0; +} + +int do_dma_probe(struct dw_dma_chip *chip) +{ + struct dw_dma *dw = chip->dw; + struct dw_dma_platform_data *pdata; + bool autocfg = false; + unsigned int dw_params; + unsigned int i; + int err; + + dw->pdata = devm_kzalloc(chip->dev, sizeof(*dw->pdata), GFP_KERNEL); + if (!dw->pdata) + return -ENOMEM; + + dw->regs = chip->regs; + + pm_runtime_get_sync(chip->dev); + + if (!chip->pdata) { + dw_params = dma_readl(dw, DW_PARAMS); + dev_dbg(chip->dev, "DW_PARAMS: 0x%08x\n", dw_params); + + autocfg = dw_params >> DW_PARAMS_EN & 1; + if (!autocfg) { + err = -EINVAL; + goto err_pdata; + } + + /* Reassign the platform data pointer */ + pdata = dw->pdata; + + /* Get hardware configuration parameters */ + pdata->nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 7) + 1; + pdata->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1; + for (i = 0; i < pdata->nr_masters; i++) { + pdata->data_width[i] = + 4 << (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3); + } + pdata->block_size = dma_readl(dw, MAX_BLK_SIZE); + + /* Fill platform data with the default values */ + pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING; + pdata->chan_priority = CHAN_PRIORITY_ASCENDING; + } else if (chip->pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) { + err = -EINVAL; + goto err_pdata; + } else { + memcpy(dw->pdata, chip->pdata, sizeof(*dw->pdata)); + + /* Reassign the platform data pointer */ + pdata = dw->pdata; + } + + dw->chan = devm_kcalloc(chip->dev, pdata->nr_channels, sizeof(*dw->chan), + GFP_KERNEL); + if (!dw->chan) { + err = -ENOMEM; + goto err_pdata; + } + + /* Calculate all channel mask before DMA setup */ + dw->all_chan_mask = (1 << pdata->nr_channels) - 1; + + /* Force dma off, just in case */ + dw->disable(dw); + + /* Device and instance ID for IRQ and DMA pool */ + dw->set_device_name(dw, chip->id); + + /* Create a pool of consistent memory blocks for hardware descriptors */ + dw->desc_pool = dmam_pool_create(dw->name, chip->dev, + sizeof(struct dw_desc), 4, 0); + if (!dw->desc_pool) { + dev_err(chip->dev, "No memory for descriptors dma pool\n"); + err = -ENOMEM; + goto err_pdata; + } + + tasklet_setup(&dw->tasklet, dw_dma_tasklet); + + err = request_irq(chip->irq, dw_dma_interrupt, IRQF_SHARED, + dw->name, dw); + if (err) + goto err_pdata; + + INIT_LIST_HEAD(&dw->dma.channels); + for (i = 0; i < pdata->nr_channels; i++) { + struct dw_dma_chan *dwc = &dw->chan[i]; + + dwc->chan.device = &dw->dma; + dma_cookie_init(&dwc->chan); + if (pdata->chan_allocation_order == CHAN_ALLOCATION_ASCENDING) + list_add_tail(&dwc->chan.device_node, + &dw->dma.channels); + else + list_add(&dwc->chan.device_node, &dw->dma.channels); + + /* 7 is highest priority & 0 is lowest. */ + if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING) + dwc->priority = pdata->nr_channels - i - 1; + else + dwc->priority = i; + + dwc->ch_regs = &__dw_regs(dw)->CHAN[i]; + spin_lock_init(&dwc->lock); + dwc->mask = 1 << i; + + INIT_LIST_HEAD(&dwc->active_list); + INIT_LIST_HEAD(&dwc->queue); + + channel_clear_bit(dw, CH_EN, dwc->mask); + + dwc->direction = DMA_TRANS_NONE; + + /* Hardware configuration */ + if (autocfg) { + unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1; + void __iomem *addr = &__dw_regs(dw)->DWC_PARAMS[r]; + unsigned int dwc_params = readl(addr); + + dev_dbg(chip->dev, "DWC_PARAMS[%d]: 0x%08x\n", i, + dwc_params); + + /* + * Decode maximum block size for given channel. The + * stored 4 bit value represents blocks from 0x00 for 3 + * up to 0x0a for 4095. + */ + dwc->block_size = + (4 << ((pdata->block_size >> 4 * i) & 0xf)) - 1; + + /* + * According to the DW DMA databook the true scatter- + * gether LLPs aren't available if either multi-block + * config is disabled (CHx_MULTI_BLK_EN == 0) or the + * LLP register is hard-coded to zeros + * (CHx_HC_LLP == 1). + */ + dwc->nollp = + (dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0 || + (dwc_params >> DWC_PARAMS_HC_LLP & 0x1) == 1; + dwc->max_burst = + (0x4 << (dwc_params >> DWC_PARAMS_MSIZE & 0x7)); + } else { + dwc->block_size = pdata->block_size; + dwc->nollp = !pdata->multi_block[i]; + dwc->max_burst = pdata->max_burst[i] ?: DW_DMA_MAX_BURST; + } + } + + /* Clear all interrupts on all channels. */ + dma_writel(dw, CLEAR.XFER, dw->all_chan_mask); + dma_writel(dw, CLEAR.BLOCK, dw->all_chan_mask); + dma_writel(dw, CLEAR.SRC_TRAN, dw->all_chan_mask); + dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask); + dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask); + + /* Set capabilities */ + dma_cap_set(DMA_SLAVE, dw->dma.cap_mask); + dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask); + dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask); + + dw->dma.dev = chip->dev; + dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources; + dw->dma.device_free_chan_resources = dwc_free_chan_resources; + + dw->dma.device_prep_dma_memcpy = dwc_prep_dma_memcpy; + dw->dma.device_prep_slave_sg = dwc_prep_slave_sg; + + dw->dma.device_caps = dwc_caps; + dw->dma.device_config = dwc_config; + dw->dma.device_pause = dwc_pause; + dw->dma.device_resume = dwc_resume; + dw->dma.device_terminate_all = dwc_terminate_all; + + dw->dma.device_tx_status = dwc_tx_status; + dw->dma.device_issue_pending = dwc_issue_pending; + + /* DMA capabilities */ + dw->dma.min_burst = DW_DMA_MIN_BURST; + dw->dma.max_burst = DW_DMA_MAX_BURST; + dw->dma.src_addr_widths = DW_DMA_BUSWIDTHS; + dw->dma.dst_addr_widths = DW_DMA_BUSWIDTHS; + dw->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV) | + BIT(DMA_MEM_TO_MEM); + dw->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + /* + * For now there is no hardware with non uniform maximum block size + * across all of the device channels, so we set the maximum segment + * size as the block size found for the very first channel. + */ + dma_set_max_seg_size(dw->dma.dev, dw->chan[0].block_size); + + err = dma_async_device_register(&dw->dma); + if (err) + goto err_dma_register; + + dev_info(chip->dev, "DesignWare DMA Controller, %d channels\n", + pdata->nr_channels); + + pm_runtime_put_sync_suspend(chip->dev); + + return 0; + +err_dma_register: + free_irq(chip->irq, dw); +err_pdata: + pm_runtime_put_sync_suspend(chip->dev); + return err; +} + +int do_dma_remove(struct dw_dma_chip *chip) +{ + struct dw_dma *dw = chip->dw; + struct dw_dma_chan *dwc, *_dwc; + + pm_runtime_get_sync(chip->dev); + + do_dw_dma_off(dw); + dma_async_device_unregister(&dw->dma); + + free_irq(chip->irq, dw); + tasklet_kill(&dw->tasklet); + + list_for_each_entry_safe(dwc, _dwc, &dw->dma.channels, + chan.device_node) { + list_del(&dwc->chan.device_node); + channel_clear_bit(dw, CH_EN, dwc->mask); + } + + pm_runtime_put_sync_suspend(chip->dev); + return 0; +} + +int do_dw_dma_disable(struct dw_dma_chip *chip) +{ + struct dw_dma *dw = chip->dw; + + dw->disable(dw); + return 0; +} +EXPORT_SYMBOL_GPL(do_dw_dma_disable); + +int do_dw_dma_enable(struct dw_dma_chip *chip) +{ + struct dw_dma *dw = chip->dw; + + dw->enable(dw); + return 0; +} +EXPORT_SYMBOL_GPL(do_dw_dma_enable); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller core driver"); +MODULE_AUTHOR("Haavard Skinnemoen (Atmel)"); +MODULE_AUTHOR("Viresh Kumar "); diff --git a/drivers/dma/dw/dw.c b/drivers/dma/dw/dw.c new file mode 100644 index 0000000000..a4862263ff --- /dev/null +++ b/drivers/dma/dw/dw.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2007-2008 Atmel Corporation +// Copyright (C) 2010-2011 ST Microelectronics +// Copyright (C) 2013,2018 Intel Corporation + +#include +#include +#include +#include +#include + +#include "internal.h" + +static void dw_dma_initialize_chan(struct dw_dma_chan *dwc) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + u32 cfghi = is_slave_direction(dwc->direction) ? 0 : DWC_CFGH_FIFO_MODE; + u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority); + bool hs_polarity = dwc->dws.hs_polarity; + + cfghi |= DWC_CFGH_DST_PER(dwc->dws.dst_id); + cfghi |= DWC_CFGH_SRC_PER(dwc->dws.src_id); + cfghi |= DWC_CFGH_PROTCTL(dw->pdata->protctl); + + /* Set polarity of handshake interface */ + cfglo |= hs_polarity ? DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL : 0; + + channel_writel(dwc, CFG_LO, cfglo); + channel_writel(dwc, CFG_HI, cfghi); +} + +static void dw_dma_suspend_chan(struct dw_dma_chan *dwc, bool drain) +{ + u32 cfglo = channel_readl(dwc, CFG_LO); + + channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP); +} + +static void dw_dma_resume_chan(struct dw_dma_chan *dwc, bool drain) +{ + u32 cfglo = channel_readl(dwc, CFG_LO); + + channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP); +} + +static u32 dw_dma_bytes2block(struct dw_dma_chan *dwc, + size_t bytes, unsigned int width, size_t *len) +{ + u32 block; + + if ((bytes >> width) > dwc->block_size) { + block = dwc->block_size; + *len = dwc->block_size << width; + } else { + block = bytes >> width; + *len = bytes; + } + + return block; +} + +static size_t dw_dma_block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width) +{ + return DWC_CTLH_BLOCK_TS(block) << width; +} + +static u32 dw_dma_prepare_ctllo(struct dw_dma_chan *dwc) +{ + struct dma_slave_config *sconfig = &dwc->dma_sconfig; + u8 smsize = (dwc->direction == DMA_DEV_TO_MEM) ? sconfig->src_maxburst : 0; + u8 dmsize = (dwc->direction == DMA_MEM_TO_DEV) ? sconfig->dst_maxburst : 0; + u8 p_master = dwc->dws.p_master; + u8 m_master = dwc->dws.m_master; + u8 dms = (dwc->direction == DMA_MEM_TO_DEV) ? p_master : m_master; + u8 sms = (dwc->direction == DMA_DEV_TO_MEM) ? p_master : m_master; + + return DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN | + DWC_CTLL_DST_MSIZE(dmsize) | DWC_CTLL_SRC_MSIZE(smsize) | + DWC_CTLL_DMS(dms) | DWC_CTLL_SMS(sms); +} + +static void dw_dma_encode_maxburst(struct dw_dma_chan *dwc, u32 *maxburst) +{ + /* + * Fix burst size according to dw_dmac. We need to convert them as: + * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3. + */ + *maxburst = *maxburst > 1 ? fls(*maxburst) - 2 : 0; +} + +static void dw_dma_set_device_name(struct dw_dma *dw, int id) +{ + snprintf(dw->name, sizeof(dw->name), "dw:dmac%d", id); +} + +static void dw_dma_disable(struct dw_dma *dw) +{ + do_dw_dma_off(dw); +} + +static void dw_dma_enable(struct dw_dma *dw) +{ + do_dw_dma_on(dw); +} + +int dw_dma_probe(struct dw_dma_chip *chip) +{ + struct dw_dma *dw; + + dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL); + if (!dw) + return -ENOMEM; + + /* Channel operations */ + dw->initialize_chan = dw_dma_initialize_chan; + dw->suspend_chan = dw_dma_suspend_chan; + dw->resume_chan = dw_dma_resume_chan; + dw->prepare_ctllo = dw_dma_prepare_ctllo; + dw->encode_maxburst = dw_dma_encode_maxburst; + dw->bytes2block = dw_dma_bytes2block; + dw->block2bytes = dw_dma_block2bytes; + + /* Device operations */ + dw->set_device_name = dw_dma_set_device_name; + dw->disable = dw_dma_disable; + dw->enable = dw_dma_enable; + + chip->dw = dw; + return do_dma_probe(chip); +} +EXPORT_SYMBOL_GPL(dw_dma_probe); + +int dw_dma_remove(struct dw_dma_chip *chip) +{ + return do_dma_remove(chip); +} +EXPORT_SYMBOL_GPL(dw_dma_remove); diff --git a/drivers/dma/dw/idma32.c b/drivers/dma/dw/idma32.c new file mode 100644 index 0000000000..58f4078d83 --- /dev/null +++ b/drivers/dma/dw/idma32.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2013,2018,2020-2021 Intel Corporation + +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +#define DMA_CTL_CH(x) (0x1000 + (x) * 4) +#define DMA_SRC_ADDR_FILLIN(x) (0x1100 + (x) * 4) +#define DMA_DST_ADDR_FILLIN(x) (0x1200 + (x) * 4) +#define DMA_XBAR_SEL(x) (0x1300 + (x) * 4) +#define DMA_REGACCESS_CHID_CFG (0x1400) + +#define CTL_CH_TRANSFER_MODE_MASK GENMASK(1, 0) +#define CTL_CH_TRANSFER_MODE_S2S 0 +#define CTL_CH_TRANSFER_MODE_S2D 1 +#define CTL_CH_TRANSFER_MODE_D2S 2 +#define CTL_CH_TRANSFER_MODE_D2D 3 +#define CTL_CH_RD_RS_MASK GENMASK(4, 3) +#define CTL_CH_WR_RS_MASK GENMASK(6, 5) +#define CTL_CH_RD_NON_SNOOP_BIT BIT(8) +#define CTL_CH_WR_NON_SNOOP_BIT BIT(9) + +#define XBAR_SEL_DEVID_MASK GENMASK(15, 0) +#define XBAR_SEL_RX_TX_BIT BIT(16) +#define XBAR_SEL_RX_TX_SHIFT 16 + +#define REGACCESS_CHID_MASK GENMASK(2, 0) + +static unsigned int idma32_get_slave_devfn(struct dw_dma_chan *dwc) +{ + struct device *slave = dwc->chan.slave; + + if (!slave || !dev_is_pci(slave)) + return 0; + + return to_pci_dev(slave)->devfn; +} + +static void idma32_initialize_chan_xbar(struct dw_dma_chan *dwc) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + void __iomem *misc = __dw_regs(dw); + u32 cfghi = 0, cfglo = 0; + u8 dst_id, src_id; + u32 value; + + /* DMA Channel ID Configuration register must be programmed first */ + value = readl(misc + DMA_REGACCESS_CHID_CFG); + + value &= ~REGACCESS_CHID_MASK; + value |= dwc->chan.chan_id; + + writel(value, misc + DMA_REGACCESS_CHID_CFG); + + /* Configure channel attributes */ + value = readl(misc + DMA_CTL_CH(dwc->chan.chan_id)); + + value &= ~(CTL_CH_RD_NON_SNOOP_BIT | CTL_CH_WR_NON_SNOOP_BIT); + value &= ~(CTL_CH_RD_RS_MASK | CTL_CH_WR_RS_MASK); + value &= ~CTL_CH_TRANSFER_MODE_MASK; + + switch (dwc->direction) { + case DMA_MEM_TO_DEV: + value |= CTL_CH_TRANSFER_MODE_D2S; + value |= CTL_CH_WR_NON_SNOOP_BIT; + break; + case DMA_DEV_TO_MEM: + value |= CTL_CH_TRANSFER_MODE_S2D; + value |= CTL_CH_RD_NON_SNOOP_BIT; + break; + default: + /* + * Memory-to-Memory and Device-to-Device are ignored for now. + * + * For Memory-to-Memory transfers we would need to set mode + * and disable snooping on both sides. + */ + return; + } + + writel(value, misc + DMA_CTL_CH(dwc->chan.chan_id)); + + /* Configure crossbar selection */ + value = readl(misc + DMA_XBAR_SEL(dwc->chan.chan_id)); + + /* DEVFN selection */ + value &= ~XBAR_SEL_DEVID_MASK; + value |= idma32_get_slave_devfn(dwc); + + switch (dwc->direction) { + case DMA_MEM_TO_DEV: + value |= XBAR_SEL_RX_TX_BIT; + break; + case DMA_DEV_TO_MEM: + value &= ~XBAR_SEL_RX_TX_BIT; + break; + default: + /* Memory-to-Memory and Device-to-Device are ignored for now */ + return; + } + + writel(value, misc + DMA_XBAR_SEL(dwc->chan.chan_id)); + + /* Configure DMA channel low and high registers */ + switch (dwc->direction) { + case DMA_MEM_TO_DEV: + dst_id = dwc->chan.chan_id; + src_id = dwc->dws.src_id; + break; + case DMA_DEV_TO_MEM: + dst_id = dwc->dws.dst_id; + src_id = dwc->chan.chan_id; + break; + default: + /* Memory-to-Memory and Device-to-Device are ignored for now */ + return; + } + + /* Set default burst alignment */ + cfglo |= IDMA32C_CFGL_DST_BURST_ALIGN | IDMA32C_CFGL_SRC_BURST_ALIGN; + + /* Low 4 bits of the request lines */ + cfghi |= IDMA32C_CFGH_DST_PER(dst_id & 0xf); + cfghi |= IDMA32C_CFGH_SRC_PER(src_id & 0xf); + + /* Request line extension (2 bits) */ + cfghi |= IDMA32C_CFGH_DST_PER_EXT(dst_id >> 4 & 0x3); + cfghi |= IDMA32C_CFGH_SRC_PER_EXT(src_id >> 4 & 0x3); + + channel_writel(dwc, CFG_LO, cfglo); + channel_writel(dwc, CFG_HI, cfghi); +} + +static void idma32_initialize_chan_generic(struct dw_dma_chan *dwc) +{ + u32 cfghi = 0; + u32 cfglo = 0; + + /* Set default burst alignment */ + cfglo |= IDMA32C_CFGL_DST_BURST_ALIGN | IDMA32C_CFGL_SRC_BURST_ALIGN; + + /* Low 4 bits of the request lines */ + cfghi |= IDMA32C_CFGH_DST_PER(dwc->dws.dst_id & 0xf); + cfghi |= IDMA32C_CFGH_SRC_PER(dwc->dws.src_id & 0xf); + + /* Request line extension (2 bits) */ + cfghi |= IDMA32C_CFGH_DST_PER_EXT(dwc->dws.dst_id >> 4 & 0x3); + cfghi |= IDMA32C_CFGH_SRC_PER_EXT(dwc->dws.src_id >> 4 & 0x3); + + channel_writel(dwc, CFG_LO, cfglo); + channel_writel(dwc, CFG_HI, cfghi); +} + +static void idma32_suspend_chan(struct dw_dma_chan *dwc, bool drain) +{ + u32 cfglo = channel_readl(dwc, CFG_LO); + + if (drain) + cfglo |= IDMA32C_CFGL_CH_DRAIN; + + channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP); +} + +static void idma32_resume_chan(struct dw_dma_chan *dwc, bool drain) +{ + u32 cfglo = channel_readl(dwc, CFG_LO); + + if (drain) + cfglo &= ~IDMA32C_CFGL_CH_DRAIN; + + channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP); +} + +static u32 idma32_bytes2block(struct dw_dma_chan *dwc, + size_t bytes, unsigned int width, size_t *len) +{ + u32 block; + + if (bytes > dwc->block_size) { + block = dwc->block_size; + *len = dwc->block_size; + } else { + block = bytes; + *len = bytes; + } + + return block; +} + +static size_t idma32_block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width) +{ + return IDMA32C_CTLH_BLOCK_TS(block); +} + +static u32 idma32_prepare_ctllo(struct dw_dma_chan *dwc) +{ + struct dma_slave_config *sconfig = &dwc->dma_sconfig; + u8 smsize = (dwc->direction == DMA_DEV_TO_MEM) ? sconfig->src_maxburst : 0; + u8 dmsize = (dwc->direction == DMA_MEM_TO_DEV) ? sconfig->dst_maxburst : 0; + + return DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN | + DWC_CTLL_DST_MSIZE(dmsize) | DWC_CTLL_SRC_MSIZE(smsize); +} + +static void idma32_encode_maxburst(struct dw_dma_chan *dwc, u32 *maxburst) +{ + *maxburst = *maxburst > 1 ? fls(*maxburst) - 1 : 0; +} + +static void idma32_set_device_name(struct dw_dma *dw, int id) +{ + snprintf(dw->name, sizeof(dw->name), "idma32:dmac%d", id); +} + +/* + * Program FIFO size of channels. + * + * By default full FIFO (512 bytes) is assigned to channel 0. Here we + * slice FIFO on equal parts between channels. + */ +static void idma32_fifo_partition(struct dw_dma *dw) +{ + u64 value = IDMA32C_FP_PSIZE_CH0(64) | IDMA32C_FP_PSIZE_CH1(64) | + IDMA32C_FP_UPDATE; + u64 fifo_partition = 0; + + /* Fill FIFO_PARTITION low bits (Channels 0..1, 4..5) */ + fifo_partition |= value << 0; + + /* Fill FIFO_PARTITION high bits (Channels 2..3, 6..7) */ + fifo_partition |= value << 32; + + /* Program FIFO Partition registers - 64 bytes per channel */ + idma32_writeq(dw, FIFO_PARTITION1, fifo_partition); + idma32_writeq(dw, FIFO_PARTITION0, fifo_partition); +} + +static void idma32_disable(struct dw_dma *dw) +{ + do_dw_dma_off(dw); + idma32_fifo_partition(dw); +} + +static void idma32_enable(struct dw_dma *dw) +{ + idma32_fifo_partition(dw); + do_dw_dma_on(dw); +} + +int idma32_dma_probe(struct dw_dma_chip *chip) +{ + struct dw_dma *dw; + + dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL); + if (!dw) + return -ENOMEM; + + /* Channel operations */ + if (chip->pdata->quirks & DW_DMA_QUIRK_XBAR_PRESENT) + dw->initialize_chan = idma32_initialize_chan_xbar; + else + dw->initialize_chan = idma32_initialize_chan_generic; + dw->suspend_chan = idma32_suspend_chan; + dw->resume_chan = idma32_resume_chan; + dw->prepare_ctllo = idma32_prepare_ctllo; + dw->encode_maxburst = idma32_encode_maxburst; + dw->bytes2block = idma32_bytes2block; + dw->block2bytes = idma32_block2bytes; + + /* Device operations */ + dw->set_device_name = idma32_set_device_name; + dw->disable = idma32_disable; + dw->enable = idma32_enable; + + chip->dw = dw; + return do_dma_probe(chip); +} +EXPORT_SYMBOL_GPL(idma32_dma_probe); + +int idma32_dma_remove(struct dw_dma_chip *chip) +{ + return do_dma_remove(chip); +} +EXPORT_SYMBOL_GPL(idma32_dma_remove); diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h new file mode 100644 index 0000000000..563ce73488 --- /dev/null +++ b/drivers/dma/dw/internal.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Driver for the Synopsys DesignWare DMA Controller + * + * Copyright (C) 2013 Intel Corporation + */ + +#ifndef _DMA_DW_INTERNAL_H +#define _DMA_DW_INTERNAL_H + +#include + +#include "regs.h" + +int do_dma_probe(struct dw_dma_chip *chip); +int do_dma_remove(struct dw_dma_chip *chip); + +void do_dw_dma_on(struct dw_dma *dw); +void do_dw_dma_off(struct dw_dma *dw); + +int do_dw_dma_disable(struct dw_dma_chip *chip); +int do_dw_dma_enable(struct dw_dma_chip *chip); + +extern bool dw_dma_filter(struct dma_chan *chan, void *param); + +#ifdef CONFIG_ACPI +void dw_dma_acpi_controller_register(struct dw_dma *dw); +void dw_dma_acpi_controller_free(struct dw_dma *dw); +#else /* !CONFIG_ACPI */ +static inline void dw_dma_acpi_controller_register(struct dw_dma *dw) {} +static inline void dw_dma_acpi_controller_free(struct dw_dma *dw) {} +#endif /* !CONFIG_ACPI */ + +struct platform_device; + +#ifdef CONFIG_OF +struct dw_dma_platform_data *dw_dma_parse_dt(struct platform_device *pdev); +void dw_dma_of_controller_register(struct dw_dma *dw); +void dw_dma_of_controller_free(struct dw_dma *dw); +#else +static inline struct dw_dma_platform_data *dw_dma_parse_dt(struct platform_device *pdev) +{ + return NULL; +} +static inline void dw_dma_of_controller_register(struct dw_dma *dw) {} +static inline void dw_dma_of_controller_free(struct dw_dma *dw) {} +#endif + +struct dw_dma_chip_pdata { + const struct dw_dma_platform_data *pdata; + int (*probe)(struct dw_dma_chip *chip); + int (*remove)(struct dw_dma_chip *chip); + struct dw_dma_chip *chip; +}; + +static __maybe_unused const struct dw_dma_chip_pdata dw_dma_chip_pdata = { + .probe = dw_dma_probe, + .remove = dw_dma_remove, +}; + +static const struct dw_dma_platform_data idma32_pdata = { + .nr_channels = 8, + .chan_allocation_order = CHAN_ALLOCATION_ASCENDING, + .chan_priority = CHAN_PRIORITY_ASCENDING, + .block_size = 131071, + .nr_masters = 1, + .data_width = {4}, + .multi_block = {1, 1, 1, 1, 1, 1, 1, 1}, +}; + +static __maybe_unused const struct dw_dma_chip_pdata idma32_chip_pdata = { + .pdata = &idma32_pdata, + .probe = idma32_dma_probe, + .remove = idma32_dma_remove, +}; + +static const struct dw_dma_platform_data xbar_pdata = { + .nr_channels = 8, + .chan_allocation_order = CHAN_ALLOCATION_ASCENDING, + .chan_priority = CHAN_PRIORITY_ASCENDING, + .block_size = 131071, + .nr_masters = 1, + .data_width = {4}, + .quirks = DW_DMA_QUIRK_XBAR_PRESENT, +}; + +static __maybe_unused const struct dw_dma_chip_pdata xbar_chip_pdata = { + .pdata = &xbar_pdata, + .probe = idma32_dma_probe, + .remove = idma32_dma_remove, +}; + +#endif /* _DMA_DW_INTERNAL_H */ diff --git a/drivers/dma/dw/of.c b/drivers/dma/dw/of.c new file mode 100644 index 0000000000..523ca80683 --- /dev/null +++ b/drivers/dma/dw/of.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Platform driver for the Synopsys DesignWare DMA Controller + * + * Copyright (C) 2007-2008 Atmel Corporation + * Copyright (C) 2010-2011 ST Microelectronics + * Copyright (C) 2013 Intel Corporation + */ + +#include +#include +#include + +#include "internal.h" + +static struct dma_chan *dw_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct dw_dma *dw = ofdma->of_dma_data; + struct dw_dma_slave slave = { + .dma_dev = dw->dma.dev, + }; + dma_cap_mask_t cap; + + if (dma_spec->args_count < 3 || dma_spec->args_count > 4) + return NULL; + + slave.src_id = dma_spec->args[0]; + slave.dst_id = dma_spec->args[0]; + slave.m_master = dma_spec->args[1]; + slave.p_master = dma_spec->args[2]; + if (dma_spec->args_count >= 4) + slave.channels = dma_spec->args[3]; + + if (WARN_ON(slave.src_id >= DW_DMA_MAX_NR_REQUESTS || + slave.dst_id >= DW_DMA_MAX_NR_REQUESTS || + slave.m_master >= dw->pdata->nr_masters || + slave.p_master >= dw->pdata->nr_masters || + slave.channels >= BIT(dw->pdata->nr_channels))) + return NULL; + + dma_cap_zero(cap); + dma_cap_set(DMA_SLAVE, cap); + + /* TODO: there should be a simpler way to do this */ + return dma_request_channel(cap, dw_dma_filter, &slave); +} + +struct dw_dma_platform_data *dw_dma_parse_dt(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct dw_dma_platform_data *pdata; + u32 tmp, arr[DW_DMA_MAX_NR_MASTERS]; + u32 nr_masters; + u32 nr_channels; + + if (of_property_read_u32(np, "dma-masters", &nr_masters)) + return NULL; + if (nr_masters < 1 || nr_masters > DW_DMA_MAX_NR_MASTERS) + return NULL; + + if (of_property_read_u32(np, "dma-channels", &nr_channels)) + return NULL; + if (nr_channels > DW_DMA_MAX_NR_CHANNELS) + return NULL; + + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return NULL; + + pdata->nr_masters = nr_masters; + pdata->nr_channels = nr_channels; + + of_property_read_u32(np, "chan_allocation_order", &pdata->chan_allocation_order); + of_property_read_u32(np, "chan_priority", &pdata->chan_priority); + + of_property_read_u32(np, "block_size", &pdata->block_size); + + /* Try deprecated property first */ + if (!of_property_read_u32_array(np, "data_width", arr, nr_masters)) { + for (tmp = 0; tmp < nr_masters; tmp++) + pdata->data_width[tmp] = BIT(arr[tmp] & 0x07); + } + + /* If "data_width" and "data-width" both provided use the latter one */ + of_property_read_u32_array(np, "data-width", pdata->data_width, nr_masters); + + memset32(pdata->multi_block, 1, nr_channels); + of_property_read_u32_array(np, "multi-block", pdata->multi_block, nr_channels); + + memset32(pdata->max_burst, DW_DMA_MAX_BURST, nr_channels); + of_property_read_u32_array(np, "snps,max-burst-len", pdata->max_burst, nr_channels); + + of_property_read_u32(np, "snps,dma-protection-control", &pdata->protctl); + if (pdata->protctl > CHAN_PROTCTL_MASK) + return NULL; + + return pdata; +} + +void dw_dma_of_controller_register(struct dw_dma *dw) +{ + struct device *dev = dw->dma.dev; + int ret; + + if (!dev->of_node) + return; + + ret = of_dma_controller_register(dev->of_node, dw_dma_of_xlate, dw); + if (ret) + dev_err(dev, "could not register of_dma_controller\n"); +} + +void dw_dma_of_controller_free(struct dw_dma *dw) +{ + struct device *dev = dw->dma.dev; + + if (!dev->of_node) + return; + + of_dma_controller_free(dev->of_node); +} diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c new file mode 100644 index 0000000000..ad2d4d012c --- /dev/null +++ b/drivers/dma/dw/pci.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCI driver for the Synopsys DesignWare DMA Controller + * + * Copyright (C) 2013 Intel Corporation + * Author: Andy Shevchenko + */ + +#include +#include +#include + +#include "internal.h" + +static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid) +{ + const struct dw_dma_chip_pdata *drv_data = (void *)pid->driver_data; + struct dw_dma_chip_pdata *data; + struct dw_dma_chip *chip; + int ret; + + ret = pcim_enable_device(pdev); + if (ret) + return ret; + + ret = pcim_iomap_regions(pdev, 1 << 0, pci_name(pdev)); + if (ret) { + dev_err(&pdev->dev, "I/O memory remapping failed\n"); + return ret; + } + + pci_set_master(pdev); + pci_try_set_mwi(pdev); + + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (ret) + return ret; + + data = devm_kmemdup(&pdev->dev, drv_data, sizeof(*drv_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + chip->dev = &pdev->dev; + chip->id = pdev->devfn; + chip->regs = pcim_iomap_table(pdev)[0]; + chip->irq = pdev->irq; + chip->pdata = data->pdata; + + data->chip = chip; + + ret = data->probe(chip); + if (ret) + return ret; + + dw_dma_acpi_controller_register(chip->dw); + + pci_set_drvdata(pdev, data); + + return 0; +} + +static void dw_pci_remove(struct pci_dev *pdev) +{ + struct dw_dma_chip_pdata *data = pci_get_drvdata(pdev); + struct dw_dma_chip *chip = data->chip; + int ret; + + dw_dma_acpi_controller_free(chip->dw); + + ret = data->remove(chip); + if (ret) + dev_warn(&pdev->dev, "can't remove device properly: %d\n", ret); +} + +#ifdef CONFIG_PM_SLEEP + +static int dw_pci_suspend_late(struct device *dev) +{ + struct dw_dma_chip_pdata *data = dev_get_drvdata(dev); + struct dw_dma_chip *chip = data->chip; + + return do_dw_dma_disable(chip); +}; + +static int dw_pci_resume_early(struct device *dev) +{ + struct dw_dma_chip_pdata *data = dev_get_drvdata(dev); + struct dw_dma_chip *chip = data->chip; + + return do_dw_dma_enable(chip); +}; + +#endif /* CONFIG_PM_SLEEP */ + +static const struct dev_pm_ops dw_pci_dev_pm_ops = { + SET_LATE_SYSTEM_SLEEP_PM_OPS(dw_pci_suspend_late, dw_pci_resume_early) +}; + +static const struct pci_device_id dw_pci_id_table[] = { + /* Medfield (GPDMA) */ + { PCI_VDEVICE(INTEL, 0x0827), (kernel_ulong_t)&dw_dma_chip_pdata }, + + /* BayTrail */ + { PCI_VDEVICE(INTEL, 0x0f06), (kernel_ulong_t)&dw_dma_chip_pdata }, + { PCI_VDEVICE(INTEL, 0x0f40), (kernel_ulong_t)&dw_dma_chip_pdata }, + + /* Merrifield */ + { PCI_VDEVICE(INTEL, 0x11a2), (kernel_ulong_t)&idma32_chip_pdata }, + + /* Braswell */ + { PCI_VDEVICE(INTEL, 0x2286), (kernel_ulong_t)&dw_dma_chip_pdata }, + { PCI_VDEVICE(INTEL, 0x22c0), (kernel_ulong_t)&dw_dma_chip_pdata }, + + /* Elkhart Lake iDMA 32-bit (PSE DMA) */ + { PCI_VDEVICE(INTEL, 0x4bb4), (kernel_ulong_t)&xbar_chip_pdata }, + { PCI_VDEVICE(INTEL, 0x4bb5), (kernel_ulong_t)&xbar_chip_pdata }, + { PCI_VDEVICE(INTEL, 0x4bb6), (kernel_ulong_t)&xbar_chip_pdata }, + + /* Haswell */ + { PCI_VDEVICE(INTEL, 0x9c60), (kernel_ulong_t)&dw_dma_chip_pdata }, + + /* Broadwell */ + { PCI_VDEVICE(INTEL, 0x9ce0), (kernel_ulong_t)&dw_dma_chip_pdata }, + + { } +}; +MODULE_DEVICE_TABLE(pci, dw_pci_id_table); + +static struct pci_driver dw_pci_driver = { + .name = "dw_dmac_pci", + .id_table = dw_pci_id_table, + .probe = dw_pci_probe, + .remove = dw_pci_remove, + .driver = { + .pm = &dw_pci_dev_pm_ops, + }, +}; + +module_pci_driver(dw_pci_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller PCI driver"); +MODULE_AUTHOR("Andy Shevchenko "); diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c new file mode 100644 index 0000000000..47f2292dba --- /dev/null +++ b/drivers/dma/dw/platform.c @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Platform driver for the Synopsys DesignWare DMA Controller + * + * Copyright (C) 2007-2008 Atmel Corporation + * Copyright (C) 2010-2011 ST Microelectronics + * Copyright (C) 2013 Intel Corporation + * + * Some parts of this driver are derived from the original dw_dmac. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +#define DRV_NAME "dw_dmac" + +static int dw_probe(struct platform_device *pdev) +{ + const struct dw_dma_chip_pdata *match; + struct dw_dma_chip_pdata *data; + struct dw_dma_chip *chip; + struct device *dev = &pdev->dev; + int err; + + match = device_get_match_data(dev); + if (!match) + return -ENODEV; + + data = devm_kmemdup(&pdev->dev, match, sizeof(*match), GFP_KERNEL); + if (!data) + return -ENOMEM; + + chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + chip->irq = platform_get_irq(pdev, 0); + if (chip->irq < 0) + return chip->irq; + + chip->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(chip->regs)) + return PTR_ERR(chip->regs); + + err = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) + return err; + + if (!data->pdata) + data->pdata = dev_get_platdata(dev); + if (!data->pdata) + data->pdata = dw_dma_parse_dt(pdev); + + chip->dev = dev; + chip->id = pdev->id; + chip->pdata = data->pdata; + + data->chip = chip; + + chip->clk = devm_clk_get_optional(chip->dev, "hclk"); + if (IS_ERR(chip->clk)) + return PTR_ERR(chip->clk); + err = clk_prepare_enable(chip->clk); + if (err) + return err; + + pm_runtime_enable(&pdev->dev); + + err = data->probe(chip); + if (err) + goto err_dw_dma_probe; + + platform_set_drvdata(pdev, data); + + dw_dma_of_controller_register(chip->dw); + + dw_dma_acpi_controller_register(chip->dw); + + return 0; + +err_dw_dma_probe: + pm_runtime_disable(&pdev->dev); + clk_disable_unprepare(chip->clk); + return err; +} + +static int dw_remove(struct platform_device *pdev) +{ + struct dw_dma_chip_pdata *data = platform_get_drvdata(pdev); + struct dw_dma_chip *chip = data->chip; + int ret; + + dw_dma_acpi_controller_free(chip->dw); + + dw_dma_of_controller_free(chip->dw); + + ret = data->remove(chip); + if (ret) + dev_warn(chip->dev, "can't remove device properly: %d\n", ret); + + pm_runtime_disable(&pdev->dev); + clk_disable_unprepare(chip->clk); + + return 0; +} + +static void dw_shutdown(struct platform_device *pdev) +{ + struct dw_dma_chip_pdata *data = platform_get_drvdata(pdev); + struct dw_dma_chip *chip = data->chip; + + /* + * We have to call do_dw_dma_disable() to stop any ongoing transfer. On + * some platforms we can't do that since DMA device is powered off. + * Moreover we have no possibility to check if the platform is affected + * or not. That's why we call pm_runtime_get_sync() / pm_runtime_put() + * unconditionally. On the other hand we can't use + * pm_runtime_suspended() because runtime PM framework is not fully + * used by the driver. + */ + pm_runtime_get_sync(chip->dev); + do_dw_dma_disable(chip); + pm_runtime_put_sync_suspend(chip->dev); + + clk_disable_unprepare(chip->clk); +} + +#ifdef CONFIG_OF +static const struct of_device_id dw_dma_of_id_table[] = { + { .compatible = "snps,dma-spear1340", .data = &dw_dma_chip_pdata }, + { .compatible = "renesas,rzn1-dma", .data = &dw_dma_chip_pdata }, + {} +}; +MODULE_DEVICE_TABLE(of, dw_dma_of_id_table); +#endif + +#ifdef CONFIG_ACPI +static const struct acpi_device_id dw_dma_acpi_id_table[] = { + { "INTL9C60", (kernel_ulong_t)&dw_dma_chip_pdata }, + { "80862286", (kernel_ulong_t)&dw_dma_chip_pdata }, + { "808622C0", (kernel_ulong_t)&dw_dma_chip_pdata }, + + /* Elkhart Lake iDMA 32-bit (PSE DMA) */ + { "80864BB4", (kernel_ulong_t)&xbar_chip_pdata }, + { "80864BB5", (kernel_ulong_t)&xbar_chip_pdata }, + { "80864BB6", (kernel_ulong_t)&xbar_chip_pdata }, + + { } +}; +MODULE_DEVICE_TABLE(acpi, dw_dma_acpi_id_table); +#endif + +#ifdef CONFIG_PM_SLEEP + +static int dw_suspend_late(struct device *dev) +{ + struct dw_dma_chip_pdata *data = dev_get_drvdata(dev); + struct dw_dma_chip *chip = data->chip; + + do_dw_dma_disable(chip); + clk_disable_unprepare(chip->clk); + + return 0; +} + +static int dw_resume_early(struct device *dev) +{ + struct dw_dma_chip_pdata *data = dev_get_drvdata(dev); + struct dw_dma_chip *chip = data->chip; + int ret; + + ret = clk_prepare_enable(chip->clk); + if (ret) + return ret; + + return do_dw_dma_enable(chip); +} + +#endif /* CONFIG_PM_SLEEP */ + +static const struct dev_pm_ops dw_dev_pm_ops = { + SET_LATE_SYSTEM_SLEEP_PM_OPS(dw_suspend_late, dw_resume_early) +}; + +static struct platform_driver dw_driver = { + .probe = dw_probe, + .remove = dw_remove, + .shutdown = dw_shutdown, + .driver = { + .name = DRV_NAME, + .pm = &dw_dev_pm_ops, + .of_match_table = of_match_ptr(dw_dma_of_id_table), + .acpi_match_table = ACPI_PTR(dw_dma_acpi_id_table), + }, +}; + +static int __init dw_init(void) +{ + return platform_driver_register(&dw_driver); +} +subsys_initcall(dw_init); + +static void __exit dw_exit(void) +{ + platform_driver_unregister(&dw_driver); +} +module_exit(dw_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller platform driver"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h new file mode 100644 index 0000000000..76654bd13c --- /dev/null +++ b/drivers/dma/dw/regs.h @@ -0,0 +1,409 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Driver for the Synopsys DesignWare AHB DMA Controller + * + * Copyright (C) 2005-2007 Atmel Corporation + * Copyright (C) 2010-2011 ST Microelectronics + * Copyright (C) 2016 Intel Corporation + */ + +#include +#include +#include + +#include + +#include "internal.h" + +#define DW_DMA_MAX_NR_REQUESTS 16 + +/* flow controller */ +enum dw_dma_fc { + DW_DMA_FC_D_M2M, + DW_DMA_FC_D_M2P, + DW_DMA_FC_D_P2M, + DW_DMA_FC_D_P2P, + DW_DMA_FC_P_P2M, + DW_DMA_FC_SP_P2P, + DW_DMA_FC_P_M2P, + DW_DMA_FC_DP_P2P, +}; + +/* + * Redefine this macro to handle differences between 32- and 64-bit + * addressing, big vs. little endian, etc. + */ +#define DW_REG(name) u32 name; u32 __pad_##name + +/* Hardware register definitions. */ +struct dw_dma_chan_regs { + DW_REG(SAR); /* Source Address Register */ + DW_REG(DAR); /* Destination Address Register */ + DW_REG(LLP); /* Linked List Pointer */ + u32 CTL_LO; /* Control Register Low */ + u32 CTL_HI; /* Control Register High */ + DW_REG(SSTAT); + DW_REG(DSTAT); + DW_REG(SSTATAR); + DW_REG(DSTATAR); + u32 CFG_LO; /* Configuration Register Low */ + u32 CFG_HI; /* Configuration Register High */ + DW_REG(SGR); + DW_REG(DSR); +}; + +struct dw_dma_irq_regs { + DW_REG(XFER); + DW_REG(BLOCK); + DW_REG(SRC_TRAN); + DW_REG(DST_TRAN); + DW_REG(ERROR); +}; + +struct dw_dma_regs { + /* per-channel registers */ + struct dw_dma_chan_regs CHAN[DW_DMA_MAX_NR_CHANNELS]; + + /* irq handling */ + struct dw_dma_irq_regs RAW; /* r */ + struct dw_dma_irq_regs STATUS; /* r (raw & mask) */ + struct dw_dma_irq_regs MASK; /* rw (set = irq enabled) */ + struct dw_dma_irq_regs CLEAR; /* w (ack, affects "raw") */ + + DW_REG(STATUS_INT); /* r */ + + /* software handshaking */ + DW_REG(REQ_SRC); + DW_REG(REQ_DST); + DW_REG(SGL_REQ_SRC); + DW_REG(SGL_REQ_DST); + DW_REG(LAST_SRC); + DW_REG(LAST_DST); + + /* miscellaneous */ + DW_REG(CFG); + DW_REG(CH_EN); + DW_REG(ID); + DW_REG(TEST); + + /* iDMA 32-bit support */ + DW_REG(CLASS_PRIORITY0); + DW_REG(CLASS_PRIORITY1); + + /* optional encoded params, 0x3c8..0x3f7 */ + u32 __reserved; + + /* per-channel configuration registers */ + u32 DWC_PARAMS[DW_DMA_MAX_NR_CHANNELS]; + u32 MULTI_BLK_TYPE; + u32 MAX_BLK_SIZE; + + /* top-level parameters */ + u32 DW_PARAMS; + + /* component ID */ + u32 COMP_TYPE; + u32 COMP_VERSION; + + /* iDMA 32-bit support */ + DW_REG(FIFO_PARTITION0); + DW_REG(FIFO_PARTITION1); + + DW_REG(SAI_ERR); + DW_REG(GLOBAL_CFG); +}; + +/* Bitfields in DW_PARAMS */ +#define DW_PARAMS_NR_CHAN 8 /* number of channels */ +#define DW_PARAMS_NR_MASTER 11 /* number of AHB masters */ +#define DW_PARAMS_DATA_WIDTH(n) (15 + 2 * (n)) +#define DW_PARAMS_DATA_WIDTH1 15 /* master 1 data width */ +#define DW_PARAMS_DATA_WIDTH2 17 /* master 2 data width */ +#define DW_PARAMS_DATA_WIDTH3 19 /* master 3 data width */ +#define DW_PARAMS_DATA_WIDTH4 21 /* master 4 data width */ +#define DW_PARAMS_EN 28 /* encoded parameters */ + +/* Bitfields in DWC_PARAMS */ +#define DWC_PARAMS_MBLK_EN 11 /* multi block transfer */ +#define DWC_PARAMS_HC_LLP 13 /* set LLP register to zero */ +#define DWC_PARAMS_MSIZE 16 /* max group transaction size */ + +/* bursts size */ +enum dw_dma_msize { + DW_DMA_MSIZE_1, + DW_DMA_MSIZE_4, + DW_DMA_MSIZE_8, + DW_DMA_MSIZE_16, + DW_DMA_MSIZE_32, + DW_DMA_MSIZE_64, + DW_DMA_MSIZE_128, + DW_DMA_MSIZE_256, +}; + +/* Bitfields in LLP */ +#define DWC_LLP_LMS(x) ((x) & 3) /* list master select */ +#define DWC_LLP_LOC(x) ((x) & ~3) /* next lli */ + +/* Bitfields in CTL_LO */ +#define DWC_CTLL_INT_EN (1 << 0) /* irqs enabled? */ +#define DWC_CTLL_DST_WIDTH(n) ((n)<<1) /* bytes per element */ +#define DWC_CTLL_SRC_WIDTH(n) ((n)<<4) +#define DWC_CTLL_DST_INC (0<<7) /* DAR update/not */ +#define DWC_CTLL_DST_DEC (1<<7) +#define DWC_CTLL_DST_FIX (2<<7) +#define DWC_CTLL_SRC_INC (0<<9) /* SAR update/not */ +#define DWC_CTLL_SRC_DEC (1<<9) +#define DWC_CTLL_SRC_FIX (2<<9) +#define DWC_CTLL_DST_MSIZE(n) ((n)<<11) /* burst, #elements */ +#define DWC_CTLL_SRC_MSIZE(n) ((n)<<14) +#define DWC_CTLL_S_GATH_EN (1 << 17) /* src gather, !FIX */ +#define DWC_CTLL_D_SCAT_EN (1 << 18) /* dst scatter, !FIX */ +#define DWC_CTLL_FC(n) ((n) << 20) +#define DWC_CTLL_FC_M2M (0 << 20) /* mem-to-mem */ +#define DWC_CTLL_FC_M2P (1 << 20) /* mem-to-periph */ +#define DWC_CTLL_FC_P2M (2 << 20) /* periph-to-mem */ +#define DWC_CTLL_FC_P2P (3 << 20) /* periph-to-periph */ +/* plus 4 transfer types for peripheral-as-flow-controller */ +#define DWC_CTLL_DMS(n) ((n)<<23) /* dst master select */ +#define DWC_CTLL_SMS(n) ((n)<<25) /* src master select */ +#define DWC_CTLL_LLP_D_EN (1 << 27) /* dest block chain */ +#define DWC_CTLL_LLP_S_EN (1 << 28) /* src block chain */ + +/* Bitfields in CTL_HI */ +#define DWC_CTLH_BLOCK_TS_MASK GENMASK(11, 0) +#define DWC_CTLH_BLOCK_TS(x) ((x) & DWC_CTLH_BLOCK_TS_MASK) +#define DWC_CTLH_DONE (1 << 12) + +/* Bitfields in CFG_LO */ +#define DWC_CFGL_CH_PRIOR_MASK (0x7 << 5) /* priority mask */ +#define DWC_CFGL_CH_PRIOR(x) ((x) << 5) /* priority */ +#define DWC_CFGL_CH_SUSP (1 << 8) /* pause xfer */ +#define DWC_CFGL_FIFO_EMPTY (1 << 9) /* pause xfer */ +#define DWC_CFGL_HS_DST (1 << 10) /* handshake w/dst */ +#define DWC_CFGL_HS_SRC (1 << 11) /* handshake w/src */ +#define DWC_CFGL_LOCK_CH_XFER (0 << 12) /* scope of LOCK_CH */ +#define DWC_CFGL_LOCK_CH_BLOCK (1 << 12) +#define DWC_CFGL_LOCK_CH_XACT (2 << 12) +#define DWC_CFGL_LOCK_BUS_XFER (0 << 14) /* scope of LOCK_BUS */ +#define DWC_CFGL_LOCK_BUS_BLOCK (1 << 14) +#define DWC_CFGL_LOCK_BUS_XACT (2 << 14) +#define DWC_CFGL_LOCK_CH (1 << 15) /* channel lockout */ +#define DWC_CFGL_LOCK_BUS (1 << 16) /* busmaster lockout */ +#define DWC_CFGL_HS_DST_POL (1 << 18) /* dst handshake active low */ +#define DWC_CFGL_HS_SRC_POL (1 << 19) /* src handshake active low */ +#define DWC_CFGL_MAX_BURST(x) ((x) << 20) +#define DWC_CFGL_RELOAD_SAR (1 << 30) +#define DWC_CFGL_RELOAD_DAR (1 << 31) + +/* Bitfields in CFG_HI */ +#define DWC_CFGH_FCMODE (1 << 0) +#define DWC_CFGH_FIFO_MODE (1 << 1) +#define DWC_CFGH_PROTCTL(x) ((x) << 2) +#define DWC_CFGH_PROTCTL_DATA (0 << 2) /* data access - always set */ +#define DWC_CFGH_PROTCTL_PRIV (1 << 2) /* privileged -> AHB HPROT[1] */ +#define DWC_CFGH_PROTCTL_BUFFER (2 << 2) /* bufferable -> AHB HPROT[2] */ +#define DWC_CFGH_PROTCTL_CACHE (4 << 2) /* cacheable -> AHB HPROT[3] */ +#define DWC_CFGH_DS_UPD_EN (1 << 5) +#define DWC_CFGH_SS_UPD_EN (1 << 6) +#define DWC_CFGH_SRC_PER(x) ((x) << 7) +#define DWC_CFGH_DST_PER(x) ((x) << 11) + +/* Bitfields in SGR */ +#define DWC_SGR_SGI(x) ((x) << 0) +#define DWC_SGR_SGC(x) ((x) << 20) + +/* Bitfields in DSR */ +#define DWC_DSR_DSI(x) ((x) << 0) +#define DWC_DSR_DSC(x) ((x) << 20) + +/* Bitfields in CFG */ +#define DW_CFG_DMA_EN (1 << 0) + +/* iDMA 32-bit support */ + +/* bursts size */ +enum idma32_msize { + IDMA32_MSIZE_1, + IDMA32_MSIZE_2, + IDMA32_MSIZE_4, + IDMA32_MSIZE_8, + IDMA32_MSIZE_16, + IDMA32_MSIZE_32, +}; + +/* Bitfields in CTL_HI */ +#define IDMA32C_CTLH_BLOCK_TS_MASK GENMASK(16, 0) +#define IDMA32C_CTLH_BLOCK_TS(x) ((x) & IDMA32C_CTLH_BLOCK_TS_MASK) +#define IDMA32C_CTLH_DONE (1 << 17) + +/* Bitfields in CFG_LO */ +#define IDMA32C_CFGL_DST_BURST_ALIGN (1 << 0) /* dst burst align */ +#define IDMA32C_CFGL_SRC_BURST_ALIGN (1 << 1) /* src burst align */ +#define IDMA32C_CFGL_CH_DRAIN (1 << 10) /* drain FIFO */ +#define IDMA32C_CFGL_DST_OPT_BL (1 << 20) /* optimize dst burst length */ +#define IDMA32C_CFGL_SRC_OPT_BL (1 << 21) /* optimize src burst length */ + +/* Bitfields in CFG_HI */ +#define IDMA32C_CFGH_SRC_PER(x) ((x) << 0) +#define IDMA32C_CFGH_DST_PER(x) ((x) << 4) +#define IDMA32C_CFGH_RD_ISSUE_THD(x) ((x) << 8) +#define IDMA32C_CFGH_RW_ISSUE_THD(x) ((x) << 18) +#define IDMA32C_CFGH_SRC_PER_EXT(x) ((x) << 28) /* src peripheral extension */ +#define IDMA32C_CFGH_DST_PER_EXT(x) ((x) << 30) /* dst peripheral extension */ + +/* Bitfields in FIFO_PARTITION */ +#define IDMA32C_FP_PSIZE_CH0(x) ((x) << 0) +#define IDMA32C_FP_PSIZE_CH1(x) ((x) << 13) +#define IDMA32C_FP_UPDATE (1 << 26) + +enum dw_dmac_flags { + DW_DMA_IS_CYCLIC = 0, + DW_DMA_IS_SOFT_LLP = 1, + DW_DMA_IS_PAUSED = 2, + DW_DMA_IS_INITIALIZED = 3, +}; + +struct dw_dma_chan { + struct dma_chan chan; + void __iomem *ch_regs; + u8 mask; + u8 priority; + enum dma_transfer_direction direction; + + /* software emulation of the LLP transfers */ + struct list_head *tx_node_active; + + spinlock_t lock; + + /* these other elements are all protected by lock */ + unsigned long flags; + struct list_head active_list; + struct list_head queue; + + unsigned int descs_allocated; + + /* hardware configuration */ + unsigned int block_size; + bool nollp; + u32 max_burst; + + /* custom slave configuration */ + struct dw_dma_slave dws; + + /* configuration passed via .device_config */ + struct dma_slave_config dma_sconfig; +}; + +static inline struct dw_dma_chan_regs __iomem * +__dwc_regs(struct dw_dma_chan *dwc) +{ + return dwc->ch_regs; +} + +#define channel_readl(dwc, name) \ + readl(&(__dwc_regs(dwc)->name)) +#define channel_writel(dwc, name, val) \ + writel((val), &(__dwc_regs(dwc)->name)) + +static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct dw_dma_chan, chan); +} + +struct dw_dma { + struct dma_device dma; + char name[20]; + void __iomem *regs; + struct dma_pool *desc_pool; + struct tasklet_struct tasklet; + + /* channels */ + struct dw_dma_chan *chan; + u8 all_chan_mask; + u8 in_use; + + /* Channel operations */ + void (*initialize_chan)(struct dw_dma_chan *dwc); + void (*suspend_chan)(struct dw_dma_chan *dwc, bool drain); + void (*resume_chan)(struct dw_dma_chan *dwc, bool drain); + u32 (*prepare_ctllo)(struct dw_dma_chan *dwc); + void (*encode_maxburst)(struct dw_dma_chan *dwc, u32 *maxburst); + u32 (*bytes2block)(struct dw_dma_chan *dwc, size_t bytes, + unsigned int width, size_t *len); + size_t (*block2bytes)(struct dw_dma_chan *dwc, u32 block, u32 width); + + /* Device operations */ + void (*set_device_name)(struct dw_dma *dw, int id); + void (*disable)(struct dw_dma *dw); + void (*enable)(struct dw_dma *dw); + + /* platform data */ + struct dw_dma_platform_data *pdata; +}; + +static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw) +{ + return dw->regs; +} + +#define dma_readl(dw, name) \ + readl(&(__dw_regs(dw)->name)) +#define dma_writel(dw, name, val) \ + writel((val), &(__dw_regs(dw)->name)) + +#define idma32_readq(dw, name) \ + hi_lo_readq(&(__dw_regs(dw)->name)) +#define idma32_writeq(dw, name, val) \ + hi_lo_writeq((val), &(__dw_regs(dw)->name)) + +#define channel_set_bit(dw, reg, mask) \ + dma_writel(dw, reg, ((mask) << 8) | (mask)) +#define channel_clear_bit(dw, reg, mask) \ + dma_writel(dw, reg, ((mask) << 8) | 0) + +static inline struct dw_dma *to_dw_dma(struct dma_device *ddev) +{ + return container_of(ddev, struct dw_dma, dma); +} + +/* LLI == Linked List Item; a.k.a. DMA block descriptor */ +struct dw_lli { + /* values that are not changed by hardware */ + __le32 sar; + __le32 dar; + __le32 llp; /* chain to next lli */ + __le32 ctllo; + /* values that may get written back: */ + __le32 ctlhi; + /* sstat and dstat can snapshot peripheral register state. + * silicon config may discard either or both... + */ + __le32 sstat; + __le32 dstat; +}; + +struct dw_desc { + /* FIRST values the hardware uses */ + struct dw_lli lli; + +#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_le32(v)) +#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_le32(v)) +#define lli_read(d, reg) le32_to_cpu((d)->lli.reg) +#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_le32(v)) + + /* THEN values for driver housekeeping */ + struct list_head desc_node; + struct list_head tx_list; + struct dma_async_tx_descriptor txd; + size_t len; + size_t total_len; + u32 residue; +}; + +#define to_dw_desc(h) list_entry(h, struct dw_desc, desc_node) + +static inline struct dw_desc * +txd_to_dw_desc(struct dma_async_tx_descriptor *txd) +{ + return container_of(txd, struct dw_desc, txd); +} diff --git a/drivers/dma/dw/rzn1-dmamux.c b/drivers/dma/dw/rzn1-dmamux.c new file mode 100644 index 0000000000..4fb8508419 --- /dev/null +++ b/drivers/dma/dw/rzn1-dmamux.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 Schneider-Electric + * Author: Miquel Raynal + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define RNZ1_DMAMUX_NCELLS 6 +#define RZN1_DMAMUX_MAX_LINES 64 +#define RZN1_DMAMUX_LINES_PER_CTLR 16 + +struct rzn1_dmamux_data { + struct dma_router dmarouter; + DECLARE_BITMAP(used_chans, 2 * RZN1_DMAMUX_LINES_PER_CTLR); +}; + +struct rzn1_dmamux_map { + unsigned int req_idx; +}; + +static void rzn1_dmamux_free(struct device *dev, void *route_data) +{ + struct rzn1_dmamux_data *dmamux = dev_get_drvdata(dev); + struct rzn1_dmamux_map *map = route_data; + + dev_dbg(dev, "Unmapping DMAMUX request %u\n", map->req_idx); + + clear_bit(map->req_idx, dmamux->used_chans); + + kfree(map); +} + +static void *rzn1_dmamux_route_allocate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct platform_device *pdev = of_find_device_by_node(ofdma->of_node); + struct rzn1_dmamux_data *dmamux = platform_get_drvdata(pdev); + struct rzn1_dmamux_map *map; + unsigned int dmac_idx, chan, val; + u32 mask; + int ret; + + if (dma_spec->args_count != RNZ1_DMAMUX_NCELLS) + return ERR_PTR(-EINVAL); + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return ERR_PTR(-ENOMEM); + + chan = dma_spec->args[0]; + map->req_idx = dma_spec->args[4]; + val = dma_spec->args[5]; + dma_spec->args_count -= 2; + + if (chan >= RZN1_DMAMUX_LINES_PER_CTLR) { + dev_err(&pdev->dev, "Invalid DMA request line: %u\n", chan); + ret = -EINVAL; + goto free_map; + } + + if (map->req_idx >= RZN1_DMAMUX_MAX_LINES || + (map->req_idx % RZN1_DMAMUX_LINES_PER_CTLR) != chan) { + dev_err(&pdev->dev, "Invalid MUX request line: %u\n", map->req_idx); + ret = -EINVAL; + goto free_map; + } + + dmac_idx = map->req_idx >= RZN1_DMAMUX_LINES_PER_CTLR ? 1 : 0; + dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", dmac_idx); + if (!dma_spec->np) { + dev_err(&pdev->dev, "Can't get DMA master\n"); + ret = -EINVAL; + goto free_map; + } + + dev_dbg(&pdev->dev, "Mapping DMAMUX request %u to DMAC%u request %u\n", + map->req_idx, dmac_idx, chan); + + if (test_and_set_bit(map->req_idx, dmamux->used_chans)) { + ret = -EBUSY; + goto free_map; + } + + mask = BIT(map->req_idx); + ret = r9a06g032_sysctrl_set_dmamux(mask, val ? mask : 0); + if (ret) + goto clear_bitmap; + + return map; + +clear_bitmap: + clear_bit(map->req_idx, dmamux->used_chans); +free_map: + kfree(map); + + return ERR_PTR(ret); +} + +#ifdef CONFIG_OF +static const struct of_device_id rzn1_dmac_match[] = { + { .compatible = "renesas,rzn1-dma" }, + {} +}; +#endif + +static int rzn1_dmamux_probe(struct platform_device *pdev) +{ + struct device_node *mux_node = pdev->dev.of_node; + const struct of_device_id *match; + struct device_node *dmac_node; + struct rzn1_dmamux_data *dmamux; + + dmamux = devm_kzalloc(&pdev->dev, sizeof(*dmamux), GFP_KERNEL); + if (!dmamux) + return -ENOMEM; + + dmac_node = of_parse_phandle(mux_node, "dma-masters", 0); + if (!dmac_node) + return dev_err_probe(&pdev->dev, -ENODEV, "Can't get DMA master node\n"); + + match = of_match_node(rzn1_dmac_match, dmac_node); + of_node_put(dmac_node); + if (!match) + return dev_err_probe(&pdev->dev, -EINVAL, "DMA master is not supported\n"); + + dmamux->dmarouter.dev = &pdev->dev; + dmamux->dmarouter.route_free = rzn1_dmamux_free; + + platform_set_drvdata(pdev, dmamux); + + return of_dma_router_register(mux_node, rzn1_dmamux_route_allocate, + &dmamux->dmarouter); +} + +static const struct of_device_id rzn1_dmamux_match[] = { + { .compatible = "renesas,rzn1-dmamux" }, + {} +}; +MODULE_DEVICE_TABLE(of, rzn1_dmamux_match); + +static struct platform_driver rzn1_dmamux_driver = { + .driver = { + .name = "renesas,rzn1-dmamux", + .of_match_table = rzn1_dmamux_match, + }, + .probe = rzn1_dmamux_probe, +}; +module_platform_driver(rzn1_dmamux_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Miquel Raynal + * Copyright (C) 2006 Applied Data Systems + * Copyright (C) 2009 Ryan Mallon + * + * This driver is based on dw_dmac and amba-pl08x drivers. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "dmaengine.h" + +/* M2P registers */ +#define M2P_CONTROL 0x0000 +#define M2P_CONTROL_STALLINT BIT(0) +#define M2P_CONTROL_NFBINT BIT(1) +#define M2P_CONTROL_CH_ERROR_INT BIT(3) +#define M2P_CONTROL_ENABLE BIT(4) +#define M2P_CONTROL_ICE BIT(6) + +#define M2P_INTERRUPT 0x0004 +#define M2P_INTERRUPT_STALL BIT(0) +#define M2P_INTERRUPT_NFB BIT(1) +#define M2P_INTERRUPT_ERROR BIT(3) + +#define M2P_PPALLOC 0x0008 +#define M2P_STATUS 0x000c + +#define M2P_MAXCNT0 0x0020 +#define M2P_BASE0 0x0024 +#define M2P_MAXCNT1 0x0030 +#define M2P_BASE1 0x0034 + +#define M2P_STATE_IDLE 0 +#define M2P_STATE_STALL 1 +#define M2P_STATE_ON 2 +#define M2P_STATE_NEXT 3 + +/* M2M registers */ +#define M2M_CONTROL 0x0000 +#define M2M_CONTROL_DONEINT BIT(2) +#define M2M_CONTROL_ENABLE BIT(3) +#define M2M_CONTROL_START BIT(4) +#define M2M_CONTROL_DAH BIT(11) +#define M2M_CONTROL_SAH BIT(12) +#define M2M_CONTROL_PW_SHIFT 9 +#define M2M_CONTROL_PW_8 (0 << M2M_CONTROL_PW_SHIFT) +#define M2M_CONTROL_PW_16 (1 << M2M_CONTROL_PW_SHIFT) +#define M2M_CONTROL_PW_32 (2 << M2M_CONTROL_PW_SHIFT) +#define M2M_CONTROL_PW_MASK (3 << M2M_CONTROL_PW_SHIFT) +#define M2M_CONTROL_TM_SHIFT 13 +#define M2M_CONTROL_TM_TX (1 << M2M_CONTROL_TM_SHIFT) +#define M2M_CONTROL_TM_RX (2 << M2M_CONTROL_TM_SHIFT) +#define M2M_CONTROL_NFBINT BIT(21) +#define M2M_CONTROL_RSS_SHIFT 22 +#define M2M_CONTROL_RSS_SSPRX (1 << M2M_CONTROL_RSS_SHIFT) +#define M2M_CONTROL_RSS_SSPTX (2 << M2M_CONTROL_RSS_SHIFT) +#define M2M_CONTROL_RSS_IDE (3 << M2M_CONTROL_RSS_SHIFT) +#define M2M_CONTROL_NO_HDSK BIT(24) +#define M2M_CONTROL_PWSC_SHIFT 25 + +#define M2M_INTERRUPT 0x0004 +#define M2M_INTERRUPT_MASK 6 + +#define M2M_STATUS 0x000c +#define M2M_STATUS_CTL_SHIFT 1 +#define M2M_STATUS_CTL_IDLE (0 << M2M_STATUS_CTL_SHIFT) +#define M2M_STATUS_CTL_STALL (1 << M2M_STATUS_CTL_SHIFT) +#define M2M_STATUS_CTL_MEMRD (2 << M2M_STATUS_CTL_SHIFT) +#define M2M_STATUS_CTL_MEMWR (3 << M2M_STATUS_CTL_SHIFT) +#define M2M_STATUS_CTL_BWCWAIT (4 << M2M_STATUS_CTL_SHIFT) +#define M2M_STATUS_CTL_MASK (7 << M2M_STATUS_CTL_SHIFT) +#define M2M_STATUS_BUF_SHIFT 4 +#define M2M_STATUS_BUF_NO (0 << M2M_STATUS_BUF_SHIFT) +#define M2M_STATUS_BUF_ON (1 << M2M_STATUS_BUF_SHIFT) +#define M2M_STATUS_BUF_NEXT (2 << M2M_STATUS_BUF_SHIFT) +#define M2M_STATUS_BUF_MASK (3 << M2M_STATUS_BUF_SHIFT) +#define M2M_STATUS_DONE BIT(6) + +#define M2M_BCR0 0x0010 +#define M2M_BCR1 0x0014 +#define M2M_SAR_BASE0 0x0018 +#define M2M_SAR_BASE1 0x001c +#define M2M_DAR_BASE0 0x002c +#define M2M_DAR_BASE1 0x0030 + +#define DMA_MAX_CHAN_BYTES 0xffff +#define DMA_MAX_CHAN_DESCRIPTORS 32 + +struct ep93xx_dma_engine; +static int ep93xx_dma_slave_config_write(struct dma_chan *chan, + enum dma_transfer_direction dir, + struct dma_slave_config *config); + +/** + * struct ep93xx_dma_desc - EP93xx specific transaction descriptor + * @src_addr: source address of the transaction + * @dst_addr: destination address of the transaction + * @size: size of the transaction (in bytes) + * @complete: this descriptor is completed + * @txd: dmaengine API descriptor + * @tx_list: list of linked descriptors + * @node: link used for putting this into a channel queue + */ +struct ep93xx_dma_desc { + u32 src_addr; + u32 dst_addr; + size_t size; + bool complete; + struct dma_async_tx_descriptor txd; + struct list_head tx_list; + struct list_head node; +}; + +/** + * struct ep93xx_dma_chan - an EP93xx DMA M2P/M2M channel + * @chan: dmaengine API channel + * @edma: pointer to the engine device + * @regs: memory mapped registers + * @irq: interrupt number of the channel + * @clk: clock used by this channel + * @tasklet: channel specific tasklet used for callbacks + * @lock: lock protecting the fields following + * @flags: flags for the channel + * @buffer: which buffer to use next (0/1) + * @active: flattened chain of descriptors currently being processed + * @queue: pending descriptors which are handled next + * @free_list: list of free descriptors which can be used + * @runtime_addr: physical address currently used as dest/src (M2M only). This + * is set via .device_config before slave operation is + * prepared + * @runtime_ctrl: M2M runtime values for the control register. + * @slave_config: slave configuration + * + * As EP93xx DMA controller doesn't support real chained DMA descriptors we + * will have slightly different scheme here: @active points to a head of + * flattened DMA descriptor chain. + * + * @queue holds pending transactions. These are linked through the first + * descriptor in the chain. When a descriptor is moved to the @active queue, + * the first and chained descriptors are flattened into a single list. + * + * @chan.private holds pointer to &struct ep93xx_dma_data which contains + * necessary channel configuration information. For memcpy channels this must + * be %NULL. + */ +struct ep93xx_dma_chan { + struct dma_chan chan; + const struct ep93xx_dma_engine *edma; + void __iomem *regs; + int irq; + struct clk *clk; + struct tasklet_struct tasklet; + /* protects the fields following */ + spinlock_t lock; + unsigned long flags; +/* Channel is configured for cyclic transfers */ +#define EP93XX_DMA_IS_CYCLIC 0 + + int buffer; + struct list_head active; + struct list_head queue; + struct list_head free_list; + u32 runtime_addr; + u32 runtime_ctrl; + struct dma_slave_config slave_config; +}; + +/** + * struct ep93xx_dma_engine - the EP93xx DMA engine instance + * @dma_dev: holds the dmaengine device + * @m2m: is this an M2M or M2P device + * @hw_setup: method which sets the channel up for operation + * @hw_synchronize: synchronizes DMA channel termination to current context + * @hw_shutdown: shuts the channel down and flushes whatever is left + * @hw_submit: pushes active descriptor(s) to the hardware + * @hw_interrupt: handle the interrupt + * @num_channels: number of channels for this instance + * @channels: array of channels + * + * There is one instance of this struct for the M2P channels and one for the + * M2M channels. hw_xxx() methods are used to perform operations which are + * different on M2M and M2P channels. These methods are called with channel + * lock held and interrupts disabled so they cannot sleep. + */ +struct ep93xx_dma_engine { + struct dma_device dma_dev; + bool m2m; + int (*hw_setup)(struct ep93xx_dma_chan *); + void (*hw_synchronize)(struct ep93xx_dma_chan *); + void (*hw_shutdown)(struct ep93xx_dma_chan *); + void (*hw_submit)(struct ep93xx_dma_chan *); + int (*hw_interrupt)(struct ep93xx_dma_chan *); +#define INTERRUPT_UNKNOWN 0 +#define INTERRUPT_DONE 1 +#define INTERRUPT_NEXT_BUFFER 2 + + size_t num_channels; + struct ep93xx_dma_chan channels[]; +}; + +static inline struct device *chan2dev(struct ep93xx_dma_chan *edmac) +{ + return &edmac->chan.dev->device; +} + +static struct ep93xx_dma_chan *to_ep93xx_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct ep93xx_dma_chan, chan); +} + +/** + * ep93xx_dma_set_active - set new active descriptor chain + * @edmac: channel + * @desc: head of the new active descriptor chain + * + * Sets @desc to be the head of the new active descriptor chain. This is the + * chain which is processed next. The active list must be empty before calling + * this function. + * + * Called with @edmac->lock held and interrupts disabled. + */ +static void ep93xx_dma_set_active(struct ep93xx_dma_chan *edmac, + struct ep93xx_dma_desc *desc) +{ + BUG_ON(!list_empty(&edmac->active)); + + list_add_tail(&desc->node, &edmac->active); + + /* Flatten the @desc->tx_list chain into @edmac->active list */ + while (!list_empty(&desc->tx_list)) { + struct ep93xx_dma_desc *d = list_first_entry(&desc->tx_list, + struct ep93xx_dma_desc, node); + + /* + * We copy the callback parameters from the first descriptor + * to all the chained descriptors. This way we can call the + * callback without having to find out the first descriptor in + * the chain. Useful for cyclic transfers. + */ + d->txd.callback = desc->txd.callback; + d->txd.callback_param = desc->txd.callback_param; + + list_move_tail(&d->node, &edmac->active); + } +} + +/* Called with @edmac->lock held and interrupts disabled */ +static struct ep93xx_dma_desc * +ep93xx_dma_get_active(struct ep93xx_dma_chan *edmac) +{ + return list_first_entry_or_null(&edmac->active, + struct ep93xx_dma_desc, node); +} + +/** + * ep93xx_dma_advance_active - advances to the next active descriptor + * @edmac: channel + * + * Function advances active descriptor to the next in the @edmac->active and + * returns %true if we still have descriptors in the chain to process. + * Otherwise returns %false. + * + * When the channel is in cyclic mode always returns %true. + * + * Called with @edmac->lock held and interrupts disabled. + */ +static bool ep93xx_dma_advance_active(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *desc; + + list_rotate_left(&edmac->active); + + if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) + return true; + + desc = ep93xx_dma_get_active(edmac); + if (!desc) + return false; + + /* + * If txd.cookie is set it means that we are back in the first + * descriptor in the chain and hence done with it. + */ + return !desc->txd.cookie; +} + +/* + * M2P DMA implementation + */ + +static void m2p_set_control(struct ep93xx_dma_chan *edmac, u32 control) +{ + writel(control, edmac->regs + M2P_CONTROL); + /* + * EP93xx User's Guide states that we must perform a dummy read after + * write to the control register. + */ + readl(edmac->regs + M2P_CONTROL); +} + +static int m2p_hw_setup(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_data *data = edmac->chan.private; + u32 control; + + writel(data->port & 0xf, edmac->regs + M2P_PPALLOC); + + control = M2P_CONTROL_CH_ERROR_INT | M2P_CONTROL_ICE + | M2P_CONTROL_ENABLE; + m2p_set_control(edmac, control); + + edmac->buffer = 0; + + return 0; +} + +static inline u32 m2p_channel_state(struct ep93xx_dma_chan *edmac) +{ + return (readl(edmac->regs + M2P_STATUS) >> 4) & 0x3; +} + +static void m2p_hw_synchronize(struct ep93xx_dma_chan *edmac) +{ + unsigned long flags; + u32 control; + + spin_lock_irqsave(&edmac->lock, flags); + control = readl(edmac->regs + M2P_CONTROL); + control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT); + m2p_set_control(edmac, control); + spin_unlock_irqrestore(&edmac->lock, flags); + + while (m2p_channel_state(edmac) >= M2P_STATE_ON) + schedule(); +} + +static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac) +{ + m2p_set_control(edmac, 0); + + while (m2p_channel_state(edmac) != M2P_STATE_IDLE) + dev_warn(chan2dev(edmac), "M2P: Not yet IDLE\n"); +} + +static void m2p_fill_desc(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *desc; + u32 bus_addr; + + desc = ep93xx_dma_get_active(edmac); + if (!desc) { + dev_warn(chan2dev(edmac), "M2P: empty descriptor list\n"); + return; + } + + if (ep93xx_dma_chan_direction(&edmac->chan) == DMA_MEM_TO_DEV) + bus_addr = desc->src_addr; + else + bus_addr = desc->dst_addr; + + if (edmac->buffer == 0) { + writel(desc->size, edmac->regs + M2P_MAXCNT0); + writel(bus_addr, edmac->regs + M2P_BASE0); + } else { + writel(desc->size, edmac->regs + M2P_MAXCNT1); + writel(bus_addr, edmac->regs + M2P_BASE1); + } + + edmac->buffer ^= 1; +} + +static void m2p_hw_submit(struct ep93xx_dma_chan *edmac) +{ + u32 control = readl(edmac->regs + M2P_CONTROL); + + m2p_fill_desc(edmac); + control |= M2P_CONTROL_STALLINT; + + if (ep93xx_dma_advance_active(edmac)) { + m2p_fill_desc(edmac); + control |= M2P_CONTROL_NFBINT; + } + + m2p_set_control(edmac, control); +} + +static int m2p_hw_interrupt(struct ep93xx_dma_chan *edmac) +{ + u32 irq_status = readl(edmac->regs + M2P_INTERRUPT); + u32 control; + + if (irq_status & M2P_INTERRUPT_ERROR) { + struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac); + + /* Clear the error interrupt */ + writel(1, edmac->regs + M2P_INTERRUPT); + + /* + * It seems that there is no easy way of reporting errors back + * to client so we just report the error here and continue as + * usual. + * + * Revisit this when there is a mechanism to report back the + * errors. + */ + dev_err(chan2dev(edmac), + "DMA transfer failed! Details:\n" + "\tcookie : %d\n" + "\tsrc_addr : 0x%08x\n" + "\tdst_addr : 0x%08x\n" + "\tsize : %zu\n", + desc->txd.cookie, desc->src_addr, desc->dst_addr, + desc->size); + } + + /* + * Even latest E2 silicon revision sometimes assert STALL interrupt + * instead of NFB. Therefore we treat them equally, basing on the + * amount of data we still have to transfer. + */ + if (!(irq_status & (M2P_INTERRUPT_STALL | M2P_INTERRUPT_NFB))) + return INTERRUPT_UNKNOWN; + + if (ep93xx_dma_advance_active(edmac)) { + m2p_fill_desc(edmac); + return INTERRUPT_NEXT_BUFFER; + } + + /* Disable interrupts */ + control = readl(edmac->regs + M2P_CONTROL); + control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT); + m2p_set_control(edmac, control); + + return INTERRUPT_DONE; +} + +/* + * M2M DMA implementation + */ + +static int m2m_hw_setup(struct ep93xx_dma_chan *edmac) +{ + const struct ep93xx_dma_data *data = edmac->chan.private; + u32 control = 0; + + if (!data) { + /* This is memcpy channel, nothing to configure */ + writel(control, edmac->regs + M2M_CONTROL); + return 0; + } + + switch (data->port) { + case EP93XX_DMA_SSP: + /* + * This was found via experimenting - anything less than 5 + * causes the channel to perform only a partial transfer which + * leads to problems since we don't get DONE interrupt then. + */ + control = (5 << M2M_CONTROL_PWSC_SHIFT); + control |= M2M_CONTROL_NO_HDSK; + + if (data->direction == DMA_MEM_TO_DEV) { + control |= M2M_CONTROL_DAH; + control |= M2M_CONTROL_TM_TX; + control |= M2M_CONTROL_RSS_SSPTX; + } else { + control |= M2M_CONTROL_SAH; + control |= M2M_CONTROL_TM_RX; + control |= M2M_CONTROL_RSS_SSPRX; + } + break; + + case EP93XX_DMA_IDE: + /* + * This IDE part is totally untested. Values below are taken + * from the EP93xx Users's Guide and might not be correct. + */ + if (data->direction == DMA_MEM_TO_DEV) { + /* Worst case from the UG */ + control = (3 << M2M_CONTROL_PWSC_SHIFT); + control |= M2M_CONTROL_DAH; + control |= M2M_CONTROL_TM_TX; + } else { + control = (2 << M2M_CONTROL_PWSC_SHIFT); + control |= M2M_CONTROL_SAH; + control |= M2M_CONTROL_TM_RX; + } + + control |= M2M_CONTROL_NO_HDSK; + control |= M2M_CONTROL_RSS_IDE; + control |= M2M_CONTROL_PW_16; + break; + + default: + return -EINVAL; + } + + writel(control, edmac->regs + M2M_CONTROL); + return 0; +} + +static void m2m_hw_shutdown(struct ep93xx_dma_chan *edmac) +{ + /* Just disable the channel */ + writel(0, edmac->regs + M2M_CONTROL); +} + +static void m2m_fill_desc(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *desc; + + desc = ep93xx_dma_get_active(edmac); + if (!desc) { + dev_warn(chan2dev(edmac), "M2M: empty descriptor list\n"); + return; + } + + if (edmac->buffer == 0) { + writel(desc->src_addr, edmac->regs + M2M_SAR_BASE0); + writel(desc->dst_addr, edmac->regs + M2M_DAR_BASE0); + writel(desc->size, edmac->regs + M2M_BCR0); + } else { + writel(desc->src_addr, edmac->regs + M2M_SAR_BASE1); + writel(desc->dst_addr, edmac->regs + M2M_DAR_BASE1); + writel(desc->size, edmac->regs + M2M_BCR1); + } + + edmac->buffer ^= 1; +} + +static void m2m_hw_submit(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_data *data = edmac->chan.private; + u32 control = readl(edmac->regs + M2M_CONTROL); + + /* + * Since we allow clients to configure PW (peripheral width) we always + * clear PW bits here and then set them according what is given in + * the runtime configuration. + */ + control &= ~M2M_CONTROL_PW_MASK; + control |= edmac->runtime_ctrl; + + m2m_fill_desc(edmac); + control |= M2M_CONTROL_DONEINT; + + if (ep93xx_dma_advance_active(edmac)) { + m2m_fill_desc(edmac); + control |= M2M_CONTROL_NFBINT; + } + + /* + * Now we can finally enable the channel. For M2M channel this must be + * done _after_ the BCRx registers are programmed. + */ + control |= M2M_CONTROL_ENABLE; + writel(control, edmac->regs + M2M_CONTROL); + + if (!data) { + /* + * For memcpy channels the software trigger must be asserted + * in order to start the memcpy operation. + */ + control |= M2M_CONTROL_START; + writel(control, edmac->regs + M2M_CONTROL); + } +} + +/* + * According to EP93xx User's Guide, we should receive DONE interrupt when all + * M2M DMA controller transactions complete normally. This is not always the + * case - sometimes EP93xx M2M DMA asserts DONE interrupt when the DMA channel + * is still running (channel Buffer FSM in DMA_BUF_ON state, and channel + * Control FSM in DMA_MEM_RD state, observed at least in IDE-DMA operation). + * In effect, disabling the channel when only DONE bit is set could stop + * currently running DMA transfer. To avoid this, we use Buffer FSM and + * Control FSM to check current state of DMA channel. + */ +static int m2m_hw_interrupt(struct ep93xx_dma_chan *edmac) +{ + u32 status = readl(edmac->regs + M2M_STATUS); + u32 ctl_fsm = status & M2M_STATUS_CTL_MASK; + u32 buf_fsm = status & M2M_STATUS_BUF_MASK; + bool done = status & M2M_STATUS_DONE; + bool last_done; + u32 control; + struct ep93xx_dma_desc *desc; + + /* Accept only DONE and NFB interrupts */ + if (!(readl(edmac->regs + M2M_INTERRUPT) & M2M_INTERRUPT_MASK)) + return INTERRUPT_UNKNOWN; + + if (done) { + /* Clear the DONE bit */ + writel(0, edmac->regs + M2M_INTERRUPT); + } + + /* + * Check whether we are done with descriptors or not. This, together + * with DMA channel state, determines action to take in interrupt. + */ + desc = ep93xx_dma_get_active(edmac); + last_done = !desc || desc->txd.cookie; + + /* + * Use M2M DMA Buffer FSM and Control FSM to check current state of + * DMA channel. Using DONE and NFB bits from channel status register + * or bits from channel interrupt register is not reliable. + */ + if (!last_done && + (buf_fsm == M2M_STATUS_BUF_NO || + buf_fsm == M2M_STATUS_BUF_ON)) { + /* + * Two buffers are ready for update when Buffer FSM is in + * DMA_NO_BUF state. Only one buffer can be prepared without + * disabling the channel or polling the DONE bit. + * To simplify things, always prepare only one buffer. + */ + if (ep93xx_dma_advance_active(edmac)) { + m2m_fill_desc(edmac); + if (done && !edmac->chan.private) { + /* Software trigger for memcpy channel */ + control = readl(edmac->regs + M2M_CONTROL); + control |= M2M_CONTROL_START; + writel(control, edmac->regs + M2M_CONTROL); + } + return INTERRUPT_NEXT_BUFFER; + } else { + last_done = true; + } + } + + /* + * Disable the channel only when Buffer FSM is in DMA_NO_BUF state + * and Control FSM is in DMA_STALL state. + */ + if (last_done && + buf_fsm == M2M_STATUS_BUF_NO && + ctl_fsm == M2M_STATUS_CTL_STALL) { + /* Disable interrupts and the channel */ + control = readl(edmac->regs + M2M_CONTROL); + control &= ~(M2M_CONTROL_DONEINT | M2M_CONTROL_NFBINT + | M2M_CONTROL_ENABLE); + writel(control, edmac->regs + M2M_CONTROL); + return INTERRUPT_DONE; + } + + /* + * Nothing to do this time. + */ + return INTERRUPT_NEXT_BUFFER; +} + +/* + * DMA engine API implementation + */ + +static struct ep93xx_dma_desc * +ep93xx_dma_desc_get(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *desc, *_desc; + struct ep93xx_dma_desc *ret = NULL; + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + list_for_each_entry_safe(desc, _desc, &edmac->free_list, node) { + if (async_tx_test_ack(&desc->txd)) { + list_del_init(&desc->node); + + /* Re-initialize the descriptor */ + desc->src_addr = 0; + desc->dst_addr = 0; + desc->size = 0; + desc->complete = false; + desc->txd.cookie = 0; + desc->txd.callback = NULL; + desc->txd.callback_param = NULL; + + ret = desc; + break; + } + } + spin_unlock_irqrestore(&edmac->lock, flags); + return ret; +} + +static void ep93xx_dma_desc_put(struct ep93xx_dma_chan *edmac, + struct ep93xx_dma_desc *desc) +{ + if (desc) { + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + list_splice_init(&desc->tx_list, &edmac->free_list); + list_add(&desc->node, &edmac->free_list); + spin_unlock_irqrestore(&edmac->lock, flags); + } +} + +/** + * ep93xx_dma_advance_work - start processing the next pending transaction + * @edmac: channel + * + * If we have pending transactions queued and we are currently idling, this + * function takes the next queued transaction from the @edmac->queue and + * pushes it to the hardware for execution. + */ +static void ep93xx_dma_advance_work(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *new; + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + if (!list_empty(&edmac->active) || list_empty(&edmac->queue)) { + spin_unlock_irqrestore(&edmac->lock, flags); + return; + } + + /* Take the next descriptor from the pending queue */ + new = list_first_entry(&edmac->queue, struct ep93xx_dma_desc, node); + list_del_init(&new->node); + + ep93xx_dma_set_active(edmac, new); + + /* Push it to the hardware */ + edmac->edma->hw_submit(edmac); + spin_unlock_irqrestore(&edmac->lock, flags); +} + +static void ep93xx_dma_tasklet(struct tasklet_struct *t) +{ + struct ep93xx_dma_chan *edmac = from_tasklet(edmac, t, tasklet); + struct ep93xx_dma_desc *desc, *d; + struct dmaengine_desc_callback cb; + LIST_HEAD(list); + + memset(&cb, 0, sizeof(cb)); + spin_lock_irq(&edmac->lock); + /* + * If dma_terminate_all() was called before we get to run, the active + * list has become empty. If that happens we aren't supposed to do + * anything more than call ep93xx_dma_advance_work(). + */ + desc = ep93xx_dma_get_active(edmac); + if (desc) { + if (desc->complete) { + /* mark descriptor complete for non cyclic case only */ + if (!test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) + dma_cookie_complete(&desc->txd); + list_splice_init(&edmac->active, &list); + } + dmaengine_desc_get_callback(&desc->txd, &cb); + } + spin_unlock_irq(&edmac->lock); + + /* Pick up the next descriptor from the queue */ + ep93xx_dma_advance_work(edmac); + + /* Now we can release all the chained descriptors */ + list_for_each_entry_safe(desc, d, &list, node) { + dma_descriptor_unmap(&desc->txd); + ep93xx_dma_desc_put(edmac, desc); + } + + dmaengine_desc_callback_invoke(&cb, NULL); +} + +static irqreturn_t ep93xx_dma_interrupt(int irq, void *dev_id) +{ + struct ep93xx_dma_chan *edmac = dev_id; + struct ep93xx_dma_desc *desc; + irqreturn_t ret = IRQ_HANDLED; + + spin_lock(&edmac->lock); + + desc = ep93xx_dma_get_active(edmac); + if (!desc) { + dev_warn(chan2dev(edmac), + "got interrupt while active list is empty\n"); + spin_unlock(&edmac->lock); + return IRQ_NONE; + } + + switch (edmac->edma->hw_interrupt(edmac)) { + case INTERRUPT_DONE: + desc->complete = true; + tasklet_schedule(&edmac->tasklet); + break; + + case INTERRUPT_NEXT_BUFFER: + if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) + tasklet_schedule(&edmac->tasklet); + break; + + default: + dev_warn(chan2dev(edmac), "unknown interrupt!\n"); + ret = IRQ_NONE; + break; + } + + spin_unlock(&edmac->lock); + return ret; +} + +/** + * ep93xx_dma_tx_submit - set the prepared descriptor(s) to be executed + * @tx: descriptor to be executed + * + * Function will execute given descriptor on the hardware or if the hardware + * is busy, queue the descriptor to be executed later on. Returns cookie which + * can be used to poll the status of the descriptor. + */ +static dma_cookie_t ep93xx_dma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(tx->chan); + struct ep93xx_dma_desc *desc; + dma_cookie_t cookie; + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + cookie = dma_cookie_assign(tx); + + desc = container_of(tx, struct ep93xx_dma_desc, txd); + + /* + * If nothing is currently prosessed, we push this descriptor + * directly to the hardware. Otherwise we put the descriptor + * to the pending queue. + */ + if (list_empty(&edmac->active)) { + ep93xx_dma_set_active(edmac, desc); + edmac->edma->hw_submit(edmac); + } else { + list_add_tail(&desc->node, &edmac->queue); + } + + spin_unlock_irqrestore(&edmac->lock, flags); + return cookie; +} + +/** + * ep93xx_dma_alloc_chan_resources - allocate resources for the channel + * @chan: channel to allocate resources + * + * Function allocates necessary resources for the given DMA channel and + * returns number of allocated descriptors for the channel. Negative errno + * is returned in case of failure. + */ +static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_data *data = chan->private; + const char *name = dma_chan_name(chan); + int ret, i; + + /* Sanity check the channel parameters */ + if (!edmac->edma->m2m) { + if (!data) + return -EINVAL; + if (data->port < EP93XX_DMA_I2S1 || + data->port > EP93XX_DMA_IRDA) + return -EINVAL; + if (data->direction != ep93xx_dma_chan_direction(chan)) + return -EINVAL; + } else { + if (data) { + switch (data->port) { + case EP93XX_DMA_SSP: + case EP93XX_DMA_IDE: + if (!is_slave_direction(data->direction)) + return -EINVAL; + break; + default: + return -EINVAL; + } + } + } + + if (data && data->name) + name = data->name; + + ret = clk_prepare_enable(edmac->clk); + if (ret) + return ret; + + ret = request_irq(edmac->irq, ep93xx_dma_interrupt, 0, name, edmac); + if (ret) + goto fail_clk_disable; + + spin_lock_irq(&edmac->lock); + dma_cookie_init(&edmac->chan); + ret = edmac->edma->hw_setup(edmac); + spin_unlock_irq(&edmac->lock); + + if (ret) + goto fail_free_irq; + + for (i = 0; i < DMA_MAX_CHAN_DESCRIPTORS; i++) { + struct ep93xx_dma_desc *desc; + + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) { + dev_warn(chan2dev(edmac), "not enough descriptors\n"); + break; + } + + INIT_LIST_HEAD(&desc->tx_list); + + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.flags = DMA_CTRL_ACK; + desc->txd.tx_submit = ep93xx_dma_tx_submit; + + ep93xx_dma_desc_put(edmac, desc); + } + + return i; + +fail_free_irq: + free_irq(edmac->irq, edmac); +fail_clk_disable: + clk_disable_unprepare(edmac->clk); + + return ret; +} + +/** + * ep93xx_dma_free_chan_resources - release resources for the channel + * @chan: channel + * + * Function releases all the resources allocated for the given channel. + * The channel must be idle when this is called. + */ +static void ep93xx_dma_free_chan_resources(struct dma_chan *chan) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_desc *desc, *d; + unsigned long flags; + LIST_HEAD(list); + + BUG_ON(!list_empty(&edmac->active)); + BUG_ON(!list_empty(&edmac->queue)); + + spin_lock_irqsave(&edmac->lock, flags); + edmac->edma->hw_shutdown(edmac); + edmac->runtime_addr = 0; + edmac->runtime_ctrl = 0; + edmac->buffer = 0; + list_splice_init(&edmac->free_list, &list); + spin_unlock_irqrestore(&edmac->lock, flags); + + list_for_each_entry_safe(desc, d, &list, node) + kfree(desc); + + clk_disable_unprepare(edmac->clk); + free_irq(edmac->irq, edmac); +} + +/** + * ep93xx_dma_prep_dma_memcpy - prepare a memcpy DMA operation + * @chan: channel + * @dest: destination bus address + * @src: source bus address + * @len: size of the transaction + * @flags: flags for the descriptor + * + * Returns a valid DMA descriptor or %NULL in case of failure. + */ +static struct dma_async_tx_descriptor * +ep93xx_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_desc *desc, *first; + size_t bytes, offset; + + first = NULL; + for (offset = 0; offset < len; offset += bytes) { + desc = ep93xx_dma_desc_get(edmac); + if (!desc) { + dev_warn(chan2dev(edmac), "couldn't get descriptor\n"); + goto fail; + } + + bytes = min_t(size_t, len - offset, DMA_MAX_CHAN_BYTES); + + desc->src_addr = src + offset; + desc->dst_addr = dest + offset; + desc->size = bytes; + + if (!first) + first = desc; + else + list_add_tail(&desc->node, &first->tx_list); + } + + first->txd.cookie = -EBUSY; + first->txd.flags = flags; + + return &first->txd; +fail: + ep93xx_dma_desc_put(edmac, first); + return NULL; +} + +/** + * ep93xx_dma_prep_slave_sg - prepare a slave DMA operation + * @chan: channel + * @sgl: list of buffers to transfer + * @sg_len: number of entries in @sgl + * @dir: direction of tha DMA transfer + * @flags: flags for the descriptor + * @context: operation context (ignored) + * + * Returns a valid DMA descriptor or %NULL in case of failure. + */ +static struct dma_async_tx_descriptor * +ep93xx_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_desc *desc, *first; + struct scatterlist *sg; + int i; + + if (!edmac->edma->m2m && dir != ep93xx_dma_chan_direction(chan)) { + dev_warn(chan2dev(edmac), + "channel was configured with different direction\n"); + return NULL; + } + + if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) { + dev_warn(chan2dev(edmac), + "channel is already used for cyclic transfers\n"); + return NULL; + } + + ep93xx_dma_slave_config_write(chan, dir, &edmac->slave_config); + + first = NULL; + for_each_sg(sgl, sg, sg_len, i) { + size_t len = sg_dma_len(sg); + + if (len > DMA_MAX_CHAN_BYTES) { + dev_warn(chan2dev(edmac), "too big transfer size %zu\n", + len); + goto fail; + } + + desc = ep93xx_dma_desc_get(edmac); + if (!desc) { + dev_warn(chan2dev(edmac), "couldn't get descriptor\n"); + goto fail; + } + + if (dir == DMA_MEM_TO_DEV) { + desc->src_addr = sg_dma_address(sg); + desc->dst_addr = edmac->runtime_addr; + } else { + desc->src_addr = edmac->runtime_addr; + desc->dst_addr = sg_dma_address(sg); + } + desc->size = len; + + if (!first) + first = desc; + else + list_add_tail(&desc->node, &first->tx_list); + } + + first->txd.cookie = -EBUSY; + first->txd.flags = flags; + + return &first->txd; + +fail: + ep93xx_dma_desc_put(edmac, first); + return NULL; +} + +/** + * ep93xx_dma_prep_dma_cyclic - prepare a cyclic DMA operation + * @chan: channel + * @dma_addr: DMA mapped address of the buffer + * @buf_len: length of the buffer (in bytes) + * @period_len: length of a single period + * @dir: direction of the operation + * @flags: tx descriptor status flags + * + * Prepares a descriptor for cyclic DMA operation. This means that once the + * descriptor is submitted, we will be submitting in a @period_len sized + * buffers and calling callback once the period has been elapsed. Transfer + * terminates only when client calls dmaengine_terminate_all() for this + * channel. + * + * Returns a valid DMA descriptor or %NULL in case of failure. + */ +static struct dma_async_tx_descriptor * +ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr, + size_t buf_len, size_t period_len, + enum dma_transfer_direction dir, unsigned long flags) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_desc *desc, *first; + size_t offset = 0; + + if (!edmac->edma->m2m && dir != ep93xx_dma_chan_direction(chan)) { + dev_warn(chan2dev(edmac), + "channel was configured with different direction\n"); + return NULL; + } + + if (test_and_set_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) { + dev_warn(chan2dev(edmac), + "channel is already used for cyclic transfers\n"); + return NULL; + } + + if (period_len > DMA_MAX_CHAN_BYTES) { + dev_warn(chan2dev(edmac), "too big period length %zu\n", + period_len); + return NULL; + } + + ep93xx_dma_slave_config_write(chan, dir, &edmac->slave_config); + + /* Split the buffer into period size chunks */ + first = NULL; + for (offset = 0; offset < buf_len; offset += period_len) { + desc = ep93xx_dma_desc_get(edmac); + if (!desc) { + dev_warn(chan2dev(edmac), "couldn't get descriptor\n"); + goto fail; + } + + if (dir == DMA_MEM_TO_DEV) { + desc->src_addr = dma_addr + offset; + desc->dst_addr = edmac->runtime_addr; + } else { + desc->src_addr = edmac->runtime_addr; + desc->dst_addr = dma_addr + offset; + } + + desc->size = period_len; + + if (!first) + first = desc; + else + list_add_tail(&desc->node, &first->tx_list); + } + + first->txd.cookie = -EBUSY; + + return &first->txd; + +fail: + ep93xx_dma_desc_put(edmac, first); + return NULL; +} + +/** + * ep93xx_dma_synchronize - Synchronizes the termination of transfers to the + * current context. + * @chan: channel + * + * Synchronizes the DMA channel termination to the current context. When this + * function returns it is guaranteed that all transfers for previously issued + * descriptors have stopped and it is safe to free the memory associated + * with them. Furthermore it is guaranteed that all complete callback functions + * for a previously submitted descriptor have finished running and it is safe to + * free resources accessed from within the complete callbacks. + */ +static void ep93xx_dma_synchronize(struct dma_chan *chan) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + + if (edmac->edma->hw_synchronize) + edmac->edma->hw_synchronize(edmac); +} + +/** + * ep93xx_dma_terminate_all - terminate all transactions + * @chan: channel + * + * Stops all DMA transactions. All descriptors are put back to the + * @edmac->free_list and callbacks are _not_ called. + */ +static int ep93xx_dma_terminate_all(struct dma_chan *chan) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_desc *desc, *_d; + unsigned long flags; + LIST_HEAD(list); + + spin_lock_irqsave(&edmac->lock, flags); + /* First we disable and flush the DMA channel */ + edmac->edma->hw_shutdown(edmac); + clear_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags); + list_splice_init(&edmac->active, &list); + list_splice_init(&edmac->queue, &list); + /* + * We then re-enable the channel. This way we can continue submitting + * the descriptors by just calling ->hw_submit() again. + */ + edmac->edma->hw_setup(edmac); + spin_unlock_irqrestore(&edmac->lock, flags); + + list_for_each_entry_safe(desc, _d, &list, node) + ep93xx_dma_desc_put(edmac, desc); + + return 0; +} + +static int ep93xx_dma_slave_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + + memcpy(&edmac->slave_config, config, sizeof(*config)); + + return 0; +} + +static int ep93xx_dma_slave_config_write(struct dma_chan *chan, + enum dma_transfer_direction dir, + struct dma_slave_config *config) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + enum dma_slave_buswidth width; + unsigned long flags; + u32 addr, ctrl; + + if (!edmac->edma->m2m) + return -EINVAL; + + switch (dir) { + case DMA_DEV_TO_MEM: + width = config->src_addr_width; + addr = config->src_addr; + break; + + case DMA_MEM_TO_DEV: + width = config->dst_addr_width; + addr = config->dst_addr; + break; + + default: + return -EINVAL; + } + + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + ctrl = 0; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + ctrl = M2M_CONTROL_PW_16; + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + ctrl = M2M_CONTROL_PW_32; + break; + default: + return -EINVAL; + } + + spin_lock_irqsave(&edmac->lock, flags); + edmac->runtime_addr = addr; + edmac->runtime_ctrl = ctrl; + spin_unlock_irqrestore(&edmac->lock, flags); + + return 0; +} + +/** + * ep93xx_dma_tx_status - check if a transaction is completed + * @chan: channel + * @cookie: transaction specific cookie + * @state: state of the transaction is stored here if given + * + * This function can be used to query state of a given transaction. + */ +static enum dma_status ep93xx_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + return dma_cookie_status(chan, cookie, state); +} + +/** + * ep93xx_dma_issue_pending - push pending transactions to the hardware + * @chan: channel + * + * When this function is called, all pending transactions are pushed to the + * hardware and executed. + */ +static void ep93xx_dma_issue_pending(struct dma_chan *chan) +{ + ep93xx_dma_advance_work(to_ep93xx_dma_chan(chan)); +} + +static int __init ep93xx_dma_probe(struct platform_device *pdev) +{ + struct ep93xx_dma_platform_data *pdata = dev_get_platdata(&pdev->dev); + struct ep93xx_dma_engine *edma; + struct dma_device *dma_dev; + int ret, i; + + edma = kzalloc(struct_size(edma, channels, pdata->num_channels), GFP_KERNEL); + if (!edma) + return -ENOMEM; + + dma_dev = &edma->dma_dev; + edma->m2m = platform_get_device_id(pdev)->driver_data; + edma->num_channels = pdata->num_channels; + + INIT_LIST_HEAD(&dma_dev->channels); + for (i = 0; i < pdata->num_channels; i++) { + const struct ep93xx_dma_chan_data *cdata = &pdata->channels[i]; + struct ep93xx_dma_chan *edmac = &edma->channels[i]; + + edmac->chan.device = dma_dev; + edmac->regs = cdata->base; + edmac->irq = cdata->irq; + edmac->edma = edma; + + edmac->clk = clk_get(NULL, cdata->name); + if (IS_ERR(edmac->clk)) { + dev_warn(&pdev->dev, "failed to get clock for %s\n", + cdata->name); + continue; + } + + spin_lock_init(&edmac->lock); + INIT_LIST_HEAD(&edmac->active); + INIT_LIST_HEAD(&edmac->queue); + INIT_LIST_HEAD(&edmac->free_list); + tasklet_setup(&edmac->tasklet, ep93xx_dma_tasklet); + + list_add_tail(&edmac->chan.device_node, + &dma_dev->channels); + } + + dma_cap_zero(dma_dev->cap_mask); + dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); + dma_cap_set(DMA_CYCLIC, dma_dev->cap_mask); + + dma_dev->dev = &pdev->dev; + dma_dev->device_alloc_chan_resources = ep93xx_dma_alloc_chan_resources; + dma_dev->device_free_chan_resources = ep93xx_dma_free_chan_resources; + dma_dev->device_prep_slave_sg = ep93xx_dma_prep_slave_sg; + dma_dev->device_prep_dma_cyclic = ep93xx_dma_prep_dma_cyclic; + dma_dev->device_config = ep93xx_dma_slave_config; + dma_dev->device_synchronize = ep93xx_dma_synchronize; + dma_dev->device_terminate_all = ep93xx_dma_terminate_all; + dma_dev->device_issue_pending = ep93xx_dma_issue_pending; + dma_dev->device_tx_status = ep93xx_dma_tx_status; + + dma_set_max_seg_size(dma_dev->dev, DMA_MAX_CHAN_BYTES); + + if (edma->m2m) { + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_dev->device_prep_dma_memcpy = ep93xx_dma_prep_dma_memcpy; + + edma->hw_setup = m2m_hw_setup; + edma->hw_shutdown = m2m_hw_shutdown; + edma->hw_submit = m2m_hw_submit; + edma->hw_interrupt = m2m_hw_interrupt; + } else { + dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask); + + edma->hw_synchronize = m2p_hw_synchronize; + edma->hw_setup = m2p_hw_setup; + edma->hw_shutdown = m2p_hw_shutdown; + edma->hw_submit = m2p_hw_submit; + edma->hw_interrupt = m2p_hw_interrupt; + } + + ret = dma_async_device_register(dma_dev); + if (unlikely(ret)) { + for (i = 0; i < edma->num_channels; i++) { + struct ep93xx_dma_chan *edmac = &edma->channels[i]; + if (!IS_ERR_OR_NULL(edmac->clk)) + clk_put(edmac->clk); + } + kfree(edma); + } else { + dev_info(dma_dev->dev, "EP93xx M2%s DMA ready\n", + edma->m2m ? "M" : "P"); + } + + return ret; +} + +static const struct platform_device_id ep93xx_dma_driver_ids[] = { + { "ep93xx-dma-m2p", 0 }, + { "ep93xx-dma-m2m", 1 }, + { }, +}; + +static struct platform_driver ep93xx_dma_driver = { + .driver = { + .name = "ep93xx-dma", + }, + .id_table = ep93xx_dma_driver_ids, +}; + +static int __init ep93xx_dma_module_init(void) +{ + return platform_driver_probe(&ep93xx_dma_driver, ep93xx_dma_probe); +} +subsys_initcall(ep93xx_dma_module_init); + +MODULE_AUTHOR("Mika Westerberg "); +MODULE_DESCRIPTION("EP93xx DMA driver"); diff --git a/drivers/dma/fsl-dpaa2-qdma/Kconfig b/drivers/dma/fsl-dpaa2-qdma/Kconfig new file mode 100644 index 0000000000..258ed6be93 --- /dev/null +++ b/drivers/dma/fsl-dpaa2-qdma/Kconfig @@ -0,0 +1,9 @@ +menuconfig FSL_DPAA2_QDMA + tristate "NXP DPAA2 QDMA" + depends on ARM64 + depends on FSL_MC_BUS && FSL_MC_DPIO + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + NXP Data Path Acceleration Architecture 2 QDMA driver, + using the NXP MC bus driver. diff --git a/drivers/dma/fsl-dpaa2-qdma/Makefile b/drivers/dma/fsl-dpaa2-qdma/Makefile new file mode 100644 index 0000000000..c1d0226f2b --- /dev/null +++ b/drivers/dma/fsl-dpaa2-qdma/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for the NXP DPAA2 qDMA controllers +obj-$(CONFIG_FSL_DPAA2_QDMA) += dpaa2-qdma.o dpdmai.o diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c new file mode 100644 index 0000000000..a42a376348 --- /dev/null +++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c @@ -0,0 +1,839 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright 2019 NXP + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../virt-dma.h" +#include "dpdmai.h" +#include "dpaa2-qdma.h" + +static bool smmu_disable = true; + +static struct dpaa2_qdma_chan *to_dpaa2_qdma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct dpaa2_qdma_chan, vchan.chan); +} + +static struct dpaa2_qdma_comp *to_fsl_qdma_comp(struct virt_dma_desc *vd) +{ + return container_of(vd, struct dpaa2_qdma_comp, vdesc); +} + +static int dpaa2_qdma_alloc_chan_resources(struct dma_chan *chan) +{ + struct dpaa2_qdma_chan *dpaa2_chan = to_dpaa2_qdma_chan(chan); + struct dpaa2_qdma_engine *dpaa2_qdma = dpaa2_chan->qdma; + struct device *dev = &dpaa2_qdma->priv->dpdmai_dev->dev; + + dpaa2_chan->fd_pool = dma_pool_create("fd_pool", dev, + sizeof(struct dpaa2_fd), + sizeof(struct dpaa2_fd), 0); + if (!dpaa2_chan->fd_pool) + goto err; + + dpaa2_chan->fl_pool = dma_pool_create("fl_pool", dev, + sizeof(struct dpaa2_fl_entry), + sizeof(struct dpaa2_fl_entry), 0); + if (!dpaa2_chan->fl_pool) + goto err_fd; + + dpaa2_chan->sdd_pool = + dma_pool_create("sdd_pool", dev, + sizeof(struct dpaa2_qdma_sd_d), + sizeof(struct dpaa2_qdma_sd_d), 0); + if (!dpaa2_chan->sdd_pool) + goto err_fl; + + return dpaa2_qdma->desc_allocated++; +err_fl: + dma_pool_destroy(dpaa2_chan->fl_pool); +err_fd: + dma_pool_destroy(dpaa2_chan->fd_pool); +err: + return -ENOMEM; +} + +static void dpaa2_qdma_free_chan_resources(struct dma_chan *chan) +{ + struct dpaa2_qdma_chan *dpaa2_chan = to_dpaa2_qdma_chan(chan); + struct dpaa2_qdma_engine *dpaa2_qdma = dpaa2_chan->qdma; + unsigned long flags; + + LIST_HEAD(head); + + spin_lock_irqsave(&dpaa2_chan->vchan.lock, flags); + vchan_get_all_descriptors(&dpaa2_chan->vchan, &head); + spin_unlock_irqrestore(&dpaa2_chan->vchan.lock, flags); + + vchan_dma_desc_free_list(&dpaa2_chan->vchan, &head); + + dpaa2_dpdmai_free_comp(dpaa2_chan, &dpaa2_chan->comp_used); + dpaa2_dpdmai_free_comp(dpaa2_chan, &dpaa2_chan->comp_free); + + dma_pool_destroy(dpaa2_chan->fd_pool); + dma_pool_destroy(dpaa2_chan->fl_pool); + dma_pool_destroy(dpaa2_chan->sdd_pool); + dpaa2_qdma->desc_allocated--; +} + +/* + * Request a command descriptor for enqueue. + */ +static struct dpaa2_qdma_comp * +dpaa2_qdma_request_desc(struct dpaa2_qdma_chan *dpaa2_chan) +{ + struct dpaa2_qdma_priv *qdma_priv = dpaa2_chan->qdma->priv; + struct device *dev = &qdma_priv->dpdmai_dev->dev; + struct dpaa2_qdma_comp *comp_temp = NULL; + unsigned long flags; + + spin_lock_irqsave(&dpaa2_chan->queue_lock, flags); + if (list_empty(&dpaa2_chan->comp_free)) { + spin_unlock_irqrestore(&dpaa2_chan->queue_lock, flags); + comp_temp = kzalloc(sizeof(*comp_temp), GFP_NOWAIT); + if (!comp_temp) + goto err; + comp_temp->fd_virt_addr = + dma_pool_alloc(dpaa2_chan->fd_pool, GFP_NOWAIT, + &comp_temp->fd_bus_addr); + if (!comp_temp->fd_virt_addr) + goto err_comp; + + comp_temp->fl_virt_addr = + dma_pool_alloc(dpaa2_chan->fl_pool, GFP_NOWAIT, + &comp_temp->fl_bus_addr); + if (!comp_temp->fl_virt_addr) + goto err_fd_virt; + + comp_temp->desc_virt_addr = + dma_pool_alloc(dpaa2_chan->sdd_pool, GFP_NOWAIT, + &comp_temp->desc_bus_addr); + if (!comp_temp->desc_virt_addr) + goto err_fl_virt; + + comp_temp->qchan = dpaa2_chan; + return comp_temp; + } + + comp_temp = list_first_entry(&dpaa2_chan->comp_free, + struct dpaa2_qdma_comp, list); + list_del(&comp_temp->list); + spin_unlock_irqrestore(&dpaa2_chan->queue_lock, flags); + + comp_temp->qchan = dpaa2_chan; + + return comp_temp; + +err_fl_virt: + dma_pool_free(dpaa2_chan->fl_pool, + comp_temp->fl_virt_addr, + comp_temp->fl_bus_addr); +err_fd_virt: + dma_pool_free(dpaa2_chan->fd_pool, + comp_temp->fd_virt_addr, + comp_temp->fd_bus_addr); +err_comp: + kfree(comp_temp); +err: + dev_err(dev, "Failed to request descriptor\n"); + return NULL; +} + +static void +dpaa2_qdma_populate_fd(u32 format, struct dpaa2_qdma_comp *dpaa2_comp) +{ + struct dpaa2_fd *fd; + + fd = dpaa2_comp->fd_virt_addr; + memset(fd, 0, sizeof(struct dpaa2_fd)); + + /* fd populated */ + dpaa2_fd_set_addr(fd, dpaa2_comp->fl_bus_addr); + + /* + * Bypass memory translation, Frame list format, short length disable + * we need to disable BMT if fsl-mc use iova addr + */ + if (smmu_disable) + dpaa2_fd_set_bpid(fd, QMAN_FD_BMT_ENABLE); + dpaa2_fd_set_format(fd, QMAN_FD_FMT_ENABLE | QMAN_FD_SL_DISABLE); + + dpaa2_fd_set_frc(fd, format | QDMA_SER_CTX); +} + +/* first frame list for descriptor buffer */ +static void +dpaa2_qdma_populate_first_framel(struct dpaa2_fl_entry *f_list, + struct dpaa2_qdma_comp *dpaa2_comp, + bool wrt_changed) +{ + struct dpaa2_qdma_sd_d *sdd; + + sdd = dpaa2_comp->desc_virt_addr; + memset(sdd, 0, 2 * (sizeof(*sdd))); + + /* source descriptor CMD */ + sdd->cmd = cpu_to_le32(QDMA_SD_CMD_RDTTYPE_COHERENT); + sdd++; + + /* dest descriptor CMD */ + if (wrt_changed) + sdd->cmd = cpu_to_le32(LX2160_QDMA_DD_CMD_WRTTYPE_COHERENT); + else + sdd->cmd = cpu_to_le32(QDMA_DD_CMD_WRTTYPE_COHERENT); + + memset(f_list, 0, sizeof(struct dpaa2_fl_entry)); + + /* first frame list to source descriptor */ + dpaa2_fl_set_addr(f_list, dpaa2_comp->desc_bus_addr); + dpaa2_fl_set_len(f_list, 0x20); + dpaa2_fl_set_format(f_list, QDMA_FL_FMT_SBF | QDMA_FL_SL_LONG); + + /* bypass memory translation */ + if (smmu_disable) + f_list->bpid = cpu_to_le16(QDMA_FL_BMT_ENABLE); +} + +/* source and destination frame list */ +static void +dpaa2_qdma_populate_frames(struct dpaa2_fl_entry *f_list, + dma_addr_t dst, dma_addr_t src, + size_t len, uint8_t fmt) +{ + /* source frame list to source buffer */ + memset(f_list, 0, sizeof(struct dpaa2_fl_entry)); + + dpaa2_fl_set_addr(f_list, src); + dpaa2_fl_set_len(f_list, len); + + /* single buffer frame or scatter gather frame */ + dpaa2_fl_set_format(f_list, (fmt | QDMA_FL_SL_LONG)); + + /* bypass memory translation */ + if (smmu_disable) + f_list->bpid = cpu_to_le16(QDMA_FL_BMT_ENABLE); + + f_list++; + + /* destination frame list to destination buffer */ + memset(f_list, 0, sizeof(struct dpaa2_fl_entry)); + + dpaa2_fl_set_addr(f_list, dst); + dpaa2_fl_set_len(f_list, len); + dpaa2_fl_set_format(f_list, (fmt | QDMA_FL_SL_LONG)); + /* single buffer frame or scatter gather frame */ + dpaa2_fl_set_final(f_list, QDMA_FL_F); + /* bypass memory translation */ + if (smmu_disable) + f_list->bpid = cpu_to_le16(QDMA_FL_BMT_ENABLE); +} + +static struct dma_async_tx_descriptor +*dpaa2_qdma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, + dma_addr_t src, size_t len, ulong flags) +{ + struct dpaa2_qdma_chan *dpaa2_chan = to_dpaa2_qdma_chan(chan); + struct dpaa2_qdma_engine *dpaa2_qdma; + struct dpaa2_qdma_comp *dpaa2_comp; + struct dpaa2_fl_entry *f_list; + bool wrt_changed; + + dpaa2_qdma = dpaa2_chan->qdma; + dpaa2_comp = dpaa2_qdma_request_desc(dpaa2_chan); + if (!dpaa2_comp) + return NULL; + + wrt_changed = (bool)dpaa2_qdma->qdma_wrtype_fixup; + + /* populate Frame descriptor */ + dpaa2_qdma_populate_fd(QDMA_FD_LONG_FORMAT, dpaa2_comp); + + f_list = dpaa2_comp->fl_virt_addr; + + /* first frame list for descriptor buffer (logn format) */ + dpaa2_qdma_populate_first_framel(f_list, dpaa2_comp, wrt_changed); + + f_list++; + + dpaa2_qdma_populate_frames(f_list, dst, src, len, QDMA_FL_FMT_SBF); + + return vchan_tx_prep(&dpaa2_chan->vchan, &dpaa2_comp->vdesc, flags); +} + +static void dpaa2_qdma_issue_pending(struct dma_chan *chan) +{ + struct dpaa2_qdma_chan *dpaa2_chan = to_dpaa2_qdma_chan(chan); + struct dpaa2_qdma_comp *dpaa2_comp; + struct virt_dma_desc *vdesc; + struct dpaa2_fd *fd; + unsigned long flags; + int err; + + spin_lock_irqsave(&dpaa2_chan->queue_lock, flags); + spin_lock(&dpaa2_chan->vchan.lock); + if (vchan_issue_pending(&dpaa2_chan->vchan)) { + vdesc = vchan_next_desc(&dpaa2_chan->vchan); + if (!vdesc) + goto err_enqueue; + dpaa2_comp = to_fsl_qdma_comp(vdesc); + + fd = dpaa2_comp->fd_virt_addr; + + list_del(&vdesc->node); + list_add_tail(&dpaa2_comp->list, &dpaa2_chan->comp_used); + + err = dpaa2_io_service_enqueue_fq(NULL, dpaa2_chan->fqid, fd); + if (err) { + list_move_tail(&dpaa2_comp->list, + &dpaa2_chan->comp_free); + } + } +err_enqueue: + spin_unlock(&dpaa2_chan->vchan.lock); + spin_unlock_irqrestore(&dpaa2_chan->queue_lock, flags); +} + +static int __cold dpaa2_qdma_setup(struct fsl_mc_device *ls_dev) +{ + struct dpaa2_qdma_priv_per_prio *ppriv; + struct device *dev = &ls_dev->dev; + struct dpaa2_qdma_priv *priv; + u8 prio_def = DPDMAI_PRIO_NUM; + int err = -EINVAL; + int i; + + priv = dev_get_drvdata(dev); + + priv->dev = dev; + priv->dpqdma_id = ls_dev->obj_desc.id; + + /* Get the handle for the DPDMAI this interface is associate with */ + err = dpdmai_open(priv->mc_io, 0, priv->dpqdma_id, &ls_dev->mc_handle); + if (err) { + dev_err(dev, "dpdmai_open() failed\n"); + return err; + } + + dev_dbg(dev, "Opened dpdmai object successfully\n"); + + err = dpdmai_get_attributes(priv->mc_io, 0, ls_dev->mc_handle, + &priv->dpdmai_attr); + if (err) { + dev_err(dev, "dpdmai_get_attributes() failed\n"); + goto exit; + } + + if (priv->dpdmai_attr.version.major > DPDMAI_VER_MAJOR) { + err = -EINVAL; + dev_err(dev, "DPDMAI major version mismatch\n" + "Found %u.%u, supported version is %u.%u\n", + priv->dpdmai_attr.version.major, + priv->dpdmai_attr.version.minor, + DPDMAI_VER_MAJOR, DPDMAI_VER_MINOR); + goto exit; + } + + if (priv->dpdmai_attr.version.minor > DPDMAI_VER_MINOR) { + err = -EINVAL; + dev_err(dev, "DPDMAI minor version mismatch\n" + "Found %u.%u, supported version is %u.%u\n", + priv->dpdmai_attr.version.major, + priv->dpdmai_attr.version.minor, + DPDMAI_VER_MAJOR, DPDMAI_VER_MINOR); + goto exit; + } + + priv->num_pairs = min(priv->dpdmai_attr.num_of_priorities, prio_def); + ppriv = kcalloc(priv->num_pairs, sizeof(*ppriv), GFP_KERNEL); + if (!ppriv) { + err = -ENOMEM; + goto exit; + } + priv->ppriv = ppriv; + + for (i = 0; i < priv->num_pairs; i++) { + err = dpdmai_get_rx_queue(priv->mc_io, 0, ls_dev->mc_handle, + i, &priv->rx_queue_attr[i]); + if (err) { + dev_err(dev, "dpdmai_get_rx_queue() failed\n"); + goto exit; + } + ppriv->rsp_fqid = priv->rx_queue_attr[i].fqid; + + err = dpdmai_get_tx_queue(priv->mc_io, 0, ls_dev->mc_handle, + i, &priv->tx_fqid[i]); + if (err) { + dev_err(dev, "dpdmai_get_tx_queue() failed\n"); + goto exit; + } + ppriv->req_fqid = priv->tx_fqid[i]; + ppriv->prio = i; + ppriv->priv = priv; + ppriv++; + } + + return 0; +exit: + dpdmai_close(priv->mc_io, 0, ls_dev->mc_handle); + return err; +} + +static void dpaa2_qdma_fqdan_cb(struct dpaa2_io_notification_ctx *ctx) +{ + struct dpaa2_qdma_priv_per_prio *ppriv = container_of(ctx, + struct dpaa2_qdma_priv_per_prio, nctx); + struct dpaa2_qdma_comp *dpaa2_comp, *_comp_tmp; + struct dpaa2_qdma_priv *priv = ppriv->priv; + u32 n_chans = priv->dpaa2_qdma->n_chans; + struct dpaa2_qdma_chan *qchan; + const struct dpaa2_fd *fd_eq; + const struct dpaa2_fd *fd; + struct dpaa2_dq *dq; + int is_last = 0; + int found; + u8 status; + int err; + int i; + + do { + err = dpaa2_io_service_pull_fq(NULL, ppriv->rsp_fqid, + ppriv->store); + } while (err); + + while (!is_last) { + do { + dq = dpaa2_io_store_next(ppriv->store, &is_last); + } while (!is_last && !dq); + if (!dq) { + dev_err(priv->dev, "FQID returned no valid frames!\n"); + continue; + } + + /* obtain FD and process the error */ + fd = dpaa2_dq_fd(dq); + + status = dpaa2_fd_get_ctrl(fd) & 0xff; + if (status) + dev_err(priv->dev, "FD error occurred\n"); + found = 0; + for (i = 0; i < n_chans; i++) { + qchan = &priv->dpaa2_qdma->chans[i]; + spin_lock(&qchan->queue_lock); + if (list_empty(&qchan->comp_used)) { + spin_unlock(&qchan->queue_lock); + continue; + } + list_for_each_entry_safe(dpaa2_comp, _comp_tmp, + &qchan->comp_used, list) { + fd_eq = dpaa2_comp->fd_virt_addr; + + if (le64_to_cpu(fd_eq->simple.addr) == + le64_to_cpu(fd->simple.addr)) { + spin_lock(&qchan->vchan.lock); + vchan_cookie_complete(& + dpaa2_comp->vdesc); + spin_unlock(&qchan->vchan.lock); + found = 1; + break; + } + } + spin_unlock(&qchan->queue_lock); + if (found) + break; + } + } + + dpaa2_io_service_rearm(NULL, ctx); +} + +static int __cold dpaa2_qdma_dpio_setup(struct dpaa2_qdma_priv *priv) +{ + struct dpaa2_qdma_priv_per_prio *ppriv; + struct device *dev = priv->dev; + int err = -EINVAL; + int i, num; + + num = priv->num_pairs; + ppriv = priv->ppriv; + for (i = 0; i < num; i++) { + ppriv->nctx.is_cdan = 0; + ppriv->nctx.desired_cpu = DPAA2_IO_ANY_CPU; + ppriv->nctx.id = ppriv->rsp_fqid; + ppriv->nctx.cb = dpaa2_qdma_fqdan_cb; + err = dpaa2_io_service_register(NULL, &ppriv->nctx, dev); + if (err) { + dev_err(dev, "Notification register failed\n"); + goto err_service; + } + + ppriv->store = + dpaa2_io_store_create(DPAA2_QDMA_STORE_SIZE, dev); + if (!ppriv->store) { + err = -ENOMEM; + dev_err(dev, "dpaa2_io_store_create() failed\n"); + goto err_store; + } + + ppriv++; + } + return 0; + +err_store: + dpaa2_io_service_deregister(NULL, &ppriv->nctx, dev); +err_service: + ppriv--; + while (ppriv >= priv->ppriv) { + dpaa2_io_service_deregister(NULL, &ppriv->nctx, dev); + dpaa2_io_store_destroy(ppriv->store); + ppriv--; + } + return err; +} + +static void dpaa2_dpmai_store_free(struct dpaa2_qdma_priv *priv) +{ + struct dpaa2_qdma_priv_per_prio *ppriv = priv->ppriv; + int i; + + for (i = 0; i < priv->num_pairs; i++) { + dpaa2_io_store_destroy(ppriv->store); + ppriv++; + } +} + +static void dpaa2_dpdmai_dpio_free(struct dpaa2_qdma_priv *priv) +{ + struct dpaa2_qdma_priv_per_prio *ppriv = priv->ppriv; + struct device *dev = priv->dev; + int i; + + for (i = 0; i < priv->num_pairs; i++) { + dpaa2_io_service_deregister(NULL, &ppriv->nctx, dev); + ppriv++; + } +} + +static int __cold dpaa2_dpdmai_bind(struct dpaa2_qdma_priv *priv) +{ + struct dpdmai_rx_queue_cfg rx_queue_cfg; + struct dpaa2_qdma_priv_per_prio *ppriv; + struct device *dev = priv->dev; + struct fsl_mc_device *ls_dev; + int i, num; + int err; + + ls_dev = to_fsl_mc_device(dev); + num = priv->num_pairs; + ppriv = priv->ppriv; + for (i = 0; i < num; i++) { + rx_queue_cfg.options = DPDMAI_QUEUE_OPT_USER_CTX | + DPDMAI_QUEUE_OPT_DEST; + rx_queue_cfg.user_ctx = ppriv->nctx.qman64; + rx_queue_cfg.dest_cfg.dest_type = DPDMAI_DEST_DPIO; + rx_queue_cfg.dest_cfg.dest_id = ppriv->nctx.dpio_id; + rx_queue_cfg.dest_cfg.priority = ppriv->prio; + err = dpdmai_set_rx_queue(priv->mc_io, 0, ls_dev->mc_handle, + rx_queue_cfg.dest_cfg.priority, + &rx_queue_cfg); + if (err) { + dev_err(dev, "dpdmai_set_rx_queue() failed\n"); + return err; + } + + ppriv++; + } + + return 0; +} + +static int __cold dpaa2_dpdmai_dpio_unbind(struct dpaa2_qdma_priv *priv) +{ + struct dpaa2_qdma_priv_per_prio *ppriv = priv->ppriv; + struct device *dev = priv->dev; + struct fsl_mc_device *ls_dev; + int err = 0; + int i; + + ls_dev = to_fsl_mc_device(dev); + + for (i = 0; i < priv->num_pairs; i++) { + ppriv->nctx.qman64 = 0; + ppriv->nctx.dpio_id = 0; + ppriv++; + } + + err = dpdmai_reset(priv->mc_io, 0, ls_dev->mc_handle); + if (err) + dev_err(dev, "dpdmai_reset() failed\n"); + + return err; +} + +static void dpaa2_dpdmai_free_comp(struct dpaa2_qdma_chan *qchan, + struct list_head *head) +{ + struct dpaa2_qdma_comp *comp_tmp, *_comp_tmp; + unsigned long flags; + + list_for_each_entry_safe(comp_tmp, _comp_tmp, + head, list) { + spin_lock_irqsave(&qchan->queue_lock, flags); + list_del(&comp_tmp->list); + spin_unlock_irqrestore(&qchan->queue_lock, flags); + dma_pool_free(qchan->fd_pool, + comp_tmp->fd_virt_addr, + comp_tmp->fd_bus_addr); + dma_pool_free(qchan->fl_pool, + comp_tmp->fl_virt_addr, + comp_tmp->fl_bus_addr); + dma_pool_free(qchan->sdd_pool, + comp_tmp->desc_virt_addr, + comp_tmp->desc_bus_addr); + kfree(comp_tmp); + } +} + +static void dpaa2_dpdmai_free_channels(struct dpaa2_qdma_engine *dpaa2_qdma) +{ + struct dpaa2_qdma_chan *qchan; + int num, i; + + num = dpaa2_qdma->n_chans; + for (i = 0; i < num; i++) { + qchan = &dpaa2_qdma->chans[i]; + dpaa2_dpdmai_free_comp(qchan, &qchan->comp_used); + dpaa2_dpdmai_free_comp(qchan, &qchan->comp_free); + dma_pool_destroy(qchan->fd_pool); + dma_pool_destroy(qchan->fl_pool); + dma_pool_destroy(qchan->sdd_pool); + } +} + +static void dpaa2_qdma_free_desc(struct virt_dma_desc *vdesc) +{ + struct dpaa2_qdma_comp *dpaa2_comp; + struct dpaa2_qdma_chan *qchan; + unsigned long flags; + + dpaa2_comp = to_fsl_qdma_comp(vdesc); + qchan = dpaa2_comp->qchan; + spin_lock_irqsave(&qchan->queue_lock, flags); + list_move_tail(&dpaa2_comp->list, &qchan->comp_free); + spin_unlock_irqrestore(&qchan->queue_lock, flags); +} + +static int dpaa2_dpdmai_init_channels(struct dpaa2_qdma_engine *dpaa2_qdma) +{ + struct dpaa2_qdma_priv *priv = dpaa2_qdma->priv; + struct dpaa2_qdma_chan *dpaa2_chan; + int num = priv->num_pairs; + int i; + + INIT_LIST_HEAD(&dpaa2_qdma->dma_dev.channels); + for (i = 0; i < dpaa2_qdma->n_chans; i++) { + dpaa2_chan = &dpaa2_qdma->chans[i]; + dpaa2_chan->qdma = dpaa2_qdma; + dpaa2_chan->fqid = priv->tx_fqid[i % num]; + dpaa2_chan->vchan.desc_free = dpaa2_qdma_free_desc; + vchan_init(&dpaa2_chan->vchan, &dpaa2_qdma->dma_dev); + spin_lock_init(&dpaa2_chan->queue_lock); + INIT_LIST_HEAD(&dpaa2_chan->comp_used); + INIT_LIST_HEAD(&dpaa2_chan->comp_free); + } + return 0; +} + +static int dpaa2_qdma_probe(struct fsl_mc_device *dpdmai_dev) +{ + struct device *dev = &dpdmai_dev->dev; + struct dpaa2_qdma_engine *dpaa2_qdma; + struct dpaa2_qdma_priv *priv; + int err; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + dev_set_drvdata(dev, priv); + priv->dpdmai_dev = dpdmai_dev; + + priv->iommu_domain = iommu_get_domain_for_dev(dev); + if (priv->iommu_domain) + smmu_disable = false; + + /* obtain a MC portal */ + err = fsl_mc_portal_allocate(dpdmai_dev, 0, &priv->mc_io); + if (err) { + if (err == -ENXIO) + err = -EPROBE_DEFER; + else + dev_err(dev, "MC portal allocation failed\n"); + goto err_mcportal; + } + + /* DPDMAI initialization */ + err = dpaa2_qdma_setup(dpdmai_dev); + if (err) { + dev_err(dev, "dpaa2_dpdmai_setup() failed\n"); + goto err_dpdmai_setup; + } + + /* DPIO */ + err = dpaa2_qdma_dpio_setup(priv); + if (err) { + dev_err(dev, "dpaa2_dpdmai_dpio_setup() failed\n"); + goto err_dpio_setup; + } + + /* DPDMAI binding to DPIO */ + err = dpaa2_dpdmai_bind(priv); + if (err) { + dev_err(dev, "dpaa2_dpdmai_bind() failed\n"); + goto err_bind; + } + + /* DPDMAI enable */ + err = dpdmai_enable(priv->mc_io, 0, dpdmai_dev->mc_handle); + if (err) { + dev_err(dev, "dpdmai_enable() failed\n"); + goto err_enable; + } + + dpaa2_qdma = kzalloc(sizeof(*dpaa2_qdma), GFP_KERNEL); + if (!dpaa2_qdma) { + err = -ENOMEM; + goto err_eng; + } + + priv->dpaa2_qdma = dpaa2_qdma; + dpaa2_qdma->priv = priv; + + dpaa2_qdma->desc_allocated = 0; + dpaa2_qdma->n_chans = NUM_CH; + + dpaa2_dpdmai_init_channels(dpaa2_qdma); + + if (soc_device_match(soc_fixup_tuning)) + dpaa2_qdma->qdma_wrtype_fixup = true; + else + dpaa2_qdma->qdma_wrtype_fixup = false; + + dma_cap_set(DMA_PRIVATE, dpaa2_qdma->dma_dev.cap_mask); + dma_cap_set(DMA_SLAVE, dpaa2_qdma->dma_dev.cap_mask); + dma_cap_set(DMA_MEMCPY, dpaa2_qdma->dma_dev.cap_mask); + + dpaa2_qdma->dma_dev.dev = dev; + dpaa2_qdma->dma_dev.device_alloc_chan_resources = + dpaa2_qdma_alloc_chan_resources; + dpaa2_qdma->dma_dev.device_free_chan_resources = + dpaa2_qdma_free_chan_resources; + dpaa2_qdma->dma_dev.device_tx_status = dma_cookie_status; + dpaa2_qdma->dma_dev.device_prep_dma_memcpy = dpaa2_qdma_prep_memcpy; + dpaa2_qdma->dma_dev.device_issue_pending = dpaa2_qdma_issue_pending; + + err = dma_async_device_register(&dpaa2_qdma->dma_dev); + if (err) { + dev_err(dev, "Can't register NXP QDMA engine.\n"); + goto err_dpaa2_qdma; + } + + return 0; + +err_dpaa2_qdma: + kfree(dpaa2_qdma); +err_eng: + dpdmai_disable(priv->mc_io, 0, dpdmai_dev->mc_handle); +err_enable: + dpaa2_dpdmai_dpio_unbind(priv); +err_bind: + dpaa2_dpmai_store_free(priv); + dpaa2_dpdmai_dpio_free(priv); +err_dpio_setup: + kfree(priv->ppriv); + dpdmai_close(priv->mc_io, 0, dpdmai_dev->mc_handle); +err_dpdmai_setup: + fsl_mc_portal_free(priv->mc_io); +err_mcportal: + kfree(priv); + dev_set_drvdata(dev, NULL); + return err; +} + +static void dpaa2_qdma_remove(struct fsl_mc_device *ls_dev) +{ + struct dpaa2_qdma_engine *dpaa2_qdma; + struct dpaa2_qdma_priv *priv; + struct device *dev; + + dev = &ls_dev->dev; + priv = dev_get_drvdata(dev); + dpaa2_qdma = priv->dpaa2_qdma; + + dpdmai_disable(priv->mc_io, 0, ls_dev->mc_handle); + dpaa2_dpdmai_dpio_unbind(priv); + dpaa2_dpmai_store_free(priv); + dpaa2_dpdmai_dpio_free(priv); + dpdmai_close(priv->mc_io, 0, ls_dev->mc_handle); + fsl_mc_portal_free(priv->mc_io); + dev_set_drvdata(dev, NULL); + dpaa2_dpdmai_free_channels(dpaa2_qdma); + + dma_async_device_unregister(&dpaa2_qdma->dma_dev); + kfree(priv); + kfree(dpaa2_qdma); +} + +static void dpaa2_qdma_shutdown(struct fsl_mc_device *ls_dev) +{ + struct dpaa2_qdma_priv *priv; + struct device *dev; + + dev = &ls_dev->dev; + priv = dev_get_drvdata(dev); + + dpdmai_disable(priv->mc_io, 0, ls_dev->mc_handle); + dpaa2_dpdmai_dpio_unbind(priv); + dpdmai_close(priv->mc_io, 0, ls_dev->mc_handle); + dpdmai_destroy(priv->mc_io, 0, ls_dev->mc_handle); +} + +static const struct fsl_mc_device_id dpaa2_qdma_id_table[] = { + { + .vendor = FSL_MC_VENDOR_FREESCALE, + .obj_type = "dpdmai", + }, + { .vendor = 0x0 } +}; + +static struct fsl_mc_driver dpaa2_qdma_driver = { + .driver = { + .name = "dpaa2-qdma", + .owner = THIS_MODULE, + }, + .probe = dpaa2_qdma_probe, + .remove = dpaa2_qdma_remove, + .shutdown = dpaa2_qdma_shutdown, + .match_id_table = dpaa2_qdma_id_table +}; + +static int __init dpaa2_qdma_driver_init(void) +{ + return fsl_mc_driver_register(&(dpaa2_qdma_driver)); +} +late_initcall(dpaa2_qdma_driver_init); + +static void __exit fsl_qdma_exit(void) +{ + fsl_mc_driver_unregister(&(dpaa2_qdma_driver)); +} +module_exit(fsl_qdma_exit); + +MODULE_ALIAS("platform:fsl-dpaa2-qdma"); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("NXP Layerscape DPAA2 qDMA engine driver"); diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.h b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.h new file mode 100644 index 0000000000..03e2f4e0ba --- /dev/null +++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2019 NXP */ + +#ifndef __DPAA2_QDMA_H +#define __DPAA2_QDMA_H + +#define DPAA2_QDMA_STORE_SIZE 16 +#define NUM_CH 8 + +struct dpaa2_qdma_sd_d { + u32 rsv:32; + union { + struct { + u32 ssd:12; /* souce stride distance */ + u32 sss:12; /* souce stride size */ + u32 rsv1:8; + } sdf; + struct { + u32 dsd:12; /* Destination stride distance */ + u32 dss:12; /* Destination stride size */ + u32 rsv2:8; + } ddf; + } df; + u32 rbpcmd; /* Route-by-port command */ + u32 cmd; +} __attribute__((__packed__)); + +/* Source descriptor command read transaction type for RBP=0: */ +/* coherent copy of cacheable memory */ +#define QDMA_SD_CMD_RDTTYPE_COHERENT (0xb << 28) +/* Destination descriptor command write transaction type for RBP=0: */ +/* coherent copy of cacheable memory */ +#define QDMA_DD_CMD_WRTTYPE_COHERENT (0x6 << 28) +#define LX2160_QDMA_DD_CMD_WRTTYPE_COHERENT (0xb << 28) + +#define QMAN_FD_FMT_ENABLE BIT(0) /* frame list table enable */ +#define QMAN_FD_BMT_ENABLE BIT(15) /* bypass memory translation */ +#define QMAN_FD_BMT_DISABLE (0) /* bypass memory translation */ +#define QMAN_FD_SL_DISABLE (0) /* short lengthe disabled */ +#define QMAN_FD_SL_ENABLE BIT(14) /* short lengthe enabled */ + +#define QDMA_FINAL_BIT_DISABLE (0) /* final bit disable */ +#define QDMA_FINAL_BIT_ENABLE BIT(31) /* final bit enable */ + +#define QDMA_FD_SHORT_FORMAT BIT(11) /* short format */ +#define QDMA_FD_LONG_FORMAT (0) /* long format */ +#define QDMA_SER_DISABLE (8) /* no notification */ +#define QDMA_SER_CTX BIT(8) /* notification by FQD_CTX[fqid] */ +#define QDMA_SER_DEST (2 << 8) /* notification by destination desc */ +#define QDMA_SER_BOTH (3 << 8) /* soruce and dest notification */ +#define QDMA_FD_SPF_ENALBE BIT(30) /* source prefetch enable */ + +#define QMAN_FD_VA_ENABLE BIT(14) /* Address used is virtual address */ +#define QMAN_FD_VA_DISABLE (0)/* Address used is a real address */ +/* Flow Context: 49bit physical address */ +#define QMAN_FD_CBMT_ENABLE BIT(15) +#define QMAN_FD_CBMT_DISABLE (0) /* Flow Context: 64bit virtual address */ +#define QMAN_FD_SC_DISABLE (0) /* stashing control */ + +#define QDMA_FL_FMT_SBF (0x0) /* Single buffer frame */ +#define QDMA_FL_FMT_SGE (0x2) /* Scatter gather frame */ +#define QDMA_FL_BMT_ENABLE BIT(15) /* enable bypass memory translation */ +#define QDMA_FL_BMT_DISABLE (0x0) /* enable bypass memory translation */ +#define QDMA_FL_SL_LONG (0x0)/* long length */ +#define QDMA_FL_SL_SHORT (0x1) /* short length */ +#define QDMA_FL_F (0x1)/* last frame list bit */ + +/*Description of Frame list table structure*/ +struct dpaa2_qdma_chan { + struct dpaa2_qdma_engine *qdma; + struct virt_dma_chan vchan; + struct virt_dma_desc vdesc; + enum dma_status status; + u32 fqid; + + /* spinlock used by dpaa2 qdma driver */ + spinlock_t queue_lock; + struct dma_pool *fd_pool; + struct dma_pool *fl_pool; + struct dma_pool *sdd_pool; + + struct list_head comp_used; + struct list_head comp_free; + +}; + +struct dpaa2_qdma_comp { + dma_addr_t fd_bus_addr; + dma_addr_t fl_bus_addr; + dma_addr_t desc_bus_addr; + struct dpaa2_fd *fd_virt_addr; + struct dpaa2_fl_entry *fl_virt_addr; + struct dpaa2_qdma_sd_d *desc_virt_addr; + struct dpaa2_qdma_chan *qchan; + struct virt_dma_desc vdesc; + struct list_head list; +}; + +struct dpaa2_qdma_engine { + struct dma_device dma_dev; + u32 n_chans; + struct dpaa2_qdma_chan chans[NUM_CH]; + int qdma_wrtype_fixup; + int desc_allocated; + + struct dpaa2_qdma_priv *priv; +}; + +/* + * dpaa2_qdma_priv - driver private data + */ +struct dpaa2_qdma_priv { + int dpqdma_id; + + struct iommu_domain *iommu_domain; + struct dpdmai_attr dpdmai_attr; + struct device *dev; + struct fsl_mc_io *mc_io; + struct fsl_mc_device *dpdmai_dev; + u8 num_pairs; + + struct dpaa2_qdma_engine *dpaa2_qdma; + struct dpaa2_qdma_priv_per_prio *ppriv; + + struct dpdmai_rx_queue_attr rx_queue_attr[DPDMAI_PRIO_NUM]; + u32 tx_fqid[DPDMAI_PRIO_NUM]; +}; + +struct dpaa2_qdma_priv_per_prio { + int req_fqid; + int rsp_fqid; + int prio; + + struct dpaa2_io_store *store; + struct dpaa2_io_notification_ctx nctx; + + struct dpaa2_qdma_priv *priv; +}; + +static struct soc_device_attribute soc_fixup_tuning[] = { + { .family = "QorIQ LX2160A"}, + { /* sentinel */ } +}; + +/* FD pool size: one FD + 3 Frame list + 2 source/destination descriptor */ +#define FD_POOL_SIZE (sizeof(struct dpaa2_fd) + \ + sizeof(struct dpaa2_fl_entry) * 3 + \ + sizeof(struct dpaa2_qdma_sd_d) * 2) + +static void dpaa2_dpdmai_free_channels(struct dpaa2_qdma_engine *dpaa2_qdma); +static void dpaa2_dpdmai_free_comp(struct dpaa2_qdma_chan *qchan, + struct list_head *head); +#endif /* __DPAA2_QDMA_H */ diff --git a/drivers/dma/fsl-dpaa2-qdma/dpdmai.c b/drivers/dma/fsl-dpaa2-qdma/dpdmai.c new file mode 100644 index 0000000000..878662aaa1 --- /dev/null +++ b/drivers/dma/fsl-dpaa2-qdma/dpdmai.c @@ -0,0 +1,397 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright 2019 NXP + +#include +#include +#include +#include +#include "dpdmai.h" + +struct dpdmai_rsp_get_attributes { + __le32 id; + u8 num_of_priorities; + u8 pad0[3]; + __le16 major; + __le16 minor; +}; + +struct dpdmai_cmd_queue { + __le32 dest_id; + u8 priority; + u8 queue; + u8 dest_type; + u8 pad; + __le64 user_ctx; + union { + __le32 options; + __le32 fqid; + }; +}; + +struct dpdmai_rsp_get_tx_queue { + __le64 pad; + __le32 fqid; +}; + +#define MC_CMD_OP(_cmd, _param, _offset, _width, _type, _arg) \ + ((_cmd).params[_param] |= mc_enc((_offset), (_width), _arg)) + +/* cmd, param, offset, width, type, arg_name */ +#define DPDMAI_CMD_CREATE(cmd, cfg) \ +do { \ + MC_CMD_OP(cmd, 0, 8, 8, u8, (cfg)->priorities[0]);\ + MC_CMD_OP(cmd, 0, 16, 8, u8, (cfg)->priorities[1]);\ +} while (0) + +static inline u64 mc_enc(int lsoffset, int width, u64 val) +{ + return (val & MAKE_UMASK64(width)) << lsoffset; +} + +/** + * dpdmai_open() - Open a control session for the specified object + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @dpdmai_id: DPDMAI unique ID + * @token: Returned token; use in subsequent API calls + * + * This function can be used to open a control session for an + * already created object; an object may have been declared in + * the DPL or by calling the dpdmai_create() function. + * This function returns a unique authentication token, + * associated with the specific object ID and the specific MC + * portal; this token must be used in all subsequent commands for + * this specific object. + * + * Return: '0' on Success; Error code otherwise. + */ +int dpdmai_open(struct fsl_mc_io *mc_io, u32 cmd_flags, + int dpdmai_id, u16 *token) +{ + struct fsl_mc_command cmd = { 0 }; + __le64 *cmd_dpdmai_id; + int err; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_OPEN, + cmd_flags, 0); + + cmd_dpdmai_id = cmd.params; + *cmd_dpdmai_id = cpu_to_le32(dpdmai_id); + + /* send command to mc*/ + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + /* retrieve response parameters */ + *token = mc_cmd_hdr_read_token(&cmd); + + return 0; +} +EXPORT_SYMBOL_GPL(dpdmai_open); + +/** + * dpdmai_close() - Close the control session of the object + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPDMAI object + * + * After this function is called, no further operations are + * allowed on the object without opening a new control session. + * + * Return: '0' on Success; Error code otherwise. + */ +int dpdmai_close(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token) +{ + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_CLOSE, + cmd_flags, token); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} +EXPORT_SYMBOL_GPL(dpdmai_close); + +/** + * dpdmai_create() - Create the DPDMAI object + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @cfg: Configuration structure + * @token: Returned token; use in subsequent API calls + * + * Create the DPDMAI object, allocate required resources and + * perform required initialization. + * + * The object can be created either by declaring it in the + * DPL file, or by calling this function. + * + * This function returns a unique authentication token, + * associated with the specific object ID and the specific MC + * portal; this token must be used in all subsequent calls to + * this specific object. For objects that are created using the + * DPL file, call dpdmai_open() function to get an authentication + * token first. + * + * Return: '0' on Success; Error code otherwise. + */ +int dpdmai_create(struct fsl_mc_io *mc_io, u32 cmd_flags, + const struct dpdmai_cfg *cfg, u16 *token) +{ + struct fsl_mc_command cmd = { 0 }; + int err; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_CREATE, + cmd_flags, 0); + DPDMAI_CMD_CREATE(cmd, cfg); + + /* send command to mc*/ + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + /* retrieve response parameters */ + *token = mc_cmd_hdr_read_token(&cmd); + + return 0; +} + +/** + * dpdmai_destroy() - Destroy the DPDMAI object and release all its resources. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPDMAI object + * + * Return: '0' on Success; error code otherwise. + */ +int dpdmai_destroy(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token) +{ + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_DESTROY, + cmd_flags, token); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} +EXPORT_SYMBOL_GPL(dpdmai_destroy); + +/** + * dpdmai_enable() - Enable the DPDMAI, allow sending and receiving frames. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPDMAI object + * + * Return: '0' on Success; Error code otherwise. + */ +int dpdmai_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token) +{ + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_ENABLE, + cmd_flags, token); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} +EXPORT_SYMBOL_GPL(dpdmai_enable); + +/** + * dpdmai_disable() - Disable the DPDMAI, stop sending and receiving frames. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPDMAI object + * + * Return: '0' on Success; Error code otherwise. + */ +int dpdmai_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token) +{ + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_DISABLE, + cmd_flags, token); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} +EXPORT_SYMBOL_GPL(dpdmai_disable); + +/** + * dpdmai_reset() - Reset the DPDMAI, returns the object to initial state. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPDMAI object + * + * Return: '0' on Success; Error code otherwise. + */ +int dpdmai_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token) +{ + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_RESET, + cmd_flags, token); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} +EXPORT_SYMBOL_GPL(dpdmai_reset); + +/** + * dpdmai_get_attributes() - Retrieve DPDMAI attributes. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPDMAI object + * @attr: Returned object's attributes + * + * Return: '0' on Success; Error code otherwise. + */ +int dpdmai_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, + u16 token, struct dpdmai_attr *attr) +{ + struct dpdmai_rsp_get_attributes *rsp_params; + struct fsl_mc_command cmd = { 0 }; + int err; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_GET_ATTR, + cmd_flags, token); + + /* send command to mc*/ + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + /* retrieve response parameters */ + rsp_params = (struct dpdmai_rsp_get_attributes *)cmd.params; + attr->id = le32_to_cpu(rsp_params->id); + attr->version.major = le16_to_cpu(rsp_params->major); + attr->version.minor = le16_to_cpu(rsp_params->minor); + attr->num_of_priorities = rsp_params->num_of_priorities; + + return 0; +} +EXPORT_SYMBOL_GPL(dpdmai_get_attributes); + +/** + * dpdmai_set_rx_queue() - Set Rx queue configuration + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPDMAI object + * @priority: Select the queue relative to number of + * priorities configured at DPDMAI creation + * @cfg: Rx queue configuration + * + * Return: '0' on Success; Error code otherwise. + */ +int dpdmai_set_rx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u8 priority, const struct dpdmai_rx_queue_cfg *cfg) +{ + struct dpdmai_cmd_queue *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_SET_RX_QUEUE, + cmd_flags, token); + + cmd_params = (struct dpdmai_cmd_queue *)cmd.params; + cmd_params->dest_id = cpu_to_le32(cfg->dest_cfg.dest_id); + cmd_params->priority = cfg->dest_cfg.priority; + cmd_params->queue = priority; + cmd_params->dest_type = cfg->dest_cfg.dest_type; + cmd_params->user_ctx = cpu_to_le64(cfg->user_ctx); + cmd_params->options = cpu_to_le32(cfg->options); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} +EXPORT_SYMBOL_GPL(dpdmai_set_rx_queue); + +/** + * dpdmai_get_rx_queue() - Retrieve Rx queue attributes. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPDMAI object + * @priority: Select the queue relative to number of + * priorities configured at DPDMAI creation + * @attr: Returned Rx queue attributes + * + * Return: '0' on Success; Error code otherwise. + */ +int dpdmai_get_rx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u8 priority, struct dpdmai_rx_queue_attr *attr) +{ + struct dpdmai_cmd_queue *cmd_params; + struct fsl_mc_command cmd = { 0 }; + int err; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_GET_RX_QUEUE, + cmd_flags, token); + + cmd_params = (struct dpdmai_cmd_queue *)cmd.params; + cmd_params->queue = priority; + + /* send command to mc*/ + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + /* retrieve response parameters */ + attr->dest_cfg.dest_id = le32_to_cpu(cmd_params->dest_id); + attr->dest_cfg.priority = cmd_params->priority; + attr->dest_cfg.dest_type = cmd_params->dest_type; + attr->user_ctx = le64_to_cpu(cmd_params->user_ctx); + attr->fqid = le32_to_cpu(cmd_params->fqid); + + return 0; +} +EXPORT_SYMBOL_GPL(dpdmai_get_rx_queue); + +/** + * dpdmai_get_tx_queue() - Retrieve Tx queue attributes. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPDMAI object + * @priority: Select the queue relative to number of + * priorities configured at DPDMAI creation + * @fqid: Returned Tx queue + * + * Return: '0' on Success; Error code otherwise. + */ +int dpdmai_get_tx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, + u16 token, u8 priority, u32 *fqid) +{ + struct dpdmai_rsp_get_tx_queue *rsp_params; + struct dpdmai_cmd_queue *cmd_params; + struct fsl_mc_command cmd = { 0 }; + int err; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_GET_TX_QUEUE, + cmd_flags, token); + + cmd_params = (struct dpdmai_cmd_queue *)cmd.params; + cmd_params->queue = priority; + + /* send command to mc*/ + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + /* retrieve response parameters */ + + rsp_params = (struct dpdmai_rsp_get_tx_queue *)cmd.params; + *fqid = le32_to_cpu(rsp_params->fqid); + + return 0; +} +EXPORT_SYMBOL_GPL(dpdmai_get_tx_queue); + +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/fsl-dpaa2-qdma/dpdmai.h b/drivers/dma/fsl-dpaa2-qdma/dpdmai.h new file mode 100644 index 0000000000..b13b9bf0c0 --- /dev/null +++ b/drivers/dma/fsl-dpaa2-qdma/dpdmai.h @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2019 NXP */ + +#ifndef __FSL_DPDMAI_H +#define __FSL_DPDMAI_H + +/* DPDMAI Version */ +#define DPDMAI_VER_MAJOR 2 +#define DPDMAI_VER_MINOR 2 + +#define DPDMAI_CMD_BASE_VERSION 0 +#define DPDMAI_CMD_ID_OFFSET 4 + +#define DPDMAI_CMDID_FORMAT(x) (((x) << DPDMAI_CMD_ID_OFFSET) | \ + DPDMAI_CMD_BASE_VERSION) + +/* Command IDs */ +#define DPDMAI_CMDID_CLOSE DPDMAI_CMDID_FORMAT(0x800) +#define DPDMAI_CMDID_OPEN DPDMAI_CMDID_FORMAT(0x80E) +#define DPDMAI_CMDID_CREATE DPDMAI_CMDID_FORMAT(0x90E) +#define DPDMAI_CMDID_DESTROY DPDMAI_CMDID_FORMAT(0x900) + +#define DPDMAI_CMDID_ENABLE DPDMAI_CMDID_FORMAT(0x002) +#define DPDMAI_CMDID_DISABLE DPDMAI_CMDID_FORMAT(0x003) +#define DPDMAI_CMDID_GET_ATTR DPDMAI_CMDID_FORMAT(0x004) +#define DPDMAI_CMDID_RESET DPDMAI_CMDID_FORMAT(0x005) +#define DPDMAI_CMDID_IS_ENABLED DPDMAI_CMDID_FORMAT(0x006) + +#define DPDMAI_CMDID_SET_IRQ DPDMAI_CMDID_FORMAT(0x010) +#define DPDMAI_CMDID_GET_IRQ DPDMAI_CMDID_FORMAT(0x011) +#define DPDMAI_CMDID_SET_IRQ_ENABLE DPDMAI_CMDID_FORMAT(0x012) +#define DPDMAI_CMDID_GET_IRQ_ENABLE DPDMAI_CMDID_FORMAT(0x013) +#define DPDMAI_CMDID_SET_IRQ_MASK DPDMAI_CMDID_FORMAT(0x014) +#define DPDMAI_CMDID_GET_IRQ_MASK DPDMAI_CMDID_FORMAT(0x015) +#define DPDMAI_CMDID_GET_IRQ_STATUS DPDMAI_CMDID_FORMAT(0x016) +#define DPDMAI_CMDID_CLEAR_IRQ_STATUS DPDMAI_CMDID_FORMAT(0x017) + +#define DPDMAI_CMDID_SET_RX_QUEUE DPDMAI_CMDID_FORMAT(0x1A0) +#define DPDMAI_CMDID_GET_RX_QUEUE DPDMAI_CMDID_FORMAT(0x1A1) +#define DPDMAI_CMDID_GET_TX_QUEUE DPDMAI_CMDID_FORMAT(0x1A2) + +#define MC_CMD_HDR_TOKEN_O 32 /* Token field offset */ +#define MC_CMD_HDR_TOKEN_S 16 /* Token field size */ + +#define MAKE_UMASK64(_width) \ + ((u64)((_width) < 64 ? ((u64)1 << (_width)) - 1 : (u64)-1)) + +/* Data Path DMA Interface API + * Contains initialization APIs and runtime control APIs for DPDMAI + */ + +/** + * Maximum number of Tx/Rx priorities per DPDMAI object + */ +#define DPDMAI_PRIO_NUM 2 + +/* DPDMAI queue modification options */ + +/** + * Select to modify the user's context associated with the queue + */ +#define DPDMAI_QUEUE_OPT_USER_CTX 0x1 + +/** + * Select to modify the queue's destination + */ +#define DPDMAI_QUEUE_OPT_DEST 0x2 + +/** + * struct dpdmai_cfg - Structure representing DPDMAI configuration + * @priorities: Priorities for the DMA hardware processing; valid priorities are + * configured with values 1-8; the entry following last valid entry + * should be configured with 0 + */ +struct dpdmai_cfg { + u8 priorities[DPDMAI_PRIO_NUM]; +}; + +/** + * struct dpdmai_attr - Structure representing DPDMAI attributes + * @id: DPDMAI object ID + * @version: DPDMAI version + * @num_of_priorities: number of priorities + */ +struct dpdmai_attr { + int id; + /** + * struct version - DPDMAI version + * @major: DPDMAI major version + * @minor: DPDMAI minor version + */ + struct { + u16 major; + u16 minor; + } version; + u8 num_of_priorities; +}; + +/** + * enum dpdmai_dest - DPDMAI destination types + * @DPDMAI_DEST_NONE: Unassigned destination; The queue is set in parked mode + * and does not generate FQDAN notifications; user is expected to dequeue + * from the queue based on polling or other user-defined method + * @DPDMAI_DEST_DPIO: The queue is set in schedule mode and generates FQDAN + * notifications to the specified DPIO; user is expected to dequeue + * from the queue only after notification is received + * @DPDMAI_DEST_DPCON: The queue is set in schedule mode and does not generate + * FQDAN notifications, but is connected to the specified DPCON object; + * user is expected to dequeue from the DPCON channel + */ +enum dpdmai_dest { + DPDMAI_DEST_NONE = 0, + DPDMAI_DEST_DPIO = 1, + DPDMAI_DEST_DPCON = 2 +}; + +/** + * struct dpdmai_dest_cfg - Structure representing DPDMAI destination parameters + * @dest_type: Destination type + * @dest_id: Either DPIO ID or DPCON ID, depending on the destination type + * @priority: Priority selection within the DPIO or DPCON channel; valid values + * are 0-1 or 0-7, depending on the number of priorities in that + * channel; not relevant for 'DPDMAI_DEST_NONE' option + */ +struct dpdmai_dest_cfg { + enum dpdmai_dest dest_type; + int dest_id; + u8 priority; +}; + +/** + * struct dpdmai_rx_queue_cfg - DPDMAI RX queue configuration + * @options: Flags representing the suggested modifications to the queue; + * Use any combination of 'DPDMAI_QUEUE_OPT_' flags + * @user_ctx: User context value provided in the frame descriptor of each + * dequeued frame; + * valid only if 'DPDMAI_QUEUE_OPT_USER_CTX' is contained in 'options' + * @dest_cfg: Queue destination parameters; + * valid only if 'DPDMAI_QUEUE_OPT_DEST' is contained in 'options' + */ +struct dpdmai_rx_queue_cfg { + struct dpdmai_dest_cfg dest_cfg; + u32 options; + u64 user_ctx; + +}; + +/** + * struct dpdmai_rx_queue_attr - Structure representing attributes of Rx queues + * @user_ctx: User context value provided in the frame descriptor of each + * dequeued frame + * @dest_cfg: Queue destination configuration + * @fqid: Virtual FQID value to be used for dequeue operations + */ +struct dpdmai_rx_queue_attr { + struct dpdmai_dest_cfg dest_cfg; + u64 user_ctx; + u32 fqid; +}; + +int dpdmai_open(struct fsl_mc_io *mc_io, u32 cmd_flags, + int dpdmai_id, u16 *token); +int dpdmai_close(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token); +int dpdmai_destroy(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token); +int dpdmai_create(struct fsl_mc_io *mc_io, u32 cmd_flags, + const struct dpdmai_cfg *cfg, u16 *token); +int dpdmai_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token); +int dpdmai_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token); +int dpdmai_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token); +int dpdmai_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, + u16 token, struct dpdmai_attr *attr); +int dpdmai_set_rx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u8 priority, const struct dpdmai_rx_queue_cfg *cfg); +int dpdmai_get_rx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u8 priority, struct dpdmai_rx_queue_attr *attr); +int dpdmai_get_tx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, + u16 token, u8 priority, u32 *fqid); + +#endif /* __FSL_DPDMAI_H */ diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c new file mode 100644 index 0000000000..b53f46245c --- /dev/null +++ b/drivers/dma/fsl-edma-common.c @@ -0,0 +1,881 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Copyright (c) 2013-2014 Freescale Semiconductor, Inc +// Copyright (c) 2017 Sysam, Angelo Dureghello + +#include +#include +#include +#include +#include +#include + +#include "fsl-edma-common.h" + +#define EDMA_CR 0x00 +#define EDMA_ES 0x04 +#define EDMA_ERQ 0x0C +#define EDMA_EEI 0x14 +#define EDMA_SERQ 0x1B +#define EDMA_CERQ 0x1A +#define EDMA_SEEI 0x19 +#define EDMA_CEEI 0x18 +#define EDMA_CINT 0x1F +#define EDMA_CERR 0x1E +#define EDMA_SSRT 0x1D +#define EDMA_CDNE 0x1C +#define EDMA_INTR 0x24 +#define EDMA_ERR 0x2C + +#define EDMA64_ERQH 0x08 +#define EDMA64_EEIH 0x10 +#define EDMA64_SERQ 0x18 +#define EDMA64_CERQ 0x19 +#define EDMA64_SEEI 0x1a +#define EDMA64_CEEI 0x1b +#define EDMA64_CINT 0x1c +#define EDMA64_CERR 0x1d +#define EDMA64_SSRT 0x1e +#define EDMA64_CDNE 0x1f +#define EDMA64_INTH 0x20 +#define EDMA64_INTL 0x24 +#define EDMA64_ERRH 0x28 +#define EDMA64_ERRL 0x2c + +void fsl_edma_tx_chan_handler(struct fsl_edma_chan *fsl_chan) +{ + spin_lock(&fsl_chan->vchan.lock); + + if (!fsl_chan->edesc) { + /* terminate_all called before */ + spin_unlock(&fsl_chan->vchan.lock); + return; + } + + if (!fsl_chan->edesc->iscyclic) { + list_del(&fsl_chan->edesc->vdesc.node); + vchan_cookie_complete(&fsl_chan->edesc->vdesc); + fsl_chan->edesc = NULL; + fsl_chan->status = DMA_COMPLETE; + fsl_chan->idle = true; + } else { + vchan_cyclic_callback(&fsl_chan->edesc->vdesc); + } + + if (!fsl_chan->edesc) + fsl_edma_xfer_desc(fsl_chan); + + spin_unlock(&fsl_chan->vchan.lock); +} + +static void fsl_edma3_enable_request(struct fsl_edma_chan *fsl_chan) +{ + u32 val, flags; + + flags = fsl_edma_drvflags(fsl_chan); + val = edma_readl_chreg(fsl_chan, ch_sbr); + /* Remote/local swapped wrongly on iMX8 QM Audio edma */ + if (flags & FSL_EDMA_DRV_QUIRK_SWAPPED) { + if (!fsl_chan->is_rxchan) + val |= EDMA_V3_CH_SBR_RD; + else + val |= EDMA_V3_CH_SBR_WR; + } else { + if (fsl_chan->is_rxchan) + val |= EDMA_V3_CH_SBR_RD; + else + val |= EDMA_V3_CH_SBR_WR; + } + + if (fsl_chan->is_remote) + val &= ~(EDMA_V3_CH_SBR_RD | EDMA_V3_CH_SBR_WR); + + edma_writel_chreg(fsl_chan, val, ch_sbr); + + if (flags & FSL_EDMA_DRV_HAS_CHMUX) { + /* + * ch_mux: With the exception of 0, attempts to write a value + * already in use will be forced to 0. + */ + if (!edma_readl_chreg(fsl_chan, ch_mux)) + edma_writel_chreg(fsl_chan, fsl_chan->srcid, ch_mux); + } + + val = edma_readl_chreg(fsl_chan, ch_csr); + val |= EDMA_V3_CH_CSR_ERQ; + edma_writel_chreg(fsl_chan, val, ch_csr); +} + +static void fsl_edma_enable_request(struct fsl_edma_chan *fsl_chan) +{ + struct edma_regs *regs = &fsl_chan->edma->regs; + u32 ch = fsl_chan->vchan.chan.chan_id; + + if (fsl_edma_drvflags(fsl_chan) & FSL_EDMA_DRV_SPLIT_REG) + return fsl_edma3_enable_request(fsl_chan); + + if (fsl_chan->edma->drvdata->flags & FSL_EDMA_DRV_WRAP_IO) { + edma_writeb(fsl_chan->edma, EDMA_SEEI_SEEI(ch), regs->seei); + edma_writeb(fsl_chan->edma, ch, regs->serq); + } else { + /* ColdFire is big endian, and accesses natively + * big endian I/O peripherals + */ + iowrite8(EDMA_SEEI_SEEI(ch), regs->seei); + iowrite8(ch, regs->serq); + } +} + +static void fsl_edma3_disable_request(struct fsl_edma_chan *fsl_chan) +{ + u32 val = edma_readl_chreg(fsl_chan, ch_csr); + u32 flags; + + flags = fsl_edma_drvflags(fsl_chan); + + if (flags & FSL_EDMA_DRV_HAS_CHMUX) + edma_writel_chreg(fsl_chan, 0, ch_mux); + + val &= ~EDMA_V3_CH_CSR_ERQ; + edma_writel_chreg(fsl_chan, val, ch_csr); +} + +void fsl_edma_disable_request(struct fsl_edma_chan *fsl_chan) +{ + struct edma_regs *regs = &fsl_chan->edma->regs; + u32 ch = fsl_chan->vchan.chan.chan_id; + + if (fsl_edma_drvflags(fsl_chan) & FSL_EDMA_DRV_SPLIT_REG) + return fsl_edma3_disable_request(fsl_chan); + + if (fsl_chan->edma->drvdata->flags & FSL_EDMA_DRV_WRAP_IO) { + edma_writeb(fsl_chan->edma, ch, regs->cerq); + edma_writeb(fsl_chan->edma, EDMA_CEEI_CEEI(ch), regs->ceei); + } else { + /* ColdFire is big endian, and accesses natively + * big endian I/O peripherals + */ + iowrite8(ch, regs->cerq); + iowrite8(EDMA_CEEI_CEEI(ch), regs->ceei); + } +} + +static void mux_configure8(struct fsl_edma_chan *fsl_chan, void __iomem *addr, + u32 off, u32 slot, bool enable) +{ + u8 val8; + + if (enable) + val8 = EDMAMUX_CHCFG_ENBL | slot; + else + val8 = EDMAMUX_CHCFG_DIS; + + iowrite8(val8, addr + off); +} + +static void mux_configure32(struct fsl_edma_chan *fsl_chan, void __iomem *addr, + u32 off, u32 slot, bool enable) +{ + u32 val; + + if (enable) + val = EDMAMUX_CHCFG_ENBL << 24 | slot; + else + val = EDMAMUX_CHCFG_DIS; + + iowrite32(val, addr + off * 4); +} + +void fsl_edma_chan_mux(struct fsl_edma_chan *fsl_chan, + unsigned int slot, bool enable) +{ + u32 ch = fsl_chan->vchan.chan.chan_id; + void __iomem *muxaddr; + unsigned int chans_per_mux, ch_off; + int endian_diff[4] = {3, 1, -1, -3}; + u32 dmamux_nr = fsl_chan->edma->drvdata->dmamuxs; + + if (!dmamux_nr) + return; + + chans_per_mux = fsl_chan->edma->n_chans / dmamux_nr; + ch_off = fsl_chan->vchan.chan.chan_id % chans_per_mux; + + if (fsl_chan->edma->drvdata->flags & FSL_EDMA_DRV_MUX_SWAP) + ch_off += endian_diff[ch_off % 4]; + + muxaddr = fsl_chan->edma->muxbase[ch / chans_per_mux]; + slot = EDMAMUX_CHCFG_SOURCE(slot); + + if (fsl_chan->edma->drvdata->flags & FSL_EDMA_DRV_CONFIG32) + mux_configure32(fsl_chan, muxaddr, ch_off, slot, enable); + else + mux_configure8(fsl_chan, muxaddr, ch_off, slot, enable); +} + +static unsigned int fsl_edma_get_tcd_attr(enum dma_slave_buswidth addr_width) +{ + u32 val; + + if (addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + + val = ffs(addr_width) - 1; + return val | (val << 8); +} + +void fsl_edma_free_desc(struct virt_dma_desc *vdesc) +{ + struct fsl_edma_desc *fsl_desc; + int i; + + fsl_desc = to_fsl_edma_desc(vdesc); + for (i = 0; i < fsl_desc->n_tcds; i++) + dma_pool_free(fsl_desc->echan->tcd_pool, fsl_desc->tcd[i].vtcd, + fsl_desc->tcd[i].ptcd); + kfree(fsl_desc); +} + +int fsl_edma_terminate_all(struct dma_chan *chan) +{ + struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&fsl_chan->vchan.lock, flags); + fsl_edma_disable_request(fsl_chan); + fsl_chan->edesc = NULL; + fsl_chan->idle = true; + vchan_get_all_descriptors(&fsl_chan->vchan, &head); + spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags); + vchan_dma_desc_free_list(&fsl_chan->vchan, &head); + + if (fsl_edma_drvflags(fsl_chan) & FSL_EDMA_DRV_HAS_PD) + pm_runtime_allow(fsl_chan->pd_dev); + + return 0; +} + +int fsl_edma_pause(struct dma_chan *chan) +{ + struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&fsl_chan->vchan.lock, flags); + if (fsl_chan->edesc) { + fsl_edma_disable_request(fsl_chan); + fsl_chan->status = DMA_PAUSED; + fsl_chan->idle = true; + } + spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags); + return 0; +} + +int fsl_edma_resume(struct dma_chan *chan) +{ + struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&fsl_chan->vchan.lock, flags); + if (fsl_chan->edesc) { + fsl_edma_enable_request(fsl_chan); + fsl_chan->status = DMA_IN_PROGRESS; + fsl_chan->idle = false; + } + spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags); + return 0; +} + +static void fsl_edma_unprep_slave_dma(struct fsl_edma_chan *fsl_chan) +{ + if (fsl_chan->dma_dir != DMA_NONE) + dma_unmap_resource(fsl_chan->vchan.chan.device->dev, + fsl_chan->dma_dev_addr, + fsl_chan->dma_dev_size, + fsl_chan->dma_dir, 0); + fsl_chan->dma_dir = DMA_NONE; +} + +static bool fsl_edma_prep_slave_dma(struct fsl_edma_chan *fsl_chan, + enum dma_transfer_direction dir) +{ + struct device *dev = fsl_chan->vchan.chan.device->dev; + enum dma_data_direction dma_dir; + phys_addr_t addr = 0; + u32 size = 0; + + switch (dir) { + case DMA_MEM_TO_DEV: + dma_dir = DMA_FROM_DEVICE; + addr = fsl_chan->cfg.dst_addr; + size = fsl_chan->cfg.dst_maxburst; + break; + case DMA_DEV_TO_MEM: + dma_dir = DMA_TO_DEVICE; + addr = fsl_chan->cfg.src_addr; + size = fsl_chan->cfg.src_maxburst; + break; + default: + dma_dir = DMA_NONE; + break; + } + + /* Already mapped for this config? */ + if (fsl_chan->dma_dir == dma_dir) + return true; + + fsl_edma_unprep_slave_dma(fsl_chan); + + fsl_chan->dma_dev_addr = dma_map_resource(dev, addr, size, dma_dir, 0); + if (dma_mapping_error(dev, fsl_chan->dma_dev_addr)) + return false; + fsl_chan->dma_dev_size = size; + fsl_chan->dma_dir = dma_dir; + + return true; +} + +int fsl_edma_slave_config(struct dma_chan *chan, + struct dma_slave_config *cfg) +{ + struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan); + + memcpy(&fsl_chan->cfg, cfg, sizeof(*cfg)); + fsl_edma_unprep_slave_dma(fsl_chan); + + return 0; +} + +static size_t fsl_edma_desc_residue(struct fsl_edma_chan *fsl_chan, + struct virt_dma_desc *vdesc, bool in_progress) +{ + struct fsl_edma_desc *edesc = fsl_chan->edesc; + enum dma_transfer_direction dir = edesc->dirn; + dma_addr_t cur_addr, dma_addr; + size_t len, size; + u32 nbytes = 0; + int i; + + /* calculate the total size in this desc */ + for (len = i = 0; i < fsl_chan->edesc->n_tcds; i++) { + nbytes = le32_to_cpu(edesc->tcd[i].vtcd->nbytes); + if (nbytes & (EDMA_V3_TCD_NBYTES_DMLOE | EDMA_V3_TCD_NBYTES_SMLOE)) + nbytes = EDMA_V3_TCD_NBYTES_MLOFF_NBYTES(nbytes); + len += nbytes * le16_to_cpu(edesc->tcd[i].vtcd->biter); + } + + if (!in_progress) + return len; + + if (dir == DMA_MEM_TO_DEV) + cur_addr = edma_read_tcdreg(fsl_chan, saddr); + else + cur_addr = edma_read_tcdreg(fsl_chan, daddr); + + /* figure out the finished and calculate the residue */ + for (i = 0; i < fsl_chan->edesc->n_tcds; i++) { + nbytes = le32_to_cpu(edesc->tcd[i].vtcd->nbytes); + if (nbytes & (EDMA_V3_TCD_NBYTES_DMLOE | EDMA_V3_TCD_NBYTES_SMLOE)) + nbytes = EDMA_V3_TCD_NBYTES_MLOFF_NBYTES(nbytes); + + size = nbytes * le16_to_cpu(edesc->tcd[i].vtcd->biter); + + if (dir == DMA_MEM_TO_DEV) + dma_addr = le32_to_cpu(edesc->tcd[i].vtcd->saddr); + else + dma_addr = le32_to_cpu(edesc->tcd[i].vtcd->daddr); + + len -= size; + if (cur_addr >= dma_addr && cur_addr < dma_addr + size) { + len += dma_addr + size - cur_addr; + break; + } + } + + return len; +} + +enum dma_status fsl_edma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) +{ + struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan); + struct virt_dma_desc *vdesc; + enum dma_status status; + unsigned long flags; + + status = dma_cookie_status(chan, cookie, txstate); + if (status == DMA_COMPLETE) + return status; + + if (!txstate) + return fsl_chan->status; + + spin_lock_irqsave(&fsl_chan->vchan.lock, flags); + vdesc = vchan_find_desc(&fsl_chan->vchan, cookie); + if (fsl_chan->edesc && cookie == fsl_chan->edesc->vdesc.tx.cookie) + txstate->residue = + fsl_edma_desc_residue(fsl_chan, vdesc, true); + else if (vdesc) + txstate->residue = + fsl_edma_desc_residue(fsl_chan, vdesc, false); + else + txstate->residue = 0; + + spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags); + + return fsl_chan->status; +} + +static void fsl_edma_set_tcd_regs(struct fsl_edma_chan *fsl_chan, + struct fsl_edma_hw_tcd *tcd) +{ + u16 csr = 0; + + /* + * TCD parameters are stored in struct fsl_edma_hw_tcd in little + * endian format. However, we need to load the TCD registers in + * big- or little-endian obeying the eDMA engine model endian, + * and this is performed from specific edma_write functions + */ + edma_write_tcdreg(fsl_chan, 0, csr); + + edma_write_tcdreg(fsl_chan, tcd->saddr, saddr); + edma_write_tcdreg(fsl_chan, tcd->daddr, daddr); + + edma_write_tcdreg(fsl_chan, tcd->attr, attr); + edma_write_tcdreg(fsl_chan, tcd->soff, soff); + + edma_write_tcdreg(fsl_chan, tcd->nbytes, nbytes); + edma_write_tcdreg(fsl_chan, tcd->slast, slast); + + edma_write_tcdreg(fsl_chan, tcd->citer, citer); + edma_write_tcdreg(fsl_chan, tcd->biter, biter); + edma_write_tcdreg(fsl_chan, tcd->doff, doff); + + edma_write_tcdreg(fsl_chan, tcd->dlast_sga, dlast_sga); + + csr = le16_to_cpu(tcd->csr); + + if (fsl_chan->is_sw) { + csr |= EDMA_TCD_CSR_START; + tcd->csr = cpu_to_le16(csr); + } + + /* + * Must clear CHn_CSR[DONE] bit before enable TCDn_CSR[ESG] at EDMAv3 + * eDMAv4 have not such requirement. + * Change MLINK need clear CHn_CSR[DONE] for both eDMAv3 and eDMAv4. + */ + if (((fsl_edma_drvflags(fsl_chan) & FSL_EDMA_DRV_CLEAR_DONE_E_SG) && + (csr & EDMA_TCD_CSR_E_SG)) || + ((fsl_edma_drvflags(fsl_chan) & FSL_EDMA_DRV_CLEAR_DONE_E_LINK) && + (csr & EDMA_TCD_CSR_E_LINK))) + edma_writel_chreg(fsl_chan, edma_readl_chreg(fsl_chan, ch_csr), ch_csr); + + + edma_write_tcdreg(fsl_chan, tcd->csr, csr); +} + +static inline +void fsl_edma_fill_tcd(struct fsl_edma_chan *fsl_chan, + struct fsl_edma_hw_tcd *tcd, u32 src, u32 dst, + u16 attr, u16 soff, u32 nbytes, u32 slast, u16 citer, + u16 biter, u16 doff, u32 dlast_sga, bool major_int, + bool disable_req, bool enable_sg) +{ + struct dma_slave_config *cfg = &fsl_chan->cfg; + u16 csr = 0; + u32 burst; + + /* + * eDMA hardware SGs require the TCDs to be stored in little + * endian format irrespective of the register endian model. + * So we put the value in little endian in memory, waiting + * for fsl_edma_set_tcd_regs doing the swap. + */ + tcd->saddr = cpu_to_le32(src); + tcd->daddr = cpu_to_le32(dst); + + tcd->attr = cpu_to_le16(attr); + + tcd->soff = cpu_to_le16(soff); + + if (fsl_chan->is_multi_fifo) { + /* set mloff to support multiple fifo */ + burst = cfg->direction == DMA_DEV_TO_MEM ? + cfg->src_addr_width : cfg->dst_addr_width; + nbytes |= EDMA_V3_TCD_NBYTES_MLOFF(-(burst * 4)); + /* enable DMLOE/SMLOE */ + if (cfg->direction == DMA_MEM_TO_DEV) { + nbytes |= EDMA_V3_TCD_NBYTES_DMLOE; + nbytes &= ~EDMA_V3_TCD_NBYTES_SMLOE; + } else { + nbytes |= EDMA_V3_TCD_NBYTES_SMLOE; + nbytes &= ~EDMA_V3_TCD_NBYTES_DMLOE; + } + } + + tcd->nbytes = cpu_to_le32(nbytes); + tcd->slast = cpu_to_le32(slast); + + tcd->citer = cpu_to_le16(EDMA_TCD_CITER_CITER(citer)); + tcd->doff = cpu_to_le16(doff); + + tcd->dlast_sga = cpu_to_le32(dlast_sga); + + tcd->biter = cpu_to_le16(EDMA_TCD_BITER_BITER(biter)); + if (major_int) + csr |= EDMA_TCD_CSR_INT_MAJOR; + + if (disable_req) + csr |= EDMA_TCD_CSR_D_REQ; + + if (enable_sg) + csr |= EDMA_TCD_CSR_E_SG; + + if (fsl_chan->is_rxchan) + csr |= EDMA_TCD_CSR_ACTIVE; + + if (fsl_chan->is_sw) + csr |= EDMA_TCD_CSR_START; + + tcd->csr = cpu_to_le16(csr); +} + +static struct fsl_edma_desc *fsl_edma_alloc_desc(struct fsl_edma_chan *fsl_chan, + int sg_len) +{ + struct fsl_edma_desc *fsl_desc; + int i; + + fsl_desc = kzalloc(struct_size(fsl_desc, tcd, sg_len), GFP_NOWAIT); + if (!fsl_desc) + return NULL; + + fsl_desc->echan = fsl_chan; + fsl_desc->n_tcds = sg_len; + for (i = 0; i < sg_len; i++) { + fsl_desc->tcd[i].vtcd = dma_pool_alloc(fsl_chan->tcd_pool, + GFP_NOWAIT, &fsl_desc->tcd[i].ptcd); + if (!fsl_desc->tcd[i].vtcd) + goto err; + } + return fsl_desc; + +err: + while (--i >= 0) + dma_pool_free(fsl_chan->tcd_pool, fsl_desc->tcd[i].vtcd, + fsl_desc->tcd[i].ptcd); + kfree(fsl_desc); + return NULL; +} + +struct dma_async_tx_descriptor *fsl_edma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan); + struct fsl_edma_desc *fsl_desc; + dma_addr_t dma_buf_next; + bool major_int = true; + int sg_len, i; + u32 src_addr, dst_addr, last_sg, nbytes; + u16 soff, doff, iter; + + if (!is_slave_direction(direction)) + return NULL; + + if (!fsl_edma_prep_slave_dma(fsl_chan, direction)) + return NULL; + + sg_len = buf_len / period_len; + fsl_desc = fsl_edma_alloc_desc(fsl_chan, sg_len); + if (!fsl_desc) + return NULL; + fsl_desc->iscyclic = true; + fsl_desc->dirn = direction; + + dma_buf_next = dma_addr; + if (direction == DMA_MEM_TO_DEV) { + fsl_chan->attr = + fsl_edma_get_tcd_attr(fsl_chan->cfg.dst_addr_width); + nbytes = fsl_chan->cfg.dst_addr_width * + fsl_chan->cfg.dst_maxburst; + } else { + fsl_chan->attr = + fsl_edma_get_tcd_attr(fsl_chan->cfg.src_addr_width); + nbytes = fsl_chan->cfg.src_addr_width * + fsl_chan->cfg.src_maxburst; + } + + iter = period_len / nbytes; + + for (i = 0; i < sg_len; i++) { + if (dma_buf_next >= dma_addr + buf_len) + dma_buf_next = dma_addr; + + /* get next sg's physical address */ + last_sg = fsl_desc->tcd[(i + 1) % sg_len].ptcd; + + if (direction == DMA_MEM_TO_DEV) { + src_addr = dma_buf_next; + dst_addr = fsl_chan->dma_dev_addr; + soff = fsl_chan->cfg.dst_addr_width; + doff = fsl_chan->is_multi_fifo ? 4 : 0; + } else if (direction == DMA_DEV_TO_MEM) { + src_addr = fsl_chan->dma_dev_addr; + dst_addr = dma_buf_next; + soff = fsl_chan->is_multi_fifo ? 4 : 0; + doff = fsl_chan->cfg.src_addr_width; + } else { + /* DMA_DEV_TO_DEV */ + src_addr = fsl_chan->cfg.src_addr; + dst_addr = fsl_chan->cfg.dst_addr; + soff = doff = 0; + major_int = false; + } + + fsl_edma_fill_tcd(fsl_chan, fsl_desc->tcd[i].vtcd, src_addr, dst_addr, + fsl_chan->attr, soff, nbytes, 0, iter, + iter, doff, last_sg, major_int, false, true); + dma_buf_next += period_len; + } + + return vchan_tx_prep(&fsl_chan->vchan, &fsl_desc->vdesc, flags); +} + +struct dma_async_tx_descriptor *fsl_edma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan); + struct fsl_edma_desc *fsl_desc; + struct scatterlist *sg; + u32 src_addr, dst_addr, last_sg, nbytes; + u16 soff, doff, iter; + int i; + + if (!is_slave_direction(direction)) + return NULL; + + if (!fsl_edma_prep_slave_dma(fsl_chan, direction)) + return NULL; + + fsl_desc = fsl_edma_alloc_desc(fsl_chan, sg_len); + if (!fsl_desc) + return NULL; + fsl_desc->iscyclic = false; + fsl_desc->dirn = direction; + + if (direction == DMA_MEM_TO_DEV) { + fsl_chan->attr = + fsl_edma_get_tcd_attr(fsl_chan->cfg.dst_addr_width); + nbytes = fsl_chan->cfg.dst_addr_width * + fsl_chan->cfg.dst_maxburst; + } else { + fsl_chan->attr = + fsl_edma_get_tcd_attr(fsl_chan->cfg.src_addr_width); + nbytes = fsl_chan->cfg.src_addr_width * + fsl_chan->cfg.src_maxburst; + } + + for_each_sg(sgl, sg, sg_len, i) { + if (direction == DMA_MEM_TO_DEV) { + src_addr = sg_dma_address(sg); + dst_addr = fsl_chan->dma_dev_addr; + soff = fsl_chan->cfg.dst_addr_width; + doff = 0; + } else if (direction == DMA_DEV_TO_MEM) { + src_addr = fsl_chan->dma_dev_addr; + dst_addr = sg_dma_address(sg); + soff = 0; + doff = fsl_chan->cfg.src_addr_width; + } else { + /* DMA_DEV_TO_DEV */ + src_addr = fsl_chan->cfg.src_addr; + dst_addr = fsl_chan->cfg.dst_addr; + soff = 0; + doff = 0; + } + + /* + * Choose the suitable burst length if sg_dma_len is not + * multiple of burst length so that the whole transfer length is + * multiple of minor loop(burst length). + */ + if (sg_dma_len(sg) % nbytes) { + u32 width = (direction == DMA_DEV_TO_MEM) ? doff : soff; + u32 burst = (direction == DMA_DEV_TO_MEM) ? + fsl_chan->cfg.src_maxburst : + fsl_chan->cfg.dst_maxburst; + int j; + + for (j = burst; j > 1; j--) { + if (!(sg_dma_len(sg) % (j * width))) { + nbytes = j * width; + break; + } + } + /* Set burst size as 1 if there's no suitable one */ + if (j == 1) + nbytes = width; + } + iter = sg_dma_len(sg) / nbytes; + if (i < sg_len - 1) { + last_sg = fsl_desc->tcd[(i + 1)].ptcd; + fsl_edma_fill_tcd(fsl_chan, fsl_desc->tcd[i].vtcd, src_addr, + dst_addr, fsl_chan->attr, soff, + nbytes, 0, iter, iter, doff, last_sg, + false, false, true); + } else { + last_sg = 0; + fsl_edma_fill_tcd(fsl_chan, fsl_desc->tcd[i].vtcd, src_addr, + dst_addr, fsl_chan->attr, soff, + nbytes, 0, iter, iter, doff, last_sg, + true, true, false); + } + } + + return vchan_tx_prep(&fsl_chan->vchan, &fsl_desc->vdesc, flags); +} + +struct dma_async_tx_descriptor *fsl_edma_prep_memcpy(struct dma_chan *chan, + dma_addr_t dma_dst, dma_addr_t dma_src, + size_t len, unsigned long flags) +{ + struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan); + struct fsl_edma_desc *fsl_desc; + + fsl_desc = fsl_edma_alloc_desc(fsl_chan, 1); + if (!fsl_desc) + return NULL; + fsl_desc->iscyclic = false; + + fsl_chan->is_sw = true; + + /* To match with copy_align and max_seg_size so 1 tcd is enough */ + fsl_edma_fill_tcd(fsl_chan, fsl_desc->tcd[0].vtcd, dma_src, dma_dst, + fsl_edma_get_tcd_attr(DMA_SLAVE_BUSWIDTH_32_BYTES), + 32, len, 0, 1, 1, 32, 0, true, true, false); + + return vchan_tx_prep(&fsl_chan->vchan, &fsl_desc->vdesc, flags); +} + +void fsl_edma_xfer_desc(struct fsl_edma_chan *fsl_chan) +{ + struct virt_dma_desc *vdesc; + + lockdep_assert_held(&fsl_chan->vchan.lock); + + vdesc = vchan_next_desc(&fsl_chan->vchan); + if (!vdesc) + return; + fsl_chan->edesc = to_fsl_edma_desc(vdesc); + fsl_edma_set_tcd_regs(fsl_chan, fsl_chan->edesc->tcd[0].vtcd); + fsl_edma_enable_request(fsl_chan); + fsl_chan->status = DMA_IN_PROGRESS; + fsl_chan->idle = false; +} + +void fsl_edma_issue_pending(struct dma_chan *chan) +{ + struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&fsl_chan->vchan.lock, flags); + + if (unlikely(fsl_chan->pm_state != RUNNING)) { + spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags); + /* cannot submit due to suspend */ + return; + } + + if (vchan_issue_pending(&fsl_chan->vchan) && !fsl_chan->edesc) + fsl_edma_xfer_desc(fsl_chan); + + spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags); +} + +int fsl_edma_alloc_chan_resources(struct dma_chan *chan) +{ + struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan); + + fsl_chan->tcd_pool = dma_pool_create("tcd_pool", chan->device->dev, + sizeof(struct fsl_edma_hw_tcd), + 32, 0); + return 0; +} + +void fsl_edma_free_chan_resources(struct dma_chan *chan) +{ + struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan); + struct fsl_edma_engine *edma = fsl_chan->edma; + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&fsl_chan->vchan.lock, flags); + fsl_edma_disable_request(fsl_chan); + if (edma->drvdata->dmamuxs) + fsl_edma_chan_mux(fsl_chan, 0, false); + fsl_chan->edesc = NULL; + vchan_get_all_descriptors(&fsl_chan->vchan, &head); + fsl_edma_unprep_slave_dma(fsl_chan); + spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags); + + vchan_dma_desc_free_list(&fsl_chan->vchan, &head); + dma_pool_destroy(fsl_chan->tcd_pool); + fsl_chan->tcd_pool = NULL; + fsl_chan->is_sw = false; + fsl_chan->srcid = 0; +} + +void fsl_edma_cleanup_vchan(struct dma_device *dmadev) +{ + struct fsl_edma_chan *chan, *_chan; + + list_for_each_entry_safe(chan, _chan, + &dmadev->channels, vchan.chan.device_node) { + list_del(&chan->vchan.chan.device_node); + tasklet_kill(&chan->vchan.task); + } +} + +/* + * On the 32 channels Vybrid/mpc577x edma version, register offsets are + * different compared to ColdFire mcf5441x 64 channels edma. + * + * This function sets up register offsets as per proper declared version + * so must be called in xxx_edma_probe() just after setting the + * edma "version" and "membase" appropriately. + */ +void fsl_edma_setup_regs(struct fsl_edma_engine *edma) +{ + bool is64 = !!(edma->drvdata->flags & FSL_EDMA_DRV_EDMA64); + + edma->regs.cr = edma->membase + EDMA_CR; + edma->regs.es = edma->membase + EDMA_ES; + edma->regs.erql = edma->membase + EDMA_ERQ; + edma->regs.eeil = edma->membase + EDMA_EEI; + + edma->regs.serq = edma->membase + (is64 ? EDMA64_SERQ : EDMA_SERQ); + edma->regs.cerq = edma->membase + (is64 ? EDMA64_CERQ : EDMA_CERQ); + edma->regs.seei = edma->membase + (is64 ? EDMA64_SEEI : EDMA_SEEI); + edma->regs.ceei = edma->membase + (is64 ? EDMA64_CEEI : EDMA_CEEI); + edma->regs.cint = edma->membase + (is64 ? EDMA64_CINT : EDMA_CINT); + edma->regs.cerr = edma->membase + (is64 ? EDMA64_CERR : EDMA_CERR); + edma->regs.ssrt = edma->membase + (is64 ? EDMA64_SSRT : EDMA_SSRT); + edma->regs.cdne = edma->membase + (is64 ? EDMA64_CDNE : EDMA_CDNE); + edma->regs.intl = edma->membase + (is64 ? EDMA64_INTL : EDMA_INTR); + edma->regs.errl = edma->membase + (is64 ? EDMA64_ERRL : EDMA_ERR); + + if (is64) { + edma->regs.erqh = edma->membase + EDMA64_ERQH; + edma->regs.eeih = edma->membase + EDMA64_EEIH; + edma->regs.errh = edma->membase + EDMA64_ERRH; + edma->regs.inth = edma->membase + EDMA64_INTH; + } +} + +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h new file mode 100644 index 0000000000..40d50cc3d7 --- /dev/null +++ b/drivers/dma/fsl-edma-common.h @@ -0,0 +1,351 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright 2013-2014 Freescale Semiconductor, Inc. + * Copyright 2018 Angelo Dureghello + */ +#ifndef _FSL_EDMA_COMMON_H_ +#define _FSL_EDMA_COMMON_H_ + +#include +#include +#include "virt-dma.h" + +#define EDMA_CR_EDBG BIT(1) +#define EDMA_CR_ERCA BIT(2) +#define EDMA_CR_ERGA BIT(3) +#define EDMA_CR_HOE BIT(4) +#define EDMA_CR_HALT BIT(5) +#define EDMA_CR_CLM BIT(6) +#define EDMA_CR_EMLM BIT(7) +#define EDMA_CR_ECX BIT(16) +#define EDMA_CR_CX BIT(17) + +#define EDMA_SEEI_SEEI(x) ((x) & GENMASK(4, 0)) +#define EDMA_CEEI_CEEI(x) ((x) & GENMASK(4, 0)) +#define EDMA_CINT_CINT(x) ((x) & GENMASK(4, 0)) +#define EDMA_CERR_CERR(x) ((x) & GENMASK(4, 0)) + +#define EDMA_TCD_ATTR_DSIZE(x) (((x) & GENMASK(2, 0))) +#define EDMA_TCD_ATTR_DMOD(x) (((x) & GENMASK(4, 0)) << 3) +#define EDMA_TCD_ATTR_SSIZE(x) (((x) & GENMASK(2, 0)) << 8) +#define EDMA_TCD_ATTR_SMOD(x) (((x) & GENMASK(4, 0)) << 11) + +#define EDMA_TCD_CITER_CITER(x) ((x) & GENMASK(14, 0)) +#define EDMA_TCD_BITER_BITER(x) ((x) & GENMASK(14, 0)) + +#define EDMA_TCD_CSR_START BIT(0) +#define EDMA_TCD_CSR_INT_MAJOR BIT(1) +#define EDMA_TCD_CSR_INT_HALF BIT(2) +#define EDMA_TCD_CSR_D_REQ BIT(3) +#define EDMA_TCD_CSR_E_SG BIT(4) +#define EDMA_TCD_CSR_E_LINK BIT(5) +#define EDMA_TCD_CSR_ACTIVE BIT(6) +#define EDMA_TCD_CSR_DONE BIT(7) + +#define EDMA_V3_TCD_NBYTES_MLOFF_NBYTES(x) ((x) & GENMASK(9, 0)) +#define EDMA_V3_TCD_NBYTES_MLOFF(x) (x << 10) +#define EDMA_V3_TCD_NBYTES_DMLOE (1 << 30) +#define EDMA_V3_TCD_NBYTES_SMLOE (1 << 31) + +#define EDMAMUX_CHCFG_DIS 0x0 +#define EDMAMUX_CHCFG_ENBL 0x80 +#define EDMAMUX_CHCFG_SOURCE(n) ((n) & 0x3F) + +#define DMAMUX_NR 2 + +#define EDMA_TCD 0x1000 + +#define FSL_EDMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)) + +#define EDMA_V3_CH_SBR_RD BIT(22) +#define EDMA_V3_CH_SBR_WR BIT(21) +#define EDMA_V3_CH_CSR_ERQ BIT(0) +#define EDMA_V3_CH_CSR_EARQ BIT(1) +#define EDMA_V3_CH_CSR_EEI BIT(2) +#define EDMA_V3_CH_CSR_DONE BIT(30) +#define EDMA_V3_CH_CSR_ACTIVE BIT(31) + +enum fsl_edma_pm_state { + RUNNING = 0, + SUSPENDED, +}; + +struct fsl_edma_hw_tcd { + __le32 saddr; + __le16 soff; + __le16 attr; + __le32 nbytes; + __le32 slast; + __le32 daddr; + __le16 doff; + __le16 citer; + __le32 dlast_sga; + __le16 csr; + __le16 biter; +}; + +struct fsl_edma3_ch_reg { + __le32 ch_csr; + __le32 ch_es; + __le32 ch_int; + __le32 ch_sbr; + __le32 ch_pri; + __le32 ch_mux; + __le32 ch_mattr; /* edma4, reserved for edma3 */ + __le32 ch_reserved; + struct fsl_edma_hw_tcd tcd; +} __packed; + +/* + * These are iomem pointers, for both v32 and v64. + */ +struct edma_regs { + void __iomem *cr; + void __iomem *es; + void __iomem *erqh; + void __iomem *erql; /* aka erq on v32 */ + void __iomem *eeih; + void __iomem *eeil; /* aka eei on v32 */ + void __iomem *seei; + void __iomem *ceei; + void __iomem *serq; + void __iomem *cerq; + void __iomem *cint; + void __iomem *cerr; + void __iomem *ssrt; + void __iomem *cdne; + void __iomem *inth; + void __iomem *intl; + void __iomem *errh; + void __iomem *errl; +}; + +struct fsl_edma_sw_tcd { + dma_addr_t ptcd; + struct fsl_edma_hw_tcd *vtcd; +}; + +struct fsl_edma_chan { + struct virt_dma_chan vchan; + enum dma_status status; + enum fsl_edma_pm_state pm_state; + bool idle; + u32 slave_id; + struct fsl_edma_engine *edma; + struct fsl_edma_desc *edesc; + struct dma_slave_config cfg; + u32 attr; + bool is_sw; + struct dma_pool *tcd_pool; + dma_addr_t dma_dev_addr; + u32 dma_dev_size; + enum dma_data_direction dma_dir; + char chan_name[32]; + struct fsl_edma_hw_tcd __iomem *tcd; + u32 real_count; + struct work_struct issue_worker; + struct platform_device *pdev; + struct device *pd_dev; + u32 srcid; + struct clk *clk; + int priority; + int hw_chanid; + int txirq; + bool is_rxchan; + bool is_remote; + bool is_multi_fifo; +}; + +struct fsl_edma_desc { + struct virt_dma_desc vdesc; + struct fsl_edma_chan *echan; + bool iscyclic; + enum dma_transfer_direction dirn; + unsigned int n_tcds; + struct fsl_edma_sw_tcd tcd[]; +}; + +#define FSL_EDMA_DRV_HAS_DMACLK BIT(0) +#define FSL_EDMA_DRV_MUX_SWAP BIT(1) +#define FSL_EDMA_DRV_CONFIG32 BIT(2) +#define FSL_EDMA_DRV_WRAP_IO BIT(3) +#define FSL_EDMA_DRV_EDMA64 BIT(4) +#define FSL_EDMA_DRV_HAS_PD BIT(5) +#define FSL_EDMA_DRV_HAS_CHCLK BIT(6) +#define FSL_EDMA_DRV_HAS_CHMUX BIT(7) +/* imx8 QM audio edma remote local swapped */ +#define FSL_EDMA_DRV_QUIRK_SWAPPED BIT(8) +/* control and status register is in tcd address space, edma3 reg layout */ +#define FSL_EDMA_DRV_SPLIT_REG BIT(9) +#define FSL_EDMA_DRV_BUS_8BYTE BIT(10) +#define FSL_EDMA_DRV_DEV_TO_DEV BIT(11) +#define FSL_EDMA_DRV_ALIGN_64BYTE BIT(12) +/* Need clean CHn_CSR DONE before enable TCD's ESG */ +#define FSL_EDMA_DRV_CLEAR_DONE_E_SG BIT(13) +/* Need clean CHn_CSR DONE before enable TCD's MAJORELINK */ +#define FSL_EDMA_DRV_CLEAR_DONE_E_LINK BIT(14) + +#define FSL_EDMA_DRV_EDMA3 (FSL_EDMA_DRV_SPLIT_REG | \ + FSL_EDMA_DRV_BUS_8BYTE | \ + FSL_EDMA_DRV_DEV_TO_DEV | \ + FSL_EDMA_DRV_ALIGN_64BYTE | \ + FSL_EDMA_DRV_CLEAR_DONE_E_SG | \ + FSL_EDMA_DRV_CLEAR_DONE_E_LINK) + +#define FSL_EDMA_DRV_EDMA4 (FSL_EDMA_DRV_SPLIT_REG | \ + FSL_EDMA_DRV_BUS_8BYTE | \ + FSL_EDMA_DRV_DEV_TO_DEV | \ + FSL_EDMA_DRV_ALIGN_64BYTE | \ + FSL_EDMA_DRV_CLEAR_DONE_E_LINK) + +struct fsl_edma_drvdata { + u32 dmamuxs; /* only used before v3 */ + u32 chreg_off; + u32 chreg_space_sz; + u32 flags; + int (*setup_irq)(struct platform_device *pdev, + struct fsl_edma_engine *fsl_edma); +}; + +struct fsl_edma_engine { + struct dma_device dma_dev; + void __iomem *membase; + void __iomem *muxbase[DMAMUX_NR]; + struct clk *muxclk[DMAMUX_NR]; + struct clk *dmaclk; + struct clk *chclk; + struct mutex fsl_edma_mutex; + const struct fsl_edma_drvdata *drvdata; + u32 n_chans; + int txirq; + int errirq; + bool big_endian; + struct edma_regs regs; + u64 chan_masked; + struct fsl_edma_chan chans[]; +}; + +#define edma_read_tcdreg(chan, __name) \ +(sizeof(chan->tcd->__name) == sizeof(u32) ? \ + edma_readl(chan->edma, &chan->tcd->__name) : \ + edma_readw(chan->edma, &chan->tcd->__name)) + +#define edma_write_tcdreg(chan, val, __name) \ +(sizeof(chan->tcd->__name) == sizeof(u32) ? \ + edma_writel(chan->edma, (u32 __force)val, &chan->tcd->__name) : \ + edma_writew(chan->edma, (u16 __force)val, &chan->tcd->__name)) + +#define edma_readl_chreg(chan, __name) \ + edma_readl(chan->edma, \ + (void __iomem *)&(container_of(chan->tcd, struct fsl_edma3_ch_reg, tcd)->__name)) + +#define edma_writel_chreg(chan, val, __name) \ + edma_writel(chan->edma, val, \ + (void __iomem *)&(container_of(chan->tcd, struct fsl_edma3_ch_reg, tcd)->__name)) + +/* + * R/W functions for big- or little-endian registers: + * The eDMA controller's endian is independent of the CPU core's endian. + * For the big-endian IP module, the offset for 8-bit or 16-bit registers + * should also be swapped opposite to that in little-endian IP. + */ +static inline u32 edma_readl(struct fsl_edma_engine *edma, void __iomem *addr) +{ + if (edma->big_endian) + return ioread32be(addr); + else + return ioread32(addr); +} + +static inline u16 edma_readw(struct fsl_edma_engine *edma, void __iomem *addr) +{ + if (edma->big_endian) + return ioread16be(addr); + else + return ioread16(addr); +} + +static inline void edma_writeb(struct fsl_edma_engine *edma, + u8 val, void __iomem *addr) +{ + /* swap the reg offset for these in big-endian mode */ + if (edma->big_endian) + iowrite8(val, (void __iomem *)((unsigned long)addr ^ 0x3)); + else + iowrite8(val, addr); +} + +static inline void edma_writew(struct fsl_edma_engine *edma, + u16 val, void __iomem *addr) +{ + /* swap the reg offset for these in big-endian mode */ + if (edma->big_endian) + iowrite16be(val, (void __iomem *)((unsigned long)addr ^ 0x2)); + else + iowrite16(val, addr); +} + +static inline void edma_writel(struct fsl_edma_engine *edma, + u32 val, void __iomem *addr) +{ + if (edma->big_endian) + iowrite32be(val, addr); + else + iowrite32(val, addr); +} + +static inline struct fsl_edma_chan *to_fsl_edma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct fsl_edma_chan, vchan.chan); +} + +static inline u32 fsl_edma_drvflags(struct fsl_edma_chan *fsl_chan) +{ + return fsl_chan->edma->drvdata->flags; +} + +static inline struct fsl_edma_desc *to_fsl_edma_desc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct fsl_edma_desc, vdesc); +} + +static inline void fsl_edma_err_chan_handler(struct fsl_edma_chan *fsl_chan) +{ + fsl_chan->status = DMA_ERROR; + fsl_chan->idle = true; +} + +void fsl_edma_tx_chan_handler(struct fsl_edma_chan *fsl_chan); +void fsl_edma_disable_request(struct fsl_edma_chan *fsl_chan); +void fsl_edma_chan_mux(struct fsl_edma_chan *fsl_chan, + unsigned int slot, bool enable); +void fsl_edma_free_desc(struct virt_dma_desc *vdesc); +int fsl_edma_terminate_all(struct dma_chan *chan); +int fsl_edma_pause(struct dma_chan *chan); +int fsl_edma_resume(struct dma_chan *chan); +int fsl_edma_slave_config(struct dma_chan *chan, + struct dma_slave_config *cfg); +enum dma_status fsl_edma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate); +struct dma_async_tx_descriptor *fsl_edma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags); +struct dma_async_tx_descriptor *fsl_edma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context); +struct dma_async_tx_descriptor *fsl_edma_prep_memcpy( + struct dma_chan *chan, dma_addr_t dma_dst, dma_addr_t dma_src, + size_t len, unsigned long flags); +void fsl_edma_xfer_desc(struct fsl_edma_chan *fsl_chan); +void fsl_edma_issue_pending(struct dma_chan *chan); +int fsl_edma_alloc_chan_resources(struct dma_chan *chan); +void fsl_edma_free_chan_resources(struct dma_chan *chan); +void fsl_edma_cleanup_vchan(struct dma_device *dmadev); +void fsl_edma_setup_regs(struct fsl_edma_engine *edma); + +#endif /* _FSL_EDMA_COMMON_H_ */ diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c new file mode 100644 index 0000000000..30df55da4d --- /dev/null +++ b/drivers/dma/fsl-edma-main.c @@ -0,0 +1,724 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * drivers/dma/fsl-edma.c + * + * Copyright 2013-2014 Freescale Semiconductor, Inc. + * + * Driver for the Freescale eDMA engine with flexible channel multiplexing + * capability for DMA request sources. The eDMA block can be found on some + * Vybrid and Layerscape SoCs. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fsl-edma-common.h" + +#define ARGS_RX BIT(0) +#define ARGS_REMOTE BIT(1) +#define ARGS_MULTI_FIFO BIT(2) +#define ARGS_EVEN_CH BIT(3) +#define ARGS_ODD_CH BIT(4) + +static void fsl_edma_synchronize(struct dma_chan *chan) +{ + struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan); + + vchan_synchronize(&fsl_chan->vchan); +} + +static irqreturn_t fsl_edma_tx_handler(int irq, void *dev_id) +{ + struct fsl_edma_engine *fsl_edma = dev_id; + unsigned int intr, ch; + struct edma_regs *regs = &fsl_edma->regs; + + intr = edma_readl(fsl_edma, regs->intl); + if (!intr) + return IRQ_NONE; + + for (ch = 0; ch < fsl_edma->n_chans; ch++) { + if (intr & (0x1 << ch)) { + edma_writeb(fsl_edma, EDMA_CINT_CINT(ch), regs->cint); + fsl_edma_tx_chan_handler(&fsl_edma->chans[ch]); + } + } + return IRQ_HANDLED; +} + +static irqreturn_t fsl_edma3_tx_handler(int irq, void *dev_id) +{ + struct fsl_edma_chan *fsl_chan = dev_id; + unsigned int intr; + + intr = edma_readl_chreg(fsl_chan, ch_int); + if (!intr) + return IRQ_HANDLED; + + edma_writel_chreg(fsl_chan, 1, ch_int); + + fsl_edma_tx_chan_handler(fsl_chan); + + return IRQ_HANDLED; +} + +static irqreturn_t fsl_edma_err_handler(int irq, void *dev_id) +{ + struct fsl_edma_engine *fsl_edma = dev_id; + unsigned int err, ch; + struct edma_regs *regs = &fsl_edma->regs; + + err = edma_readl(fsl_edma, regs->errl); + if (!err) + return IRQ_NONE; + + for (ch = 0; ch < fsl_edma->n_chans; ch++) { + if (err & (0x1 << ch)) { + fsl_edma_disable_request(&fsl_edma->chans[ch]); + edma_writeb(fsl_edma, EDMA_CERR_CERR(ch), regs->cerr); + fsl_edma_err_chan_handler(&fsl_edma->chans[ch]); + } + } + return IRQ_HANDLED; +} + +static irqreturn_t fsl_edma_irq_handler(int irq, void *dev_id) +{ + if (fsl_edma_tx_handler(irq, dev_id) == IRQ_HANDLED) + return IRQ_HANDLED; + + return fsl_edma_err_handler(irq, dev_id); +} + +static struct dma_chan *fsl_edma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct fsl_edma_engine *fsl_edma = ofdma->of_dma_data; + struct dma_chan *chan, *_chan; + struct fsl_edma_chan *fsl_chan; + u32 dmamux_nr = fsl_edma->drvdata->dmamuxs; + unsigned long chans_per_mux = fsl_edma->n_chans / dmamux_nr; + + if (dma_spec->args_count != 2) + return NULL; + + mutex_lock(&fsl_edma->fsl_edma_mutex); + list_for_each_entry_safe(chan, _chan, &fsl_edma->dma_dev.channels, device_node) { + if (chan->client_count) + continue; + if ((chan->chan_id / chans_per_mux) == dma_spec->args[0]) { + chan = dma_get_slave_channel(chan); + if (chan) { + chan->device->privatecnt++; + fsl_chan = to_fsl_edma_chan(chan); + fsl_chan->slave_id = dma_spec->args[1]; + fsl_edma_chan_mux(fsl_chan, fsl_chan->slave_id, + true); + mutex_unlock(&fsl_edma->fsl_edma_mutex); + return chan; + } + } + } + mutex_unlock(&fsl_edma->fsl_edma_mutex); + return NULL; +} + +static struct dma_chan *fsl_edma3_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct fsl_edma_engine *fsl_edma = ofdma->of_dma_data; + struct dma_chan *chan, *_chan; + struct fsl_edma_chan *fsl_chan; + bool b_chmux; + int i; + + if (dma_spec->args_count != 3) + return NULL; + + b_chmux = !!(fsl_edma->drvdata->flags & FSL_EDMA_DRV_HAS_CHMUX); + + mutex_lock(&fsl_edma->fsl_edma_mutex); + list_for_each_entry_safe(chan, _chan, &fsl_edma->dma_dev.channels, + device_node) { + + if (chan->client_count) + continue; + + fsl_chan = to_fsl_edma_chan(chan); + i = fsl_chan - fsl_edma->chans; + + fsl_chan->priority = dma_spec->args[1]; + fsl_chan->is_rxchan = dma_spec->args[2] & ARGS_RX; + fsl_chan->is_remote = dma_spec->args[2] & ARGS_REMOTE; + fsl_chan->is_multi_fifo = dma_spec->args[2] & ARGS_MULTI_FIFO; + + if ((dma_spec->args[2] & ARGS_EVEN_CH) && (i & 0x1)) + continue; + + if ((dma_spec->args[2] & ARGS_ODD_CH) && !(i & 0x1)) + continue; + + if (!b_chmux && i == dma_spec->args[0]) { + chan = dma_get_slave_channel(chan); + chan->device->privatecnt++; + mutex_unlock(&fsl_edma->fsl_edma_mutex); + return chan; + } else if (b_chmux && !fsl_chan->srcid) { + /* if controller support channel mux, choose a free channel */ + chan = dma_get_slave_channel(chan); + chan->device->privatecnt++; + fsl_chan->srcid = dma_spec->args[0]; + mutex_unlock(&fsl_edma->fsl_edma_mutex); + return chan; + } + } + mutex_unlock(&fsl_edma->fsl_edma_mutex); + return NULL; +} + +static int +fsl_edma_irq_init(struct platform_device *pdev, struct fsl_edma_engine *fsl_edma) +{ + int ret; + + edma_writel(fsl_edma, ~0, fsl_edma->regs.intl); + + fsl_edma->txirq = platform_get_irq_byname(pdev, "edma-tx"); + if (fsl_edma->txirq < 0) + return fsl_edma->txirq; + + fsl_edma->errirq = platform_get_irq_byname(pdev, "edma-err"); + if (fsl_edma->errirq < 0) + return fsl_edma->errirq; + + if (fsl_edma->txirq == fsl_edma->errirq) { + ret = devm_request_irq(&pdev->dev, fsl_edma->txirq, + fsl_edma_irq_handler, 0, "eDMA", fsl_edma); + if (ret) { + dev_err(&pdev->dev, "Can't register eDMA IRQ.\n"); + return ret; + } + } else { + ret = devm_request_irq(&pdev->dev, fsl_edma->txirq, + fsl_edma_tx_handler, 0, "eDMA tx", fsl_edma); + if (ret) { + dev_err(&pdev->dev, "Can't register eDMA tx IRQ.\n"); + return ret; + } + + ret = devm_request_irq(&pdev->dev, fsl_edma->errirq, + fsl_edma_err_handler, 0, "eDMA err", fsl_edma); + if (ret) { + dev_err(&pdev->dev, "Can't register eDMA err IRQ.\n"); + return ret; + } + } + + return 0; +} + +static int fsl_edma3_irq_init(struct platform_device *pdev, struct fsl_edma_engine *fsl_edma) +{ + int ret; + int i; + + for (i = 0; i < fsl_edma->n_chans; i++) { + + struct fsl_edma_chan *fsl_chan = &fsl_edma->chans[i]; + + if (fsl_edma->chan_masked & BIT(i)) + continue; + + /* request channel irq */ + fsl_chan->txirq = platform_get_irq(pdev, i); + if (fsl_chan->txirq < 0) { + dev_err(&pdev->dev, "Can't get chan %d's irq.\n", i); + return -EINVAL; + } + + ret = devm_request_irq(&pdev->dev, fsl_chan->txirq, + fsl_edma3_tx_handler, IRQF_SHARED, + fsl_chan->chan_name, fsl_chan); + if (ret) { + dev_err(&pdev->dev, "Can't register chan%d's IRQ.\n", i); + return -EINVAL; + } + } + + return 0; +} + +static int +fsl_edma2_irq_init(struct platform_device *pdev, + struct fsl_edma_engine *fsl_edma) +{ + int i, ret, irq; + int count; + + edma_writel(fsl_edma, ~0, fsl_edma->regs.intl); + + count = platform_irq_count(pdev); + dev_dbg(&pdev->dev, "%s Found %d interrupts\r\n", __func__, count); + if (count <= 2) { + dev_err(&pdev->dev, "Interrupts in DTS not correct.\n"); + return -EINVAL; + } + /* + * 16 channel independent interrupts + 1 error interrupt on i.mx7ulp. + * 2 channel share one interrupt, for example, ch0/ch16, ch1/ch17... + * For now, just simply request irq without IRQF_SHARED flag, since 16 + * channels are enough on i.mx7ulp whose M4 domain own some peripherals. + */ + for (i = 0; i < count; i++) { + irq = platform_get_irq(pdev, i); + if (irq < 0) + return -ENXIO; + + /* The last IRQ is for eDMA err */ + if (i == count - 1) + ret = devm_request_irq(&pdev->dev, irq, + fsl_edma_err_handler, + 0, "eDMA2-ERR", fsl_edma); + else + ret = devm_request_irq(&pdev->dev, irq, + fsl_edma_tx_handler, 0, + fsl_edma->chans[i].chan_name, + fsl_edma); + if (ret) + return ret; + } + + return 0; +} + +static void fsl_edma_irq_exit( + struct platform_device *pdev, struct fsl_edma_engine *fsl_edma) +{ + if (fsl_edma->txirq == fsl_edma->errirq) { + devm_free_irq(&pdev->dev, fsl_edma->txirq, fsl_edma); + } else { + devm_free_irq(&pdev->dev, fsl_edma->txirq, fsl_edma); + devm_free_irq(&pdev->dev, fsl_edma->errirq, fsl_edma); + } +} + +static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma, int nr_clocks) +{ + int i; + + for (i = 0; i < nr_clocks; i++) + clk_disable_unprepare(fsl_edma->muxclk[i]); +} + +static struct fsl_edma_drvdata vf610_data = { + .dmamuxs = DMAMUX_NR, + .flags = FSL_EDMA_DRV_WRAP_IO, + .chreg_off = EDMA_TCD, + .chreg_space_sz = sizeof(struct fsl_edma_hw_tcd), + .setup_irq = fsl_edma_irq_init, +}; + +static struct fsl_edma_drvdata ls1028a_data = { + .dmamuxs = DMAMUX_NR, + .flags = FSL_EDMA_DRV_MUX_SWAP | FSL_EDMA_DRV_WRAP_IO, + .chreg_off = EDMA_TCD, + .chreg_space_sz = sizeof(struct fsl_edma_hw_tcd), + .setup_irq = fsl_edma_irq_init, +}; + +static struct fsl_edma_drvdata imx7ulp_data = { + .dmamuxs = 1, + .chreg_off = EDMA_TCD, + .chreg_space_sz = sizeof(struct fsl_edma_hw_tcd), + .flags = FSL_EDMA_DRV_HAS_DMACLK | FSL_EDMA_DRV_CONFIG32, + .setup_irq = fsl_edma2_irq_init, +}; + +static struct fsl_edma_drvdata imx8qm_data = { + .flags = FSL_EDMA_DRV_HAS_PD | FSL_EDMA_DRV_EDMA3, + .chreg_space_sz = 0x10000, + .chreg_off = 0x10000, + .setup_irq = fsl_edma3_irq_init, +}; + +static struct fsl_edma_drvdata imx8qm_audio_data = { + .flags = FSL_EDMA_DRV_QUIRK_SWAPPED | FSL_EDMA_DRV_HAS_PD | FSL_EDMA_DRV_EDMA3, + .chreg_space_sz = 0x10000, + .chreg_off = 0x10000, + .setup_irq = fsl_edma3_irq_init, +}; + +static struct fsl_edma_drvdata imx93_data3 = { + .flags = FSL_EDMA_DRV_HAS_DMACLK | FSL_EDMA_DRV_EDMA3, + .chreg_space_sz = 0x10000, + .chreg_off = 0x10000, + .setup_irq = fsl_edma3_irq_init, +}; + +static struct fsl_edma_drvdata imx93_data4 = { + .flags = FSL_EDMA_DRV_HAS_CHMUX | FSL_EDMA_DRV_HAS_DMACLK | FSL_EDMA_DRV_EDMA4, + .chreg_space_sz = 0x8000, + .chreg_off = 0x10000, + .setup_irq = fsl_edma3_irq_init, +}; + +static const struct of_device_id fsl_edma_dt_ids[] = { + { .compatible = "fsl,vf610-edma", .data = &vf610_data}, + { .compatible = "fsl,ls1028a-edma", .data = &ls1028a_data}, + { .compatible = "fsl,imx7ulp-edma", .data = &imx7ulp_data}, + { .compatible = "fsl,imx8qm-edma", .data = &imx8qm_data}, + { .compatible = "fsl,imx8qm-adma", .data = &imx8qm_audio_data}, + { .compatible = "fsl,imx93-edma3", .data = &imx93_data3}, + { .compatible = "fsl,imx93-edma4", .data = &imx93_data4}, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, fsl_edma_dt_ids); + +static int fsl_edma3_attach_pd(struct platform_device *pdev, struct fsl_edma_engine *fsl_edma) +{ + struct fsl_edma_chan *fsl_chan; + struct device_link *link; + struct device *pd_chan; + struct device *dev; + int i; + + dev = &pdev->dev; + + for (i = 0; i < fsl_edma->n_chans; i++) { + if (fsl_edma->chan_masked & BIT(i)) + continue; + + fsl_chan = &fsl_edma->chans[i]; + + pd_chan = dev_pm_domain_attach_by_id(dev, i); + if (IS_ERR_OR_NULL(pd_chan)) { + dev_err(dev, "Failed attach pd %d\n", i); + return -EINVAL; + } + + link = device_link_add(dev, pd_chan, DL_FLAG_STATELESS | + DL_FLAG_PM_RUNTIME | + DL_FLAG_RPM_ACTIVE); + if (!link) { + dev_err(dev, "Failed to add device_link to %d\n", i); + return -EINVAL; + } + + fsl_chan->pd_dev = pd_chan; + + pm_runtime_use_autosuspend(fsl_chan->pd_dev); + pm_runtime_set_autosuspend_delay(fsl_chan->pd_dev, 200); + pm_runtime_set_active(fsl_chan->pd_dev); + } + + return 0; +} + +static int fsl_edma_probe(struct platform_device *pdev) +{ + const struct of_device_id *of_id = + of_match_device(fsl_edma_dt_ids, &pdev->dev); + struct device_node *np = pdev->dev.of_node; + struct fsl_edma_engine *fsl_edma; + const struct fsl_edma_drvdata *drvdata = NULL; + u32 chan_mask[2] = {0, 0}; + struct edma_regs *regs; + int chans; + int ret, i; + + if (of_id) + drvdata = of_id->data; + if (!drvdata) { + dev_err(&pdev->dev, "unable to find driver data\n"); + return -EINVAL; + } + + ret = of_property_read_u32(np, "dma-channels", &chans); + if (ret) { + dev_err(&pdev->dev, "Can't get dma-channels.\n"); + return ret; + } + + fsl_edma = devm_kzalloc(&pdev->dev, struct_size(fsl_edma, chans, chans), + GFP_KERNEL); + if (!fsl_edma) + return -ENOMEM; + + fsl_edma->drvdata = drvdata; + fsl_edma->n_chans = chans; + mutex_init(&fsl_edma->fsl_edma_mutex); + + fsl_edma->membase = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(fsl_edma->membase)) + return PTR_ERR(fsl_edma->membase); + + if (!(drvdata->flags & FSL_EDMA_DRV_SPLIT_REG)) { + fsl_edma_setup_regs(fsl_edma); + regs = &fsl_edma->regs; + } + + if (drvdata->flags & FSL_EDMA_DRV_HAS_DMACLK) { + fsl_edma->dmaclk = devm_clk_get_enabled(&pdev->dev, "dma"); + if (IS_ERR(fsl_edma->dmaclk)) { + dev_err(&pdev->dev, "Missing DMA block clock.\n"); + return PTR_ERR(fsl_edma->dmaclk); + } + } + + if (drvdata->flags & FSL_EDMA_DRV_HAS_CHCLK) { + fsl_edma->chclk = devm_clk_get_enabled(&pdev->dev, "mp"); + if (IS_ERR(fsl_edma->chclk)) { + dev_err(&pdev->dev, "Missing MP block clock.\n"); + return PTR_ERR(fsl_edma->chclk); + } + } + + ret = of_property_read_variable_u32_array(np, "dma-channel-mask", chan_mask, 1, 2); + + if (ret > 0) { + fsl_edma->chan_masked = chan_mask[1]; + fsl_edma->chan_masked <<= 32; + fsl_edma->chan_masked |= chan_mask[0]; + } + + for (i = 0; i < fsl_edma->drvdata->dmamuxs; i++) { + char clkname[32]; + + /* eDMAv3 mux register move to TCD area if ch_mux exist */ + if (drvdata->flags & FSL_EDMA_DRV_SPLIT_REG) + break; + + fsl_edma->muxbase[i] = devm_platform_ioremap_resource(pdev, + 1 + i); + if (IS_ERR(fsl_edma->muxbase[i])) { + /* on error: disable all previously enabled clks */ + fsl_disable_clocks(fsl_edma, i); + return PTR_ERR(fsl_edma->muxbase[i]); + } + + sprintf(clkname, "dmamux%d", i); + fsl_edma->muxclk[i] = devm_clk_get_enabled(&pdev->dev, clkname); + if (IS_ERR(fsl_edma->muxclk[i])) { + dev_err(&pdev->dev, "Missing DMAMUX block clock.\n"); + /* on error: disable all previously enabled clks */ + return PTR_ERR(fsl_edma->muxclk[i]); + } + } + + fsl_edma->big_endian = of_property_read_bool(np, "big-endian"); + + if (drvdata->flags & FSL_EDMA_DRV_HAS_PD) { + ret = fsl_edma3_attach_pd(pdev, fsl_edma); + if (ret) + return ret; + } + + INIT_LIST_HEAD(&fsl_edma->dma_dev.channels); + for (i = 0; i < fsl_edma->n_chans; i++) { + struct fsl_edma_chan *fsl_chan = &fsl_edma->chans[i]; + int len; + + if (fsl_edma->chan_masked & BIT(i)) + continue; + + snprintf(fsl_chan->chan_name, sizeof(fsl_chan->chan_name), "%s-CH%02d", + dev_name(&pdev->dev), i); + + fsl_chan->edma = fsl_edma; + fsl_chan->pm_state = RUNNING; + fsl_chan->slave_id = 0; + fsl_chan->idle = true; + fsl_chan->dma_dir = DMA_NONE; + fsl_chan->vchan.desc_free = fsl_edma_free_desc; + + len = (drvdata->flags & FSL_EDMA_DRV_SPLIT_REG) ? + offsetof(struct fsl_edma3_ch_reg, tcd) : 0; + fsl_chan->tcd = fsl_edma->membase + + i * drvdata->chreg_space_sz + drvdata->chreg_off + len; + + fsl_chan->pdev = pdev; + vchan_init(&fsl_chan->vchan, &fsl_edma->dma_dev); + + edma_write_tcdreg(fsl_chan, 0, csr); + fsl_edma_chan_mux(fsl_chan, 0, false); + } + + ret = fsl_edma->drvdata->setup_irq(pdev, fsl_edma); + if (ret) + return ret; + + dma_cap_set(DMA_PRIVATE, fsl_edma->dma_dev.cap_mask); + dma_cap_set(DMA_SLAVE, fsl_edma->dma_dev.cap_mask); + dma_cap_set(DMA_CYCLIC, fsl_edma->dma_dev.cap_mask); + dma_cap_set(DMA_MEMCPY, fsl_edma->dma_dev.cap_mask); + + fsl_edma->dma_dev.dev = &pdev->dev; + fsl_edma->dma_dev.device_alloc_chan_resources + = fsl_edma_alloc_chan_resources; + fsl_edma->dma_dev.device_free_chan_resources + = fsl_edma_free_chan_resources; + fsl_edma->dma_dev.device_tx_status = fsl_edma_tx_status; + fsl_edma->dma_dev.device_prep_slave_sg = fsl_edma_prep_slave_sg; + fsl_edma->dma_dev.device_prep_dma_cyclic = fsl_edma_prep_dma_cyclic; + fsl_edma->dma_dev.device_prep_dma_memcpy = fsl_edma_prep_memcpy; + fsl_edma->dma_dev.device_config = fsl_edma_slave_config; + fsl_edma->dma_dev.device_pause = fsl_edma_pause; + fsl_edma->dma_dev.device_resume = fsl_edma_resume; + fsl_edma->dma_dev.device_terminate_all = fsl_edma_terminate_all; + fsl_edma->dma_dev.device_synchronize = fsl_edma_synchronize; + fsl_edma->dma_dev.device_issue_pending = fsl_edma_issue_pending; + + fsl_edma->dma_dev.src_addr_widths = FSL_EDMA_BUSWIDTHS; + fsl_edma->dma_dev.dst_addr_widths = FSL_EDMA_BUSWIDTHS; + + if (drvdata->flags & FSL_EDMA_DRV_BUS_8BYTE) { + fsl_edma->dma_dev.src_addr_widths |= BIT(DMA_SLAVE_BUSWIDTH_8_BYTES); + fsl_edma->dma_dev.dst_addr_widths |= BIT(DMA_SLAVE_BUSWIDTH_8_BYTES); + } + + fsl_edma->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + if (drvdata->flags & FSL_EDMA_DRV_DEV_TO_DEV) + fsl_edma->dma_dev.directions |= BIT(DMA_DEV_TO_DEV); + + fsl_edma->dma_dev.copy_align = drvdata->flags & FSL_EDMA_DRV_ALIGN_64BYTE ? + DMAENGINE_ALIGN_64_BYTES : + DMAENGINE_ALIGN_32_BYTES; + + /* Per worst case 'nbytes = 1' take CITER as the max_seg_size */ + dma_set_max_seg_size(fsl_edma->dma_dev.dev, 0x3fff); + + fsl_edma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + + platform_set_drvdata(pdev, fsl_edma); + + ret = dma_async_device_register(&fsl_edma->dma_dev); + if (ret) { + dev_err(&pdev->dev, + "Can't register Freescale eDMA engine. (%d)\n", ret); + return ret; + } + + ret = of_dma_controller_register(np, + drvdata->flags & FSL_EDMA_DRV_SPLIT_REG ? fsl_edma3_xlate : fsl_edma_xlate, + fsl_edma); + if (ret) { + dev_err(&pdev->dev, + "Can't register Freescale eDMA of_dma. (%d)\n", ret); + dma_async_device_unregister(&fsl_edma->dma_dev); + return ret; + } + + /* enable round robin arbitration */ + if (!(drvdata->flags & FSL_EDMA_DRV_SPLIT_REG)) + edma_writel(fsl_edma, EDMA_CR_ERGA | EDMA_CR_ERCA, regs->cr); + + return 0; +} + +static int fsl_edma_remove(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct fsl_edma_engine *fsl_edma = platform_get_drvdata(pdev); + + fsl_edma_irq_exit(pdev, fsl_edma); + fsl_edma_cleanup_vchan(&fsl_edma->dma_dev); + of_dma_controller_free(np); + dma_async_device_unregister(&fsl_edma->dma_dev); + fsl_disable_clocks(fsl_edma, fsl_edma->drvdata->dmamuxs); + + return 0; +} + +static int fsl_edma_suspend_late(struct device *dev) +{ + struct fsl_edma_engine *fsl_edma = dev_get_drvdata(dev); + struct fsl_edma_chan *fsl_chan; + unsigned long flags; + int i; + + for (i = 0; i < fsl_edma->n_chans; i++) { + fsl_chan = &fsl_edma->chans[i]; + if (fsl_edma->chan_masked & BIT(i)) + continue; + spin_lock_irqsave(&fsl_chan->vchan.lock, flags); + /* Make sure chan is idle or will force disable. */ + if (unlikely(!fsl_chan->idle)) { + dev_warn(dev, "WARN: There is non-idle channel."); + fsl_edma_disable_request(fsl_chan); + fsl_edma_chan_mux(fsl_chan, 0, false); + } + + fsl_chan->pm_state = SUSPENDED; + spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags); + } + + return 0; +} + +static int fsl_edma_resume_early(struct device *dev) +{ + struct fsl_edma_engine *fsl_edma = dev_get_drvdata(dev); + struct fsl_edma_chan *fsl_chan; + struct edma_regs *regs = &fsl_edma->regs; + int i; + + for (i = 0; i < fsl_edma->n_chans; i++) { + fsl_chan = &fsl_edma->chans[i]; + if (fsl_edma->chan_masked & BIT(i)) + continue; + fsl_chan->pm_state = RUNNING; + edma_write_tcdreg(fsl_chan, 0, csr); + if (fsl_chan->slave_id != 0) + fsl_edma_chan_mux(fsl_chan, fsl_chan->slave_id, true); + } + + if (!(fsl_edma->drvdata->flags & FSL_EDMA_DRV_SPLIT_REG)) + edma_writel(fsl_edma, EDMA_CR_ERGA | EDMA_CR_ERCA, regs->cr); + + return 0; +} + +/* + * eDMA provides the service to others, so it should be suspend late + * and resume early. When eDMA suspend, all of the clients should stop + * the DMA data transmission and let the channel idle. + */ +static const struct dev_pm_ops fsl_edma_pm_ops = { + .suspend_late = fsl_edma_suspend_late, + .resume_early = fsl_edma_resume_early, +}; + +static struct platform_driver fsl_edma_driver = { + .driver = { + .name = "fsl-edma", + .of_match_table = fsl_edma_dt_ids, + .pm = &fsl_edma_pm_ops, + }, + .probe = fsl_edma_probe, + .remove = fsl_edma_remove, +}; + +static int __init fsl_edma_init(void) +{ + return platform_driver_register(&fsl_edma_driver); +} +subsys_initcall(fsl_edma_init); + +static void __exit fsl_edma_exit(void) +{ + platform_driver_unregister(&fsl_edma_driver); +} +module_exit(fsl_edma_exit); + +MODULE_ALIAS("platform:fsl-edma"); +MODULE_DESCRIPTION("Freescale eDMA engine driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c new file mode 100644 index 0000000000..a8cc8a4bc6 --- /dev/null +++ b/drivers/dma/fsl-qdma.c @@ -0,0 +1,1308 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright 2014-2015 Freescale +// Copyright 2018 NXP + +/* + * Driver for NXP Layerscape Queue Direct Memory Access Controller + * + * Author: + * Wen He + * Jiaheng Fan + * + */ + +#include +#include +#include +#include +#include +#include + +#include "virt-dma.h" +#include "fsldma.h" + +/* Register related definition */ +#define FSL_QDMA_DMR 0x0 +#define FSL_QDMA_DSR 0x4 +#define FSL_QDMA_DEIER 0xe00 +#define FSL_QDMA_DEDR 0xe04 +#define FSL_QDMA_DECFDW0R 0xe10 +#define FSL_QDMA_DECFDW1R 0xe14 +#define FSL_QDMA_DECFDW2R 0xe18 +#define FSL_QDMA_DECFDW3R 0xe1c +#define FSL_QDMA_DECFQIDR 0xe30 +#define FSL_QDMA_DECBR 0xe34 + +#define FSL_QDMA_BCQMR(x) (0xc0 + 0x100 * (x)) +#define FSL_QDMA_BCQSR(x) (0xc4 + 0x100 * (x)) +#define FSL_QDMA_BCQEDPA_SADDR(x) (0xc8 + 0x100 * (x)) +#define FSL_QDMA_BCQDPA_SADDR(x) (0xcc + 0x100 * (x)) +#define FSL_QDMA_BCQEEPA_SADDR(x) (0xd0 + 0x100 * (x)) +#define FSL_QDMA_BCQEPA_SADDR(x) (0xd4 + 0x100 * (x)) +#define FSL_QDMA_BCQIER(x) (0xe0 + 0x100 * (x)) +#define FSL_QDMA_BCQIDR(x) (0xe4 + 0x100 * (x)) + +#define FSL_QDMA_SQDPAR 0x80c +#define FSL_QDMA_SQEPAR 0x814 +#define FSL_QDMA_BSQMR 0x800 +#define FSL_QDMA_BSQSR 0x804 +#define FSL_QDMA_BSQICR 0x828 +#define FSL_QDMA_CQMR 0xa00 +#define FSL_QDMA_CQDSCR1 0xa08 +#define FSL_QDMA_CQDSCR2 0xa0c +#define FSL_QDMA_CQIER 0xa10 +#define FSL_QDMA_CQEDR 0xa14 +#define FSL_QDMA_SQCCMR 0xa20 + +/* Registers for bit and genmask */ +#define FSL_QDMA_CQIDR_SQT BIT(15) +#define QDMA_CCDF_FORMAT BIT(29) +#define QDMA_CCDF_SER BIT(30) +#define QDMA_SG_FIN BIT(30) +#define QDMA_SG_LEN_MASK GENMASK(29, 0) +#define QDMA_CCDF_MASK GENMASK(28, 20) + +#define FSL_QDMA_DEDR_CLEAR GENMASK(31, 0) +#define FSL_QDMA_BCQIDR_CLEAR GENMASK(31, 0) +#define FSL_QDMA_DEIER_CLEAR GENMASK(31, 0) + +#define FSL_QDMA_BCQIER_CQTIE BIT(15) +#define FSL_QDMA_BCQIER_CQPEIE BIT(23) +#define FSL_QDMA_BSQICR_ICEN BIT(31) + +#define FSL_QDMA_BSQICR_ICST(x) ((x) << 16) +#define FSL_QDMA_CQIER_MEIE BIT(31) +#define FSL_QDMA_CQIER_TEIE BIT(0) +#define FSL_QDMA_SQCCMR_ENTER_WM BIT(21) + +#define FSL_QDMA_BCQMR_EN BIT(31) +#define FSL_QDMA_BCQMR_EI BIT(30) +#define FSL_QDMA_BCQMR_CD_THLD(x) ((x) << 20) +#define FSL_QDMA_BCQMR_CQ_SIZE(x) ((x) << 16) + +#define FSL_QDMA_BCQSR_QF BIT(16) +#define FSL_QDMA_BCQSR_XOFF BIT(0) + +#define FSL_QDMA_BSQMR_EN BIT(31) +#define FSL_QDMA_BSQMR_DI BIT(30) +#define FSL_QDMA_BSQMR_CQ_SIZE(x) ((x) << 16) + +#define FSL_QDMA_BSQSR_QE BIT(17) + +#define FSL_QDMA_DMR_DQD BIT(30) +#define FSL_QDMA_DSR_DB BIT(31) + +/* Size related definition */ +#define FSL_QDMA_QUEUE_MAX 8 +#define FSL_QDMA_COMMAND_BUFFER_SIZE 64 +#define FSL_QDMA_DESCRIPTOR_BUFFER_SIZE 32 +#define FSL_QDMA_CIRCULAR_DESC_SIZE_MIN 64 +#define FSL_QDMA_CIRCULAR_DESC_SIZE_MAX 16384 +#define FSL_QDMA_QUEUE_NUM_MAX 8 + +/* Field definition for CMD */ +#define FSL_QDMA_CMD_RWTTYPE 0x4 +#define FSL_QDMA_CMD_LWC 0x2 +#define FSL_QDMA_CMD_RWTTYPE_OFFSET 28 +#define FSL_QDMA_CMD_NS_OFFSET 27 +#define FSL_QDMA_CMD_DQOS_OFFSET 24 +#define FSL_QDMA_CMD_WTHROTL_OFFSET 20 +#define FSL_QDMA_CMD_DSEN_OFFSET 19 +#define FSL_QDMA_CMD_LWC_OFFSET 16 + +/* Field definition for Descriptor status */ +#define QDMA_CCDF_STATUS_RTE BIT(5) +#define QDMA_CCDF_STATUS_WTE BIT(4) +#define QDMA_CCDF_STATUS_CDE BIT(2) +#define QDMA_CCDF_STATUS_SDE BIT(1) +#define QDMA_CCDF_STATUS_DDE BIT(0) +#define QDMA_CCDF_STATUS_MASK (QDMA_CCDF_STATUS_RTE | \ + QDMA_CCDF_STATUS_WTE | \ + QDMA_CCDF_STATUS_CDE | \ + QDMA_CCDF_STATUS_SDE | \ + QDMA_CCDF_STATUS_DDE) + +/* Field definition for Descriptor offset */ +#define QDMA_CCDF_OFFSET 20 +#define QDMA_SDDF_CMD(x) (((u64)(x)) << 32) + +/* Field definition for safe loop count*/ +#define FSL_QDMA_HALT_COUNT 1500 +#define FSL_QDMA_MAX_SIZE 16385 +#define FSL_QDMA_COMP_TIMEOUT 1000 +#define FSL_COMMAND_QUEUE_OVERFLLOW 10 + +#define FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma_engine, x) \ + (((fsl_qdma_engine)->block_offset) * (x)) + +/** + * struct fsl_qdma_format - This is the struct holding describing compound + * descriptor format with qDMA. + * @status: Command status and enqueue status notification. + * @cfg: Frame offset and frame format. + * @addr_lo: Holding the compound descriptor of the lower + * 32-bits address in memory 40-bit address. + * @addr_hi: Same as above member, but point high 8-bits in + * memory 40-bit address. + * @__reserved1: Reserved field. + * @cfg8b_w1: Compound descriptor command queue origin produced + * by qDMA and dynamic debug field. + * @data: Pointer to the memory 40-bit address, describes DMA + * source information and DMA destination information. + */ +struct fsl_qdma_format { + __le32 status; + __le32 cfg; + union { + struct { + __le32 addr_lo; + u8 addr_hi; + u8 __reserved1[2]; + u8 cfg8b_w1; + } __packed; + __le64 data; + }; +} __packed; + +/* qDMA status notification pre information */ +struct fsl_pre_status { + u64 addr; + u8 queue; +}; + +static DEFINE_PER_CPU(struct fsl_pre_status, pre); + +struct fsl_qdma_chan { + struct virt_dma_chan vchan; + struct virt_dma_desc vdesc; + enum dma_status status; + struct fsl_qdma_engine *qdma; + struct fsl_qdma_queue *queue; +}; + +struct fsl_qdma_queue { + struct fsl_qdma_format *virt_head; + struct fsl_qdma_format *virt_tail; + struct list_head comp_used; + struct list_head comp_free; + struct dma_pool *comp_pool; + struct dma_pool *desc_pool; + spinlock_t queue_lock; + dma_addr_t bus_addr; + u32 n_cq; + u32 id; + struct fsl_qdma_format *cq; + void __iomem *block_base; +}; + +struct fsl_qdma_comp { + dma_addr_t bus_addr; + dma_addr_t desc_bus_addr; + struct fsl_qdma_format *virt_addr; + struct fsl_qdma_format *desc_virt_addr; + struct fsl_qdma_chan *qchan; + struct virt_dma_desc vdesc; + struct list_head list; +}; + +struct fsl_qdma_engine { + struct dma_device dma_dev; + void __iomem *ctrl_base; + void __iomem *status_base; + void __iomem *block_base; + u32 n_chans; + u32 n_queues; + struct mutex fsl_qdma_mutex; + int error_irq; + int *queue_irq; + u32 feature; + struct fsl_qdma_queue *queue; + struct fsl_qdma_queue **status; + struct fsl_qdma_chan *chans; + int block_number; + int block_offset; + int irq_base; + int desc_allocated; + +}; + +static inline u64 +qdma_ccdf_addr_get64(const struct fsl_qdma_format *ccdf) +{ + return le64_to_cpu(ccdf->data) & (U64_MAX >> 24); +} + +static inline void +qdma_desc_addr_set64(struct fsl_qdma_format *ccdf, u64 addr) +{ + ccdf->addr_hi = upper_32_bits(addr); + ccdf->addr_lo = cpu_to_le32(lower_32_bits(addr)); +} + +static inline u8 +qdma_ccdf_get_queue(const struct fsl_qdma_format *ccdf) +{ + return ccdf->cfg8b_w1 & U8_MAX; +} + +static inline int +qdma_ccdf_get_offset(const struct fsl_qdma_format *ccdf) +{ + return (le32_to_cpu(ccdf->cfg) & QDMA_CCDF_MASK) >> QDMA_CCDF_OFFSET; +} + +static inline void +qdma_ccdf_set_format(struct fsl_qdma_format *ccdf, int offset) +{ + ccdf->cfg = cpu_to_le32(QDMA_CCDF_FORMAT | + (offset << QDMA_CCDF_OFFSET)); +} + +static inline int +qdma_ccdf_get_status(const struct fsl_qdma_format *ccdf) +{ + return (le32_to_cpu(ccdf->status) & QDMA_CCDF_STATUS_MASK); +} + +static inline void +qdma_ccdf_set_ser(struct fsl_qdma_format *ccdf, int status) +{ + ccdf->status = cpu_to_le32(QDMA_CCDF_SER | status); +} + +static inline void qdma_csgf_set_len(struct fsl_qdma_format *csgf, int len) +{ + csgf->cfg = cpu_to_le32(len & QDMA_SG_LEN_MASK); +} + +static inline void qdma_csgf_set_f(struct fsl_qdma_format *csgf, int len) +{ + csgf->cfg = cpu_to_le32(QDMA_SG_FIN | (len & QDMA_SG_LEN_MASK)); +} + +static u32 qdma_readl(struct fsl_qdma_engine *qdma, void __iomem *addr) +{ + return FSL_DMA_IN(qdma, addr, 32); +} + +static void qdma_writel(struct fsl_qdma_engine *qdma, u32 val, + void __iomem *addr) +{ + FSL_DMA_OUT(qdma, addr, val, 32); +} + +static struct fsl_qdma_chan *to_fsl_qdma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct fsl_qdma_chan, vchan.chan); +} + +static struct fsl_qdma_comp *to_fsl_qdma_comp(struct virt_dma_desc *vd) +{ + return container_of(vd, struct fsl_qdma_comp, vdesc); +} + +static void fsl_qdma_free_chan_resources(struct dma_chan *chan) +{ + struct fsl_qdma_chan *fsl_chan = to_fsl_qdma_chan(chan); + struct fsl_qdma_queue *fsl_queue = fsl_chan->queue; + struct fsl_qdma_engine *fsl_qdma = fsl_chan->qdma; + struct fsl_qdma_comp *comp_temp, *_comp_temp; + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&fsl_chan->vchan.lock, flags); + vchan_get_all_descriptors(&fsl_chan->vchan, &head); + spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags); + + vchan_dma_desc_free_list(&fsl_chan->vchan, &head); + + if (!fsl_queue->comp_pool && !fsl_queue->desc_pool) + return; + + list_for_each_entry_safe(comp_temp, _comp_temp, + &fsl_queue->comp_used, list) { + dma_pool_free(fsl_queue->comp_pool, + comp_temp->virt_addr, + comp_temp->bus_addr); + dma_pool_free(fsl_queue->desc_pool, + comp_temp->desc_virt_addr, + comp_temp->desc_bus_addr); + list_del(&comp_temp->list); + kfree(comp_temp); + } + + list_for_each_entry_safe(comp_temp, _comp_temp, + &fsl_queue->comp_free, list) { + dma_pool_free(fsl_queue->comp_pool, + comp_temp->virt_addr, + comp_temp->bus_addr); + dma_pool_free(fsl_queue->desc_pool, + comp_temp->desc_virt_addr, + comp_temp->desc_bus_addr); + list_del(&comp_temp->list); + kfree(comp_temp); + } + + dma_pool_destroy(fsl_queue->comp_pool); + dma_pool_destroy(fsl_queue->desc_pool); + + fsl_qdma->desc_allocated--; + fsl_queue->comp_pool = NULL; + fsl_queue->desc_pool = NULL; +} + +static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp, + dma_addr_t dst, dma_addr_t src, u32 len) +{ + u32 cmd; + struct fsl_qdma_format *sdf, *ddf; + struct fsl_qdma_format *ccdf, *csgf_desc, *csgf_src, *csgf_dest; + + ccdf = fsl_comp->virt_addr; + csgf_desc = fsl_comp->virt_addr + 1; + csgf_src = fsl_comp->virt_addr + 2; + csgf_dest = fsl_comp->virt_addr + 3; + sdf = fsl_comp->desc_virt_addr; + ddf = fsl_comp->desc_virt_addr + 1; + + memset(fsl_comp->virt_addr, 0, FSL_QDMA_COMMAND_BUFFER_SIZE); + memset(fsl_comp->desc_virt_addr, 0, FSL_QDMA_DESCRIPTOR_BUFFER_SIZE); + /* Head Command Descriptor(Frame Descriptor) */ + qdma_desc_addr_set64(ccdf, fsl_comp->bus_addr + 16); + qdma_ccdf_set_format(ccdf, qdma_ccdf_get_offset(ccdf)); + qdma_ccdf_set_ser(ccdf, qdma_ccdf_get_status(ccdf)); + /* Status notification is enqueued to status queue. */ + /* Compound Command Descriptor(Frame List Table) */ + qdma_desc_addr_set64(csgf_desc, fsl_comp->desc_bus_addr); + /* It must be 32 as Compound S/G Descriptor */ + qdma_csgf_set_len(csgf_desc, 32); + qdma_desc_addr_set64(csgf_src, src); + qdma_csgf_set_len(csgf_src, len); + qdma_desc_addr_set64(csgf_dest, dst); + qdma_csgf_set_len(csgf_dest, len); + /* This entry is the last entry. */ + qdma_csgf_set_f(csgf_dest, len); + /* Descriptor Buffer */ + cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << + FSL_QDMA_CMD_RWTTYPE_OFFSET); + sdf->data = QDMA_SDDF_CMD(cmd); + + cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << + FSL_QDMA_CMD_RWTTYPE_OFFSET); + cmd |= cpu_to_le32(FSL_QDMA_CMD_LWC << FSL_QDMA_CMD_LWC_OFFSET); + ddf->data = QDMA_SDDF_CMD(cmd); +} + +/* + * Pre-request full command descriptor for enqueue. + */ +static int fsl_qdma_pre_request_enqueue_desc(struct fsl_qdma_queue *queue) +{ + int i; + struct fsl_qdma_comp *comp_temp, *_comp_temp; + + for (i = 0; i < queue->n_cq + FSL_COMMAND_QUEUE_OVERFLLOW; i++) { + comp_temp = kzalloc(sizeof(*comp_temp), GFP_KERNEL); + if (!comp_temp) + goto err_alloc; + comp_temp->virt_addr = + dma_pool_alloc(queue->comp_pool, GFP_KERNEL, + &comp_temp->bus_addr); + if (!comp_temp->virt_addr) + goto err_dma_alloc; + + comp_temp->desc_virt_addr = + dma_pool_alloc(queue->desc_pool, GFP_KERNEL, + &comp_temp->desc_bus_addr); + if (!comp_temp->desc_virt_addr) + goto err_desc_dma_alloc; + + list_add_tail(&comp_temp->list, &queue->comp_free); + } + + return 0; + +err_desc_dma_alloc: + dma_pool_free(queue->comp_pool, comp_temp->virt_addr, + comp_temp->bus_addr); + +err_dma_alloc: + kfree(comp_temp); + +err_alloc: + list_for_each_entry_safe(comp_temp, _comp_temp, + &queue->comp_free, list) { + if (comp_temp->virt_addr) + dma_pool_free(queue->comp_pool, + comp_temp->virt_addr, + comp_temp->bus_addr); + if (comp_temp->desc_virt_addr) + dma_pool_free(queue->desc_pool, + comp_temp->desc_virt_addr, + comp_temp->desc_bus_addr); + + list_del(&comp_temp->list); + kfree(comp_temp); + } + + return -ENOMEM; +} + +/* + * Request a command descriptor for enqueue. + */ +static struct fsl_qdma_comp +*fsl_qdma_request_enqueue_desc(struct fsl_qdma_chan *fsl_chan) +{ + unsigned long flags; + struct fsl_qdma_comp *comp_temp; + int timeout = FSL_QDMA_COMP_TIMEOUT; + struct fsl_qdma_queue *queue = fsl_chan->queue; + + while (timeout--) { + spin_lock_irqsave(&queue->queue_lock, flags); + if (!list_empty(&queue->comp_free)) { + comp_temp = list_first_entry(&queue->comp_free, + struct fsl_qdma_comp, + list); + list_del(&comp_temp->list); + + spin_unlock_irqrestore(&queue->queue_lock, flags); + comp_temp->qchan = fsl_chan; + return comp_temp; + } + spin_unlock_irqrestore(&queue->queue_lock, flags); + udelay(1); + } + + return NULL; +} + +static struct fsl_qdma_queue +*fsl_qdma_alloc_queue_resources(struct platform_device *pdev, + struct fsl_qdma_engine *fsl_qdma) +{ + int ret, len, i, j; + int queue_num, block_number; + unsigned int queue_size[FSL_QDMA_QUEUE_MAX]; + struct fsl_qdma_queue *queue_head, *queue_temp; + + queue_num = fsl_qdma->n_queues; + block_number = fsl_qdma->block_number; + + if (queue_num > FSL_QDMA_QUEUE_MAX) + queue_num = FSL_QDMA_QUEUE_MAX; + len = sizeof(*queue_head) * queue_num * block_number; + queue_head = devm_kzalloc(&pdev->dev, len, GFP_KERNEL); + if (!queue_head) + return NULL; + + ret = device_property_read_u32_array(&pdev->dev, "queue-sizes", + queue_size, queue_num); + if (ret) { + dev_err(&pdev->dev, "Can't get queue-sizes.\n"); + return NULL; + } + for (j = 0; j < block_number; j++) { + for (i = 0; i < queue_num; i++) { + if (queue_size[i] > FSL_QDMA_CIRCULAR_DESC_SIZE_MAX || + queue_size[i] < FSL_QDMA_CIRCULAR_DESC_SIZE_MIN) { + dev_err(&pdev->dev, + "Get wrong queue-sizes.\n"); + return NULL; + } + queue_temp = queue_head + i + (j * queue_num); + + queue_temp->cq = + dma_alloc_coherent(&pdev->dev, + sizeof(struct fsl_qdma_format) * + queue_size[i], + &queue_temp->bus_addr, + GFP_KERNEL); + if (!queue_temp->cq) + return NULL; + queue_temp->block_base = fsl_qdma->block_base + + FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, j); + queue_temp->n_cq = queue_size[i]; + queue_temp->id = i; + queue_temp->virt_head = queue_temp->cq; + queue_temp->virt_tail = queue_temp->cq; + /* + * List for queue command buffer + */ + INIT_LIST_HEAD(&queue_temp->comp_used); + spin_lock_init(&queue_temp->queue_lock); + } + } + return queue_head; +} + +static struct fsl_qdma_queue +*fsl_qdma_prep_status_queue(struct platform_device *pdev) +{ + int ret; + unsigned int status_size; + struct fsl_qdma_queue *status_head; + struct device_node *np = pdev->dev.of_node; + + ret = of_property_read_u32(np, "status-sizes", &status_size); + if (ret) { + dev_err(&pdev->dev, "Can't get status-sizes.\n"); + return NULL; + } + if (status_size > FSL_QDMA_CIRCULAR_DESC_SIZE_MAX || + status_size < FSL_QDMA_CIRCULAR_DESC_SIZE_MIN) { + dev_err(&pdev->dev, "Get wrong status_size.\n"); + return NULL; + } + status_head = devm_kzalloc(&pdev->dev, + sizeof(*status_head), GFP_KERNEL); + if (!status_head) + return NULL; + + /* + * Buffer for queue command + */ + status_head->cq = dma_alloc_coherent(&pdev->dev, + sizeof(struct fsl_qdma_format) * + status_size, + &status_head->bus_addr, + GFP_KERNEL); + if (!status_head->cq) { + devm_kfree(&pdev->dev, status_head); + return NULL; + } + status_head->n_cq = status_size; + status_head->virt_head = status_head->cq; + status_head->virt_tail = status_head->cq; + status_head->comp_pool = NULL; + + return status_head; +} + +static int fsl_qdma_halt(struct fsl_qdma_engine *fsl_qdma) +{ + u32 reg; + int i, j, count = FSL_QDMA_HALT_COUNT; + void __iomem *block, *ctrl = fsl_qdma->ctrl_base; + + /* Disable the command queue and wait for idle state. */ + reg = qdma_readl(fsl_qdma, ctrl + FSL_QDMA_DMR); + reg |= FSL_QDMA_DMR_DQD; + qdma_writel(fsl_qdma, reg, ctrl + FSL_QDMA_DMR); + for (j = 0; j < fsl_qdma->block_number; j++) { + block = fsl_qdma->block_base + + FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, j); + for (i = 0; i < FSL_QDMA_QUEUE_NUM_MAX; i++) + qdma_writel(fsl_qdma, 0, block + FSL_QDMA_BCQMR(i)); + } + while (1) { + reg = qdma_readl(fsl_qdma, ctrl + FSL_QDMA_DSR); + if (!(reg & FSL_QDMA_DSR_DB)) + break; + if (count-- < 0) + return -EBUSY; + udelay(100); + } + + for (j = 0; j < fsl_qdma->block_number; j++) { + block = fsl_qdma->block_base + + FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, j); + + /* Disable status queue. */ + qdma_writel(fsl_qdma, 0, block + FSL_QDMA_BSQMR); + + /* + * clear the command queue interrupt detect register for + * all queues. + */ + qdma_writel(fsl_qdma, FSL_QDMA_BCQIDR_CLEAR, + block + FSL_QDMA_BCQIDR(0)); + } + + return 0; +} + +static int +fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma, + void *block, + int id) +{ + bool duplicate; + u32 reg, i, count; + u8 completion_status; + struct fsl_qdma_queue *temp_queue; + struct fsl_qdma_format *status_addr; + struct fsl_qdma_comp *fsl_comp = NULL; + struct fsl_qdma_queue *fsl_queue = fsl_qdma->queue; + struct fsl_qdma_queue *fsl_status = fsl_qdma->status[id]; + + count = FSL_QDMA_MAX_SIZE; + + while (count--) { + duplicate = 0; + reg = qdma_readl(fsl_qdma, block + FSL_QDMA_BSQSR); + if (reg & FSL_QDMA_BSQSR_QE) + return 0; + + status_addr = fsl_status->virt_head; + + if (qdma_ccdf_get_queue(status_addr) == + __this_cpu_read(pre.queue) && + qdma_ccdf_addr_get64(status_addr) == + __this_cpu_read(pre.addr)) + duplicate = 1; + i = qdma_ccdf_get_queue(status_addr) + + id * fsl_qdma->n_queues; + __this_cpu_write(pre.addr, qdma_ccdf_addr_get64(status_addr)); + __this_cpu_write(pre.queue, qdma_ccdf_get_queue(status_addr)); + temp_queue = fsl_queue + i; + + spin_lock(&temp_queue->queue_lock); + if (list_empty(&temp_queue->comp_used)) { + if (!duplicate) { + spin_unlock(&temp_queue->queue_lock); + return -EAGAIN; + } + } else { + fsl_comp = list_first_entry(&temp_queue->comp_used, + struct fsl_qdma_comp, list); + if (fsl_comp->bus_addr + 16 != + __this_cpu_read(pre.addr)) { + if (!duplicate) { + spin_unlock(&temp_queue->queue_lock); + return -EAGAIN; + } + } + } + + if (duplicate) { + reg = qdma_readl(fsl_qdma, block + FSL_QDMA_BSQMR); + reg |= FSL_QDMA_BSQMR_DI; + qdma_desc_addr_set64(status_addr, 0x0); + fsl_status->virt_head++; + if (fsl_status->virt_head == fsl_status->cq + + fsl_status->n_cq) + fsl_status->virt_head = fsl_status->cq; + qdma_writel(fsl_qdma, reg, block + FSL_QDMA_BSQMR); + spin_unlock(&temp_queue->queue_lock); + continue; + } + list_del(&fsl_comp->list); + + completion_status = qdma_ccdf_get_status(status_addr); + + reg = qdma_readl(fsl_qdma, block + FSL_QDMA_BSQMR); + reg |= FSL_QDMA_BSQMR_DI; + qdma_desc_addr_set64(status_addr, 0x0); + fsl_status->virt_head++; + if (fsl_status->virt_head == fsl_status->cq + fsl_status->n_cq) + fsl_status->virt_head = fsl_status->cq; + qdma_writel(fsl_qdma, reg, block + FSL_QDMA_BSQMR); + spin_unlock(&temp_queue->queue_lock); + + /* The completion_status is evaluated here + * (outside of spin lock) + */ + if (completion_status) { + /* A completion error occurred! */ + if (completion_status & QDMA_CCDF_STATUS_WTE) { + /* Write transaction error */ + fsl_comp->vdesc.tx_result.result = + DMA_TRANS_WRITE_FAILED; + } else if (completion_status & QDMA_CCDF_STATUS_RTE) { + /* Read transaction error */ + fsl_comp->vdesc.tx_result.result = + DMA_TRANS_READ_FAILED; + } else { + /* Command/source/destination + * description error + */ + fsl_comp->vdesc.tx_result.result = + DMA_TRANS_ABORTED; + dev_err(fsl_qdma->dma_dev.dev, + "DMA status descriptor error %x\n", + completion_status); + } + } + + spin_lock(&fsl_comp->qchan->vchan.lock); + vchan_cookie_complete(&fsl_comp->vdesc); + fsl_comp->qchan->status = DMA_COMPLETE; + spin_unlock(&fsl_comp->qchan->vchan.lock); + } + + return 0; +} + +static irqreturn_t fsl_qdma_error_handler(int irq, void *dev_id) +{ + unsigned int intr; + struct fsl_qdma_engine *fsl_qdma = dev_id; + void __iomem *status = fsl_qdma->status_base; + unsigned int decfdw0r; + unsigned int decfdw1r; + unsigned int decfdw2r; + unsigned int decfdw3r; + + intr = qdma_readl(fsl_qdma, status + FSL_QDMA_DEDR); + + if (intr) { + decfdw0r = qdma_readl(fsl_qdma, status + FSL_QDMA_DECFDW0R); + decfdw1r = qdma_readl(fsl_qdma, status + FSL_QDMA_DECFDW1R); + decfdw2r = qdma_readl(fsl_qdma, status + FSL_QDMA_DECFDW2R); + decfdw3r = qdma_readl(fsl_qdma, status + FSL_QDMA_DECFDW3R); + dev_err(fsl_qdma->dma_dev.dev, + "DMA transaction error! (%x: %x-%x-%x-%x)\n", + intr, decfdw0r, decfdw1r, decfdw2r, decfdw3r); + } + + qdma_writel(fsl_qdma, FSL_QDMA_DEDR_CLEAR, status + FSL_QDMA_DEDR); + return IRQ_HANDLED; +} + +static irqreturn_t fsl_qdma_queue_handler(int irq, void *dev_id) +{ + int id; + unsigned int intr, reg; + struct fsl_qdma_engine *fsl_qdma = dev_id; + void __iomem *block, *ctrl = fsl_qdma->ctrl_base; + + id = irq - fsl_qdma->irq_base; + if (id < 0 && id > fsl_qdma->block_number) { + dev_err(fsl_qdma->dma_dev.dev, + "irq %d is wrong irq_base is %d\n", + irq, fsl_qdma->irq_base); + } + + block = fsl_qdma->block_base + + FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, id); + + intr = qdma_readl(fsl_qdma, block + FSL_QDMA_BCQIDR(0)); + + if ((intr & FSL_QDMA_CQIDR_SQT) != 0) + intr = fsl_qdma_queue_transfer_complete(fsl_qdma, block, id); + + if (intr != 0) { + reg = qdma_readl(fsl_qdma, ctrl + FSL_QDMA_DMR); + reg |= FSL_QDMA_DMR_DQD; + qdma_writel(fsl_qdma, reg, ctrl + FSL_QDMA_DMR); + qdma_writel(fsl_qdma, 0, block + FSL_QDMA_BCQIER(0)); + dev_err(fsl_qdma->dma_dev.dev, "QDMA: status err!\n"); + } + + /* Clear all detected events and interrupts. */ + qdma_writel(fsl_qdma, FSL_QDMA_BCQIDR_CLEAR, + block + FSL_QDMA_BCQIDR(0)); + + return IRQ_HANDLED; +} + +static int +fsl_qdma_irq_init(struct platform_device *pdev, + struct fsl_qdma_engine *fsl_qdma) +{ + int i; + int cpu; + int ret; + char irq_name[20]; + + fsl_qdma->error_irq = + platform_get_irq_byname(pdev, "qdma-error"); + if (fsl_qdma->error_irq < 0) + return fsl_qdma->error_irq; + + ret = devm_request_irq(&pdev->dev, fsl_qdma->error_irq, + fsl_qdma_error_handler, 0, + "qDMA error", fsl_qdma); + if (ret) { + dev_err(&pdev->dev, "Can't register qDMA controller IRQ.\n"); + return ret; + } + + for (i = 0; i < fsl_qdma->block_number; i++) { + sprintf(irq_name, "qdma-queue%d", i); + fsl_qdma->queue_irq[i] = + platform_get_irq_byname(pdev, irq_name); + + if (fsl_qdma->queue_irq[i] < 0) + return fsl_qdma->queue_irq[i]; + + ret = devm_request_irq(&pdev->dev, + fsl_qdma->queue_irq[i], + fsl_qdma_queue_handler, + 0, + "qDMA queue", + fsl_qdma); + if (ret) { + dev_err(&pdev->dev, + "Can't register qDMA queue IRQ.\n"); + return ret; + } + + cpu = i % num_online_cpus(); + ret = irq_set_affinity_hint(fsl_qdma->queue_irq[i], + get_cpu_mask(cpu)); + if (ret) { + dev_err(&pdev->dev, + "Can't set cpu %d affinity to IRQ %d.\n", + cpu, + fsl_qdma->queue_irq[i]); + return ret; + } + } + + return 0; +} + +static void fsl_qdma_irq_exit(struct platform_device *pdev, + struct fsl_qdma_engine *fsl_qdma) +{ + int i; + + devm_free_irq(&pdev->dev, fsl_qdma->error_irq, fsl_qdma); + for (i = 0; i < fsl_qdma->block_number; i++) + devm_free_irq(&pdev->dev, fsl_qdma->queue_irq[i], fsl_qdma); +} + +static int fsl_qdma_reg_init(struct fsl_qdma_engine *fsl_qdma) +{ + u32 reg; + int i, j, ret; + struct fsl_qdma_queue *temp; + void __iomem *status = fsl_qdma->status_base; + void __iomem *block, *ctrl = fsl_qdma->ctrl_base; + struct fsl_qdma_queue *fsl_queue = fsl_qdma->queue; + + /* Try to halt the qDMA engine first. */ + ret = fsl_qdma_halt(fsl_qdma); + if (ret) { + dev_err(fsl_qdma->dma_dev.dev, "DMA halt failed!"); + return ret; + } + + for (i = 0; i < fsl_qdma->block_number; i++) { + /* + * Clear the command queue interrupt detect register for + * all queues. + */ + + block = fsl_qdma->block_base + + FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, i); + qdma_writel(fsl_qdma, FSL_QDMA_BCQIDR_CLEAR, + block + FSL_QDMA_BCQIDR(0)); + } + + for (j = 0; j < fsl_qdma->block_number; j++) { + block = fsl_qdma->block_base + + FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, j); + for (i = 0; i < fsl_qdma->n_queues; i++) { + temp = fsl_queue + i + (j * fsl_qdma->n_queues); + /* + * Initialize Command Queue registers to + * point to the first + * command descriptor in memory. + * Dequeue Pointer Address Registers + * Enqueue Pointer Address Registers + */ + + qdma_writel(fsl_qdma, temp->bus_addr, + block + FSL_QDMA_BCQDPA_SADDR(i)); + qdma_writel(fsl_qdma, temp->bus_addr, + block + FSL_QDMA_BCQEPA_SADDR(i)); + + /* Initialize the queue mode. */ + reg = FSL_QDMA_BCQMR_EN; + reg |= FSL_QDMA_BCQMR_CD_THLD(ilog2(temp->n_cq) - 4); + reg |= FSL_QDMA_BCQMR_CQ_SIZE(ilog2(temp->n_cq) - 6); + qdma_writel(fsl_qdma, reg, block + FSL_QDMA_BCQMR(i)); + } + + /* + * Workaround for erratum: ERR010812. + * We must enable XOFF to avoid the enqueue rejection occurs. + * Setting SQCCMR ENTER_WM to 0x20. + */ + + qdma_writel(fsl_qdma, FSL_QDMA_SQCCMR_ENTER_WM, + block + FSL_QDMA_SQCCMR); + + /* + * Initialize status queue registers to point to the first + * command descriptor in memory. + * Dequeue Pointer Address Registers + * Enqueue Pointer Address Registers + */ + + qdma_writel(fsl_qdma, fsl_qdma->status[j]->bus_addr, + block + FSL_QDMA_SQEPAR); + qdma_writel(fsl_qdma, fsl_qdma->status[j]->bus_addr, + block + FSL_QDMA_SQDPAR); + /* Initialize status queue interrupt. */ + qdma_writel(fsl_qdma, FSL_QDMA_BCQIER_CQTIE, + block + FSL_QDMA_BCQIER(0)); + qdma_writel(fsl_qdma, FSL_QDMA_BSQICR_ICEN | + FSL_QDMA_BSQICR_ICST(5) | 0x8000, + block + FSL_QDMA_BSQICR); + qdma_writel(fsl_qdma, FSL_QDMA_CQIER_MEIE | + FSL_QDMA_CQIER_TEIE, + block + FSL_QDMA_CQIER); + + /* Initialize the status queue mode. */ + reg = FSL_QDMA_BSQMR_EN; + reg |= FSL_QDMA_BSQMR_CQ_SIZE(ilog2 + (fsl_qdma->status[j]->n_cq) - 6); + + qdma_writel(fsl_qdma, reg, block + FSL_QDMA_BSQMR); + reg = qdma_readl(fsl_qdma, block + FSL_QDMA_BSQMR); + } + + /* Initialize controller interrupt register. */ + qdma_writel(fsl_qdma, FSL_QDMA_DEDR_CLEAR, status + FSL_QDMA_DEDR); + qdma_writel(fsl_qdma, FSL_QDMA_DEIER_CLEAR, status + FSL_QDMA_DEIER); + + reg = qdma_readl(fsl_qdma, ctrl + FSL_QDMA_DMR); + reg &= ~FSL_QDMA_DMR_DQD; + qdma_writel(fsl_qdma, reg, ctrl + FSL_QDMA_DMR); + + return 0; +} + +static struct dma_async_tx_descriptor * +fsl_qdma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct fsl_qdma_comp *fsl_comp; + struct fsl_qdma_chan *fsl_chan = to_fsl_qdma_chan(chan); + + fsl_comp = fsl_qdma_request_enqueue_desc(fsl_chan); + + if (!fsl_comp) + return NULL; + + fsl_qdma_comp_fill_memcpy(fsl_comp, dst, src, len); + + return vchan_tx_prep(&fsl_chan->vchan, &fsl_comp->vdesc, flags); +} + +static void fsl_qdma_enqueue_desc(struct fsl_qdma_chan *fsl_chan) +{ + u32 reg; + struct virt_dma_desc *vdesc; + struct fsl_qdma_comp *fsl_comp; + struct fsl_qdma_queue *fsl_queue = fsl_chan->queue; + void __iomem *block = fsl_queue->block_base; + + reg = qdma_readl(fsl_chan->qdma, block + FSL_QDMA_BCQSR(fsl_queue->id)); + if (reg & (FSL_QDMA_BCQSR_QF | FSL_QDMA_BCQSR_XOFF)) + return; + vdesc = vchan_next_desc(&fsl_chan->vchan); + if (!vdesc) + return; + list_del(&vdesc->node); + fsl_comp = to_fsl_qdma_comp(vdesc); + + memcpy(fsl_queue->virt_head++, + fsl_comp->virt_addr, sizeof(struct fsl_qdma_format)); + if (fsl_queue->virt_head == fsl_queue->cq + fsl_queue->n_cq) + fsl_queue->virt_head = fsl_queue->cq; + + list_add_tail(&fsl_comp->list, &fsl_queue->comp_used); + barrier(); + reg = qdma_readl(fsl_chan->qdma, block + FSL_QDMA_BCQMR(fsl_queue->id)); + reg |= FSL_QDMA_BCQMR_EI; + qdma_writel(fsl_chan->qdma, reg, block + FSL_QDMA_BCQMR(fsl_queue->id)); + fsl_chan->status = DMA_IN_PROGRESS; +} + +static void fsl_qdma_free_desc(struct virt_dma_desc *vdesc) +{ + unsigned long flags; + struct fsl_qdma_comp *fsl_comp; + struct fsl_qdma_queue *fsl_queue; + + fsl_comp = to_fsl_qdma_comp(vdesc); + fsl_queue = fsl_comp->qchan->queue; + + spin_lock_irqsave(&fsl_queue->queue_lock, flags); + list_add_tail(&fsl_comp->list, &fsl_queue->comp_free); + spin_unlock_irqrestore(&fsl_queue->queue_lock, flags); +} + +static void fsl_qdma_issue_pending(struct dma_chan *chan) +{ + unsigned long flags; + struct fsl_qdma_chan *fsl_chan = to_fsl_qdma_chan(chan); + struct fsl_qdma_queue *fsl_queue = fsl_chan->queue; + + spin_lock_irqsave(&fsl_queue->queue_lock, flags); + spin_lock(&fsl_chan->vchan.lock); + if (vchan_issue_pending(&fsl_chan->vchan)) + fsl_qdma_enqueue_desc(fsl_chan); + spin_unlock(&fsl_chan->vchan.lock); + spin_unlock_irqrestore(&fsl_queue->queue_lock, flags); +} + +static void fsl_qdma_synchronize(struct dma_chan *chan) +{ + struct fsl_qdma_chan *fsl_chan = to_fsl_qdma_chan(chan); + + vchan_synchronize(&fsl_chan->vchan); +} + +static int fsl_qdma_terminate_all(struct dma_chan *chan) +{ + LIST_HEAD(head); + unsigned long flags; + struct fsl_qdma_chan *fsl_chan = to_fsl_qdma_chan(chan); + + spin_lock_irqsave(&fsl_chan->vchan.lock, flags); + vchan_get_all_descriptors(&fsl_chan->vchan, &head); + spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags); + vchan_dma_desc_free_list(&fsl_chan->vchan, &head); + return 0; +} + +static int fsl_qdma_alloc_chan_resources(struct dma_chan *chan) +{ + int ret; + struct fsl_qdma_chan *fsl_chan = to_fsl_qdma_chan(chan); + struct fsl_qdma_engine *fsl_qdma = fsl_chan->qdma; + struct fsl_qdma_queue *fsl_queue = fsl_chan->queue; + + if (fsl_queue->comp_pool && fsl_queue->desc_pool) + return fsl_qdma->desc_allocated; + + INIT_LIST_HEAD(&fsl_queue->comp_free); + + /* + * The dma pool for queue command buffer + */ + fsl_queue->comp_pool = + dma_pool_create("comp_pool", + chan->device->dev, + FSL_QDMA_COMMAND_BUFFER_SIZE, + 64, 0); + if (!fsl_queue->comp_pool) + return -ENOMEM; + + /* + * The dma pool for Descriptor(SD/DD) buffer + */ + fsl_queue->desc_pool = + dma_pool_create("desc_pool", + chan->device->dev, + FSL_QDMA_DESCRIPTOR_BUFFER_SIZE, + 32, 0); + if (!fsl_queue->desc_pool) + goto err_desc_pool; + + ret = fsl_qdma_pre_request_enqueue_desc(fsl_queue); + if (ret) { + dev_err(chan->device->dev, + "failed to alloc dma buffer for S/G descriptor\n"); + goto err_mem; + } + + fsl_qdma->desc_allocated++; + return fsl_qdma->desc_allocated; + +err_mem: + dma_pool_destroy(fsl_queue->desc_pool); +err_desc_pool: + dma_pool_destroy(fsl_queue->comp_pool); + return -ENOMEM; +} + +static int fsl_qdma_probe(struct platform_device *pdev) +{ + int ret, i; + int blk_num, blk_off; + u32 len, chans, queues; + struct fsl_qdma_chan *fsl_chan; + struct fsl_qdma_engine *fsl_qdma; + struct device_node *np = pdev->dev.of_node; + + ret = of_property_read_u32(np, "dma-channels", &chans); + if (ret) { + dev_err(&pdev->dev, "Can't get dma-channels.\n"); + return ret; + } + + ret = of_property_read_u32(np, "block-offset", &blk_off); + if (ret) { + dev_err(&pdev->dev, "Can't get block-offset.\n"); + return ret; + } + + ret = of_property_read_u32(np, "block-number", &blk_num); + if (ret) { + dev_err(&pdev->dev, "Can't get block-number.\n"); + return ret; + } + + blk_num = min_t(int, blk_num, num_online_cpus()); + + len = sizeof(*fsl_qdma); + fsl_qdma = devm_kzalloc(&pdev->dev, len, GFP_KERNEL); + if (!fsl_qdma) + return -ENOMEM; + + len = sizeof(*fsl_chan) * chans; + fsl_qdma->chans = devm_kzalloc(&pdev->dev, len, GFP_KERNEL); + if (!fsl_qdma->chans) + return -ENOMEM; + + len = sizeof(struct fsl_qdma_queue *) * blk_num; + fsl_qdma->status = devm_kzalloc(&pdev->dev, len, GFP_KERNEL); + if (!fsl_qdma->status) + return -ENOMEM; + + len = sizeof(int) * blk_num; + fsl_qdma->queue_irq = devm_kzalloc(&pdev->dev, len, GFP_KERNEL); + if (!fsl_qdma->queue_irq) + return -ENOMEM; + + ret = of_property_read_u32(np, "fsl,dma-queues", &queues); + if (ret) { + dev_err(&pdev->dev, "Can't get queues.\n"); + return ret; + } + + fsl_qdma->desc_allocated = 0; + fsl_qdma->n_chans = chans; + fsl_qdma->n_queues = queues; + fsl_qdma->block_number = blk_num; + fsl_qdma->block_offset = blk_off; + + mutex_init(&fsl_qdma->fsl_qdma_mutex); + + for (i = 0; i < fsl_qdma->block_number; i++) { + fsl_qdma->status[i] = fsl_qdma_prep_status_queue(pdev); + if (!fsl_qdma->status[i]) + return -ENOMEM; + } + fsl_qdma->ctrl_base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(fsl_qdma->ctrl_base)) + return PTR_ERR(fsl_qdma->ctrl_base); + + fsl_qdma->status_base = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(fsl_qdma->status_base)) + return PTR_ERR(fsl_qdma->status_base); + + fsl_qdma->block_base = devm_platform_ioremap_resource(pdev, 2); + if (IS_ERR(fsl_qdma->block_base)) + return PTR_ERR(fsl_qdma->block_base); + fsl_qdma->queue = fsl_qdma_alloc_queue_resources(pdev, fsl_qdma); + if (!fsl_qdma->queue) + return -ENOMEM; + + ret = fsl_qdma_irq_init(pdev, fsl_qdma); + if (ret) + return ret; + + fsl_qdma->irq_base = platform_get_irq_byname(pdev, "qdma-queue0"); + if (fsl_qdma->irq_base < 0) + return fsl_qdma->irq_base; + + fsl_qdma->feature = of_property_read_bool(np, "big-endian"); + INIT_LIST_HEAD(&fsl_qdma->dma_dev.channels); + + for (i = 0; i < fsl_qdma->n_chans; i++) { + struct fsl_qdma_chan *fsl_chan = &fsl_qdma->chans[i]; + + fsl_chan->qdma = fsl_qdma; + fsl_chan->queue = fsl_qdma->queue + i % (fsl_qdma->n_queues * + fsl_qdma->block_number); + fsl_chan->vchan.desc_free = fsl_qdma_free_desc; + vchan_init(&fsl_chan->vchan, &fsl_qdma->dma_dev); + } + + dma_cap_set(DMA_MEMCPY, fsl_qdma->dma_dev.cap_mask); + + fsl_qdma->dma_dev.dev = &pdev->dev; + fsl_qdma->dma_dev.device_free_chan_resources = + fsl_qdma_free_chan_resources; + fsl_qdma->dma_dev.device_alloc_chan_resources = + fsl_qdma_alloc_chan_resources; + fsl_qdma->dma_dev.device_tx_status = dma_cookie_status; + fsl_qdma->dma_dev.device_prep_dma_memcpy = fsl_qdma_prep_memcpy; + fsl_qdma->dma_dev.device_issue_pending = fsl_qdma_issue_pending; + fsl_qdma->dma_dev.device_synchronize = fsl_qdma_synchronize; + fsl_qdma->dma_dev.device_terminate_all = fsl_qdma_terminate_all; + + ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(40)); + if (ret) { + dev_err(&pdev->dev, "dma_set_mask failure.\n"); + return ret; + } + + platform_set_drvdata(pdev, fsl_qdma); + + ret = dma_async_device_register(&fsl_qdma->dma_dev); + if (ret) { + dev_err(&pdev->dev, + "Can't register NXP Layerscape qDMA engine.\n"); + return ret; + } + + ret = fsl_qdma_reg_init(fsl_qdma); + if (ret) { + dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n"); + return ret; + } + + return 0; +} + +static void fsl_qdma_cleanup_vchan(struct dma_device *dmadev) +{ + struct fsl_qdma_chan *chan, *_chan; + + list_for_each_entry_safe(chan, _chan, + &dmadev->channels, vchan.chan.device_node) { + list_del(&chan->vchan.chan.device_node); + tasklet_kill(&chan->vchan.task); + } +} + +static int fsl_qdma_remove(struct platform_device *pdev) +{ + int i; + struct fsl_qdma_queue *status; + struct device_node *np = pdev->dev.of_node; + struct fsl_qdma_engine *fsl_qdma = platform_get_drvdata(pdev); + + fsl_qdma_irq_exit(pdev, fsl_qdma); + fsl_qdma_cleanup_vchan(&fsl_qdma->dma_dev); + of_dma_controller_free(np); + dma_async_device_unregister(&fsl_qdma->dma_dev); + + for (i = 0; i < fsl_qdma->block_number; i++) { + status = fsl_qdma->status[i]; + dma_free_coherent(&pdev->dev, sizeof(struct fsl_qdma_format) * + status->n_cq, status->cq, status->bus_addr); + } + return 0; +} + +static const struct of_device_id fsl_qdma_dt_ids[] = { + { .compatible = "fsl,ls1021a-qdma", }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, fsl_qdma_dt_ids); + +static struct platform_driver fsl_qdma_driver = { + .driver = { + .name = "fsl-qdma", + .of_match_table = fsl_qdma_dt_ids, + }, + .probe = fsl_qdma_probe, + .remove = fsl_qdma_remove, +}; + +module_platform_driver(fsl_qdma_driver); + +MODULE_ALIAS("platform:fsl-qdma"); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("NXP Layerscape qDMA engine driver"); diff --git a/drivers/dma/fsl_raid.c b/drivers/dma/fsl_raid.c new file mode 100644 index 0000000000..0b9ca93ce3 --- /dev/null +++ b/drivers/dma/fsl_raid.c @@ -0,0 +1,898 @@ +/* + * drivers/dma/fsl_raid.c + * + * Freescale RAID Engine device driver + * + * Author: + * Harninder Rai + * Naveen Burmi + * + * Rewrite: + * Xuelin Shi + * + * Copyright (c) 2010-2014 Freescale Semiconductor, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Theory of operation: + * + * General capabilities: + * RAID Engine (RE) block is capable of offloading XOR, memcpy and P/Q + * calculations required in RAID5 and RAID6 operations. RE driver + * registers with Linux's ASYNC layer as dma driver. RE hardware + * maintains strict ordering of the requests through chained + * command queueing. + * + * Data flow: + * Software RAID layer of Linux (MD layer) maintains RAID partitions, + * strips, stripes etc. It sends requests to the underlying ASYNC layer + * which further passes it to RE driver. ASYNC layer decides which request + * goes to which job ring of RE hardware. For every request processed by + * RAID Engine, driver gets an interrupt unless coalescing is set. The + * per job ring interrupt handler checks the status register for errors, + * clears the interrupt and leave the post interrupt processing to the irq + * thread. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" +#include "fsl_raid.h" + +#define FSL_RE_MAX_XOR_SRCS 16 +#define FSL_RE_MAX_PQ_SRCS 16 +#define FSL_RE_MIN_DESCS 256 +#define FSL_RE_MAX_DESCS (4 * FSL_RE_MIN_DESCS) +#define FSL_RE_FRAME_FORMAT 0x1 +#define FSL_RE_MAX_DATA_LEN (1024*1024) + +#define to_fsl_re_dma_desc(tx) container_of(tx, struct fsl_re_desc, async_tx) + +/* Add descriptors into per chan software queue - submit_q */ +static dma_cookie_t fsl_re_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct fsl_re_desc *desc; + struct fsl_re_chan *re_chan; + dma_cookie_t cookie; + unsigned long flags; + + desc = to_fsl_re_dma_desc(tx); + re_chan = container_of(tx->chan, struct fsl_re_chan, chan); + + spin_lock_irqsave(&re_chan->desc_lock, flags); + cookie = dma_cookie_assign(tx); + list_add_tail(&desc->node, &re_chan->submit_q); + spin_unlock_irqrestore(&re_chan->desc_lock, flags); + + return cookie; +} + +/* Copy descriptor from per chan software queue into hardware job ring */ +static void fsl_re_issue_pending(struct dma_chan *chan) +{ + struct fsl_re_chan *re_chan; + int avail; + struct fsl_re_desc *desc, *_desc; + unsigned long flags; + + re_chan = container_of(chan, struct fsl_re_chan, chan); + + spin_lock_irqsave(&re_chan->desc_lock, flags); + avail = FSL_RE_SLOT_AVAIL( + in_be32(&re_chan->jrregs->inbring_slot_avail)); + + list_for_each_entry_safe(desc, _desc, &re_chan->submit_q, node) { + if (!avail) + break; + + list_move_tail(&desc->node, &re_chan->active_q); + + memcpy(&re_chan->inb_ring_virt_addr[re_chan->inb_count], + &desc->hwdesc, sizeof(struct fsl_re_hw_desc)); + + re_chan->inb_count = (re_chan->inb_count + 1) & + FSL_RE_RING_SIZE_MASK; + out_be32(&re_chan->jrregs->inbring_add_job, FSL_RE_ADD_JOB(1)); + avail--; + } + spin_unlock_irqrestore(&re_chan->desc_lock, flags); +} + +static void fsl_re_desc_done(struct fsl_re_desc *desc) +{ + dma_cookie_complete(&desc->async_tx); + dma_descriptor_unmap(&desc->async_tx); + dmaengine_desc_get_callback_invoke(&desc->async_tx, NULL); +} + +static void fsl_re_cleanup_descs(struct fsl_re_chan *re_chan) +{ + struct fsl_re_desc *desc, *_desc; + unsigned long flags; + + spin_lock_irqsave(&re_chan->desc_lock, flags); + list_for_each_entry_safe(desc, _desc, &re_chan->ack_q, node) { + if (async_tx_test_ack(&desc->async_tx)) + list_move_tail(&desc->node, &re_chan->free_q); + } + spin_unlock_irqrestore(&re_chan->desc_lock, flags); + + fsl_re_issue_pending(&re_chan->chan); +} + +static void fsl_re_dequeue(struct tasklet_struct *t) +{ + struct fsl_re_chan *re_chan = from_tasklet(re_chan, t, irqtask); + struct fsl_re_desc *desc, *_desc; + struct fsl_re_hw_desc *hwdesc; + unsigned long flags; + unsigned int count, oub_count; + int found; + + fsl_re_cleanup_descs(re_chan); + + spin_lock_irqsave(&re_chan->desc_lock, flags); + count = FSL_RE_SLOT_FULL(in_be32(&re_chan->jrregs->oubring_slot_full)); + while (count--) { + found = 0; + hwdesc = &re_chan->oub_ring_virt_addr[re_chan->oub_count]; + list_for_each_entry_safe(desc, _desc, &re_chan->active_q, + node) { + /* compare the hw dma addr to find the completed */ + if (desc->hwdesc.lbea32 == hwdesc->lbea32 && + desc->hwdesc.addr_low == hwdesc->addr_low) { + found = 1; + break; + } + } + + if (found) { + fsl_re_desc_done(desc); + list_move_tail(&desc->node, &re_chan->ack_q); + } else { + dev_err(re_chan->dev, + "found hwdesc not in sw queue, discard it\n"); + } + + oub_count = (re_chan->oub_count + 1) & FSL_RE_RING_SIZE_MASK; + re_chan->oub_count = oub_count; + + out_be32(&re_chan->jrregs->oubring_job_rmvd, + FSL_RE_RMVD_JOB(1)); + } + spin_unlock_irqrestore(&re_chan->desc_lock, flags); +} + +/* Per Job Ring interrupt handler */ +static irqreturn_t fsl_re_isr(int irq, void *data) +{ + struct fsl_re_chan *re_chan; + u32 irqstate, status; + + re_chan = dev_get_drvdata((struct device *)data); + + irqstate = in_be32(&re_chan->jrregs->jr_interrupt_status); + if (!irqstate) + return IRQ_NONE; + + /* + * There's no way in upper layer (read MD layer) to recover from + * error conditions except restart everything. In long term we + * need to do something more than just crashing + */ + if (irqstate & FSL_RE_ERROR) { + status = in_be32(&re_chan->jrregs->jr_status); + dev_err(re_chan->dev, "chan error irqstate: %x, status: %x\n", + irqstate, status); + } + + /* Clear interrupt */ + out_be32(&re_chan->jrregs->jr_interrupt_status, FSL_RE_CLR_INTR); + + tasklet_schedule(&re_chan->irqtask); + + return IRQ_HANDLED; +} + +static enum dma_status fsl_re_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + return dma_cookie_status(chan, cookie, txstate); +} + +static void fill_cfd_frame(struct fsl_re_cmpnd_frame *cf, u8 index, + size_t length, dma_addr_t addr, bool final) +{ + u32 efrl = length & FSL_RE_CF_LENGTH_MASK; + + efrl |= final << FSL_RE_CF_FINAL_SHIFT; + cf[index].efrl32 = efrl; + cf[index].addr_high = upper_32_bits(addr); + cf[index].addr_low = lower_32_bits(addr); +} + +static struct fsl_re_desc *fsl_re_init_desc(struct fsl_re_chan *re_chan, + struct fsl_re_desc *desc, + void *cf, dma_addr_t paddr) +{ + desc->re_chan = re_chan; + desc->async_tx.tx_submit = fsl_re_tx_submit; + dma_async_tx_descriptor_init(&desc->async_tx, &re_chan->chan); + INIT_LIST_HEAD(&desc->node); + + desc->hwdesc.fmt32 = FSL_RE_FRAME_FORMAT << FSL_RE_HWDESC_FMT_SHIFT; + desc->hwdesc.lbea32 = upper_32_bits(paddr); + desc->hwdesc.addr_low = lower_32_bits(paddr); + desc->cf_addr = cf; + desc->cf_paddr = paddr; + + desc->cdb_addr = (void *)(cf + FSL_RE_CF_DESC_SIZE); + desc->cdb_paddr = paddr + FSL_RE_CF_DESC_SIZE; + + return desc; +} + +static struct fsl_re_desc *fsl_re_chan_alloc_desc(struct fsl_re_chan *re_chan, + unsigned long flags) +{ + struct fsl_re_desc *desc = NULL; + void *cf; + dma_addr_t paddr; + unsigned long lock_flag; + + fsl_re_cleanup_descs(re_chan); + + spin_lock_irqsave(&re_chan->desc_lock, lock_flag); + if (!list_empty(&re_chan->free_q)) { + /* take one desc from free_q */ + desc = list_first_entry(&re_chan->free_q, + struct fsl_re_desc, node); + list_del(&desc->node); + + desc->async_tx.flags = flags; + } + spin_unlock_irqrestore(&re_chan->desc_lock, lock_flag); + + if (!desc) { + desc = kzalloc(sizeof(*desc), GFP_NOWAIT); + if (!desc) + return NULL; + + cf = dma_pool_alloc(re_chan->re_dev->cf_desc_pool, GFP_NOWAIT, + &paddr); + if (!cf) { + kfree(desc); + return NULL; + } + + desc = fsl_re_init_desc(re_chan, desc, cf, paddr); + desc->async_tx.flags = flags; + + spin_lock_irqsave(&re_chan->desc_lock, lock_flag); + re_chan->alloc_count++; + spin_unlock_irqrestore(&re_chan->desc_lock, lock_flag); + } + + return desc; +} + +static struct dma_async_tx_descriptor *fsl_re_prep_dma_genq( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags) +{ + struct fsl_re_chan *re_chan; + struct fsl_re_desc *desc; + struct fsl_re_xor_cdb *xor; + struct fsl_re_cmpnd_frame *cf; + u32 cdb; + unsigned int i, j; + unsigned int save_src_cnt = src_cnt; + int cont_q = 0; + + re_chan = container_of(chan, struct fsl_re_chan, chan); + if (len > FSL_RE_MAX_DATA_LEN) { + dev_err(re_chan->dev, "genq tx length %zu, max length %d\n", + len, FSL_RE_MAX_DATA_LEN); + return NULL; + } + + desc = fsl_re_chan_alloc_desc(re_chan, flags); + if (desc <= 0) + return NULL; + + if (scf && (flags & DMA_PREP_CONTINUE)) { + cont_q = 1; + src_cnt += 1; + } + + /* Filling xor CDB */ + cdb = FSL_RE_XOR_OPCODE << FSL_RE_CDB_OPCODE_SHIFT; + cdb |= (src_cnt - 1) << FSL_RE_CDB_NRCS_SHIFT; + cdb |= FSL_RE_BLOCK_SIZE << FSL_RE_CDB_BLKSIZE_SHIFT; + cdb |= FSL_RE_INTR_ON_ERROR << FSL_RE_CDB_ERROR_SHIFT; + cdb |= FSL_RE_DATA_DEP << FSL_RE_CDB_DEPEND_SHIFT; + xor = desc->cdb_addr; + xor->cdb32 = cdb; + + if (scf) { + /* compute q = src0*coef0^src1*coef1^..., * is GF(8) mult */ + for (i = 0; i < save_src_cnt; i++) + xor->gfm[i] = scf[i]; + if (cont_q) + xor->gfm[i++] = 1; + } else { + /* compute P, that is XOR all srcs */ + for (i = 0; i < src_cnt; i++) + xor->gfm[i] = 1; + } + + /* Filling frame 0 of compound frame descriptor with CDB */ + cf = desc->cf_addr; + fill_cfd_frame(cf, 0, sizeof(*xor), desc->cdb_paddr, 0); + + /* Fill CFD's 1st frame with dest buffer */ + fill_cfd_frame(cf, 1, len, dest, 0); + + /* Fill CFD's rest of the frames with source buffers */ + for (i = 2, j = 0; j < save_src_cnt; i++, j++) + fill_cfd_frame(cf, i, len, src[j], 0); + + if (cont_q) + fill_cfd_frame(cf, i++, len, dest, 0); + + /* Setting the final bit in the last source buffer frame in CFD */ + cf[i - 1].efrl32 |= 1 << FSL_RE_CF_FINAL_SHIFT; + + return &desc->async_tx; +} + +/* + * Prep function for P parity calculation.In RAID Engine terminology, + * XOR calculation is called GenQ calculation done through GenQ command + */ +static struct dma_async_tx_descriptor *fsl_re_prep_dma_xor( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + /* NULL let genq take all coef as 1 */ + return fsl_re_prep_dma_genq(chan, dest, src, src_cnt, NULL, len, flags); +} + +/* + * Prep function for P/Q parity calculation.In RAID Engine terminology, + * P/Q calculation is called GenQQ done through GenQQ command + */ +static struct dma_async_tx_descriptor *fsl_re_prep_dma_pq( + struct dma_chan *chan, dma_addr_t *dest, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags) +{ + struct fsl_re_chan *re_chan; + struct fsl_re_desc *desc; + struct fsl_re_pq_cdb *pq; + struct fsl_re_cmpnd_frame *cf; + u32 cdb; + u8 *p; + int gfmq_len, i, j; + unsigned int save_src_cnt = src_cnt; + + re_chan = container_of(chan, struct fsl_re_chan, chan); + if (len > FSL_RE_MAX_DATA_LEN) { + dev_err(re_chan->dev, "pq tx length is %zu, max length is %d\n", + len, FSL_RE_MAX_DATA_LEN); + return NULL; + } + + /* + * RE requires at least 2 sources, if given only one source, we pass the + * second source same as the first one. + * With only one source, generating P is meaningless, only generate Q. + */ + if (src_cnt == 1) { + struct dma_async_tx_descriptor *tx; + dma_addr_t dma_src[2]; + unsigned char coef[2]; + + dma_src[0] = *src; + coef[0] = *scf; + dma_src[1] = *src; + coef[1] = 0; + tx = fsl_re_prep_dma_genq(chan, dest[1], dma_src, 2, coef, len, + flags); + if (tx) + desc = to_fsl_re_dma_desc(tx); + + return tx; + } + + /* + * During RAID6 array creation, Linux's MD layer gets P and Q + * calculated separately in two steps. But our RAID Engine has + * the capability to calculate both P and Q with a single command + * Hence to merge well with MD layer, we need to provide a hook + * here and call re_jq_prep_dma_genq() function + */ + + if (flags & DMA_PREP_PQ_DISABLE_P) + return fsl_re_prep_dma_genq(chan, dest[1], src, src_cnt, + scf, len, flags); + + if (flags & DMA_PREP_CONTINUE) + src_cnt += 3; + + desc = fsl_re_chan_alloc_desc(re_chan, flags); + if (desc <= 0) + return NULL; + + /* Filling GenQQ CDB */ + cdb = FSL_RE_PQ_OPCODE << FSL_RE_CDB_OPCODE_SHIFT; + cdb |= (src_cnt - 1) << FSL_RE_CDB_NRCS_SHIFT; + cdb |= FSL_RE_BLOCK_SIZE << FSL_RE_CDB_BLKSIZE_SHIFT; + cdb |= FSL_RE_BUFFER_OUTPUT << FSL_RE_CDB_BUFFER_SHIFT; + cdb |= FSL_RE_DATA_DEP << FSL_RE_CDB_DEPEND_SHIFT; + + pq = desc->cdb_addr; + pq->cdb32 = cdb; + + p = pq->gfm_q1; + /* Init gfm_q1[] */ + for (i = 0; i < src_cnt; i++) + p[i] = 1; + + /* Align gfm[] to 32bit */ + gfmq_len = ALIGN(src_cnt, 4); + + /* Init gfm_q2[] */ + p += gfmq_len; + for (i = 0; i < src_cnt; i++) + p[i] = scf[i]; + + /* Filling frame 0 of compound frame descriptor with CDB */ + cf = desc->cf_addr; + fill_cfd_frame(cf, 0, sizeof(struct fsl_re_pq_cdb), desc->cdb_paddr, 0); + + /* Fill CFD's 1st & 2nd frame with dest buffers */ + for (i = 1, j = 0; i < 3; i++, j++) + fill_cfd_frame(cf, i, len, dest[j], 0); + + /* Fill CFD's rest of the frames with source buffers */ + for (i = 3, j = 0; j < save_src_cnt; i++, j++) + fill_cfd_frame(cf, i, len, src[j], 0); + + /* PQ computation continuation */ + if (flags & DMA_PREP_CONTINUE) { + if (src_cnt - save_src_cnt == 3) { + p[save_src_cnt] = 0; + p[save_src_cnt + 1] = 0; + p[save_src_cnt + 2] = 1; + fill_cfd_frame(cf, i++, len, dest[0], 0); + fill_cfd_frame(cf, i++, len, dest[1], 0); + fill_cfd_frame(cf, i++, len, dest[1], 0); + } else { + dev_err(re_chan->dev, "PQ tx continuation error!\n"); + return NULL; + } + } + + /* Setting the final bit in the last source buffer frame in CFD */ + cf[i - 1].efrl32 |= 1 << FSL_RE_CF_FINAL_SHIFT; + + return &desc->async_tx; +} + +/* + * Prep function for memcpy. In RAID Engine, memcpy is done through MOVE + * command. Logic of this function will need to be modified once multipage + * support is added in Linux's MD/ASYNC Layer + */ +static struct dma_async_tx_descriptor *fsl_re_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct fsl_re_chan *re_chan; + struct fsl_re_desc *desc; + size_t length; + struct fsl_re_cmpnd_frame *cf; + struct fsl_re_move_cdb *move; + u32 cdb; + + re_chan = container_of(chan, struct fsl_re_chan, chan); + + if (len > FSL_RE_MAX_DATA_LEN) { + dev_err(re_chan->dev, "cp tx length is %zu, max length is %d\n", + len, FSL_RE_MAX_DATA_LEN); + return NULL; + } + + desc = fsl_re_chan_alloc_desc(re_chan, flags); + if (desc <= 0) + return NULL; + + /* Filling move CDB */ + cdb = FSL_RE_MOVE_OPCODE << FSL_RE_CDB_OPCODE_SHIFT; + cdb |= FSL_RE_BLOCK_SIZE << FSL_RE_CDB_BLKSIZE_SHIFT; + cdb |= FSL_RE_INTR_ON_ERROR << FSL_RE_CDB_ERROR_SHIFT; + cdb |= FSL_RE_DATA_DEP << FSL_RE_CDB_DEPEND_SHIFT; + + move = desc->cdb_addr; + move->cdb32 = cdb; + + /* Filling frame 0 of CFD with move CDB */ + cf = desc->cf_addr; + fill_cfd_frame(cf, 0, sizeof(*move), desc->cdb_paddr, 0); + + length = min_t(size_t, len, FSL_RE_MAX_DATA_LEN); + + /* Fill CFD's 1st frame with dest buffer */ + fill_cfd_frame(cf, 1, length, dest, 0); + + /* Fill CFD's 2nd frame with src buffer */ + fill_cfd_frame(cf, 2, length, src, 1); + + return &desc->async_tx; +} + +static int fsl_re_alloc_chan_resources(struct dma_chan *chan) +{ + struct fsl_re_chan *re_chan; + struct fsl_re_desc *desc; + void *cf; + dma_addr_t paddr; + int i; + + re_chan = container_of(chan, struct fsl_re_chan, chan); + for (i = 0; i < FSL_RE_MIN_DESCS; i++) { + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) + break; + + cf = dma_pool_alloc(re_chan->re_dev->cf_desc_pool, GFP_KERNEL, + &paddr); + if (!cf) { + kfree(desc); + break; + } + + INIT_LIST_HEAD(&desc->node); + fsl_re_init_desc(re_chan, desc, cf, paddr); + + list_add_tail(&desc->node, &re_chan->free_q); + re_chan->alloc_count++; + } + return re_chan->alloc_count; +} + +static void fsl_re_free_chan_resources(struct dma_chan *chan) +{ + struct fsl_re_chan *re_chan; + struct fsl_re_desc *desc; + + re_chan = container_of(chan, struct fsl_re_chan, chan); + while (re_chan->alloc_count--) { + desc = list_first_entry(&re_chan->free_q, + struct fsl_re_desc, + node); + + list_del(&desc->node); + dma_pool_free(re_chan->re_dev->cf_desc_pool, desc->cf_addr, + desc->cf_paddr); + kfree(desc); + } + + if (!list_empty(&re_chan->free_q)) + dev_err(re_chan->dev, "chan resource cannot be cleaned!\n"); +} + +static int fsl_re_chan_probe(struct platform_device *ofdev, + struct device_node *np, u8 q, u32 off) +{ + struct device *dev, *chandev; + struct fsl_re_drv_private *re_priv; + struct fsl_re_chan *chan; + struct dma_device *dma_dev; + u32 ptr; + u32 status; + int ret = 0, rc; + struct platform_device *chan_ofdev; + + dev = &ofdev->dev; + re_priv = dev_get_drvdata(dev); + dma_dev = &re_priv->dma_dev; + + chan = devm_kzalloc(dev, sizeof(*chan), GFP_KERNEL); + if (!chan) + return -ENOMEM; + + /* create platform device for chan node */ + chan_ofdev = of_platform_device_create(np, NULL, dev); + if (!chan_ofdev) { + dev_err(dev, "Not able to create ofdev for jr %d\n", q); + ret = -EINVAL; + goto err_free; + } + + /* read reg property from dts */ + rc = of_property_read_u32(np, "reg", &ptr); + if (rc) { + dev_err(dev, "Reg property not found in jr %d\n", q); + ret = -ENODEV; + goto err_free; + } + + chan->jrregs = (struct fsl_re_chan_cfg *)((u8 *)re_priv->re_regs + + off + ptr); + + /* read irq property from dts */ + chan->irq = irq_of_parse_and_map(np, 0); + if (!chan->irq) { + dev_err(dev, "No IRQ defined for JR %d\n", q); + ret = -ENODEV; + goto err_free; + } + + snprintf(chan->name, sizeof(chan->name), "re_jr%02d", q); + + chandev = &chan_ofdev->dev; + tasklet_setup(&chan->irqtask, fsl_re_dequeue); + + ret = request_irq(chan->irq, fsl_re_isr, 0, chan->name, chandev); + if (ret) { + dev_err(dev, "Unable to register interrupt for JR %d\n", q); + ret = -EINVAL; + goto err_free; + } + + re_priv->re_jrs[q] = chan; + chan->chan.device = dma_dev; + chan->chan.private = chan; + chan->dev = chandev; + chan->re_dev = re_priv; + + spin_lock_init(&chan->desc_lock); + INIT_LIST_HEAD(&chan->ack_q); + INIT_LIST_HEAD(&chan->active_q); + INIT_LIST_HEAD(&chan->submit_q); + INIT_LIST_HEAD(&chan->free_q); + + chan->inb_ring_virt_addr = dma_pool_alloc(chan->re_dev->hw_desc_pool, + GFP_KERNEL, &chan->inb_phys_addr); + if (!chan->inb_ring_virt_addr) { + dev_err(dev, "No dma memory for inb_ring_virt_addr\n"); + ret = -ENOMEM; + goto err_free; + } + + chan->oub_ring_virt_addr = dma_pool_alloc(chan->re_dev->hw_desc_pool, + GFP_KERNEL, &chan->oub_phys_addr); + if (!chan->oub_ring_virt_addr) { + dev_err(dev, "No dma memory for oub_ring_virt_addr\n"); + ret = -ENOMEM; + goto err_free_1; + } + + /* Program the Inbound/Outbound ring base addresses and size */ + out_be32(&chan->jrregs->inbring_base_h, + chan->inb_phys_addr & FSL_RE_ADDR_BIT_MASK); + out_be32(&chan->jrregs->oubring_base_h, + chan->oub_phys_addr & FSL_RE_ADDR_BIT_MASK); + out_be32(&chan->jrregs->inbring_base_l, + chan->inb_phys_addr >> FSL_RE_ADDR_BIT_SHIFT); + out_be32(&chan->jrregs->oubring_base_l, + chan->oub_phys_addr >> FSL_RE_ADDR_BIT_SHIFT); + out_be32(&chan->jrregs->inbring_size, + FSL_RE_RING_SIZE << FSL_RE_RING_SIZE_SHIFT); + out_be32(&chan->jrregs->oubring_size, + FSL_RE_RING_SIZE << FSL_RE_RING_SIZE_SHIFT); + + /* Read LIODN value from u-boot */ + status = in_be32(&chan->jrregs->jr_config_1) & FSL_RE_REG_LIODN_MASK; + + /* Program the CFG reg */ + out_be32(&chan->jrregs->jr_config_1, + FSL_RE_CFG1_CBSI | FSL_RE_CFG1_CBS0 | status); + + dev_set_drvdata(chandev, chan); + + /* Enable RE/CHAN */ + out_be32(&chan->jrregs->jr_command, FSL_RE_ENABLE); + + return 0; + +err_free_1: + dma_pool_free(chan->re_dev->hw_desc_pool, chan->inb_ring_virt_addr, + chan->inb_phys_addr); +err_free: + return ret; +} + +/* Probe function for RAID Engine */ +static int fsl_re_probe(struct platform_device *ofdev) +{ + struct fsl_re_drv_private *re_priv; + struct device_node *np; + struct device_node *child; + u32 off; + u8 ridx = 0; + struct dma_device *dma_dev; + struct resource *res; + int rc; + struct device *dev = &ofdev->dev; + + re_priv = devm_kzalloc(dev, sizeof(*re_priv), GFP_KERNEL); + if (!re_priv) + return -ENOMEM; + + res = platform_get_resource(ofdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + /* IOMAP the entire RAID Engine region */ + re_priv->re_regs = devm_ioremap(dev, res->start, resource_size(res)); + if (!re_priv->re_regs) + return -EBUSY; + + /* Program the RE mode */ + out_be32(&re_priv->re_regs->global_config, FSL_RE_NON_DPAA_MODE); + + /* Program Galois Field polynomial */ + out_be32(&re_priv->re_regs->galois_field_config, FSL_RE_GFM_POLY); + + dev_info(dev, "version %x, mode %x, gfp %x\n", + in_be32(&re_priv->re_regs->re_version_id), + in_be32(&re_priv->re_regs->global_config), + in_be32(&re_priv->re_regs->galois_field_config)); + + dma_dev = &re_priv->dma_dev; + dma_dev->dev = dev; + INIT_LIST_HEAD(&dma_dev->channels); + dma_set_mask(dev, DMA_BIT_MASK(40)); + + dma_dev->device_alloc_chan_resources = fsl_re_alloc_chan_resources; + dma_dev->device_tx_status = fsl_re_tx_status; + dma_dev->device_issue_pending = fsl_re_issue_pending; + + dma_dev->max_xor = FSL_RE_MAX_XOR_SRCS; + dma_dev->device_prep_dma_xor = fsl_re_prep_dma_xor; + dma_cap_set(DMA_XOR, dma_dev->cap_mask); + + dma_dev->max_pq = FSL_RE_MAX_PQ_SRCS; + dma_dev->device_prep_dma_pq = fsl_re_prep_dma_pq; + dma_cap_set(DMA_PQ, dma_dev->cap_mask); + + dma_dev->device_prep_dma_memcpy = fsl_re_prep_dma_memcpy; + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + + dma_dev->device_free_chan_resources = fsl_re_free_chan_resources; + + re_priv->total_chans = 0; + + re_priv->cf_desc_pool = dmam_pool_create("fsl_re_cf_desc_pool", dev, + FSL_RE_CF_CDB_SIZE, + FSL_RE_CF_CDB_ALIGN, 0); + + if (!re_priv->cf_desc_pool) { + dev_err(dev, "No memory for fsl re_cf desc pool\n"); + return -ENOMEM; + } + + re_priv->hw_desc_pool = dmam_pool_create("fsl_re_hw_desc_pool", dev, + sizeof(struct fsl_re_hw_desc) * FSL_RE_RING_SIZE, + FSL_RE_FRAME_ALIGN, 0); + if (!re_priv->hw_desc_pool) { + dev_err(dev, "No memory for fsl re_hw desc pool\n"); + return -ENOMEM; + } + + dev_set_drvdata(dev, re_priv); + + /* Parse Device tree to find out the total number of JQs present */ + for_each_compatible_node(np, NULL, "fsl,raideng-v1.0-job-queue") { + rc = of_property_read_u32(np, "reg", &off); + if (rc) { + dev_err(dev, "Reg property not found in JQ node\n"); + of_node_put(np); + return -ENODEV; + } + /* Find out the Job Rings present under each JQ */ + for_each_child_of_node(np, child) { + rc = of_device_is_compatible(child, + "fsl,raideng-v1.0-job-ring"); + if (rc) { + fsl_re_chan_probe(ofdev, child, ridx++, off); + re_priv->total_chans++; + } + } + } + + dma_async_device_register(dma_dev); + + return 0; +} + +static void fsl_re_remove_chan(struct fsl_re_chan *chan) +{ + tasklet_kill(&chan->irqtask); + + dma_pool_free(chan->re_dev->hw_desc_pool, chan->inb_ring_virt_addr, + chan->inb_phys_addr); + + dma_pool_free(chan->re_dev->hw_desc_pool, chan->oub_ring_virt_addr, + chan->oub_phys_addr); +} + +static int fsl_re_remove(struct platform_device *ofdev) +{ + struct fsl_re_drv_private *re_priv; + struct device *dev; + int i; + + dev = &ofdev->dev; + re_priv = dev_get_drvdata(dev); + + /* Cleanup chan related memory areas */ + for (i = 0; i < re_priv->total_chans; i++) + fsl_re_remove_chan(re_priv->re_jrs[i]); + + /* Unregister the driver */ + dma_async_device_unregister(&re_priv->dma_dev); + + return 0; +} + +static const struct of_device_id fsl_re_ids[] = { + { .compatible = "fsl,raideng-v1.0", }, + {} +}; +MODULE_DEVICE_TABLE(of, fsl_re_ids); + +static struct platform_driver fsl_re_driver = { + .driver = { + .name = "fsl-raideng", + .of_match_table = fsl_re_ids, + }, + .probe = fsl_re_probe, + .remove = fsl_re_remove, +}; + +module_platform_driver(fsl_re_driver); + +MODULE_AUTHOR("Harninder Rai "); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Freescale RAID Engine Device Driver"); diff --git a/drivers/dma/fsl_raid.h b/drivers/dma/fsl_raid.h new file mode 100644 index 0000000000..69d743c049 --- /dev/null +++ b/drivers/dma/fsl_raid.h @@ -0,0 +1,306 @@ +/* + * drivers/dma/fsl_raid.h + * + * Freescale RAID Engine device driver + * + * Author: + * Harninder Rai + * Naveen Burmi + * + * Rewrite: + * Xuelin Shi + + * Copyright (c) 2010-2012 Freescale Semiconductor, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#define FSL_RE_MAX_CHANS 4 +#define FSL_RE_DPAA_MODE BIT(30) +#define FSL_RE_NON_DPAA_MODE BIT(31) +#define FSL_RE_GFM_POLY 0x1d000000 +#define FSL_RE_ADD_JOB(x) ((x) << 16) +#define FSL_RE_RMVD_JOB(x) ((x) << 16) +#define FSL_RE_CFG1_CBSI 0x08000000 +#define FSL_RE_CFG1_CBS0 0x00080000 +#define FSL_RE_SLOT_FULL_SHIFT 8 +#define FSL_RE_SLOT_FULL(x) ((x) >> FSL_RE_SLOT_FULL_SHIFT) +#define FSL_RE_SLOT_AVAIL_SHIFT 8 +#define FSL_RE_SLOT_AVAIL(x) ((x) >> FSL_RE_SLOT_AVAIL_SHIFT) +#define FSL_RE_PQ_OPCODE 0x1B +#define FSL_RE_XOR_OPCODE 0x1A +#define FSL_RE_MOVE_OPCODE 0x8 +#define FSL_RE_FRAME_ALIGN 16 +#define FSL_RE_BLOCK_SIZE 0x3 /* 4096 bytes */ +#define FSL_RE_CACHEABLE_IO 0x0 +#define FSL_RE_BUFFER_OUTPUT 0x0 +#define FSL_RE_INTR_ON_ERROR 0x1 +#define FSL_RE_DATA_DEP 0x1 +#define FSL_RE_ENABLE_DPI 0x0 +#define FSL_RE_RING_SIZE 0x400 +#define FSL_RE_RING_SIZE_MASK (FSL_RE_RING_SIZE - 1) +#define FSL_RE_RING_SIZE_SHIFT 8 +#define FSL_RE_ADDR_BIT_SHIFT 4 +#define FSL_RE_ADDR_BIT_MASK (BIT(FSL_RE_ADDR_BIT_SHIFT) - 1) +#define FSL_RE_ERROR 0x40000000 +#define FSL_RE_INTR 0x80000000 +#define FSL_RE_CLR_INTR 0x80000000 +#define FSL_RE_PAUSE 0x80000000 +#define FSL_RE_ENABLE 0x80000000 +#define FSL_RE_REG_LIODN_MASK 0x00000FFF + +#define FSL_RE_CDB_OPCODE_MASK 0xF8000000 +#define FSL_RE_CDB_OPCODE_SHIFT 27 +#define FSL_RE_CDB_EXCLEN_MASK 0x03000000 +#define FSL_RE_CDB_EXCLEN_SHIFT 24 +#define FSL_RE_CDB_EXCLQ1_MASK 0x00F00000 +#define FSL_RE_CDB_EXCLQ1_SHIFT 20 +#define FSL_RE_CDB_EXCLQ2_MASK 0x000F0000 +#define FSL_RE_CDB_EXCLQ2_SHIFT 16 +#define FSL_RE_CDB_BLKSIZE_MASK 0x0000C000 +#define FSL_RE_CDB_BLKSIZE_SHIFT 14 +#define FSL_RE_CDB_CACHE_MASK 0x00003000 +#define FSL_RE_CDB_CACHE_SHIFT 12 +#define FSL_RE_CDB_BUFFER_MASK 0x00000800 +#define FSL_RE_CDB_BUFFER_SHIFT 11 +#define FSL_RE_CDB_ERROR_MASK 0x00000400 +#define FSL_RE_CDB_ERROR_SHIFT 10 +#define FSL_RE_CDB_NRCS_MASK 0x0000003C +#define FSL_RE_CDB_NRCS_SHIFT 6 +#define FSL_RE_CDB_DEPEND_MASK 0x00000008 +#define FSL_RE_CDB_DEPEND_SHIFT 3 +#define FSL_RE_CDB_DPI_MASK 0x00000004 +#define FSL_RE_CDB_DPI_SHIFT 2 + +/* + * the largest cf block is 19*sizeof(struct cmpnd_frame), which is 304 bytes. + * here 19 = 1(cdb)+2(dest)+16(src), align to 64bytes, that is 320 bytes. + * the largest cdb block: struct pq_cdb which is 180 bytes, adding to cf block + * 320+180=500, align to 64bytes, that is 512 bytes. + */ +#define FSL_RE_CF_DESC_SIZE 320 +#define FSL_RE_CF_CDB_SIZE 512 +#define FSL_RE_CF_CDB_ALIGN 64 + +struct fsl_re_ctrl { + /* General Configuration Registers */ + __be32 global_config; /* Global Configuration Register */ + u8 rsvd1[4]; + __be32 galois_field_config; /* Galois Field Configuration Register */ + u8 rsvd2[4]; + __be32 jq_wrr_config; /* WRR Configuration register */ + u8 rsvd3[4]; + __be32 crc_config; /* CRC Configuration register */ + u8 rsvd4[228]; + __be32 system_reset; /* System Reset Register */ + u8 rsvd5[252]; + __be32 global_status; /* Global Status Register */ + u8 rsvd6[832]; + __be32 re_liodn_base; /* LIODN Base Register */ + u8 rsvd7[1712]; + __be32 re_version_id; /* Version ID register of RE */ + __be32 re_version_id_2; /* Version ID 2 register of RE */ + u8 rsvd8[512]; + __be32 host_config; /* Host I/F Configuration Register */ +}; + +struct fsl_re_chan_cfg { + /* Registers for JR interface */ + __be32 jr_config_0; /* Job Queue Configuration 0 Register */ + __be32 jr_config_1; /* Job Queue Configuration 1 Register */ + __be32 jr_interrupt_status; /* Job Queue Interrupt Status Register */ + u8 rsvd1[4]; + __be32 jr_command; /* Job Queue Command Register */ + u8 rsvd2[4]; + __be32 jr_status; /* Job Queue Status Register */ + u8 rsvd3[228]; + + /* Input Ring */ + __be32 inbring_base_h; /* Inbound Ring Base Address Register - High */ + __be32 inbring_base_l; /* Inbound Ring Base Address Register - Low */ + __be32 inbring_size; /* Inbound Ring Size Register */ + u8 rsvd4[4]; + __be32 inbring_slot_avail; /* Inbound Ring Slot Available Register */ + u8 rsvd5[4]; + __be32 inbring_add_job; /* Inbound Ring Add Job Register */ + u8 rsvd6[4]; + __be32 inbring_cnsmr_indx; /* Inbound Ring Consumer Index Register */ + u8 rsvd7[220]; + + /* Output Ring */ + __be32 oubring_base_h; /* Outbound Ring Base Address Register - High */ + __be32 oubring_base_l; /* Outbound Ring Base Address Register - Low */ + __be32 oubring_size; /* Outbound Ring Size Register */ + u8 rsvd8[4]; + __be32 oubring_job_rmvd; /* Outbound Ring Job Removed Register */ + u8 rsvd9[4]; + __be32 oubring_slot_full; /* Outbound Ring Slot Full Register */ + u8 rsvd10[4]; + __be32 oubring_prdcr_indx; /* Outbound Ring Producer Index */ +}; + +/* + * Command Descriptor Block (CDB) for unicast move command. + * In RAID Engine terms, memcpy is done through move command + */ +struct fsl_re_move_cdb { + __be32 cdb32; +}; + +/* Data protection/integrity related fields */ +#define FSL_RE_DPI_APPS_MASK 0xC0000000 +#define FSL_RE_DPI_APPS_SHIFT 30 +#define FSL_RE_DPI_REF_MASK 0x30000000 +#define FSL_RE_DPI_REF_SHIFT 28 +#define FSL_RE_DPI_GUARD_MASK 0x0C000000 +#define FSL_RE_DPI_GUARD_SHIFT 26 +#define FSL_RE_DPI_ATTR_MASK 0x03000000 +#define FSL_RE_DPI_ATTR_SHIFT 24 +#define FSL_RE_DPI_META_MASK 0x0000FFFF + +struct fsl_re_dpi { + __be32 dpi32; + __be32 ref; +}; + +/* + * CDB for GenQ command. In RAID Engine terminology, XOR is + * done through this command + */ +struct fsl_re_xor_cdb { + __be32 cdb32; + u8 gfm[16]; + struct fsl_re_dpi dpi_dest_spec; + struct fsl_re_dpi dpi_src_spec[16]; +}; + +/* CDB for no-op command */ +struct fsl_re_noop_cdb { + __be32 cdb32; +}; + +/* + * CDB for GenQQ command. In RAID Engine terminology, P/Q is + * done through this command + */ +struct fsl_re_pq_cdb { + __be32 cdb32; + u8 gfm_q1[16]; + u8 gfm_q2[16]; + struct fsl_re_dpi dpi_dest_spec[2]; + struct fsl_re_dpi dpi_src_spec[16]; +}; + +/* Compound frame */ +#define FSL_RE_CF_ADDR_HIGH_MASK 0x000000FF +#define FSL_RE_CF_EXT_MASK 0x80000000 +#define FSL_RE_CF_EXT_SHIFT 31 +#define FSL_RE_CF_FINAL_MASK 0x40000000 +#define FSL_RE_CF_FINAL_SHIFT 30 +#define FSL_RE_CF_LENGTH_MASK 0x000FFFFF +#define FSL_RE_CF_BPID_MASK 0x00FF0000 +#define FSL_RE_CF_BPID_SHIFT 16 +#define FSL_RE_CF_OFFSET_MASK 0x00001FFF + +struct fsl_re_cmpnd_frame { + __be32 addr_high; + __be32 addr_low; + __be32 efrl32; + __be32 rbro32; +}; + +/* Frame descriptor */ +#define FSL_RE_HWDESC_LIODN_MASK 0x3F000000 +#define FSL_RE_HWDESC_LIODN_SHIFT 24 +#define FSL_RE_HWDESC_BPID_MASK 0x00FF0000 +#define FSL_RE_HWDESC_BPID_SHIFT 16 +#define FSL_RE_HWDESC_ELIODN_MASK 0x0000F000 +#define FSL_RE_HWDESC_ELIODN_SHIFT 12 +#define FSL_RE_HWDESC_FMT_SHIFT 29 +#define FSL_RE_HWDESC_FMT_MASK (0x3 << FSL_RE_HWDESC_FMT_SHIFT) + +struct fsl_re_hw_desc { + __be32 lbea32; + __be32 addr_low; + __be32 fmt32; + __be32 status; +}; + +/* Raid Engine device private data */ +struct fsl_re_drv_private { + u8 total_chans; + struct dma_device dma_dev; + struct fsl_re_ctrl *re_regs; + struct fsl_re_chan *re_jrs[FSL_RE_MAX_CHANS]; + struct dma_pool *cf_desc_pool; + struct dma_pool *hw_desc_pool; +}; + +/* Per job ring data structure */ +struct fsl_re_chan { + char name[16]; + spinlock_t desc_lock; /* queue lock */ + struct list_head ack_q; /* wait to acked queue */ + struct list_head active_q; /* already issued on hw, not completed */ + struct list_head submit_q; + struct list_head free_q; /* alloc available queue */ + struct device *dev; + struct fsl_re_drv_private *re_dev; + struct dma_chan chan; + struct fsl_re_chan_cfg *jrregs; + int irq; + struct tasklet_struct irqtask; + u32 alloc_count; + + /* hw descriptor ring for inbound queue*/ + dma_addr_t inb_phys_addr; + struct fsl_re_hw_desc *inb_ring_virt_addr; + u32 inb_count; + + /* hw descriptor ring for outbound queue */ + dma_addr_t oub_phys_addr; + struct fsl_re_hw_desc *oub_ring_virt_addr; + u32 oub_count; +}; + +/* Async transaction descriptor */ +struct fsl_re_desc { + struct dma_async_tx_descriptor async_tx; + struct list_head node; + struct fsl_re_hw_desc hwdesc; + struct fsl_re_chan *re_chan; + + /* hwdesc will point to cf_addr */ + void *cf_addr; + dma_addr_t cf_paddr; + + void *cdb_addr; + dma_addr_t cdb_paddr; + int status; +}; diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c new file mode 100644 index 0000000000..ddcf736d28 --- /dev/null +++ b/drivers/dma/fsldma.c @@ -0,0 +1,1431 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Freescale MPC85xx, MPC83xx DMA Engine support + * + * Copyright (C) 2007-2010 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: + * Zhang Wei , Jul 2007 + * Ebony Zhu , May 2007 + * + * Description: + * DMA engine driver for Freescale MPC8540 DMA controller, which is + * also fit for MPC8560, MPC8555, MPC8548, MPC8641, and etc. + * The support for MPC8349 DMA controller is also added. + * + * This driver instructs the DMA controller to issue the PCI Read Multiple + * command for PCI read operations, instead of using the default PCI Read Line + * command. Please be aware that this setting may result in read pre-fetching + * on some platforms. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "dmaengine.h" +#include "fsldma.h" + +#define chan_dbg(chan, fmt, arg...) \ + dev_dbg(chan->dev, "%s: " fmt, chan->name, ##arg) +#define chan_err(chan, fmt, arg...) \ + dev_err(chan->dev, "%s: " fmt, chan->name, ##arg) + +static const char msg_ld_oom[] = "No free memory for link descriptor"; + +/* + * Register Helpers + */ + +static void set_sr(struct fsldma_chan *chan, u32 val) +{ + FSL_DMA_OUT(chan, &chan->regs->sr, val, 32); +} + +static u32 get_sr(struct fsldma_chan *chan) +{ + return FSL_DMA_IN(chan, &chan->regs->sr, 32); +} + +static void set_mr(struct fsldma_chan *chan, u32 val) +{ + FSL_DMA_OUT(chan, &chan->regs->mr, val, 32); +} + +static u32 get_mr(struct fsldma_chan *chan) +{ + return FSL_DMA_IN(chan, &chan->regs->mr, 32); +} + +static void set_cdar(struct fsldma_chan *chan, dma_addr_t addr) +{ + FSL_DMA_OUT(chan, &chan->regs->cdar, addr | FSL_DMA_SNEN, 64); +} + +static dma_addr_t get_cdar(struct fsldma_chan *chan) +{ + return FSL_DMA_IN(chan, &chan->regs->cdar, 64) & ~FSL_DMA_SNEN; +} + +static void set_bcr(struct fsldma_chan *chan, u32 val) +{ + FSL_DMA_OUT(chan, &chan->regs->bcr, val, 32); +} + +static u32 get_bcr(struct fsldma_chan *chan) +{ + return FSL_DMA_IN(chan, &chan->regs->bcr, 32); +} + +/* + * Descriptor Helpers + */ + +static void set_desc_cnt(struct fsldma_chan *chan, + struct fsl_dma_ld_hw *hw, u32 count) +{ + hw->count = CPU_TO_DMA(chan, count, 32); +} + +static void set_desc_src(struct fsldma_chan *chan, + struct fsl_dma_ld_hw *hw, dma_addr_t src) +{ + u64 snoop_bits; + + snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) + ? ((u64)FSL_DMA_SATR_SREADTYPE_SNOOP_READ << 32) : 0; + hw->src_addr = CPU_TO_DMA(chan, snoop_bits | src, 64); +} + +static void set_desc_dst(struct fsldma_chan *chan, + struct fsl_dma_ld_hw *hw, dma_addr_t dst) +{ + u64 snoop_bits; + + snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) + ? ((u64)FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE << 32) : 0; + hw->dst_addr = CPU_TO_DMA(chan, snoop_bits | dst, 64); +} + +static void set_desc_next(struct fsldma_chan *chan, + struct fsl_dma_ld_hw *hw, dma_addr_t next) +{ + u64 snoop_bits; + + snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX) + ? FSL_DMA_SNEN : 0; + hw->next_ln_addr = CPU_TO_DMA(chan, snoop_bits | next, 64); +} + +static void set_ld_eol(struct fsldma_chan *chan, struct fsl_desc_sw *desc) +{ + u64 snoop_bits; + + snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX) + ? FSL_DMA_SNEN : 0; + + desc->hw.next_ln_addr = CPU_TO_DMA(chan, + DMA_TO_CPU(chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL + | snoop_bits, 64); +} + +/* + * DMA Engine Hardware Control Helpers + */ + +static void dma_init(struct fsldma_chan *chan) +{ + /* Reset the channel */ + set_mr(chan, 0); + + switch (chan->feature & FSL_DMA_IP_MASK) { + case FSL_DMA_IP_85XX: + /* Set the channel to below modes: + * EIE - Error interrupt enable + * EOLNIE - End of links interrupt enable + * BWC - Bandwidth sharing among channels + */ + set_mr(chan, FSL_DMA_MR_BWC | FSL_DMA_MR_EIE + | FSL_DMA_MR_EOLNIE); + break; + case FSL_DMA_IP_83XX: + /* Set the channel to below modes: + * EOTIE - End-of-transfer interrupt enable + * PRC_RM - PCI read multiple + */ + set_mr(chan, FSL_DMA_MR_EOTIE | FSL_DMA_MR_PRC_RM); + break; + } +} + +static int dma_is_idle(struct fsldma_chan *chan) +{ + u32 sr = get_sr(chan); + return (!(sr & FSL_DMA_SR_CB)) || (sr & FSL_DMA_SR_CH); +} + +/* + * Start the DMA controller + * + * Preconditions: + * - the CDAR register must point to the start descriptor + * - the MRn[CS] bit must be cleared + */ +static void dma_start(struct fsldma_chan *chan) +{ + u32 mode; + + mode = get_mr(chan); + + if (chan->feature & FSL_DMA_CHAN_PAUSE_EXT) { + set_bcr(chan, 0); + mode |= FSL_DMA_MR_EMP_EN; + } else { + mode &= ~FSL_DMA_MR_EMP_EN; + } + + if (chan->feature & FSL_DMA_CHAN_START_EXT) { + mode |= FSL_DMA_MR_EMS_EN; + } else { + mode &= ~FSL_DMA_MR_EMS_EN; + mode |= FSL_DMA_MR_CS; + } + + set_mr(chan, mode); +} + +static void dma_halt(struct fsldma_chan *chan) +{ + u32 mode; + int i; + + /* read the mode register */ + mode = get_mr(chan); + + /* + * The 85xx controller supports channel abort, which will stop + * the current transfer. On 83xx, this bit is the transfer error + * mask bit, which should not be changed. + */ + if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) { + mode |= FSL_DMA_MR_CA; + set_mr(chan, mode); + + mode &= ~FSL_DMA_MR_CA; + } + + /* stop the DMA controller */ + mode &= ~(FSL_DMA_MR_CS | FSL_DMA_MR_EMS_EN); + set_mr(chan, mode); + + /* wait for the DMA controller to become idle */ + for (i = 0; i < 100; i++) { + if (dma_is_idle(chan)) + return; + + udelay(10); + } + + if (!dma_is_idle(chan)) + chan_err(chan, "DMA halt timeout!\n"); +} + +/** + * fsl_chan_set_src_loop_size - Set source address hold transfer size + * @chan : Freescale DMA channel + * @size : Address loop size, 0 for disable loop + * + * The set source address hold transfer size. The source + * address hold or loop transfer size is when the DMA transfer + * data from source address (SA), if the loop size is 4, the DMA will + * read data from SA, SA + 1, SA + 2, SA + 3, then loop back to SA, + * SA + 1 ... and so on. + */ +static void fsl_chan_set_src_loop_size(struct fsldma_chan *chan, int size) +{ + u32 mode; + + mode = get_mr(chan); + + switch (size) { + case 0: + mode &= ~FSL_DMA_MR_SAHE; + break; + case 1: + case 2: + case 4: + case 8: + mode &= ~FSL_DMA_MR_SAHTS_MASK; + mode |= FSL_DMA_MR_SAHE | (__ilog2(size) << 14); + break; + } + + set_mr(chan, mode); +} + +/** + * fsl_chan_set_dst_loop_size - Set destination address hold transfer size + * @chan : Freescale DMA channel + * @size : Address loop size, 0 for disable loop + * + * The set destination address hold transfer size. The destination + * address hold or loop transfer size is when the DMA transfer + * data to destination address (TA), if the loop size is 4, the DMA will + * write data to TA, TA + 1, TA + 2, TA + 3, then loop back to TA, + * TA + 1 ... and so on. + */ +static void fsl_chan_set_dst_loop_size(struct fsldma_chan *chan, int size) +{ + u32 mode; + + mode = get_mr(chan); + + switch (size) { + case 0: + mode &= ~FSL_DMA_MR_DAHE; + break; + case 1: + case 2: + case 4: + case 8: + mode &= ~FSL_DMA_MR_DAHTS_MASK; + mode |= FSL_DMA_MR_DAHE | (__ilog2(size) << 16); + break; + } + + set_mr(chan, mode); +} + +/** + * fsl_chan_set_request_count - Set DMA Request Count for external control + * @chan : Freescale DMA channel + * @size : Number of bytes to transfer in a single request + * + * The Freescale DMA channel can be controlled by the external signal DREQ#. + * The DMA request count is how many bytes are allowed to transfer before + * pausing the channel, after which a new assertion of DREQ# resumes channel + * operation. + * + * A size of 0 disables external pause control. The maximum size is 1024. + */ +static void fsl_chan_set_request_count(struct fsldma_chan *chan, int size) +{ + u32 mode; + + BUG_ON(size > 1024); + + mode = get_mr(chan); + mode &= ~FSL_DMA_MR_BWC_MASK; + mode |= (__ilog2(size) << 24) & FSL_DMA_MR_BWC_MASK; + + set_mr(chan, mode); +} + +/** + * fsl_chan_toggle_ext_pause - Toggle channel external pause status + * @chan : Freescale DMA channel + * @enable : 0 is disabled, 1 is enabled. + * + * The Freescale DMA channel can be controlled by the external signal DREQ#. + * The DMA Request Count feature should be used in addition to this feature + * to set the number of bytes to transfer before pausing the channel. + */ +static void fsl_chan_toggle_ext_pause(struct fsldma_chan *chan, int enable) +{ + if (enable) + chan->feature |= FSL_DMA_CHAN_PAUSE_EXT; + else + chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT; +} + +/** + * fsl_chan_toggle_ext_start - Toggle channel external start status + * @chan : Freescale DMA channel + * @enable : 0 is disabled, 1 is enabled. + * + * If enable the external start, the channel can be started by an + * external DMA start pin. So the dma_start() does not start the + * transfer immediately. The DMA channel will wait for the + * control pin asserted. + */ +static void fsl_chan_toggle_ext_start(struct fsldma_chan *chan, int enable) +{ + if (enable) + chan->feature |= FSL_DMA_CHAN_START_EXT; + else + chan->feature &= ~FSL_DMA_CHAN_START_EXT; +} + +int fsl_dma_external_start(struct dma_chan *dchan, int enable) +{ + struct fsldma_chan *chan; + + if (!dchan) + return -EINVAL; + + chan = to_fsl_chan(dchan); + + fsl_chan_toggle_ext_start(chan, enable); + return 0; +} +EXPORT_SYMBOL_GPL(fsl_dma_external_start); + +static void append_ld_queue(struct fsldma_chan *chan, struct fsl_desc_sw *desc) +{ + struct fsl_desc_sw *tail = to_fsl_desc(chan->ld_pending.prev); + + if (list_empty(&chan->ld_pending)) + goto out_splice; + + /* + * Add the hardware descriptor to the chain of hardware descriptors + * that already exists in memory. + * + * This will un-set the EOL bit of the existing transaction, and the + * last link in this transaction will become the EOL descriptor. + */ + set_desc_next(chan, &tail->hw, desc->async_tx.phys); + + /* + * Add the software descriptor and all children to the list + * of pending transactions + */ +out_splice: + list_splice_tail_init(&desc->tx_list, &chan->ld_pending); +} + +static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct fsldma_chan *chan = to_fsl_chan(tx->chan); + struct fsl_desc_sw *desc = tx_to_fsl_desc(tx); + struct fsl_desc_sw *child; + dma_cookie_t cookie = -EINVAL; + + spin_lock_bh(&chan->desc_lock); + +#ifdef CONFIG_PM + if (unlikely(chan->pm_state != RUNNING)) { + chan_dbg(chan, "cannot submit due to suspend\n"); + spin_unlock_bh(&chan->desc_lock); + return -1; + } +#endif + + /* + * assign cookies to all of the software descriptors + * that make up this transaction + */ + list_for_each_entry(child, &desc->tx_list, node) { + cookie = dma_cookie_assign(&child->async_tx); + } + + /* put this transaction onto the tail of the pending queue */ + append_ld_queue(chan, desc); + + spin_unlock_bh(&chan->desc_lock); + + return cookie; +} + +/** + * fsl_dma_free_descriptor - Free descriptor from channel's DMA pool. + * @chan : Freescale DMA channel + * @desc: descriptor to be freed + */ +static void fsl_dma_free_descriptor(struct fsldma_chan *chan, + struct fsl_desc_sw *desc) +{ + list_del(&desc->node); + chan_dbg(chan, "LD %p free\n", desc); + dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys); +} + +/** + * fsl_dma_alloc_descriptor - Allocate descriptor from channel's DMA pool. + * @chan : Freescale DMA channel + * + * Return - The descriptor allocated. NULL for failed. + */ +static struct fsl_desc_sw *fsl_dma_alloc_descriptor(struct fsldma_chan *chan) +{ + struct fsl_desc_sw *desc; + dma_addr_t pdesc; + + desc = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &pdesc); + if (!desc) { + chan_dbg(chan, "out of memory for link descriptor\n"); + return NULL; + } + + INIT_LIST_HEAD(&desc->tx_list); + dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); + desc->async_tx.tx_submit = fsl_dma_tx_submit; + desc->async_tx.phys = pdesc; + + chan_dbg(chan, "LD %p allocated\n", desc); + + return desc; +} + +/** + * fsldma_clean_completed_descriptor - free all descriptors which + * has been completed and acked + * @chan: Freescale DMA channel + * + * This function is used on all completed and acked descriptors. + * All descriptors should only be freed in this function. + */ +static void fsldma_clean_completed_descriptor(struct fsldma_chan *chan) +{ + struct fsl_desc_sw *desc, *_desc; + + /* Run the callback for each descriptor, in order */ + list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node) + if (async_tx_test_ack(&desc->async_tx)) + fsl_dma_free_descriptor(chan, desc); +} + +/** + * fsldma_run_tx_complete_actions - cleanup a single link descriptor + * @chan: Freescale DMA channel + * @desc: descriptor to cleanup and free + * @cookie: Freescale DMA transaction identifier + * + * This function is used on a descriptor which has been executed by the DMA + * controller. It will run any callbacks, submit any dependencies. + */ +static dma_cookie_t fsldma_run_tx_complete_actions(struct fsldma_chan *chan, + struct fsl_desc_sw *desc, dma_cookie_t cookie) +{ + struct dma_async_tx_descriptor *txd = &desc->async_tx; + dma_cookie_t ret = cookie; + + BUG_ON(txd->cookie < 0); + + if (txd->cookie > 0) { + ret = txd->cookie; + + dma_descriptor_unmap(txd); + /* Run the link descriptor callback function */ + dmaengine_desc_get_callback_invoke(txd, NULL); + } + + /* Run any dependencies */ + dma_run_dependencies(txd); + + return ret; +} + +/** + * fsldma_clean_running_descriptor - move the completed descriptor from + * ld_running to ld_completed + * @chan: Freescale DMA channel + * @desc: the descriptor which is completed + * + * Free the descriptor directly if acked by async_tx api, or move it to + * queue ld_completed. + */ +static void fsldma_clean_running_descriptor(struct fsldma_chan *chan, + struct fsl_desc_sw *desc) +{ + /* Remove from the list of transactions */ + list_del(&desc->node); + + /* + * the client is allowed to attach dependent operations + * until 'ack' is set + */ + if (!async_tx_test_ack(&desc->async_tx)) { + /* + * Move this descriptor to the list of descriptors which is + * completed, but still awaiting the 'ack' bit to be set. + */ + list_add_tail(&desc->node, &chan->ld_completed); + return; + } + + dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys); +} + +/** + * fsl_chan_xfer_ld_queue - transfer any pending transactions + * @chan : Freescale DMA channel + * + * HARDWARE STATE: idle + * LOCKING: must hold chan->desc_lock + */ +static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan) +{ + struct fsl_desc_sw *desc; + + /* + * If the list of pending descriptors is empty, then we + * don't need to do any work at all + */ + if (list_empty(&chan->ld_pending)) { + chan_dbg(chan, "no pending LDs\n"); + return; + } + + /* + * The DMA controller is not idle, which means that the interrupt + * handler will start any queued transactions when it runs after + * this transaction finishes + */ + if (!chan->idle) { + chan_dbg(chan, "DMA controller still busy\n"); + return; + } + + /* + * If there are some link descriptors which have not been + * transferred, we need to start the controller + */ + + /* + * Move all elements from the queue of pending transactions + * onto the list of running transactions + */ + chan_dbg(chan, "idle, starting controller\n"); + desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node); + list_splice_tail_init(&chan->ld_pending, &chan->ld_running); + + /* + * The 85xx DMA controller doesn't clear the channel start bit + * automatically at the end of a transfer. Therefore we must clear + * it in software before starting the transfer. + */ + if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) { + u32 mode; + + mode = get_mr(chan); + mode &= ~FSL_DMA_MR_CS; + set_mr(chan, mode); + } + + /* + * Program the descriptor's address into the DMA controller, + * then start the DMA transaction + */ + set_cdar(chan, desc->async_tx.phys); + get_cdar(chan); + + dma_start(chan); + chan->idle = false; +} + +/** + * fsldma_cleanup_descriptors - cleanup link descriptors which are completed + * and move them to ld_completed to free until flag 'ack' is set + * @chan: Freescale DMA channel + * + * This function is used on descriptors which have been executed by the DMA + * controller. It will run any callbacks, submit any dependencies, then + * free these descriptors if flag 'ack' is set. + */ +static void fsldma_cleanup_descriptors(struct fsldma_chan *chan) +{ + struct fsl_desc_sw *desc, *_desc; + dma_cookie_t cookie = 0; + dma_addr_t curr_phys = get_cdar(chan); + int seen_current = 0; + + fsldma_clean_completed_descriptor(chan); + + /* Run the callback for each descriptor, in order */ + list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) { + /* + * do not advance past the current descriptor loaded into the + * hardware channel, subsequent descriptors are either in + * process or have not been submitted + */ + if (seen_current) + break; + + /* + * stop the search if we reach the current descriptor and the + * channel is busy + */ + if (desc->async_tx.phys == curr_phys) { + seen_current = 1; + if (!dma_is_idle(chan)) + break; + } + + cookie = fsldma_run_tx_complete_actions(chan, desc, cookie); + + fsldma_clean_running_descriptor(chan, desc); + } + + /* + * Start any pending transactions automatically + * + * In the ideal case, we keep the DMA controller busy while we go + * ahead and free the descriptors below. + */ + fsl_chan_xfer_ld_queue(chan); + + if (cookie > 0) + chan->common.completed_cookie = cookie; +} + +/** + * fsl_dma_alloc_chan_resources - Allocate resources for DMA channel. + * @chan : Freescale DMA channel + * + * This function will create a dma pool for descriptor allocation. + * + * Return - The number of descriptors allocated. + */ +static int fsl_dma_alloc_chan_resources(struct dma_chan *dchan) +{ + struct fsldma_chan *chan = to_fsl_chan(dchan); + + /* Has this channel already been allocated? */ + if (chan->desc_pool) + return 1; + + /* + * We need the descriptor to be aligned to 32bytes + * for meeting FSL DMA specification requirement. + */ + chan->desc_pool = dma_pool_create(chan->name, chan->dev, + sizeof(struct fsl_desc_sw), + __alignof__(struct fsl_desc_sw), 0); + if (!chan->desc_pool) { + chan_err(chan, "unable to allocate descriptor pool\n"); + return -ENOMEM; + } + + /* there is at least one descriptor free to be allocated */ + return 1; +} + +/** + * fsldma_free_desc_list - Free all descriptors in a queue + * @chan: Freescae DMA channel + * @list: the list to free + * + * LOCKING: must hold chan->desc_lock + */ +static void fsldma_free_desc_list(struct fsldma_chan *chan, + struct list_head *list) +{ + struct fsl_desc_sw *desc, *_desc; + + list_for_each_entry_safe(desc, _desc, list, node) + fsl_dma_free_descriptor(chan, desc); +} + +static void fsldma_free_desc_list_reverse(struct fsldma_chan *chan, + struct list_head *list) +{ + struct fsl_desc_sw *desc, *_desc; + + list_for_each_entry_safe_reverse(desc, _desc, list, node) + fsl_dma_free_descriptor(chan, desc); +} + +/** + * fsl_dma_free_chan_resources - Free all resources of the channel. + * @chan : Freescale DMA channel + */ +static void fsl_dma_free_chan_resources(struct dma_chan *dchan) +{ + struct fsldma_chan *chan = to_fsl_chan(dchan); + + chan_dbg(chan, "free all channel resources\n"); + spin_lock_bh(&chan->desc_lock); + fsldma_cleanup_descriptors(chan); + fsldma_free_desc_list(chan, &chan->ld_pending); + fsldma_free_desc_list(chan, &chan->ld_running); + fsldma_free_desc_list(chan, &chan->ld_completed); + spin_unlock_bh(&chan->desc_lock); + + dma_pool_destroy(chan->desc_pool); + chan->desc_pool = NULL; +} + +static struct dma_async_tx_descriptor * +fsl_dma_prep_memcpy(struct dma_chan *dchan, + dma_addr_t dma_dst, dma_addr_t dma_src, + size_t len, unsigned long flags) +{ + struct fsldma_chan *chan; + struct fsl_desc_sw *first = NULL, *prev = NULL, *new; + size_t copy; + + if (!dchan) + return NULL; + + if (!len) + return NULL; + + chan = to_fsl_chan(dchan); + + do { + + /* Allocate the link descriptor from DMA pool */ + new = fsl_dma_alloc_descriptor(chan); + if (!new) { + chan_err(chan, "%s\n", msg_ld_oom); + goto fail; + } + + copy = min(len, (size_t)FSL_DMA_BCR_MAX_CNT); + + set_desc_cnt(chan, &new->hw, copy); + set_desc_src(chan, &new->hw, dma_src); + set_desc_dst(chan, &new->hw, dma_dst); + + if (!first) + first = new; + else + set_desc_next(chan, &prev->hw, new->async_tx.phys); + + new->async_tx.cookie = 0; + async_tx_ack(&new->async_tx); + + prev = new; + len -= copy; + dma_src += copy; + dma_dst += copy; + + /* Insert the link descriptor to the LD ring */ + list_add_tail(&new->node, &first->tx_list); + } while (len); + + new->async_tx.flags = flags; /* client is in control of this ack */ + new->async_tx.cookie = -EBUSY; + + /* Set End-of-link to the last link descriptor of new list */ + set_ld_eol(chan, new); + + return &first->async_tx; + +fail: + if (!first) + return NULL; + + fsldma_free_desc_list_reverse(chan, &first->tx_list); + return NULL; +} + +static int fsl_dma_device_terminate_all(struct dma_chan *dchan) +{ + struct fsldma_chan *chan; + + if (!dchan) + return -EINVAL; + + chan = to_fsl_chan(dchan); + + spin_lock_bh(&chan->desc_lock); + + /* Halt the DMA engine */ + dma_halt(chan); + + /* Remove and free all of the descriptors in the LD queue */ + fsldma_free_desc_list(chan, &chan->ld_pending); + fsldma_free_desc_list(chan, &chan->ld_running); + fsldma_free_desc_list(chan, &chan->ld_completed); + chan->idle = true; + + spin_unlock_bh(&chan->desc_lock); + return 0; +} + +static int fsl_dma_device_config(struct dma_chan *dchan, + struct dma_slave_config *config) +{ + struct fsldma_chan *chan; + int size; + + if (!dchan) + return -EINVAL; + + chan = to_fsl_chan(dchan); + + /* make sure the channel supports setting burst size */ + if (!chan->set_request_count) + return -ENXIO; + + /* we set the controller burst size depending on direction */ + if (config->direction == DMA_MEM_TO_DEV) + size = config->dst_addr_width * config->dst_maxburst; + else + size = config->src_addr_width * config->src_maxburst; + + chan->set_request_count(chan, size); + return 0; +} + + +/** + * fsl_dma_memcpy_issue_pending - Issue the DMA start command + * @chan : Freescale DMA channel + */ +static void fsl_dma_memcpy_issue_pending(struct dma_chan *dchan) +{ + struct fsldma_chan *chan = to_fsl_chan(dchan); + + spin_lock_bh(&chan->desc_lock); + fsl_chan_xfer_ld_queue(chan); + spin_unlock_bh(&chan->desc_lock); +} + +/** + * fsl_tx_status - Determine the DMA status + * @chan : Freescale DMA channel + */ +static enum dma_status fsl_tx_status(struct dma_chan *dchan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct fsldma_chan *chan = to_fsl_chan(dchan); + enum dma_status ret; + + ret = dma_cookie_status(dchan, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; + + spin_lock_bh(&chan->desc_lock); + fsldma_cleanup_descriptors(chan); + spin_unlock_bh(&chan->desc_lock); + + return dma_cookie_status(dchan, cookie, txstate); +} + +/*----------------------------------------------------------------------------*/ +/* Interrupt Handling */ +/*----------------------------------------------------------------------------*/ + +static irqreturn_t fsldma_chan_irq(int irq, void *data) +{ + struct fsldma_chan *chan = data; + u32 stat; + + /* save and clear the status register */ + stat = get_sr(chan); + set_sr(chan, stat); + chan_dbg(chan, "irq: stat = 0x%x\n", stat); + + /* check that this was really our device */ + stat &= ~(FSL_DMA_SR_CB | FSL_DMA_SR_CH); + if (!stat) + return IRQ_NONE; + + if (stat & FSL_DMA_SR_TE) + chan_err(chan, "Transfer Error!\n"); + + /* + * Programming Error + * The DMA_INTERRUPT async_tx is a NULL transfer, which will + * trigger a PE interrupt. + */ + if (stat & FSL_DMA_SR_PE) { + chan_dbg(chan, "irq: Programming Error INT\n"); + stat &= ~FSL_DMA_SR_PE; + if (get_bcr(chan) != 0) + chan_err(chan, "Programming Error!\n"); + } + + /* + * For MPC8349, EOCDI event need to update cookie + * and start the next transfer if it exist. + */ + if (stat & FSL_DMA_SR_EOCDI) { + chan_dbg(chan, "irq: End-of-Chain link INT\n"); + stat &= ~FSL_DMA_SR_EOCDI; + } + + /* + * If it current transfer is the end-of-transfer, + * we should clear the Channel Start bit for + * prepare next transfer. + */ + if (stat & FSL_DMA_SR_EOLNI) { + chan_dbg(chan, "irq: End-of-link INT\n"); + stat &= ~FSL_DMA_SR_EOLNI; + } + + /* check that the DMA controller is really idle */ + if (!dma_is_idle(chan)) + chan_err(chan, "irq: controller not idle!\n"); + + /* check that we handled all of the bits */ + if (stat) + chan_err(chan, "irq: unhandled sr 0x%08x\n", stat); + + /* + * Schedule the tasklet to handle all cleanup of the current + * transaction. It will start a new transaction if there is + * one pending. + */ + tasklet_schedule(&chan->tasklet); + chan_dbg(chan, "irq: Exit\n"); + return IRQ_HANDLED; +} + +static void dma_do_tasklet(struct tasklet_struct *t) +{ + struct fsldma_chan *chan = from_tasklet(chan, t, tasklet); + + chan_dbg(chan, "tasklet entry\n"); + + spin_lock(&chan->desc_lock); + + /* the hardware is now idle and ready for more */ + chan->idle = true; + + /* Run all cleanup for descriptors which have been completed */ + fsldma_cleanup_descriptors(chan); + + spin_unlock(&chan->desc_lock); + + chan_dbg(chan, "tasklet exit\n"); +} + +static irqreturn_t fsldma_ctrl_irq(int irq, void *data) +{ + struct fsldma_device *fdev = data; + struct fsldma_chan *chan; + unsigned int handled = 0; + u32 gsr, mask; + int i; + + gsr = (fdev->feature & FSL_DMA_BIG_ENDIAN) ? in_be32(fdev->regs) + : in_le32(fdev->regs); + mask = 0xff000000; + dev_dbg(fdev->dev, "IRQ: gsr 0x%.8x\n", gsr); + + for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) { + chan = fdev->chan[i]; + if (!chan) + continue; + + if (gsr & mask) { + dev_dbg(fdev->dev, "IRQ: chan %d\n", chan->id); + fsldma_chan_irq(irq, chan); + handled++; + } + + gsr &= ~mask; + mask >>= 8; + } + + return IRQ_RETVAL(handled); +} + +static void fsldma_free_irqs(struct fsldma_device *fdev) +{ + struct fsldma_chan *chan; + int i; + + if (fdev->irq) { + dev_dbg(fdev->dev, "free per-controller IRQ\n"); + free_irq(fdev->irq, fdev); + return; + } + + for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) { + chan = fdev->chan[i]; + if (chan && chan->irq) { + chan_dbg(chan, "free per-channel IRQ\n"); + free_irq(chan->irq, chan); + } + } +} + +static int fsldma_request_irqs(struct fsldma_device *fdev) +{ + struct fsldma_chan *chan; + int ret; + int i; + + /* if we have a per-controller IRQ, use that */ + if (fdev->irq) { + dev_dbg(fdev->dev, "request per-controller IRQ\n"); + ret = request_irq(fdev->irq, fsldma_ctrl_irq, IRQF_SHARED, + "fsldma-controller", fdev); + return ret; + } + + /* no per-controller IRQ, use the per-channel IRQs */ + for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) { + chan = fdev->chan[i]; + if (!chan) + continue; + + if (!chan->irq) { + chan_err(chan, "interrupts property missing in device tree\n"); + ret = -ENODEV; + goto out_unwind; + } + + chan_dbg(chan, "request per-channel IRQ\n"); + ret = request_irq(chan->irq, fsldma_chan_irq, IRQF_SHARED, + "fsldma-chan", chan); + if (ret) { + chan_err(chan, "unable to request per-channel IRQ\n"); + goto out_unwind; + } + } + + return 0; + +out_unwind: + for (/* none */; i >= 0; i--) { + chan = fdev->chan[i]; + if (!chan) + continue; + + if (!chan->irq) + continue; + + free_irq(chan->irq, chan); + } + + return ret; +} + +/*----------------------------------------------------------------------------*/ +/* OpenFirmware Subsystem */ +/*----------------------------------------------------------------------------*/ + +static int fsl_dma_chan_probe(struct fsldma_device *fdev, + struct device_node *node, u32 feature, const char *compatible) +{ + struct fsldma_chan *chan; + struct resource res; + int err; + + /* alloc channel */ + chan = kzalloc(sizeof(*chan), GFP_KERNEL); + if (!chan) { + err = -ENOMEM; + goto out_return; + } + + /* ioremap registers for use */ + chan->regs = of_iomap(node, 0); + if (!chan->regs) { + dev_err(fdev->dev, "unable to ioremap registers\n"); + err = -ENOMEM; + goto out_free_chan; + } + + err = of_address_to_resource(node, 0, &res); + if (err) { + dev_err(fdev->dev, "unable to find 'reg' property\n"); + goto out_iounmap_regs; + } + + chan->feature = feature; + if (!fdev->feature) + fdev->feature = chan->feature; + + /* + * If the DMA device's feature is different than the feature + * of its channels, report the bug + */ + WARN_ON(fdev->feature != chan->feature); + + chan->dev = fdev->dev; + chan->id = (res.start & 0xfff) < 0x300 ? + ((res.start - 0x100) & 0xfff) >> 7 : + ((res.start - 0x200) & 0xfff) >> 7; + if (chan->id >= FSL_DMA_MAX_CHANS_PER_DEVICE) { + dev_err(fdev->dev, "too many channels for device\n"); + err = -EINVAL; + goto out_iounmap_regs; + } + + fdev->chan[chan->id] = chan; + tasklet_setup(&chan->tasklet, dma_do_tasklet); + snprintf(chan->name, sizeof(chan->name), "chan%d", chan->id); + + /* Initialize the channel */ + dma_init(chan); + + /* Clear cdar registers */ + set_cdar(chan, 0); + + switch (chan->feature & FSL_DMA_IP_MASK) { + case FSL_DMA_IP_85XX: + chan->toggle_ext_pause = fsl_chan_toggle_ext_pause; + fallthrough; + case FSL_DMA_IP_83XX: + chan->toggle_ext_start = fsl_chan_toggle_ext_start; + chan->set_src_loop_size = fsl_chan_set_src_loop_size; + chan->set_dst_loop_size = fsl_chan_set_dst_loop_size; + chan->set_request_count = fsl_chan_set_request_count; + } + + spin_lock_init(&chan->desc_lock); + INIT_LIST_HEAD(&chan->ld_pending); + INIT_LIST_HEAD(&chan->ld_running); + INIT_LIST_HEAD(&chan->ld_completed); + chan->idle = true; +#ifdef CONFIG_PM + chan->pm_state = RUNNING; +#endif + + chan->common.device = &fdev->common; + dma_cookie_init(&chan->common); + + /* find the IRQ line, if it exists in the device tree */ + chan->irq = irq_of_parse_and_map(node, 0); + + /* Add the channel to DMA device channel list */ + list_add_tail(&chan->common.device_node, &fdev->common.channels); + + dev_info(fdev->dev, "#%d (%s), irq %d\n", chan->id, compatible, + chan->irq ? chan->irq : fdev->irq); + + return 0; + +out_iounmap_regs: + iounmap(chan->regs); +out_free_chan: + kfree(chan); +out_return: + return err; +} + +static void fsl_dma_chan_remove(struct fsldma_chan *chan) +{ + irq_dispose_mapping(chan->irq); + list_del(&chan->common.device_node); + iounmap(chan->regs); + kfree(chan); +} + +static int fsldma_of_probe(struct platform_device *op) +{ + struct fsldma_device *fdev; + struct device_node *child; + unsigned int i; + int err; + + fdev = kzalloc(sizeof(*fdev), GFP_KERNEL); + if (!fdev) { + err = -ENOMEM; + goto out_return; + } + + fdev->dev = &op->dev; + INIT_LIST_HEAD(&fdev->common.channels); + + /* ioremap the registers for use */ + fdev->regs = of_iomap(op->dev.of_node, 0); + if (!fdev->regs) { + dev_err(&op->dev, "unable to ioremap registers\n"); + err = -ENOMEM; + goto out_free; + } + + /* map the channel IRQ if it exists, but don't hookup the handler yet */ + fdev->irq = irq_of_parse_and_map(op->dev.of_node, 0); + + dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask); + dma_cap_set(DMA_SLAVE, fdev->common.cap_mask); + fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources; + fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources; + fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy; + fdev->common.device_tx_status = fsl_tx_status; + fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending; + fdev->common.device_config = fsl_dma_device_config; + fdev->common.device_terminate_all = fsl_dma_device_terminate_all; + fdev->common.dev = &op->dev; + + fdev->common.src_addr_widths = FSL_DMA_BUSWIDTHS; + fdev->common.dst_addr_widths = FSL_DMA_BUSWIDTHS; + fdev->common.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + fdev->common.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + + dma_set_mask(&(op->dev), DMA_BIT_MASK(36)); + + platform_set_drvdata(op, fdev); + + /* + * We cannot use of_platform_bus_probe() because there is no + * of_platform_bus_remove(). Instead, we manually instantiate every DMA + * channel object. + */ + for_each_child_of_node(op->dev.of_node, child) { + if (of_device_is_compatible(child, "fsl,eloplus-dma-channel")) { + fsl_dma_chan_probe(fdev, child, + FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN, + "fsl,eloplus-dma-channel"); + } + + if (of_device_is_compatible(child, "fsl,elo-dma-channel")) { + fsl_dma_chan_probe(fdev, child, + FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN, + "fsl,elo-dma-channel"); + } + } + + /* + * Hookup the IRQ handler(s) + * + * If we have a per-controller interrupt, we prefer that to the + * per-channel interrupts to reduce the number of shared interrupt + * handlers on the same IRQ line + */ + err = fsldma_request_irqs(fdev); + if (err) { + dev_err(fdev->dev, "unable to request IRQs\n"); + goto out_free_fdev; + } + + dma_async_device_register(&fdev->common); + return 0; + +out_free_fdev: + for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) { + if (fdev->chan[i]) + fsl_dma_chan_remove(fdev->chan[i]); + } + irq_dispose_mapping(fdev->irq); + iounmap(fdev->regs); +out_free: + kfree(fdev); +out_return: + return err; +} + +static int fsldma_of_remove(struct platform_device *op) +{ + struct fsldma_device *fdev; + unsigned int i; + + fdev = platform_get_drvdata(op); + dma_async_device_unregister(&fdev->common); + + fsldma_free_irqs(fdev); + + for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) { + if (fdev->chan[i]) + fsl_dma_chan_remove(fdev->chan[i]); + } + irq_dispose_mapping(fdev->irq); + + iounmap(fdev->regs); + kfree(fdev); + + return 0; +} + +#ifdef CONFIG_PM +static int fsldma_suspend_late(struct device *dev) +{ + struct fsldma_device *fdev = dev_get_drvdata(dev); + struct fsldma_chan *chan; + int i; + + for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) { + chan = fdev->chan[i]; + if (!chan) + continue; + + spin_lock_bh(&chan->desc_lock); + if (unlikely(!chan->idle)) + goto out; + chan->regs_save.mr = get_mr(chan); + chan->pm_state = SUSPENDED; + spin_unlock_bh(&chan->desc_lock); + } + return 0; + +out: + for (; i >= 0; i--) { + chan = fdev->chan[i]; + if (!chan) + continue; + chan->pm_state = RUNNING; + spin_unlock_bh(&chan->desc_lock); + } + return -EBUSY; +} + +static int fsldma_resume_early(struct device *dev) +{ + struct fsldma_device *fdev = dev_get_drvdata(dev); + struct fsldma_chan *chan; + u32 mode; + int i; + + for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) { + chan = fdev->chan[i]; + if (!chan) + continue; + + spin_lock_bh(&chan->desc_lock); + mode = chan->regs_save.mr + & ~FSL_DMA_MR_CS & ~FSL_DMA_MR_CC & ~FSL_DMA_MR_CA; + set_mr(chan, mode); + chan->pm_state = RUNNING; + spin_unlock_bh(&chan->desc_lock); + } + + return 0; +} + +static const struct dev_pm_ops fsldma_pm_ops = { + .suspend_late = fsldma_suspend_late, + .resume_early = fsldma_resume_early, +}; +#endif + +static const struct of_device_id fsldma_of_ids[] = { + { .compatible = "fsl,elo3-dma", }, + { .compatible = "fsl,eloplus-dma", }, + { .compatible = "fsl,elo-dma", }, + {} +}; +MODULE_DEVICE_TABLE(of, fsldma_of_ids); + +static struct platform_driver fsldma_of_driver = { + .driver = { + .name = "fsl-elo-dma", + .of_match_table = fsldma_of_ids, +#ifdef CONFIG_PM + .pm = &fsldma_pm_ops, +#endif + }, + .probe = fsldma_of_probe, + .remove = fsldma_of_remove, +}; + +/*----------------------------------------------------------------------------*/ +/* Module Init / Exit */ +/*----------------------------------------------------------------------------*/ + +static __init int fsldma_init(void) +{ + pr_info("Freescale Elo series DMA driver\n"); + return platform_driver_register(&fsldma_of_driver); +} + +static void __exit fsldma_exit(void) +{ + platform_driver_unregister(&fsldma_of_driver); +} + +subsys_initcall(fsldma_init); +module_exit(fsldma_exit); + +MODULE_DESCRIPTION("Freescale Elo series DMA driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h new file mode 100644 index 0000000000..308bed0a56 --- /dev/null +++ b/drivers/dma/fsldma.h @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2007-2010 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: + * Zhang Wei , Jul 2007 + * Ebony Zhu , May 2007 + */ +#ifndef __DMA_FSLDMA_H +#define __DMA_FSLDMA_H + +#include +#include +#include + +/* Define data structures needed by Freescale + * MPC8540 and MPC8349 DMA controller. + */ +#define FSL_DMA_MR_CS 0x00000001 +#define FSL_DMA_MR_CC 0x00000002 +#define FSL_DMA_MR_CA 0x00000008 +#define FSL_DMA_MR_EIE 0x00000040 +#define FSL_DMA_MR_XFE 0x00000020 +#define FSL_DMA_MR_EOLNIE 0x00000100 +#define FSL_DMA_MR_EOLSIE 0x00000080 +#define FSL_DMA_MR_EOSIE 0x00000200 +#define FSL_DMA_MR_CDSM 0x00000010 +#define FSL_DMA_MR_CTM 0x00000004 +#define FSL_DMA_MR_EMP_EN 0x00200000 +#define FSL_DMA_MR_EMS_EN 0x00040000 +#define FSL_DMA_MR_DAHE 0x00002000 +#define FSL_DMA_MR_SAHE 0x00001000 + +#define FSL_DMA_MR_SAHTS_MASK 0x0000C000 +#define FSL_DMA_MR_DAHTS_MASK 0x00030000 +#define FSL_DMA_MR_BWC_MASK 0x0f000000 + +/* + * Bandwidth/pause control determines how many bytes a given + * channel is allowed to transfer before the DMA engine pauses + * the current channel and switches to the next channel + */ +#define FSL_DMA_MR_BWC 0x0A000000 + +/* Special MR definition for MPC8349 */ +#define FSL_DMA_MR_EOTIE 0x00000080 +#define FSL_DMA_MR_PRC_RM 0x00000800 + +#define FSL_DMA_SR_CH 0x00000020 +#define FSL_DMA_SR_PE 0x00000010 +#define FSL_DMA_SR_CB 0x00000004 +#define FSL_DMA_SR_TE 0x00000080 +#define FSL_DMA_SR_EOSI 0x00000002 +#define FSL_DMA_SR_EOLSI 0x00000001 +#define FSL_DMA_SR_EOCDI 0x00000001 +#define FSL_DMA_SR_EOLNI 0x00000008 + +#define FSL_DMA_SATR_SBPATMU 0x20000000 +#define FSL_DMA_SATR_STRANSINT_RIO 0x00c00000 +#define FSL_DMA_SATR_SREADTYPE_SNOOP_READ 0x00050000 +#define FSL_DMA_SATR_SREADTYPE_BP_IORH 0x00020000 +#define FSL_DMA_SATR_SREADTYPE_BP_NREAD 0x00040000 +#define FSL_DMA_SATR_SREADTYPE_BP_MREAD 0x00070000 + +#define FSL_DMA_DATR_DBPATMU 0x20000000 +#define FSL_DMA_DATR_DTRANSINT_RIO 0x00c00000 +#define FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE 0x00050000 +#define FSL_DMA_DATR_DWRITETYPE_BP_FLUSH 0x00010000 + +#define FSL_DMA_EOL ((u64)0x1) +#define FSL_DMA_SNEN ((u64)0x10) +#define FSL_DMA_EOSIE 0x8 +#define FSL_DMA_NLDA_MASK (~(u64)0x1f) + +#define FSL_DMA_BCR_MAX_CNT 0x03ffffffu + +#define FSL_DMA_DGSR_TE 0x80 +#define FSL_DMA_DGSR_CH 0x20 +#define FSL_DMA_DGSR_PE 0x10 +#define FSL_DMA_DGSR_EOLNI 0x08 +#define FSL_DMA_DGSR_CB 0x04 +#define FSL_DMA_DGSR_EOSI 0x02 +#define FSL_DMA_DGSR_EOLSI 0x01 + +#define FSL_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)) +typedef u64 __bitwise v64; +typedef u32 __bitwise v32; + +struct fsl_dma_ld_hw { + v64 src_addr; + v64 dst_addr; + v64 next_ln_addr; + v32 count; + v32 reserve; +} __attribute__((aligned(32))); + +struct fsl_desc_sw { + struct fsl_dma_ld_hw hw; + struct list_head node; + struct list_head tx_list; + struct dma_async_tx_descriptor async_tx; +} __attribute__((aligned(32))); + +struct fsldma_chan_regs { + u32 mr; /* 0x00 - Mode Register */ + u32 sr; /* 0x04 - Status Register */ + u64 cdar; /* 0x08 - Current descriptor address register */ + u64 sar; /* 0x10 - Source Address Register */ + u64 dar; /* 0x18 - Destination Address Register */ + u32 bcr; /* 0x20 - Byte Count Register */ + u64 ndar; /* 0x24 - Next Descriptor Address Register */ +}; + +struct fsldma_chan; +#define FSL_DMA_MAX_CHANS_PER_DEVICE 8 + +struct fsldma_device { + void __iomem *regs; /* DGSR register base */ + struct device *dev; + struct dma_device common; + struct fsldma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE]; + u32 feature; /* The same as DMA channels */ + int irq; /* Channel IRQ */ +}; + +/* Define macros for fsldma_chan->feature property */ +#define FSL_DMA_LITTLE_ENDIAN 0x00000000 +#define FSL_DMA_BIG_ENDIAN 0x00000001 + +#define FSL_DMA_IP_MASK 0x00000ff0 +#define FSL_DMA_IP_85XX 0x00000010 +#define FSL_DMA_IP_83XX 0x00000020 + +#define FSL_DMA_CHAN_PAUSE_EXT 0x00001000 +#define FSL_DMA_CHAN_START_EXT 0x00002000 + +#ifdef CONFIG_PM +struct fsldma_chan_regs_save { + u32 mr; +}; + +enum fsldma_pm_state { + RUNNING = 0, + SUSPENDED, +}; +#endif + +struct fsldma_chan { + char name[8]; /* Channel name */ + struct fsldma_chan_regs __iomem *regs; + spinlock_t desc_lock; /* Descriptor operation lock */ + /* + * Descriptors which are queued to run, but have not yet been + * submitted to the hardware for execution + */ + struct list_head ld_pending; + /* + * Descriptors which are currently being executed by the hardware + */ + struct list_head ld_running; + /* + * Descriptors which have finished execution by the hardware. These + * descriptors have already had their cleanup actions run. They are + * waiting for the ACK bit to be set by the async_tx API. + */ + struct list_head ld_completed; /* Link descriptors queue */ + struct dma_chan common; /* DMA common channel */ + struct dma_pool *desc_pool; /* Descriptors pool */ + struct device *dev; /* Channel device */ + int irq; /* Channel IRQ */ + int id; /* Raw id of this channel */ + struct tasklet_struct tasklet; + u32 feature; + bool idle; /* DMA controller is idle */ +#ifdef CONFIG_PM + struct fsldma_chan_regs_save regs_save; + enum fsldma_pm_state pm_state; +#endif + + void (*toggle_ext_pause)(struct fsldma_chan *fsl_chan, int enable); + void (*toggle_ext_start)(struct fsldma_chan *fsl_chan, int enable); + void (*set_src_loop_size)(struct fsldma_chan *fsl_chan, int size); + void (*set_dst_loop_size)(struct fsldma_chan *fsl_chan, int size); + void (*set_request_count)(struct fsldma_chan *fsl_chan, int size); +}; + +#define to_fsl_chan(chan) container_of(chan, struct fsldma_chan, common) +#define to_fsl_desc(lh) container_of(lh, struct fsl_desc_sw, node) +#define tx_to_fsl_desc(tx) container_of(tx, struct fsl_desc_sw, async_tx) + +#ifdef CONFIG_PPC +#define fsl_ioread32(p) in_le32(p) +#define fsl_ioread32be(p) in_be32(p) +#define fsl_iowrite32(v, p) out_le32(p, v) +#define fsl_iowrite32be(v, p) out_be32(p, v) + +#ifdef __powerpc64__ +#define fsl_ioread64(p) in_le64(p) +#define fsl_ioread64be(p) in_be64(p) +#define fsl_iowrite64(v, p) out_le64(p, v) +#define fsl_iowrite64be(v, p) out_be64(p, v) +#else +static u64 fsl_ioread64(const u64 __iomem *addr) +{ + u32 val_lo = in_le32((u32 __iomem *)addr); + u32 val_hi = in_le32((u32 __iomem *)addr + 1); + + return ((u64)val_hi << 32) + val_lo; +} + +static void fsl_iowrite64(u64 val, u64 __iomem *addr) +{ + out_le32((u32 __iomem *)addr + 1, val >> 32); + out_le32((u32 __iomem *)addr, (u32)val); +} + +static u64 fsl_ioread64be(const u64 __iomem *addr) +{ + u32 val_hi = in_be32((u32 __iomem *)addr); + u32 val_lo = in_be32((u32 __iomem *)addr + 1); + + return ((u64)val_hi << 32) + val_lo; +} + +static void fsl_iowrite64be(u64 val, u64 __iomem *addr) +{ + out_be32((u32 __iomem *)addr, val >> 32); + out_be32((u32 __iomem *)addr + 1, (u32)val); +} +#endif +#endif + +#if defined(CONFIG_ARM64) || defined(CONFIG_ARM) +#define fsl_ioread32(p) ioread32(p) +#define fsl_ioread32be(p) ioread32be(p) +#define fsl_iowrite32(v, p) iowrite32(v, p) +#define fsl_iowrite32be(v, p) iowrite32be(v, p) +#define fsl_ioread64(p) ioread64(p) +#define fsl_ioread64be(p) ioread64be(p) +#define fsl_iowrite64(v, p) iowrite64(v, p) +#define fsl_iowrite64be(v, p) iowrite64be(v, p) +#endif + +#define FSL_DMA_IN(fsl_dma, addr, width) \ + (((fsl_dma)->feature & FSL_DMA_BIG_ENDIAN) ? \ + fsl_ioread##width##be(addr) : fsl_ioread##width(addr)) + +#define FSL_DMA_OUT(fsl_dma, addr, val, width) \ + (((fsl_dma)->feature & FSL_DMA_BIG_ENDIAN) ? \ + fsl_iowrite##width##be(val, addr) : fsl_iowrite \ + ##width(val, addr)) + +#define DMA_TO_CPU(fsl_chan, d, width) \ + (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \ + be##width##_to_cpu((__force __be##width)(v##width)d) : \ + le##width##_to_cpu((__force __le##width)(v##width)d)) +#define CPU_TO_DMA(fsl_chan, c, width) \ + (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \ + (__force v##width)cpu_to_be##width(c) : \ + (__force v##width)cpu_to_le##width(c)) + +#endif /* __DMA_FSLDMA_H */ diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c new file mode 100644 index 0000000000..c1350a36fd --- /dev/null +++ b/drivers/dma/hisi_dma.c @@ -0,0 +1,1053 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2019-2022 HiSilicon Limited. */ + +#include +#include +#include +#include +#include +#include +#include +#include "virt-dma.h" + +/* HiSilicon DMA register common field define */ +#define HISI_DMA_Q_SQ_BASE_L 0x0 +#define HISI_DMA_Q_SQ_BASE_H 0x4 +#define HISI_DMA_Q_SQ_DEPTH 0x8 +#define HISI_DMA_Q_SQ_TAIL_PTR 0xc +#define HISI_DMA_Q_CQ_BASE_L 0x10 +#define HISI_DMA_Q_CQ_BASE_H 0x14 +#define HISI_DMA_Q_CQ_DEPTH 0x18 +#define HISI_DMA_Q_CQ_HEAD_PTR 0x1c +#define HISI_DMA_Q_CTRL0 0x20 +#define HISI_DMA_Q_CTRL0_QUEUE_EN BIT(0) +#define HISI_DMA_Q_CTRL0_QUEUE_PAUSE BIT(4) +#define HISI_DMA_Q_CTRL1 0x24 +#define HISI_DMA_Q_CTRL1_QUEUE_RESET BIT(0) +#define HISI_DMA_Q_FSM_STS 0x30 +#define HISI_DMA_Q_FSM_STS_MASK GENMASK(3, 0) +#define HISI_DMA_Q_ERR_INT_NUM0 0x84 +#define HISI_DMA_Q_ERR_INT_NUM1 0x88 +#define HISI_DMA_Q_ERR_INT_NUM2 0x8c + +/* HiSilicon IP08 DMA register and field define */ +#define HISI_DMA_HIP08_MODE 0x217C +#define HISI_DMA_HIP08_Q_BASE 0x0 +#define HISI_DMA_HIP08_Q_CTRL0_ERR_ABORT_EN BIT(2) +#define HISI_DMA_HIP08_Q_INT_STS 0x40 +#define HISI_DMA_HIP08_Q_INT_MSK 0x44 +#define HISI_DMA_HIP08_Q_INT_STS_MASK GENMASK(14, 0) +#define HISI_DMA_HIP08_Q_ERR_INT_NUM3 0x90 +#define HISI_DMA_HIP08_Q_ERR_INT_NUM4 0x94 +#define HISI_DMA_HIP08_Q_ERR_INT_NUM5 0x98 +#define HISI_DMA_HIP08_Q_ERR_INT_NUM6 0x48 +#define HISI_DMA_HIP08_Q_CTRL0_SQCQ_DRCT BIT(24) + +/* HiSilicon IP09 DMA register and field define */ +#define HISI_DMA_HIP09_DMA_FLR_DISABLE 0xA00 +#define HISI_DMA_HIP09_DMA_FLR_DISABLE_B BIT(0) +#define HISI_DMA_HIP09_Q_BASE 0x2000 +#define HISI_DMA_HIP09_Q_CTRL0_ERR_ABORT_EN GENMASK(31, 28) +#define HISI_DMA_HIP09_Q_CTRL0_SQ_DRCT BIT(26) +#define HISI_DMA_HIP09_Q_CTRL0_CQ_DRCT BIT(27) +#define HISI_DMA_HIP09_Q_CTRL1_VA_ENABLE BIT(2) +#define HISI_DMA_HIP09_Q_INT_STS 0x40 +#define HISI_DMA_HIP09_Q_INT_MSK 0x44 +#define HISI_DMA_HIP09_Q_INT_STS_MASK 0x1 +#define HISI_DMA_HIP09_Q_ERR_INT_STS 0x48 +#define HISI_DMA_HIP09_Q_ERR_INT_MSK 0x4C +#define HISI_DMA_HIP09_Q_ERR_INT_STS_MASK GENMASK(18, 1) +#define HISI_DMA_HIP09_PORT_CFG_REG(port_id) (0x800 + \ + (port_id) * 0x20) +#define HISI_DMA_HIP09_PORT_CFG_LINK_DOWN_MASK_B BIT(16) + +#define HISI_DMA_HIP09_MAX_PORT_NUM 16 + +#define HISI_DMA_HIP08_MSI_NUM 32 +#define HISI_DMA_HIP08_CHAN_NUM 30 +#define HISI_DMA_HIP09_MSI_NUM 4 +#define HISI_DMA_HIP09_CHAN_NUM 4 +#define HISI_DMA_REVISION_HIP08B 0x21 +#define HISI_DMA_REVISION_HIP09A 0x30 + +#define HISI_DMA_Q_OFFSET 0x100 +#define HISI_DMA_Q_DEPTH_VAL 1024 + +#define PCI_BAR_2 2 + +#define HISI_DMA_POLL_Q_STS_DELAY_US 10 +#define HISI_DMA_POLL_Q_STS_TIME_OUT_US 1000 + +#define HISI_DMA_MAX_DIR_NAME_LEN 128 + +/* + * The HIP08B(HiSilicon IP08) and HIP09A(HiSilicon IP09) are DMA iEPs, they + * have the same pci device id but different pci revision. + * Unfortunately, they have different register layouts, so two layout + * enumerations are defined. + */ +enum hisi_dma_reg_layout { + HISI_DMA_REG_LAYOUT_INVALID = 0, + HISI_DMA_REG_LAYOUT_HIP08, + HISI_DMA_REG_LAYOUT_HIP09 +}; + +enum hisi_dma_mode { + EP = 0, + RC, +}; + +enum hisi_dma_chan_status { + DISABLE = -1, + IDLE = 0, + RUN, + CPL, + PAUSE, + HALT, + ABORT, + WAIT, + BUFFCLR, +}; + +struct hisi_dma_sqe { + __le32 dw0; +#define OPCODE_MASK GENMASK(3, 0) +#define OPCODE_SMALL_PACKAGE 0x1 +#define OPCODE_M2M 0x4 +#define LOCAL_IRQ_EN BIT(8) +#define ATTR_SRC_MASK GENMASK(14, 12) + __le32 dw1; + __le32 dw2; +#define ATTR_DST_MASK GENMASK(26, 24) + __le32 length; + __le64 src_addr; + __le64 dst_addr; +}; + +struct hisi_dma_cqe { + __le32 rsv0; + __le32 rsv1; + __le16 sq_head; + __le16 rsv2; + __le16 rsv3; + __le16 w0; +#define STATUS_MASK GENMASK(15, 1) +#define STATUS_SUCC 0x0 +#define VALID_BIT BIT(0) +}; + +struct hisi_dma_desc { + struct virt_dma_desc vd; + struct hisi_dma_sqe sqe; +}; + +struct hisi_dma_chan { + struct virt_dma_chan vc; + struct hisi_dma_dev *hdma_dev; + struct hisi_dma_sqe *sq; + struct hisi_dma_cqe *cq; + dma_addr_t sq_dma; + dma_addr_t cq_dma; + u32 sq_tail; + u32 cq_head; + u32 qp_num; + enum hisi_dma_chan_status status; + struct hisi_dma_desc *desc; +}; + +struct hisi_dma_dev { + struct pci_dev *pdev; + void __iomem *base; + struct dma_device dma_dev; + u32 chan_num; + u32 chan_depth; + enum hisi_dma_reg_layout reg_layout; + void __iomem *queue_base; /* queue region start of register */ + struct hisi_dma_chan chan[]; +}; + +#ifdef CONFIG_DEBUG_FS + +static const struct debugfs_reg32 hisi_dma_comm_chan_regs[] = { + {"DMA_QUEUE_SQ_DEPTH ", 0x0008ull}, + {"DMA_QUEUE_SQ_TAIL_PTR ", 0x000Cull}, + {"DMA_QUEUE_CQ_DEPTH ", 0x0018ull}, + {"DMA_QUEUE_CQ_HEAD_PTR ", 0x001Cull}, + {"DMA_QUEUE_CTRL0 ", 0x0020ull}, + {"DMA_QUEUE_CTRL1 ", 0x0024ull}, + {"DMA_QUEUE_FSM_STS ", 0x0030ull}, + {"DMA_QUEUE_SQ_STS ", 0x0034ull}, + {"DMA_QUEUE_CQ_TAIL_PTR ", 0x003Cull}, + {"DMA_QUEUE_INT_STS ", 0x0040ull}, + {"DMA_QUEUE_INT_MSK ", 0x0044ull}, + {"DMA_QUEUE_INT_RO ", 0x006Cull}, +}; + +static const struct debugfs_reg32 hisi_dma_hip08_chan_regs[] = { + {"DMA_QUEUE_BYTE_CNT ", 0x0038ull}, + {"DMA_ERR_INT_NUM6 ", 0x0048ull}, + {"DMA_QUEUE_DESP0 ", 0x0050ull}, + {"DMA_QUEUE_DESP1 ", 0x0054ull}, + {"DMA_QUEUE_DESP2 ", 0x0058ull}, + {"DMA_QUEUE_DESP3 ", 0x005Cull}, + {"DMA_QUEUE_DESP4 ", 0x0074ull}, + {"DMA_QUEUE_DESP5 ", 0x0078ull}, + {"DMA_QUEUE_DESP6 ", 0x007Cull}, + {"DMA_QUEUE_DESP7 ", 0x0080ull}, + {"DMA_ERR_INT_NUM0 ", 0x0084ull}, + {"DMA_ERR_INT_NUM1 ", 0x0088ull}, + {"DMA_ERR_INT_NUM2 ", 0x008Cull}, + {"DMA_ERR_INT_NUM3 ", 0x0090ull}, + {"DMA_ERR_INT_NUM4 ", 0x0094ull}, + {"DMA_ERR_INT_NUM5 ", 0x0098ull}, + {"DMA_QUEUE_SQ_STS2 ", 0x00A4ull}, +}; + +static const struct debugfs_reg32 hisi_dma_hip09_chan_regs[] = { + {"DMA_QUEUE_ERR_INT_STS ", 0x0048ull}, + {"DMA_QUEUE_ERR_INT_MSK ", 0x004Cull}, + {"DFX_SQ_READ_ERR_PTR ", 0x0068ull}, + {"DFX_DMA_ERR_INT_NUM0 ", 0x0084ull}, + {"DFX_DMA_ERR_INT_NUM1 ", 0x0088ull}, + {"DFX_DMA_ERR_INT_NUM2 ", 0x008Cull}, + {"DFX_DMA_QUEUE_SQ_STS2 ", 0x00A4ull}, +}; + +static const struct debugfs_reg32 hisi_dma_hip08_comm_regs[] = { + {"DMA_ECC_ERR_ADDR ", 0x2004ull}, + {"DMA_ECC_ECC_CNT ", 0x2014ull}, + {"COMMON_AND_CH_ERR_STS ", 0x2030ull}, + {"LOCAL_CPL_ID_STS_0 ", 0x20E0ull}, + {"LOCAL_CPL_ID_STS_1 ", 0x20E4ull}, + {"LOCAL_CPL_ID_STS_2 ", 0x20E8ull}, + {"LOCAL_CPL_ID_STS_3 ", 0x20ECull}, + {"LOCAL_TLP_NUM ", 0x2158ull}, + {"SQCQ_TLP_NUM ", 0x2164ull}, + {"CPL_NUM ", 0x2168ull}, + {"INF_BACK_PRESS_STS ", 0x2170ull}, + {"DMA_CH_RAS_LEVEL ", 0x2184ull}, + {"DMA_CM_RAS_LEVEL ", 0x2188ull}, + {"DMA_CH_ERR_STS ", 0x2190ull}, + {"DMA_CH_DONE_STS ", 0x2194ull}, + {"DMA_SQ_TAG_STS_0 ", 0x21A0ull}, + {"DMA_SQ_TAG_STS_1 ", 0x21A4ull}, + {"DMA_SQ_TAG_STS_2 ", 0x21A8ull}, + {"DMA_SQ_TAG_STS_3 ", 0x21ACull}, + {"LOCAL_P_ID_STS_0 ", 0x21B0ull}, + {"LOCAL_P_ID_STS_1 ", 0x21B4ull}, + {"LOCAL_P_ID_STS_2 ", 0x21B8ull}, + {"LOCAL_P_ID_STS_3 ", 0x21BCull}, + {"DMA_PREBUFF_INFO_0 ", 0x2200ull}, + {"DMA_CM_TABLE_INFO_0 ", 0x2220ull}, + {"DMA_CM_CE_RO ", 0x2244ull}, + {"DMA_CM_NFE_RO ", 0x2248ull}, + {"DMA_CM_FE_RO ", 0x224Cull}, +}; + +static const struct debugfs_reg32 hisi_dma_hip09_comm_regs[] = { + {"COMMON_AND_CH_ERR_STS ", 0x0030ull}, + {"DMA_PORT_IDLE_STS ", 0x0150ull}, + {"DMA_CH_RAS_LEVEL ", 0x0184ull}, + {"DMA_CM_RAS_LEVEL ", 0x0188ull}, + {"DMA_CM_CE_RO ", 0x0244ull}, + {"DMA_CM_NFE_RO ", 0x0248ull}, + {"DMA_CM_FE_RO ", 0x024Cull}, + {"DFX_INF_BACK_PRESS_STS0 ", 0x1A40ull}, + {"DFX_INF_BACK_PRESS_STS1 ", 0x1A44ull}, + {"DFX_INF_BACK_PRESS_STS2 ", 0x1A48ull}, + {"DFX_DMA_WRR_DISABLE ", 0x1A4Cull}, + {"DFX_PA_REQ_TLP_NUM ", 0x1C00ull}, + {"DFX_PA_BACK_TLP_NUM ", 0x1C04ull}, + {"DFX_PA_RETRY_TLP_NUM ", 0x1C08ull}, + {"DFX_LOCAL_NP_TLP_NUM ", 0x1C0Cull}, + {"DFX_LOCAL_CPL_HEAD_TLP_NUM ", 0x1C10ull}, + {"DFX_LOCAL_CPL_DATA_TLP_NUM ", 0x1C14ull}, + {"DFX_LOCAL_CPL_EXT_DATA_TLP_NUM ", 0x1C18ull}, + {"DFX_LOCAL_P_HEAD_TLP_NUM ", 0x1C1Cull}, + {"DFX_LOCAL_P_ACK_TLP_NUM ", 0x1C20ull}, + {"DFX_BUF_ALOC_PORT_REQ_NUM ", 0x1C24ull}, + {"DFX_BUF_ALOC_PORT_RESULT_NUM ", 0x1C28ull}, + {"DFX_BUF_FAIL_SIZE_NUM ", 0x1C2Cull}, + {"DFX_BUF_ALOC_SIZE_NUM ", 0x1C30ull}, + {"DFX_BUF_NP_RELEASE_SIZE_NUM ", 0x1C34ull}, + {"DFX_BUF_P_RELEASE_SIZE_NUM ", 0x1C38ull}, + {"DFX_BUF_PORT_RELEASE_SIZE_NUM ", 0x1C3Cull}, + {"DFX_DMA_PREBUF_MEM0_ECC_ERR_ADDR ", 0x1CA8ull}, + {"DFX_DMA_PREBUF_MEM0_ECC_CNT ", 0x1CACull}, + {"DFX_DMA_LOC_NP_OSTB_ECC_ERR_ADDR ", 0x1CB0ull}, + {"DFX_DMA_LOC_NP_OSTB_ECC_CNT ", 0x1CB4ull}, + {"DFX_DMA_PREBUF_MEM1_ECC_ERR_ADDR ", 0x1CC0ull}, + {"DFX_DMA_PREBUF_MEM1_ECC_CNT ", 0x1CC4ull}, + {"DMA_CH_DONE_STS ", 0x02E0ull}, + {"DMA_CH_ERR_STS ", 0x0320ull}, +}; +#endif /* CONFIG_DEBUG_FS*/ + +static enum hisi_dma_reg_layout hisi_dma_get_reg_layout(struct pci_dev *pdev) +{ + if (pdev->revision == HISI_DMA_REVISION_HIP08B) + return HISI_DMA_REG_LAYOUT_HIP08; + else if (pdev->revision >= HISI_DMA_REVISION_HIP09A) + return HISI_DMA_REG_LAYOUT_HIP09; + + return HISI_DMA_REG_LAYOUT_INVALID; +} + +static u32 hisi_dma_get_chan_num(struct pci_dev *pdev) +{ + if (pdev->revision == HISI_DMA_REVISION_HIP08B) + return HISI_DMA_HIP08_CHAN_NUM; + + return HISI_DMA_HIP09_CHAN_NUM; +} + +static u32 hisi_dma_get_msi_num(struct pci_dev *pdev) +{ + if (pdev->revision == HISI_DMA_REVISION_HIP08B) + return HISI_DMA_HIP08_MSI_NUM; + + return HISI_DMA_HIP09_MSI_NUM; +} + +static u32 hisi_dma_get_queue_base(struct pci_dev *pdev) +{ + if (pdev->revision == HISI_DMA_REVISION_HIP08B) + return HISI_DMA_HIP08_Q_BASE; + + return HISI_DMA_HIP09_Q_BASE; +} + +static inline struct hisi_dma_chan *to_hisi_dma_chan(struct dma_chan *c) +{ + return container_of(c, struct hisi_dma_chan, vc.chan); +} + +static inline struct hisi_dma_desc *to_hisi_dma_desc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct hisi_dma_desc, vd); +} + +static inline void hisi_dma_chan_write(void __iomem *base, u32 reg, u32 index, + u32 val) +{ + writel_relaxed(val, base + reg + index * HISI_DMA_Q_OFFSET); +} + +static inline void hisi_dma_update_bit(void __iomem *addr, u32 pos, bool val) +{ + u32 tmp; + + tmp = readl_relaxed(addr); + tmp = val ? tmp | pos : tmp & ~pos; + writel_relaxed(tmp, addr); +} + +static void hisi_dma_pause_dma(struct hisi_dma_dev *hdma_dev, u32 index, + bool pause) +{ + void __iomem *addr; + + addr = hdma_dev->queue_base + HISI_DMA_Q_CTRL0 + + index * HISI_DMA_Q_OFFSET; + hisi_dma_update_bit(addr, HISI_DMA_Q_CTRL0_QUEUE_PAUSE, pause); +} + +static void hisi_dma_enable_dma(struct hisi_dma_dev *hdma_dev, u32 index, + bool enable) +{ + void __iomem *addr; + + addr = hdma_dev->queue_base + HISI_DMA_Q_CTRL0 + + index * HISI_DMA_Q_OFFSET; + hisi_dma_update_bit(addr, HISI_DMA_Q_CTRL0_QUEUE_EN, enable); +} + +static void hisi_dma_mask_irq(struct hisi_dma_dev *hdma_dev, u32 qp_index) +{ + void __iomem *q_base = hdma_dev->queue_base; + + if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) + hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_INT_MSK, + qp_index, HISI_DMA_HIP08_Q_INT_STS_MASK); + else { + hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_INT_MSK, + qp_index, HISI_DMA_HIP09_Q_INT_STS_MASK); + hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_ERR_INT_MSK, + qp_index, + HISI_DMA_HIP09_Q_ERR_INT_STS_MASK); + } +} + +static void hisi_dma_unmask_irq(struct hisi_dma_dev *hdma_dev, u32 qp_index) +{ + void __iomem *q_base = hdma_dev->queue_base; + + if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) { + hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_INT_STS, + qp_index, HISI_DMA_HIP08_Q_INT_STS_MASK); + hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_INT_MSK, + qp_index, 0); + } else { + hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_INT_STS, + qp_index, HISI_DMA_HIP09_Q_INT_STS_MASK); + hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_ERR_INT_STS, + qp_index, + HISI_DMA_HIP09_Q_ERR_INT_STS_MASK); + hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_INT_MSK, + qp_index, 0); + hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_ERR_INT_MSK, + qp_index, 0); + } +} + +static void hisi_dma_do_reset(struct hisi_dma_dev *hdma_dev, u32 index) +{ + void __iomem *addr; + + addr = hdma_dev->queue_base + + HISI_DMA_Q_CTRL1 + index * HISI_DMA_Q_OFFSET; + hisi_dma_update_bit(addr, HISI_DMA_Q_CTRL1_QUEUE_RESET, 1); +} + +static void hisi_dma_reset_qp_point(struct hisi_dma_dev *hdma_dev, u32 index) +{ + void __iomem *q_base = hdma_dev->queue_base; + + hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_TAIL_PTR, index, 0); + hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_HEAD_PTR, index, 0); +} + +static void hisi_dma_reset_or_disable_hw_chan(struct hisi_dma_chan *chan, + bool disable) +{ + struct hisi_dma_dev *hdma_dev = chan->hdma_dev; + u32 index = chan->qp_num, tmp; + void __iomem *addr; + int ret; + + hisi_dma_pause_dma(hdma_dev, index, true); + hisi_dma_enable_dma(hdma_dev, index, false); + hisi_dma_mask_irq(hdma_dev, index); + + addr = hdma_dev->queue_base + + HISI_DMA_Q_FSM_STS + index * HISI_DMA_Q_OFFSET; + + ret = readl_relaxed_poll_timeout(addr, tmp, + FIELD_GET(HISI_DMA_Q_FSM_STS_MASK, tmp) != RUN, + HISI_DMA_POLL_Q_STS_DELAY_US, HISI_DMA_POLL_Q_STS_TIME_OUT_US); + if (ret) { + dev_err(&hdma_dev->pdev->dev, "disable channel timeout!\n"); + WARN_ON(1); + } + + hisi_dma_do_reset(hdma_dev, index); + hisi_dma_reset_qp_point(hdma_dev, index); + hisi_dma_pause_dma(hdma_dev, index, false); + + if (!disable) { + hisi_dma_enable_dma(hdma_dev, index, true); + hisi_dma_unmask_irq(hdma_dev, index); + } + + ret = readl_relaxed_poll_timeout(addr, tmp, + FIELD_GET(HISI_DMA_Q_FSM_STS_MASK, tmp) == IDLE, + HISI_DMA_POLL_Q_STS_DELAY_US, HISI_DMA_POLL_Q_STS_TIME_OUT_US); + if (ret) { + dev_err(&hdma_dev->pdev->dev, "reset channel timeout!\n"); + WARN_ON(1); + } +} + +static void hisi_dma_free_chan_resources(struct dma_chan *c) +{ + struct hisi_dma_chan *chan = to_hisi_dma_chan(c); + struct hisi_dma_dev *hdma_dev = chan->hdma_dev; + + hisi_dma_reset_or_disable_hw_chan(chan, false); + vchan_free_chan_resources(&chan->vc); + + memset(chan->sq, 0, sizeof(struct hisi_dma_sqe) * hdma_dev->chan_depth); + memset(chan->cq, 0, sizeof(struct hisi_dma_cqe) * hdma_dev->chan_depth); + chan->sq_tail = 0; + chan->cq_head = 0; + chan->status = DISABLE; +} + +static void hisi_dma_desc_free(struct virt_dma_desc *vd) +{ + kfree(to_hisi_dma_desc(vd)); +} + +static struct dma_async_tx_descriptor * +hisi_dma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct hisi_dma_chan *chan = to_hisi_dma_chan(c); + struct hisi_dma_desc *desc; + + desc = kzalloc(sizeof(*desc), GFP_NOWAIT); + if (!desc) + return NULL; + + desc->sqe.length = cpu_to_le32(len); + desc->sqe.src_addr = cpu_to_le64(src); + desc->sqe.dst_addr = cpu_to_le64(dst); + + return vchan_tx_prep(&chan->vc, &desc->vd, flags); +} + +static enum dma_status +hisi_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + return dma_cookie_status(c, cookie, txstate); +} + +static void hisi_dma_start_transfer(struct hisi_dma_chan *chan) +{ + struct hisi_dma_sqe *sqe = chan->sq + chan->sq_tail; + struct hisi_dma_dev *hdma_dev = chan->hdma_dev; + struct hisi_dma_desc *desc; + struct virt_dma_desc *vd; + + vd = vchan_next_desc(&chan->vc); + if (!vd) { + chan->desc = NULL; + return; + } + list_del(&vd->node); + desc = to_hisi_dma_desc(vd); + chan->desc = desc; + + memcpy(sqe, &desc->sqe, sizeof(struct hisi_dma_sqe)); + + /* update other field in sqe */ + sqe->dw0 = cpu_to_le32(FIELD_PREP(OPCODE_MASK, OPCODE_M2M)); + sqe->dw0 |= cpu_to_le32(LOCAL_IRQ_EN); + + /* make sure data has been updated in sqe */ + wmb(); + + /* update sq tail, point to new sqe position */ + chan->sq_tail = (chan->sq_tail + 1) % hdma_dev->chan_depth; + + /* update sq_tail to trigger a new task */ + hisi_dma_chan_write(hdma_dev->queue_base, HISI_DMA_Q_SQ_TAIL_PTR, + chan->qp_num, chan->sq_tail); +} + +static void hisi_dma_issue_pending(struct dma_chan *c) +{ + struct hisi_dma_chan *chan = to_hisi_dma_chan(c); + unsigned long flags; + + spin_lock_irqsave(&chan->vc.lock, flags); + + if (vchan_issue_pending(&chan->vc) && !chan->desc) + hisi_dma_start_transfer(chan); + + spin_unlock_irqrestore(&chan->vc.lock, flags); +} + +static int hisi_dma_terminate_all(struct dma_chan *c) +{ + struct hisi_dma_chan *chan = to_hisi_dma_chan(c); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&chan->vc.lock, flags); + + hisi_dma_pause_dma(chan->hdma_dev, chan->qp_num, true); + if (chan->desc) { + vchan_terminate_vdesc(&chan->desc->vd); + chan->desc = NULL; + } + + vchan_get_all_descriptors(&chan->vc, &head); + + spin_unlock_irqrestore(&chan->vc.lock, flags); + + vchan_dma_desc_free_list(&chan->vc, &head); + hisi_dma_pause_dma(chan->hdma_dev, chan->qp_num, false); + + return 0; +} + +static void hisi_dma_synchronize(struct dma_chan *c) +{ + struct hisi_dma_chan *chan = to_hisi_dma_chan(c); + + vchan_synchronize(&chan->vc); +} + +static int hisi_dma_alloc_qps_mem(struct hisi_dma_dev *hdma_dev) +{ + size_t sq_size = sizeof(struct hisi_dma_sqe) * hdma_dev->chan_depth; + size_t cq_size = sizeof(struct hisi_dma_cqe) * hdma_dev->chan_depth; + struct device *dev = &hdma_dev->pdev->dev; + struct hisi_dma_chan *chan; + int i; + + for (i = 0; i < hdma_dev->chan_num; i++) { + chan = &hdma_dev->chan[i]; + chan->sq = dmam_alloc_coherent(dev, sq_size, &chan->sq_dma, + GFP_KERNEL); + if (!chan->sq) + return -ENOMEM; + + chan->cq = dmam_alloc_coherent(dev, cq_size, &chan->cq_dma, + GFP_KERNEL); + if (!chan->cq) + return -ENOMEM; + } + + return 0; +} + +static void hisi_dma_init_hw_qp(struct hisi_dma_dev *hdma_dev, u32 index) +{ + struct hisi_dma_chan *chan = &hdma_dev->chan[index]; + void __iomem *q_base = hdma_dev->queue_base; + u32 hw_depth = hdma_dev->chan_depth - 1; + void __iomem *addr; + u32 tmp; + + /* set sq, cq base */ + hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_BASE_L, index, + lower_32_bits(chan->sq_dma)); + hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_BASE_H, index, + upper_32_bits(chan->sq_dma)); + hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_BASE_L, index, + lower_32_bits(chan->cq_dma)); + hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_BASE_H, index, + upper_32_bits(chan->cq_dma)); + + /* set sq, cq depth */ + hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_DEPTH, index, hw_depth); + hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_DEPTH, index, hw_depth); + + /* init sq tail and cq head */ + hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_TAIL_PTR, index, 0); + hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_HEAD_PTR, index, 0); + + /* init error interrupt stats */ + hisi_dma_chan_write(q_base, HISI_DMA_Q_ERR_INT_NUM0, index, 0); + hisi_dma_chan_write(q_base, HISI_DMA_Q_ERR_INT_NUM1, index, 0); + hisi_dma_chan_write(q_base, HISI_DMA_Q_ERR_INT_NUM2, index, 0); + + if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) { + hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_ERR_INT_NUM3, + index, 0); + hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_ERR_INT_NUM4, + index, 0); + hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_ERR_INT_NUM5, + index, 0); + hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_ERR_INT_NUM6, + index, 0); + /* + * init SQ/CQ direction selecting register. + * "0" is to local side and "1" is to remote side. + */ + addr = q_base + HISI_DMA_Q_CTRL0 + index * HISI_DMA_Q_OFFSET; + hisi_dma_update_bit(addr, HISI_DMA_HIP08_Q_CTRL0_SQCQ_DRCT, 0); + + /* + * 0 - Continue to next descriptor if error occurs. + * 1 - Abort the DMA queue if error occurs. + */ + hisi_dma_update_bit(addr, + HISI_DMA_HIP08_Q_CTRL0_ERR_ABORT_EN, 0); + } else { + addr = q_base + HISI_DMA_Q_CTRL0 + index * HISI_DMA_Q_OFFSET; + + /* + * init SQ/CQ direction selecting register. + * "0" is to local side and "1" is to remote side. + */ + hisi_dma_update_bit(addr, HISI_DMA_HIP09_Q_CTRL0_SQ_DRCT, 0); + hisi_dma_update_bit(addr, HISI_DMA_HIP09_Q_CTRL0_CQ_DRCT, 0); + + /* + * 0 - Continue to next descriptor if error occurs. + * 1 - Abort the DMA queue if error occurs. + */ + + tmp = readl_relaxed(addr); + tmp &= ~HISI_DMA_HIP09_Q_CTRL0_ERR_ABORT_EN; + writel_relaxed(tmp, addr); + + /* + * 0 - dma should process FLR whith CPU. + * 1 - dma not process FLR, only cpu process FLR. + */ + addr = q_base + HISI_DMA_HIP09_DMA_FLR_DISABLE + + index * HISI_DMA_Q_OFFSET; + hisi_dma_update_bit(addr, HISI_DMA_HIP09_DMA_FLR_DISABLE_B, 0); + + addr = q_base + HISI_DMA_Q_CTRL1 + index * HISI_DMA_Q_OFFSET; + hisi_dma_update_bit(addr, HISI_DMA_HIP09_Q_CTRL1_VA_ENABLE, 1); + } +} + +static void hisi_dma_enable_qp(struct hisi_dma_dev *hdma_dev, u32 qp_index) +{ + hisi_dma_init_hw_qp(hdma_dev, qp_index); + hisi_dma_unmask_irq(hdma_dev, qp_index); + hisi_dma_enable_dma(hdma_dev, qp_index, true); +} + +static void hisi_dma_disable_qp(struct hisi_dma_dev *hdma_dev, u32 qp_index) +{ + hisi_dma_reset_or_disable_hw_chan(&hdma_dev->chan[qp_index], true); +} + +static void hisi_dma_enable_qps(struct hisi_dma_dev *hdma_dev) +{ + int i; + + for (i = 0; i < hdma_dev->chan_num; i++) { + hdma_dev->chan[i].qp_num = i; + hdma_dev->chan[i].hdma_dev = hdma_dev; + hdma_dev->chan[i].vc.desc_free = hisi_dma_desc_free; + vchan_init(&hdma_dev->chan[i].vc, &hdma_dev->dma_dev); + hisi_dma_enable_qp(hdma_dev, i); + } +} + +static void hisi_dma_disable_qps(struct hisi_dma_dev *hdma_dev) +{ + int i; + + for (i = 0; i < hdma_dev->chan_num; i++) { + hisi_dma_disable_qp(hdma_dev, i); + tasklet_kill(&hdma_dev->chan[i].vc.task); + } +} + +static irqreturn_t hisi_dma_irq(int irq, void *data) +{ + struct hisi_dma_chan *chan = data; + struct hisi_dma_dev *hdma_dev = chan->hdma_dev; + struct hisi_dma_desc *desc; + struct hisi_dma_cqe *cqe; + void __iomem *q_base; + + spin_lock(&chan->vc.lock); + + desc = chan->desc; + cqe = chan->cq + chan->cq_head; + q_base = hdma_dev->queue_base; + if (desc) { + chan->cq_head = (chan->cq_head + 1) % hdma_dev->chan_depth; + hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_HEAD_PTR, + chan->qp_num, chan->cq_head); + if (FIELD_GET(STATUS_MASK, cqe->w0) == STATUS_SUCC) { + vchan_cookie_complete(&desc->vd); + hisi_dma_start_transfer(chan); + } else { + dev_err(&hdma_dev->pdev->dev, "task error!\n"); + } + } + + spin_unlock(&chan->vc.lock); + + return IRQ_HANDLED; +} + +static int hisi_dma_request_qps_irq(struct hisi_dma_dev *hdma_dev) +{ + struct pci_dev *pdev = hdma_dev->pdev; + int i, ret; + + for (i = 0; i < hdma_dev->chan_num; i++) { + ret = devm_request_irq(&pdev->dev, pci_irq_vector(pdev, i), + hisi_dma_irq, IRQF_SHARED, "hisi_dma", + &hdma_dev->chan[i]); + if (ret) + return ret; + } + + return 0; +} + +/* This function enables all hw channels in a device */ +static int hisi_dma_enable_hw_channels(struct hisi_dma_dev *hdma_dev) +{ + int ret; + + ret = hisi_dma_alloc_qps_mem(hdma_dev); + if (ret) { + dev_err(&hdma_dev->pdev->dev, "fail to allocate qp memory!\n"); + return ret; + } + + ret = hisi_dma_request_qps_irq(hdma_dev); + if (ret) { + dev_err(&hdma_dev->pdev->dev, "fail to request qp irq!\n"); + return ret; + } + + hisi_dma_enable_qps(hdma_dev); + + return 0; +} + +static void hisi_dma_disable_hw_channels(void *data) +{ + hisi_dma_disable_qps(data); +} + +static void hisi_dma_set_mode(struct hisi_dma_dev *hdma_dev, + enum hisi_dma_mode mode) +{ + if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) + writel_relaxed(mode == RC ? 1 : 0, + hdma_dev->base + HISI_DMA_HIP08_MODE); +} + +static void hisi_dma_init_hw(struct hisi_dma_dev *hdma_dev) +{ + void __iomem *addr; + int i; + + if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP09) { + for (i = 0; i < HISI_DMA_HIP09_MAX_PORT_NUM; i++) { + addr = hdma_dev->base + HISI_DMA_HIP09_PORT_CFG_REG(i); + hisi_dma_update_bit(addr, + HISI_DMA_HIP09_PORT_CFG_LINK_DOWN_MASK_B, 1); + } + } +} + +static void hisi_dma_init_dma_dev(struct hisi_dma_dev *hdma_dev) +{ + struct dma_device *dma_dev; + + dma_dev = &hdma_dev->dma_dev; + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_dev->device_free_chan_resources = hisi_dma_free_chan_resources; + dma_dev->device_prep_dma_memcpy = hisi_dma_prep_dma_memcpy; + dma_dev->device_tx_status = hisi_dma_tx_status; + dma_dev->device_issue_pending = hisi_dma_issue_pending; + dma_dev->device_terminate_all = hisi_dma_terminate_all; + dma_dev->device_synchronize = hisi_dma_synchronize; + dma_dev->directions = BIT(DMA_MEM_TO_MEM); + dma_dev->dev = &hdma_dev->pdev->dev; + INIT_LIST_HEAD(&dma_dev->channels); +} + +/* --- debugfs implementation --- */ +#ifdef CONFIG_DEBUG_FS +#include +static struct debugfs_reg32 *hisi_dma_get_ch_regs(struct hisi_dma_dev *hdma_dev, + u32 *regs_sz) +{ + struct device *dev = &hdma_dev->pdev->dev; + struct debugfs_reg32 *regs; + u32 regs_sz_comm; + + regs_sz_comm = ARRAY_SIZE(hisi_dma_comm_chan_regs); + + if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) + *regs_sz = regs_sz_comm + ARRAY_SIZE(hisi_dma_hip08_chan_regs); + else + *regs_sz = regs_sz_comm + ARRAY_SIZE(hisi_dma_hip09_chan_regs); + + regs = devm_kcalloc(dev, *regs_sz, sizeof(struct debugfs_reg32), + GFP_KERNEL); + if (!regs) + return NULL; + memcpy(regs, hisi_dma_comm_chan_regs, sizeof(hisi_dma_comm_chan_regs)); + + if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) + memcpy(regs + regs_sz_comm, hisi_dma_hip08_chan_regs, + sizeof(hisi_dma_hip08_chan_regs)); + else + memcpy(regs + regs_sz_comm, hisi_dma_hip09_chan_regs, + sizeof(hisi_dma_hip09_chan_regs)); + + return regs; +} + +static int hisi_dma_create_chan_dir(struct hisi_dma_dev *hdma_dev) +{ + char dir_name[HISI_DMA_MAX_DIR_NAME_LEN]; + struct debugfs_regset32 *regsets; + struct debugfs_reg32 *regs; + struct dentry *chan_dir; + struct device *dev; + u32 regs_sz; + int ret; + int i; + + dev = &hdma_dev->pdev->dev; + + regsets = devm_kcalloc(dev, hdma_dev->chan_num, + sizeof(*regsets), GFP_KERNEL); + if (!regsets) + return -ENOMEM; + + regs = hisi_dma_get_ch_regs(hdma_dev, ®s_sz); + if (!regs) + return -ENOMEM; + + for (i = 0; i < hdma_dev->chan_num; i++) { + regsets[i].regs = regs; + regsets[i].nregs = regs_sz; + regsets[i].base = hdma_dev->queue_base + i * HISI_DMA_Q_OFFSET; + regsets[i].dev = dev; + + memset(dir_name, 0, HISI_DMA_MAX_DIR_NAME_LEN); + ret = sprintf(dir_name, "channel%d", i); + if (ret < 0) + return ret; + + chan_dir = debugfs_create_dir(dir_name, + hdma_dev->dma_dev.dbg_dev_root); + debugfs_create_regset32("regs", 0444, chan_dir, ®sets[i]); + } + + return 0; +} + +static void hisi_dma_create_debugfs(struct hisi_dma_dev *hdma_dev) +{ + struct debugfs_regset32 *regset; + struct device *dev; + int ret; + + dev = &hdma_dev->pdev->dev; + + if (hdma_dev->dma_dev.dbg_dev_root == NULL) + return; + + regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL); + if (!regset) + return; + + if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) { + regset->regs = hisi_dma_hip08_comm_regs; + regset->nregs = ARRAY_SIZE(hisi_dma_hip08_comm_regs); + } else { + regset->regs = hisi_dma_hip09_comm_regs; + regset->nregs = ARRAY_SIZE(hisi_dma_hip09_comm_regs); + } + regset->base = hdma_dev->base; + regset->dev = dev; + + debugfs_create_regset32("regs", 0444, + hdma_dev->dma_dev.dbg_dev_root, regset); + + ret = hisi_dma_create_chan_dir(hdma_dev); + if (ret < 0) + dev_info(&hdma_dev->pdev->dev, "fail to create debugfs for channels!\n"); +} +#else +static void hisi_dma_create_debugfs(struct hisi_dma_dev *hdma_dev) { } +#endif /* CONFIG_DEBUG_FS*/ +/* --- debugfs implementation --- */ + +static int hisi_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + enum hisi_dma_reg_layout reg_layout; + struct device *dev = &pdev->dev; + struct hisi_dma_dev *hdma_dev; + struct dma_device *dma_dev; + u32 chan_num; + u32 msi_num; + int ret; + + reg_layout = hisi_dma_get_reg_layout(pdev); + if (reg_layout == HISI_DMA_REG_LAYOUT_INVALID) { + dev_err(dev, "unsupported device!\n"); + return -EINVAL; + } + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(dev, "failed to enable device mem!\n"); + return ret; + } + + ret = pcim_iomap_regions(pdev, 1 << PCI_BAR_2, pci_name(pdev)); + if (ret) { + dev_err(dev, "failed to remap I/O region!\n"); + return ret; + } + + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) + return ret; + + chan_num = hisi_dma_get_chan_num(pdev); + hdma_dev = devm_kzalloc(dev, struct_size(hdma_dev, chan, chan_num), + GFP_KERNEL); + if (!hdma_dev) + return -EINVAL; + + hdma_dev->base = pcim_iomap_table(pdev)[PCI_BAR_2]; + hdma_dev->pdev = pdev; + hdma_dev->chan_depth = HISI_DMA_Q_DEPTH_VAL; + hdma_dev->chan_num = chan_num; + hdma_dev->reg_layout = reg_layout; + hdma_dev->queue_base = hdma_dev->base + hisi_dma_get_queue_base(pdev); + + pci_set_drvdata(pdev, hdma_dev); + pci_set_master(pdev); + + msi_num = hisi_dma_get_msi_num(pdev); + + /* This will be freed by 'pcim_release()'. See 'pcim_enable_device()' */ + ret = pci_alloc_irq_vectors(pdev, msi_num, msi_num, PCI_IRQ_MSI); + if (ret < 0) { + dev_err(dev, "Failed to allocate MSI vectors!\n"); + return ret; + } + + hisi_dma_init_dma_dev(hdma_dev); + + hisi_dma_set_mode(hdma_dev, RC); + + hisi_dma_init_hw(hdma_dev); + + ret = hisi_dma_enable_hw_channels(hdma_dev); + if (ret < 0) { + dev_err(dev, "failed to enable hw channel!\n"); + return ret; + } + + ret = devm_add_action_or_reset(dev, hisi_dma_disable_hw_channels, + hdma_dev); + if (ret) + return ret; + + dma_dev = &hdma_dev->dma_dev; + ret = dmaenginem_async_device_register(dma_dev); + if (ret < 0) { + dev_err(dev, "failed to register device!\n"); + return ret; + } + + hisi_dma_create_debugfs(hdma_dev); + + return 0; +} + +static const struct pci_device_id hisi_dma_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa122) }, + { 0, } +}; + +static struct pci_driver hisi_dma_pci_driver = { + .name = "hisi_dma", + .id_table = hisi_dma_pci_tbl, + .probe = hisi_dma_probe, +}; + +module_pci_driver(hisi_dma_pci_driver); + +MODULE_AUTHOR("Zhou Wang "); +MODULE_AUTHOR("Zhenfa Qiu "); +MODULE_DESCRIPTION("HiSilicon Kunpeng DMA controller driver"); +MODULE_LICENSE("GPL v2"); +MODULE_DEVICE_TABLE(pci, hisi_dma_pci_tbl); diff --git a/drivers/dma/hsu/Kconfig b/drivers/dma/hsu/Kconfig new file mode 100644 index 0000000000..af102baec1 --- /dev/null +++ b/drivers/dma/hsu/Kconfig @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +# DMA engine configuration for hsu +config HSU_DMA + tristate + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + +config HSU_DMA_PCI + tristate + depends on HSU_DMA && PCI diff --git a/drivers/dma/hsu/Makefile b/drivers/dma/hsu/Makefile new file mode 100644 index 0000000000..61829b1de2 --- /dev/null +++ b/drivers/dma/hsu/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_HSU_DMA) += hsu_dma.o +hsu_dma-objs := hsu.o + +obj-$(CONFIG_HSU_DMA_PCI) += hsu_dma_pci.o +hsu_dma_pci-objs := pci.o diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c new file mode 100644 index 0000000000..af5a2e252c --- /dev/null +++ b/drivers/dma/hsu/hsu.c @@ -0,0 +1,512 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Core driver for the High Speed UART DMA + * + * Copyright (C) 2015 Intel Corporation + * Author: Andy Shevchenko + * + * Partially based on the bits found in drivers/tty/serial/mfd.c. + */ + +/* + * DMA channel allocation: + * 1. Even number chans are used for DMA Read (UART TX), odd chans for DMA + * Write (UART RX). + * 2. 0/1 channel are assigned to port 0, 2/3 chan to port 1, 4/5 chan to + * port 3, and so on. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hsu.h" + +#define HSU_DMA_BUSWIDTHS \ + BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \ + BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_16_BYTES) + +static inline void hsu_chan_disable(struct hsu_dma_chan *hsuc) +{ + hsu_chan_writel(hsuc, HSU_CH_CR, 0); +} + +static inline void hsu_chan_enable(struct hsu_dma_chan *hsuc) +{ + u32 cr = HSU_CH_CR_CHA; + + if (hsuc->direction == DMA_MEM_TO_DEV) + cr &= ~HSU_CH_CR_CHD; + else if (hsuc->direction == DMA_DEV_TO_MEM) + cr |= HSU_CH_CR_CHD; + + hsu_chan_writel(hsuc, HSU_CH_CR, cr); +} + +static void hsu_dma_chan_start(struct hsu_dma_chan *hsuc) +{ + struct dma_slave_config *config = &hsuc->config; + struct hsu_dma_desc *desc = hsuc->desc; + u32 bsr = 0, mtsr = 0; /* to shut the compiler up */ + u32 dcr = HSU_CH_DCR_CHSOE | HSU_CH_DCR_CHEI; + unsigned int i, count; + + if (hsuc->direction == DMA_MEM_TO_DEV) { + bsr = config->dst_maxburst; + mtsr = config->dst_addr_width; + } else if (hsuc->direction == DMA_DEV_TO_MEM) { + bsr = config->src_maxburst; + mtsr = config->src_addr_width; + } + + hsu_chan_disable(hsuc); + + hsu_chan_writel(hsuc, HSU_CH_DCR, 0); + hsu_chan_writel(hsuc, HSU_CH_BSR, bsr); + hsu_chan_writel(hsuc, HSU_CH_MTSR, mtsr); + + /* Set descriptors */ + count = desc->nents - desc->active; + for (i = 0; i < count && i < HSU_DMA_CHAN_NR_DESC; i++) { + hsu_chan_writel(hsuc, HSU_CH_DxSAR(i), desc->sg[i].addr); + hsu_chan_writel(hsuc, HSU_CH_DxTSR(i), desc->sg[i].len); + + /* Prepare value for DCR */ + dcr |= HSU_CH_DCR_DESCA(i); + dcr |= HSU_CH_DCR_CHTOI(i); /* timeout bit, see HSU Errata 1 */ + + desc->active++; + } + /* Only for the last descriptor in the chain */ + dcr |= HSU_CH_DCR_CHSOD(count - 1); + dcr |= HSU_CH_DCR_CHDI(count - 1); + + hsu_chan_writel(hsuc, HSU_CH_DCR, dcr); + + hsu_chan_enable(hsuc); +} + +static void hsu_dma_stop_channel(struct hsu_dma_chan *hsuc) +{ + hsu_chan_disable(hsuc); + hsu_chan_writel(hsuc, HSU_CH_DCR, 0); +} + +static void hsu_dma_start_channel(struct hsu_dma_chan *hsuc) +{ + hsu_dma_chan_start(hsuc); +} + +static void hsu_dma_start_transfer(struct hsu_dma_chan *hsuc) +{ + struct virt_dma_desc *vdesc; + + /* Get the next descriptor */ + vdesc = vchan_next_desc(&hsuc->vchan); + if (!vdesc) { + hsuc->desc = NULL; + return; + } + + list_del(&vdesc->node); + hsuc->desc = to_hsu_dma_desc(vdesc); + + /* Start the channel with a new descriptor */ + hsu_dma_start_channel(hsuc); +} + +/* + * hsu_dma_get_status() - get DMA channel status + * @chip: HSUART DMA chip + * @nr: DMA channel number + * @status: pointer for DMA Channel Status Register value + * + * Description: + * The function reads and clears the DMA Channel Status Register, checks + * if it was a timeout interrupt and returns a corresponding value. + * + * Caller should provide a valid pointer for the DMA Channel Status + * Register value that will be returned in @status. + * + * Return: + * 1 for DMA timeout status, 0 for other DMA status, or error code for + * invalid parameters or no interrupt pending. + */ +int hsu_dma_get_status(struct hsu_dma_chip *chip, unsigned short nr, + u32 *status) +{ + struct hsu_dma_chan *hsuc; + unsigned long flags; + u32 sr; + + /* Sanity check */ + if (nr >= chip->hsu->nr_channels) + return -EINVAL; + + hsuc = &chip->hsu->chan[nr]; + + /* + * No matter what situation, need read clear the IRQ status + * There is a bug, see Errata 5, HSD 2900918 + */ + spin_lock_irqsave(&hsuc->vchan.lock, flags); + sr = hsu_chan_readl(hsuc, HSU_CH_SR); + spin_unlock_irqrestore(&hsuc->vchan.lock, flags); + + /* Check if any interrupt is pending */ + sr &= ~(HSU_CH_SR_DESCE_ANY | HSU_CH_SR_CDESC_ANY); + if (!sr) + return -EIO; + + /* Timeout IRQ, need wait some time, see Errata 2 */ + if (sr & HSU_CH_SR_DESCTO_ANY) + udelay(2); + + /* + * At this point, at least one of Descriptor Time Out, Channel Error + * or Descriptor Done bits must be set. Clear the Descriptor Time Out + * bits and if sr is still non-zero, it must be channel error or + * descriptor done which are higher priority than timeout and handled + * in hsu_dma_do_irq(). Else, it must be a timeout. + */ + sr &= ~HSU_CH_SR_DESCTO_ANY; + + *status = sr; + + return sr ? 0 : 1; +} +EXPORT_SYMBOL_GPL(hsu_dma_get_status); + +/* + * hsu_dma_do_irq() - DMA interrupt handler + * @chip: HSUART DMA chip + * @nr: DMA channel number + * @status: Channel Status Register value + * + * Description: + * This function handles Channel Error and Descriptor Done interrupts. + * This function should be called after determining that the DMA interrupt + * is not a normal timeout interrupt, ie. hsu_dma_get_status() returned 0. + * + * Return: + * 0 for invalid channel number, 1 otherwise. + */ +int hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr, u32 status) +{ + struct dma_chan_percpu *stat; + struct hsu_dma_chan *hsuc; + struct hsu_dma_desc *desc; + unsigned long flags; + + /* Sanity check */ + if (nr >= chip->hsu->nr_channels) + return 0; + + hsuc = &chip->hsu->chan[nr]; + stat = this_cpu_ptr(hsuc->vchan.chan.local); + + spin_lock_irqsave(&hsuc->vchan.lock, flags); + desc = hsuc->desc; + if (desc) { + if (status & HSU_CH_SR_CHE) { + desc->status = DMA_ERROR; + } else if (desc->active < desc->nents) { + hsu_dma_start_channel(hsuc); + } else { + vchan_cookie_complete(&desc->vdesc); + desc->status = DMA_COMPLETE; + stat->bytes_transferred += desc->length; + hsu_dma_start_transfer(hsuc); + } + } + spin_unlock_irqrestore(&hsuc->vchan.lock, flags); + + return 1; +} +EXPORT_SYMBOL_GPL(hsu_dma_do_irq); + +static struct hsu_dma_desc *hsu_dma_alloc_desc(unsigned int nents) +{ + struct hsu_dma_desc *desc; + + desc = kzalloc(sizeof(*desc), GFP_NOWAIT); + if (!desc) + return NULL; + + desc->sg = kcalloc(nents, sizeof(*desc->sg), GFP_NOWAIT); + if (!desc->sg) { + kfree(desc); + return NULL; + } + + return desc; +} + +static void hsu_dma_desc_free(struct virt_dma_desc *vdesc) +{ + struct hsu_dma_desc *desc = to_hsu_dma_desc(vdesc); + + kfree(desc->sg); + kfree(desc); +} + +static struct dma_async_tx_descriptor *hsu_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan); + struct hsu_dma_desc *desc; + struct scatterlist *sg; + unsigned int i; + + desc = hsu_dma_alloc_desc(sg_len); + if (!desc) + return NULL; + + for_each_sg(sgl, sg, sg_len, i) { + desc->sg[i].addr = sg_dma_address(sg); + desc->sg[i].len = sg_dma_len(sg); + + desc->length += sg_dma_len(sg); + } + + desc->nents = sg_len; + desc->direction = direction; + /* desc->active = 0 by kzalloc */ + desc->status = DMA_IN_PROGRESS; + + return vchan_tx_prep(&hsuc->vchan, &desc->vdesc, flags); +} + +static void hsu_dma_issue_pending(struct dma_chan *chan) +{ + struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&hsuc->vchan.lock, flags); + if (vchan_issue_pending(&hsuc->vchan) && !hsuc->desc) + hsu_dma_start_transfer(hsuc); + spin_unlock_irqrestore(&hsuc->vchan.lock, flags); +} + +static size_t hsu_dma_active_desc_size(struct hsu_dma_chan *hsuc) +{ + struct hsu_dma_desc *desc = hsuc->desc; + size_t bytes = 0; + int i; + + for (i = desc->active; i < desc->nents; i++) + bytes += desc->sg[i].len; + + i = HSU_DMA_CHAN_NR_DESC - 1; + do { + bytes += hsu_chan_readl(hsuc, HSU_CH_DxTSR(i)); + } while (--i >= 0); + + return bytes; +} + +static enum dma_status hsu_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *state) +{ + struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan); + struct virt_dma_desc *vdesc; + enum dma_status status; + size_t bytes; + unsigned long flags; + + status = dma_cookie_status(chan, cookie, state); + if (status == DMA_COMPLETE) + return status; + + spin_lock_irqsave(&hsuc->vchan.lock, flags); + vdesc = vchan_find_desc(&hsuc->vchan, cookie); + if (hsuc->desc && cookie == hsuc->desc->vdesc.tx.cookie) { + bytes = hsu_dma_active_desc_size(hsuc); + dma_set_residue(state, bytes); + status = hsuc->desc->status; + } else if (vdesc) { + bytes = to_hsu_dma_desc(vdesc)->length; + dma_set_residue(state, bytes); + } + spin_unlock_irqrestore(&hsuc->vchan.lock, flags); + + return status; +} + +static int hsu_dma_slave_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan); + + memcpy(&hsuc->config, config, sizeof(hsuc->config)); + + return 0; +} + +static int hsu_dma_pause(struct dma_chan *chan) +{ + struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&hsuc->vchan.lock, flags); + if (hsuc->desc && hsuc->desc->status == DMA_IN_PROGRESS) { + hsu_chan_disable(hsuc); + hsuc->desc->status = DMA_PAUSED; + } + spin_unlock_irqrestore(&hsuc->vchan.lock, flags); + + return 0; +} + +static int hsu_dma_resume(struct dma_chan *chan) +{ + struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&hsuc->vchan.lock, flags); + if (hsuc->desc && hsuc->desc->status == DMA_PAUSED) { + hsuc->desc->status = DMA_IN_PROGRESS; + hsu_chan_enable(hsuc); + } + spin_unlock_irqrestore(&hsuc->vchan.lock, flags); + + return 0; +} + +static int hsu_dma_terminate_all(struct dma_chan *chan) +{ + struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&hsuc->vchan.lock, flags); + + hsu_dma_stop_channel(hsuc); + if (hsuc->desc) { + hsu_dma_desc_free(&hsuc->desc->vdesc); + hsuc->desc = NULL; + } + + vchan_get_all_descriptors(&hsuc->vchan, &head); + spin_unlock_irqrestore(&hsuc->vchan.lock, flags); + vchan_dma_desc_free_list(&hsuc->vchan, &head); + + return 0; +} + +static void hsu_dma_free_chan_resources(struct dma_chan *chan) +{ + vchan_free_chan_resources(to_virt_chan(chan)); +} + +static void hsu_dma_synchronize(struct dma_chan *chan) +{ + struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan); + + vchan_synchronize(&hsuc->vchan); +} + +int hsu_dma_probe(struct hsu_dma_chip *chip) +{ + struct hsu_dma *hsu; + void __iomem *addr = chip->regs + chip->offset; + unsigned short i; + int ret; + + hsu = devm_kzalloc(chip->dev, sizeof(*hsu), GFP_KERNEL); + if (!hsu) + return -ENOMEM; + + chip->hsu = hsu; + + /* Calculate nr_channels from the IO space length */ + hsu->nr_channels = (chip->length - chip->offset) / HSU_DMA_CHAN_LENGTH; + + hsu->chan = devm_kcalloc(chip->dev, hsu->nr_channels, + sizeof(*hsu->chan), GFP_KERNEL); + if (!hsu->chan) + return -ENOMEM; + + INIT_LIST_HEAD(&hsu->dma.channels); + for (i = 0; i < hsu->nr_channels; i++) { + struct hsu_dma_chan *hsuc = &hsu->chan[i]; + + hsuc->vchan.desc_free = hsu_dma_desc_free; + vchan_init(&hsuc->vchan, &hsu->dma); + + hsuc->direction = (i & 0x1) ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV; + hsuc->reg = addr + i * HSU_DMA_CHAN_LENGTH; + } + + dma_cap_set(DMA_SLAVE, hsu->dma.cap_mask); + dma_cap_set(DMA_PRIVATE, hsu->dma.cap_mask); + + hsu->dma.device_free_chan_resources = hsu_dma_free_chan_resources; + + hsu->dma.device_prep_slave_sg = hsu_dma_prep_slave_sg; + + hsu->dma.device_issue_pending = hsu_dma_issue_pending; + hsu->dma.device_tx_status = hsu_dma_tx_status; + + hsu->dma.device_config = hsu_dma_slave_config; + hsu->dma.device_pause = hsu_dma_pause; + hsu->dma.device_resume = hsu_dma_resume; + hsu->dma.device_terminate_all = hsu_dma_terminate_all; + hsu->dma.device_synchronize = hsu_dma_synchronize; + + hsu->dma.src_addr_widths = HSU_DMA_BUSWIDTHS; + hsu->dma.dst_addr_widths = HSU_DMA_BUSWIDTHS; + hsu->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + hsu->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + hsu->dma.dev = chip->dev; + + dma_set_max_seg_size(hsu->dma.dev, HSU_CH_DxTSR_MASK); + + ret = dma_async_device_register(&hsu->dma); + if (ret) + return ret; + + dev_info(chip->dev, "Found HSU DMA, %d channels\n", hsu->nr_channels); + return 0; +} +EXPORT_SYMBOL_GPL(hsu_dma_probe); + +int hsu_dma_remove(struct hsu_dma_chip *chip) +{ + struct hsu_dma *hsu = chip->hsu; + unsigned short i; + + dma_async_device_unregister(&hsu->dma); + + for (i = 0; i < hsu->nr_channels; i++) { + struct hsu_dma_chan *hsuc = &hsu->chan[i]; + + tasklet_kill(&hsuc->vchan.task); + } + + return 0; +} +EXPORT_SYMBOL_GPL(hsu_dma_remove); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("High Speed UART DMA core driver"); +MODULE_AUTHOR("Andy Shevchenko "); diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h new file mode 100644 index 0000000000..3bca577b98 --- /dev/null +++ b/drivers/dma/hsu/hsu.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Driver for the High Speed UART DMA + * + * Copyright (C) 2015 Intel Corporation + * + * Partially based on the bits found in drivers/tty/serial/mfd.c. + */ + +#ifndef __DMA_HSU_H__ +#define __DMA_HSU_H__ + +#include +#include +#include +#include + +#include + +#include "../virt-dma.h" + +#define HSU_CH_SR 0x00 /* channel status */ +#define HSU_CH_CR 0x04 /* channel control */ +#define HSU_CH_DCR 0x08 /* descriptor control */ +#define HSU_CH_BSR 0x10 /* FIFO buffer size */ +#define HSU_CH_MTSR 0x14 /* minimum transfer size */ +#define HSU_CH_DxSAR(x) (0x20 + 8 * (x)) /* desc start addr */ +#define HSU_CH_DxTSR(x) (0x24 + 8 * (x)) /* desc transfer size */ +#define HSU_CH_D0SAR 0x20 /* desc 0 start addr */ +#define HSU_CH_D0TSR 0x24 /* desc 0 transfer size */ +#define HSU_CH_D1SAR 0x28 +#define HSU_CH_D1TSR 0x2c +#define HSU_CH_D2SAR 0x30 +#define HSU_CH_D2TSR 0x34 +#define HSU_CH_D3SAR 0x38 +#define HSU_CH_D3TSR 0x3c + +#define HSU_DMA_CHAN_NR_DESC 4 +#define HSU_DMA_CHAN_LENGTH 0x40 + +/* Bits in HSU_CH_SR */ +#define HSU_CH_SR_DESCTO(x) BIT(8 + (x)) +#define HSU_CH_SR_DESCTO_ANY GENMASK(11, 8) +#define HSU_CH_SR_CHE BIT(15) +#define HSU_CH_SR_DESCE(x) BIT(16 + (x)) +#define HSU_CH_SR_DESCE_ANY GENMASK(19, 16) +#define HSU_CH_SR_CDESC_ANY GENMASK(31, 30) + +/* Bits in HSU_CH_CR */ +#define HSU_CH_CR_CHA BIT(0) +#define HSU_CH_CR_CHD BIT(1) + +/* Bits in HSU_CH_DCR */ +#define HSU_CH_DCR_DESCA(x) BIT(0 + (x)) +#define HSU_CH_DCR_CHSOD(x) BIT(8 + (x)) +#define HSU_CH_DCR_CHSOTO BIT(14) +#define HSU_CH_DCR_CHSOE BIT(15) +#define HSU_CH_DCR_CHDI(x) BIT(16 + (x)) +#define HSU_CH_DCR_CHEI BIT(23) +#define HSU_CH_DCR_CHTOI(x) BIT(24 + (x)) + +/* Bits in HSU_CH_DxTSR */ +#define HSU_CH_DxTSR_MASK GENMASK(15, 0) +#define HSU_CH_DxTSR_TSR(x) ((x) & HSU_CH_DxTSR_MASK) + +struct hsu_dma_sg { + dma_addr_t addr; + unsigned int len; +}; + +struct hsu_dma_desc { + struct virt_dma_desc vdesc; + enum dma_transfer_direction direction; + struct hsu_dma_sg *sg; + unsigned int nents; + size_t length; + unsigned int active; + enum dma_status status; +}; + +static inline struct hsu_dma_desc *to_hsu_dma_desc(struct virt_dma_desc *vdesc) +{ + return container_of(vdesc, struct hsu_dma_desc, vdesc); +} + +struct hsu_dma_chan { + struct virt_dma_chan vchan; + + void __iomem *reg; + + /* hardware configuration */ + enum dma_transfer_direction direction; + struct dma_slave_config config; + + struct hsu_dma_desc *desc; +}; + +static inline struct hsu_dma_chan *to_hsu_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct hsu_dma_chan, vchan.chan); +} + +static inline u32 hsu_chan_readl(struct hsu_dma_chan *hsuc, int offset) +{ + return readl(hsuc->reg + offset); +} + +static inline void hsu_chan_writel(struct hsu_dma_chan *hsuc, int offset, + u32 value) +{ + writel(value, hsuc->reg + offset); +} + +struct hsu_dma { + struct dma_device dma; + + /* channels */ + struct hsu_dma_chan *chan; + unsigned short nr_channels; +}; + +static inline struct hsu_dma *to_hsu_dma(struct dma_device *ddev) +{ + return container_of(ddev, struct hsu_dma, dma); +} + +#endif /* __DMA_HSU_H__ */ diff --git a/drivers/dma/hsu/pci.c b/drivers/dma/hsu/pci.c new file mode 100644 index 0000000000..0fcc0c0c22 --- /dev/null +++ b/drivers/dma/hsu/pci.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PCI driver for the High Speed UART DMA + * + * Copyright (C) 2015 Intel Corporation + * Author: Andy Shevchenko + * + * Partially based on the bits found in drivers/tty/serial/mfd.c. + */ + +#include +#include +#include +#include +#include + +#include "hsu.h" + +#define HSU_PCI_DMASR 0x00 +#define HSU_PCI_DMAISR 0x04 + +#define HSU_PCI_CHAN_OFFSET 0x100 + +#define PCI_DEVICE_ID_INTEL_MFLD_HSU_DMA 0x081e +#define PCI_DEVICE_ID_INTEL_MRFLD_HSU_DMA 0x1192 + +static irqreturn_t hsu_pci_irq(int irq, void *dev) +{ + struct hsu_dma_chip *chip = dev; + unsigned long dmaisr; + unsigned short i; + u32 status; + int ret = 0; + int err; + + dmaisr = readl(chip->regs + HSU_PCI_DMAISR); + for_each_set_bit(i, &dmaisr, chip->hsu->nr_channels) { + err = hsu_dma_get_status(chip, i, &status); + if (err > 0) + ret |= 1; + else if (err == 0) + ret |= hsu_dma_do_irq(chip, i, status); + } + + return IRQ_RETVAL(ret); +} + +static void hsu_pci_dma_remove(void *chip) +{ + hsu_dma_remove(chip); +} + +static int hsu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct device *dev = &pdev->dev; + struct hsu_dma_chip *chip; + int ret; + + ret = pcim_enable_device(pdev); + if (ret) + return ret; + + ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev)); + if (ret) { + dev_err(&pdev->dev, "I/O memory remapping failed\n"); + return ret; + } + + pci_set_master(pdev); + pci_try_set_mwi(pdev); + + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (ret) + return ret; + + chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); + if (ret < 0) + return ret; + + chip->dev = &pdev->dev; + chip->regs = pcim_iomap_table(pdev)[0]; + chip->length = pci_resource_len(pdev, 0); + chip->offset = HSU_PCI_CHAN_OFFSET; + chip->irq = pci_irq_vector(pdev, 0); + + ret = hsu_dma_probe(chip); + if (ret) + return ret; + + ret = devm_add_action_or_reset(dev, hsu_pci_dma_remove, chip); + if (ret) + return ret; + + ret = devm_request_irq(dev, chip->irq, hsu_pci_irq, 0, "hsu_dma_pci", chip); + if (ret) + return ret; + + /* + * On Intel Tangier B0 and Anniedale the interrupt line, disregarding + * to have different numbers, is shared between HSU DMA and UART IPs. + * Thus on such SoCs we are expecting that IRQ handler is called in + * UART driver only. Instead of handling the spurious interrupt + * from HSU DMA here and waste CPU time and delay HSU UART interrupt + * handling, disable the interrupt entirely. + */ + if (pdev->device == PCI_DEVICE_ID_INTEL_MRFLD_HSU_DMA) + disable_irq_nosync(chip->irq); + + pci_set_drvdata(pdev, chip); + + return 0; +} + +static const struct pci_device_id hsu_pci_id_table[] = { + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MFLD_HSU_DMA), 0 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MRFLD_HSU_DMA), 0 }, + { } +}; +MODULE_DEVICE_TABLE(pci, hsu_pci_id_table); + +static struct pci_driver hsu_pci_driver = { + .name = "hsu_dma_pci", + .id_table = hsu_pci_id_table, + .probe = hsu_pci_probe, +}; + +module_pci_driver(hsu_pci_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("High Speed UART DMA PCI driver"); +MODULE_AUTHOR("Andy Shevchenko "); diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c new file mode 100644 index 0000000000..0ac634a51c --- /dev/null +++ b/drivers/dma/idma64.c @@ -0,0 +1,706 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Core driver for the Intel integrated DMA 64-bit + * + * Copyright (C) 2015 Intel Corporation + * Author: Andy Shevchenko + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "idma64.h" + +/* For now we support only two channels */ +#define IDMA64_NR_CHAN 2 + +/* ---------------------------------------------------------------------- */ + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +/* ---------------------------------------------------------------------- */ + +static void idma64_off(struct idma64 *idma64) +{ + unsigned short count = 100; + + dma_writel(idma64, CFG, 0); + + channel_clear_bit(idma64, MASK(XFER), idma64->all_chan_mask); + channel_clear_bit(idma64, MASK(BLOCK), idma64->all_chan_mask); + channel_clear_bit(idma64, MASK(SRC_TRAN), idma64->all_chan_mask); + channel_clear_bit(idma64, MASK(DST_TRAN), idma64->all_chan_mask); + channel_clear_bit(idma64, MASK(ERROR), idma64->all_chan_mask); + + do { + cpu_relax(); + } while (dma_readl(idma64, CFG) & IDMA64_CFG_DMA_EN && --count); +} + +static void idma64_on(struct idma64 *idma64) +{ + dma_writel(idma64, CFG, IDMA64_CFG_DMA_EN); +} + +/* ---------------------------------------------------------------------- */ + +static void idma64_chan_init(struct idma64 *idma64, struct idma64_chan *idma64c) +{ + u32 cfghi = IDMA64C_CFGH_SRC_PER(1) | IDMA64C_CFGH_DST_PER(0); + u32 cfglo = 0; + + /* Set default burst alignment */ + cfglo |= IDMA64C_CFGL_DST_BURST_ALIGN | IDMA64C_CFGL_SRC_BURST_ALIGN; + + channel_writel(idma64c, CFG_LO, cfglo); + channel_writel(idma64c, CFG_HI, cfghi); + + /* Enable interrupts */ + channel_set_bit(idma64, MASK(XFER), idma64c->mask); + channel_set_bit(idma64, MASK(ERROR), idma64c->mask); + + /* + * Enforce the controller to be turned on. + * + * The iDMA is turned off in ->probe() and looses context during system + * suspend / resume cycle. That's why we have to enable it each time we + * use it. + */ + idma64_on(idma64); +} + +static void idma64_chan_stop(struct idma64 *idma64, struct idma64_chan *idma64c) +{ + channel_clear_bit(idma64, CH_EN, idma64c->mask); +} + +static void idma64_chan_start(struct idma64 *idma64, struct idma64_chan *idma64c) +{ + struct idma64_desc *desc = idma64c->desc; + struct idma64_hw_desc *hw = &desc->hw[0]; + + channel_writeq(idma64c, SAR, 0); + channel_writeq(idma64c, DAR, 0); + + channel_writel(idma64c, CTL_HI, IDMA64C_CTLH_BLOCK_TS(~0UL)); + channel_writel(idma64c, CTL_LO, IDMA64C_CTLL_LLP_S_EN | IDMA64C_CTLL_LLP_D_EN); + + channel_writeq(idma64c, LLP, hw->llp); + + channel_set_bit(idma64, CH_EN, idma64c->mask); +} + +static void idma64_stop_transfer(struct idma64_chan *idma64c) +{ + struct idma64 *idma64 = to_idma64(idma64c->vchan.chan.device); + + idma64_chan_stop(idma64, idma64c); +} + +static void idma64_start_transfer(struct idma64_chan *idma64c) +{ + struct idma64 *idma64 = to_idma64(idma64c->vchan.chan.device); + struct virt_dma_desc *vdesc; + + /* Get the next descriptor */ + vdesc = vchan_next_desc(&idma64c->vchan); + if (!vdesc) { + idma64c->desc = NULL; + return; + } + + list_del(&vdesc->node); + idma64c->desc = to_idma64_desc(vdesc); + + /* Configure the channel */ + idma64_chan_init(idma64, idma64c); + + /* Start the channel with a new descriptor */ + idma64_chan_start(idma64, idma64c); +} + +/* ---------------------------------------------------------------------- */ + +static void idma64_chan_irq(struct idma64 *idma64, unsigned short c, + u32 status_err, u32 status_xfer) +{ + struct idma64_chan *idma64c = &idma64->chan[c]; + struct dma_chan_percpu *stat; + struct idma64_desc *desc; + + stat = this_cpu_ptr(idma64c->vchan.chan.local); + + spin_lock(&idma64c->vchan.lock); + desc = idma64c->desc; + if (desc) { + if (status_err & (1 << c)) { + dma_writel(idma64, CLEAR(ERROR), idma64c->mask); + desc->status = DMA_ERROR; + } else if (status_xfer & (1 << c)) { + dma_writel(idma64, CLEAR(XFER), idma64c->mask); + desc->status = DMA_COMPLETE; + vchan_cookie_complete(&desc->vdesc); + stat->bytes_transferred += desc->length; + idma64_start_transfer(idma64c); + } + + /* idma64_start_transfer() updates idma64c->desc */ + if (idma64c->desc == NULL || desc->status == DMA_ERROR) + idma64_stop_transfer(idma64c); + } + spin_unlock(&idma64c->vchan.lock); +} + +static irqreturn_t idma64_irq(int irq, void *dev) +{ + struct idma64 *idma64 = dev; + u32 status = dma_readl(idma64, STATUS_INT); + u32 status_xfer; + u32 status_err; + unsigned short i; + + dev_vdbg(idma64->dma.dev, "%s: status=%#x\n", __func__, status); + + /* Check if we have any interrupt from the DMA controller */ + if (!status) + return IRQ_NONE; + + status_xfer = dma_readl(idma64, RAW(XFER)); + status_err = dma_readl(idma64, RAW(ERROR)); + + for (i = 0; i < idma64->dma.chancnt; i++) + idma64_chan_irq(idma64, i, status_err, status_xfer); + + return IRQ_HANDLED; +} + +/* ---------------------------------------------------------------------- */ + +static struct idma64_desc *idma64_alloc_desc(unsigned int ndesc) +{ + struct idma64_desc *desc; + + desc = kzalloc(sizeof(*desc), GFP_NOWAIT); + if (!desc) + return NULL; + + desc->hw = kcalloc(ndesc, sizeof(*desc->hw), GFP_NOWAIT); + if (!desc->hw) { + kfree(desc); + return NULL; + } + + return desc; +} + +static void idma64_desc_free(struct idma64_chan *idma64c, + struct idma64_desc *desc) +{ + struct idma64_hw_desc *hw; + + if (desc->ndesc) { + unsigned int i = desc->ndesc; + + do { + hw = &desc->hw[--i]; + dma_pool_free(idma64c->pool, hw->lli, hw->llp); + } while (i); + } + + kfree(desc->hw); + kfree(desc); +} + +static void idma64_vdesc_free(struct virt_dma_desc *vdesc) +{ + struct idma64_chan *idma64c = to_idma64_chan(vdesc->tx.chan); + + idma64_desc_free(idma64c, to_idma64_desc(vdesc)); +} + +static void idma64_hw_desc_fill(struct idma64_hw_desc *hw, + struct dma_slave_config *config, + enum dma_transfer_direction direction, u64 llp) +{ + struct idma64_lli *lli = hw->lli; + u64 sar, dar; + u32 ctlhi = IDMA64C_CTLH_BLOCK_TS(hw->len); + u32 ctllo = IDMA64C_CTLL_LLP_S_EN | IDMA64C_CTLL_LLP_D_EN; + u32 src_width, dst_width; + + if (direction == DMA_MEM_TO_DEV) { + sar = hw->phys; + dar = config->dst_addr; + ctllo |= IDMA64C_CTLL_DST_FIX | IDMA64C_CTLL_SRC_INC | + IDMA64C_CTLL_FC_M2P; + src_width = __ffs(sar | hw->len | 4); + dst_width = __ffs(config->dst_addr_width); + } else { /* DMA_DEV_TO_MEM */ + sar = config->src_addr; + dar = hw->phys; + ctllo |= IDMA64C_CTLL_DST_INC | IDMA64C_CTLL_SRC_FIX | + IDMA64C_CTLL_FC_P2M; + src_width = __ffs(config->src_addr_width); + dst_width = __ffs(dar | hw->len | 4); + } + + lli->sar = sar; + lli->dar = dar; + + lli->ctlhi = ctlhi; + lli->ctllo = ctllo | + IDMA64C_CTLL_SRC_MSIZE(config->src_maxburst) | + IDMA64C_CTLL_DST_MSIZE(config->dst_maxburst) | + IDMA64C_CTLL_DST_WIDTH(dst_width) | + IDMA64C_CTLL_SRC_WIDTH(src_width); + + lli->llp = llp; +} + +static void idma64_desc_fill(struct idma64_chan *idma64c, + struct idma64_desc *desc) +{ + struct dma_slave_config *config = &idma64c->config; + unsigned int i = desc->ndesc; + struct idma64_hw_desc *hw = &desc->hw[i - 1]; + struct idma64_lli *lli = hw->lli; + u64 llp = 0; + + /* Fill the hardware descriptors and link them to a list */ + do { + hw = &desc->hw[--i]; + idma64_hw_desc_fill(hw, config, desc->direction, llp); + llp = hw->llp; + desc->length += hw->len; + } while (i); + + /* Trigger an interrupt after the last block is transfered */ + lli->ctllo |= IDMA64C_CTLL_INT_EN; + + /* Disable LLP transfer in the last block */ + lli->ctllo &= ~(IDMA64C_CTLL_LLP_S_EN | IDMA64C_CTLL_LLP_D_EN); +} + +static struct dma_async_tx_descriptor *idma64_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + struct idma64_desc *desc; + struct scatterlist *sg; + unsigned int i; + + desc = idma64_alloc_desc(sg_len); + if (!desc) + return NULL; + + for_each_sg(sgl, sg, sg_len, i) { + struct idma64_hw_desc *hw = &desc->hw[i]; + + /* Allocate DMA capable memory for hardware descriptor */ + hw->lli = dma_pool_alloc(idma64c->pool, GFP_NOWAIT, &hw->llp); + if (!hw->lli) { + desc->ndesc = i; + idma64_desc_free(idma64c, desc); + return NULL; + } + + hw->phys = sg_dma_address(sg); + hw->len = sg_dma_len(sg); + } + + desc->ndesc = sg_len; + desc->direction = direction; + desc->status = DMA_IN_PROGRESS; + + idma64_desc_fill(idma64c, desc); + return vchan_tx_prep(&idma64c->vchan, &desc->vdesc, flags); +} + +static void idma64_issue_pending(struct dma_chan *chan) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&idma64c->vchan.lock, flags); + if (vchan_issue_pending(&idma64c->vchan) && !idma64c->desc) + idma64_start_transfer(idma64c); + spin_unlock_irqrestore(&idma64c->vchan.lock, flags); +} + +static size_t idma64_active_desc_size(struct idma64_chan *idma64c) +{ + struct idma64_desc *desc = idma64c->desc; + struct idma64_hw_desc *hw; + size_t bytes = desc->length; + u64 llp = channel_readq(idma64c, LLP); + u32 ctlhi = channel_readl(idma64c, CTL_HI); + unsigned int i = 0; + + do { + hw = &desc->hw[i]; + if (hw->llp == llp) + break; + bytes -= hw->len; + } while (++i < desc->ndesc); + + if (!i) + return bytes; + + /* The current chunk is not fully transfered yet */ + bytes += desc->hw[--i].len; + + return bytes - IDMA64C_CTLH_BLOCK_TS(ctlhi); +} + +static enum dma_status idma64_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *state) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + struct virt_dma_desc *vdesc; + enum dma_status status; + size_t bytes; + unsigned long flags; + + status = dma_cookie_status(chan, cookie, state); + if (status == DMA_COMPLETE) + return status; + + spin_lock_irqsave(&idma64c->vchan.lock, flags); + vdesc = vchan_find_desc(&idma64c->vchan, cookie); + if (idma64c->desc && cookie == idma64c->desc->vdesc.tx.cookie) { + bytes = idma64_active_desc_size(idma64c); + dma_set_residue(state, bytes); + status = idma64c->desc->status; + } else if (vdesc) { + bytes = to_idma64_desc(vdesc)->length; + dma_set_residue(state, bytes); + } + spin_unlock_irqrestore(&idma64c->vchan.lock, flags); + + return status; +} + +static void convert_burst(u32 *maxburst) +{ + if (*maxburst) + *maxburst = __fls(*maxburst); + else + *maxburst = 0; +} + +static int idma64_slave_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + + memcpy(&idma64c->config, config, sizeof(idma64c->config)); + + convert_burst(&idma64c->config.src_maxburst); + convert_burst(&idma64c->config.dst_maxburst); + + return 0; +} + +static void idma64_chan_deactivate(struct idma64_chan *idma64c, bool drain) +{ + unsigned short count = 100; + u32 cfglo; + + cfglo = channel_readl(idma64c, CFG_LO); + if (drain) + cfglo |= IDMA64C_CFGL_CH_DRAIN; + else + cfglo &= ~IDMA64C_CFGL_CH_DRAIN; + + channel_writel(idma64c, CFG_LO, cfglo | IDMA64C_CFGL_CH_SUSP); + do { + udelay(1); + cfglo = channel_readl(idma64c, CFG_LO); + } while (!(cfglo & IDMA64C_CFGL_FIFO_EMPTY) && --count); +} + +static void idma64_chan_activate(struct idma64_chan *idma64c) +{ + u32 cfglo; + + cfglo = channel_readl(idma64c, CFG_LO); + channel_writel(idma64c, CFG_LO, cfglo & ~IDMA64C_CFGL_CH_SUSP); +} + +static int idma64_pause(struct dma_chan *chan) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&idma64c->vchan.lock, flags); + if (idma64c->desc && idma64c->desc->status == DMA_IN_PROGRESS) { + idma64_chan_deactivate(idma64c, false); + idma64c->desc->status = DMA_PAUSED; + } + spin_unlock_irqrestore(&idma64c->vchan.lock, flags); + + return 0; +} + +static int idma64_resume(struct dma_chan *chan) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&idma64c->vchan.lock, flags); + if (idma64c->desc && idma64c->desc->status == DMA_PAUSED) { + idma64c->desc->status = DMA_IN_PROGRESS; + idma64_chan_activate(idma64c); + } + spin_unlock_irqrestore(&idma64c->vchan.lock, flags); + + return 0; +} + +static int idma64_terminate_all(struct dma_chan *chan) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&idma64c->vchan.lock, flags); + idma64_chan_deactivate(idma64c, true); + idma64_stop_transfer(idma64c); + if (idma64c->desc) { + idma64_vdesc_free(&idma64c->desc->vdesc); + idma64c->desc = NULL; + } + vchan_get_all_descriptors(&idma64c->vchan, &head); + spin_unlock_irqrestore(&idma64c->vchan.lock, flags); + + vchan_dma_desc_free_list(&idma64c->vchan, &head); + return 0; +} + +static void idma64_synchronize(struct dma_chan *chan) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + + vchan_synchronize(&idma64c->vchan); +} + +static int idma64_alloc_chan_resources(struct dma_chan *chan) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + + /* Create a pool of consistent memory blocks for hardware descriptors */ + idma64c->pool = dma_pool_create(dev_name(chan2dev(chan)), + chan->device->dev, + sizeof(struct idma64_lli), 8, 0); + if (!idma64c->pool) { + dev_err(chan2dev(chan), "No memory for descriptors\n"); + return -ENOMEM; + } + + return 0; +} + +static void idma64_free_chan_resources(struct dma_chan *chan) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + + vchan_free_chan_resources(to_virt_chan(chan)); + dma_pool_destroy(idma64c->pool); + idma64c->pool = NULL; +} + +/* ---------------------------------------------------------------------- */ + +#define IDMA64_BUSWIDTHS \ + BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) + +static int idma64_probe(struct idma64_chip *chip) +{ + struct idma64 *idma64; + unsigned short nr_chan = IDMA64_NR_CHAN; + unsigned short i; + int ret; + + idma64 = devm_kzalloc(chip->dev, sizeof(*idma64), GFP_KERNEL); + if (!idma64) + return -ENOMEM; + + idma64->regs = chip->regs; + chip->idma64 = idma64; + + idma64->chan = devm_kcalloc(chip->dev, nr_chan, sizeof(*idma64->chan), + GFP_KERNEL); + if (!idma64->chan) + return -ENOMEM; + + idma64->all_chan_mask = (1 << nr_chan) - 1; + + /* Turn off iDMA controller */ + idma64_off(idma64); + + ret = devm_request_irq(chip->dev, chip->irq, idma64_irq, IRQF_SHARED, + dev_name(chip->dev), idma64); + if (ret) + return ret; + + INIT_LIST_HEAD(&idma64->dma.channels); + for (i = 0; i < nr_chan; i++) { + struct idma64_chan *idma64c = &idma64->chan[i]; + + idma64c->vchan.desc_free = idma64_vdesc_free; + vchan_init(&idma64c->vchan, &idma64->dma); + + idma64c->regs = idma64->regs + i * IDMA64_CH_LENGTH; + idma64c->mask = BIT(i); + } + + dma_cap_set(DMA_SLAVE, idma64->dma.cap_mask); + dma_cap_set(DMA_PRIVATE, idma64->dma.cap_mask); + + idma64->dma.device_alloc_chan_resources = idma64_alloc_chan_resources; + idma64->dma.device_free_chan_resources = idma64_free_chan_resources; + + idma64->dma.device_prep_slave_sg = idma64_prep_slave_sg; + + idma64->dma.device_issue_pending = idma64_issue_pending; + idma64->dma.device_tx_status = idma64_tx_status; + + idma64->dma.device_config = idma64_slave_config; + idma64->dma.device_pause = idma64_pause; + idma64->dma.device_resume = idma64_resume; + idma64->dma.device_terminate_all = idma64_terminate_all; + idma64->dma.device_synchronize = idma64_synchronize; + + idma64->dma.src_addr_widths = IDMA64_BUSWIDTHS; + idma64->dma.dst_addr_widths = IDMA64_BUSWIDTHS; + idma64->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + idma64->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + idma64->dma.dev = chip->sysdev; + + dma_set_max_seg_size(idma64->dma.dev, IDMA64C_CTLH_BLOCK_TS_MASK); + + ret = dma_async_device_register(&idma64->dma); + if (ret) + return ret; + + dev_info(chip->dev, "Found Intel integrated DMA 64-bit\n"); + return 0; +} + +static void idma64_remove(struct idma64_chip *chip) +{ + struct idma64 *idma64 = chip->idma64; + unsigned short i; + + dma_async_device_unregister(&idma64->dma); + + /* + * Explicitly call devm_request_irq() to avoid the side effects with + * the scheduled tasklets. + */ + devm_free_irq(chip->dev, chip->irq, idma64); + + for (i = 0; i < idma64->dma.chancnt; i++) { + struct idma64_chan *idma64c = &idma64->chan[i]; + + tasklet_kill(&idma64c->vchan.task); + } +} + +/* ---------------------------------------------------------------------- */ + +static int idma64_platform_probe(struct platform_device *pdev) +{ + struct idma64_chip *chip; + struct device *dev = &pdev->dev; + struct device *sysdev = dev->parent; + int ret; + + chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + chip->irq = platform_get_irq(pdev, 0); + if (chip->irq < 0) + return chip->irq; + + chip->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(chip->regs)) + return PTR_ERR(chip->regs); + + ret = dma_coerce_mask_and_coherent(sysdev, DMA_BIT_MASK(64)); + if (ret) + return ret; + + chip->dev = dev; + chip->sysdev = sysdev; + + ret = idma64_probe(chip); + if (ret) + return ret; + + platform_set_drvdata(pdev, chip); + return 0; +} + +static int idma64_platform_remove(struct platform_device *pdev) +{ + struct idma64_chip *chip = platform_get_drvdata(pdev); + + idma64_remove(chip); + + return 0; +} + +static int __maybe_unused idma64_pm_suspend(struct device *dev) +{ + struct idma64_chip *chip = dev_get_drvdata(dev); + + idma64_off(chip->idma64); + return 0; +} + +static int __maybe_unused idma64_pm_resume(struct device *dev) +{ + struct idma64_chip *chip = dev_get_drvdata(dev); + + idma64_on(chip->idma64); + return 0; +} + +static const struct dev_pm_ops idma64_dev_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(idma64_pm_suspend, idma64_pm_resume) +}; + +static struct platform_driver idma64_platform_driver = { + .probe = idma64_platform_probe, + .remove = idma64_platform_remove, + .driver = { + .name = LPSS_IDMA64_DRIVER_NAME, + .pm = &idma64_dev_pm_ops, + }, +}; + +module_platform_driver(idma64_platform_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("iDMA64 core driver"); +MODULE_AUTHOR("Andy Shevchenko "); +MODULE_ALIAS("platform:" LPSS_IDMA64_DRIVER_NAME); diff --git a/drivers/dma/idma64.h b/drivers/dma/idma64.h new file mode 100644 index 0000000000..d013b54356 --- /dev/null +++ b/drivers/dma/idma64.h @@ -0,0 +1,229 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Driver for the Intel integrated DMA 64-bit + * + * Copyright (C) 2015 Intel Corporation + */ + +#ifndef __DMA_IDMA64_H__ +#define __DMA_IDMA64_H__ + +#include +#include +#include +#include + +#include + +#include "virt-dma.h" + +/* Channel registers */ + +#define IDMA64_CH_SAR 0x00 /* Source Address Register */ +#define IDMA64_CH_DAR 0x08 /* Destination Address Register */ +#define IDMA64_CH_LLP 0x10 /* Linked List Pointer */ +#define IDMA64_CH_CTL_LO 0x18 /* Control Register Low */ +#define IDMA64_CH_CTL_HI 0x1c /* Control Register High */ +#define IDMA64_CH_SSTAT 0x20 +#define IDMA64_CH_DSTAT 0x28 +#define IDMA64_CH_SSTATAR 0x30 +#define IDMA64_CH_DSTATAR 0x38 +#define IDMA64_CH_CFG_LO 0x40 /* Configuration Register Low */ +#define IDMA64_CH_CFG_HI 0x44 /* Configuration Register High */ +#define IDMA64_CH_SGR 0x48 +#define IDMA64_CH_DSR 0x50 + +#define IDMA64_CH_LENGTH 0x58 + +/* Bitfields in CTL_LO */ +#define IDMA64C_CTLL_INT_EN (1 << 0) /* irqs enabled? */ +#define IDMA64C_CTLL_DST_WIDTH(x) ((x) << 1) /* bytes per element */ +#define IDMA64C_CTLL_SRC_WIDTH(x) ((x) << 4) +#define IDMA64C_CTLL_DST_INC (0 << 8) /* DAR update/not */ +#define IDMA64C_CTLL_DST_FIX (1 << 8) +#define IDMA64C_CTLL_SRC_INC (0 << 10) /* SAR update/not */ +#define IDMA64C_CTLL_SRC_FIX (1 << 10) +#define IDMA64C_CTLL_DST_MSIZE(x) ((x) << 11) /* burst, #elements */ +#define IDMA64C_CTLL_SRC_MSIZE(x) ((x) << 14) +#define IDMA64C_CTLL_FC_M2P (1 << 20) /* mem-to-periph */ +#define IDMA64C_CTLL_FC_P2M (2 << 20) /* periph-to-mem */ +#define IDMA64C_CTLL_LLP_D_EN (1 << 27) /* dest block chain */ +#define IDMA64C_CTLL_LLP_S_EN (1 << 28) /* src block chain */ + +/* Bitfields in CTL_HI */ +#define IDMA64C_CTLH_BLOCK_TS_MASK ((1 << 17) - 1) +#define IDMA64C_CTLH_BLOCK_TS(x) ((x) & IDMA64C_CTLH_BLOCK_TS_MASK) +#define IDMA64C_CTLH_DONE (1 << 17) + +/* Bitfields in CFG_LO */ +#define IDMA64C_CFGL_DST_BURST_ALIGN (1 << 0) /* dst burst align */ +#define IDMA64C_CFGL_SRC_BURST_ALIGN (1 << 1) /* src burst align */ +#define IDMA64C_CFGL_CH_SUSP (1 << 8) +#define IDMA64C_CFGL_FIFO_EMPTY (1 << 9) +#define IDMA64C_CFGL_CH_DRAIN (1 << 10) /* drain FIFO */ +#define IDMA64C_CFGL_DST_OPT_BL (1 << 20) /* optimize dst burst length */ +#define IDMA64C_CFGL_SRC_OPT_BL (1 << 21) /* optimize src burst length */ + +/* Bitfields in CFG_HI */ +#define IDMA64C_CFGH_SRC_PER(x) ((x) << 0) /* src peripheral */ +#define IDMA64C_CFGH_DST_PER(x) ((x) << 4) /* dst peripheral */ +#define IDMA64C_CFGH_RD_ISSUE_THD(x) ((x) << 8) +#define IDMA64C_CFGH_WR_ISSUE_THD(x) ((x) << 18) + +/* Interrupt registers */ + +#define IDMA64_INT_XFER 0x00 +#define IDMA64_INT_BLOCK 0x08 +#define IDMA64_INT_SRC_TRAN 0x10 +#define IDMA64_INT_DST_TRAN 0x18 +#define IDMA64_INT_ERROR 0x20 + +#define IDMA64_RAW(x) (0x2c0 + IDMA64_INT_##x) /* r */ +#define IDMA64_STATUS(x) (0x2e8 + IDMA64_INT_##x) /* r (raw & mask) */ +#define IDMA64_MASK(x) (0x310 + IDMA64_INT_##x) /* rw (set = irq enabled) */ +#define IDMA64_CLEAR(x) (0x338 + IDMA64_INT_##x) /* w (ack, affects "raw") */ + +/* Common registers */ + +#define IDMA64_STATUS_INT 0x360 /* r */ +#define IDMA64_CFG 0x398 +#define IDMA64_CH_EN 0x3a0 + +/* Bitfields in CFG */ +#define IDMA64_CFG_DMA_EN (1 << 0) + +/* Hardware descriptor for Linked LIst transfers */ +struct idma64_lli { + u64 sar; + u64 dar; + u64 llp; + u32 ctllo; + u32 ctlhi; + u32 sstat; + u32 dstat; +}; + +struct idma64_hw_desc { + struct idma64_lli *lli; + dma_addr_t llp; + dma_addr_t phys; + unsigned int len; +}; + +struct idma64_desc { + struct virt_dma_desc vdesc; + enum dma_transfer_direction direction; + struct idma64_hw_desc *hw; + unsigned int ndesc; + size_t length; + enum dma_status status; +}; + +static inline struct idma64_desc *to_idma64_desc(struct virt_dma_desc *vdesc) +{ + return container_of(vdesc, struct idma64_desc, vdesc); +} + +struct idma64_chan { + struct virt_dma_chan vchan; + + void __iomem *regs; + + /* hardware configuration */ + enum dma_transfer_direction direction; + unsigned int mask; + struct dma_slave_config config; + + void *pool; + struct idma64_desc *desc; +}; + +static inline struct idma64_chan *to_idma64_chan(struct dma_chan *chan) +{ + return container_of(chan, struct idma64_chan, vchan.chan); +} + +#define channel_set_bit(idma64, reg, mask) \ + dma_writel(idma64, reg, ((mask) << 8) | (mask)) +#define channel_clear_bit(idma64, reg, mask) \ + dma_writel(idma64, reg, ((mask) << 8) | 0) + +static inline u32 idma64c_readl(struct idma64_chan *idma64c, int offset) +{ + return readl(idma64c->regs + offset); +} + +static inline void idma64c_writel(struct idma64_chan *idma64c, int offset, + u32 value) +{ + writel(value, idma64c->regs + offset); +} + +#define channel_readl(idma64c, reg) \ + idma64c_readl(idma64c, IDMA64_CH_##reg) +#define channel_writel(idma64c, reg, value) \ + idma64c_writel(idma64c, IDMA64_CH_##reg, (value)) + +static inline u64 idma64c_readq(struct idma64_chan *idma64c, int offset) +{ + return lo_hi_readq(idma64c->regs + offset); +} + +static inline void idma64c_writeq(struct idma64_chan *idma64c, int offset, + u64 value) +{ + lo_hi_writeq(value, idma64c->regs + offset); +} + +#define channel_readq(idma64c, reg) \ + idma64c_readq(idma64c, IDMA64_CH_##reg) +#define channel_writeq(idma64c, reg, value) \ + idma64c_writeq(idma64c, IDMA64_CH_##reg, (value)) + +struct idma64 { + struct dma_device dma; + + void __iomem *regs; + + /* channels */ + unsigned short all_chan_mask; + struct idma64_chan *chan; +}; + +static inline struct idma64 *to_idma64(struct dma_device *ddev) +{ + return container_of(ddev, struct idma64, dma); +} + +static inline u32 idma64_readl(struct idma64 *idma64, int offset) +{ + return readl(idma64->regs + offset); +} + +static inline void idma64_writel(struct idma64 *idma64, int offset, u32 value) +{ + writel(value, idma64->regs + offset); +} + +#define dma_readl(idma64, reg) \ + idma64_readl(idma64, IDMA64_##reg) +#define dma_writel(idma64, reg, value) \ + idma64_writel(idma64, IDMA64_##reg, (value)) + +/** + * struct idma64_chip - representation of iDMA 64-bit controller hardware + * @dev: struct device of the DMA controller + * @sysdev: struct device of the physical device that does DMA + * @irq: irq line + * @regs: memory mapped I/O space + * @idma64: struct idma64 that is filed by idma64_probe() + */ +struct idma64_chip { + struct device *dev; + struct device *sysdev; + int irq; + void __iomem *regs; + struct idma64 *idma64; +}; + +#endif /* __DMA_IDMA64_H__ */ diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile new file mode 100644 index 0000000000..c5e679070e --- /dev/null +++ b/drivers/dma/idxd/Makefile @@ -0,0 +1,12 @@ +ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=IDXD + +obj-$(CONFIG_INTEL_IDXD_BUS) += idxd_bus.o +idxd_bus-y := bus.o + +obj-$(CONFIG_INTEL_IDXD) += idxd.o +idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o debugfs.o + +idxd-$(CONFIG_INTEL_IDXD_PERFMON) += perfmon.o + +obj-$(CONFIG_INTEL_IDXD_COMPAT) += idxd_compat.o +idxd_compat-y := compat.o diff --git a/drivers/dma/idxd/bus.c b/drivers/dma/idxd/bus.c new file mode 100644 index 0000000000..6f84621053 --- /dev/null +++ b/drivers/dma/idxd/bus.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */ +#include +#include +#include +#include +#include "idxd.h" + + +int __idxd_driver_register(struct idxd_device_driver *idxd_drv, struct module *owner, + const char *mod_name) +{ + struct device_driver *drv = &idxd_drv->drv; + + if (!idxd_drv->type) { + pr_debug("driver type not set (%ps)\n", __builtin_return_address(0)); + return -EINVAL; + } + + drv->name = idxd_drv->name; + drv->bus = &dsa_bus_type; + drv->owner = owner; + drv->mod_name = mod_name; + + return driver_register(drv); +} +EXPORT_SYMBOL_GPL(__idxd_driver_register); + +void idxd_driver_unregister(struct idxd_device_driver *idxd_drv) +{ + driver_unregister(&idxd_drv->drv); +} +EXPORT_SYMBOL_GPL(idxd_driver_unregister); + +static int idxd_config_bus_match(struct device *dev, + struct device_driver *drv) +{ + struct idxd_device_driver *idxd_drv = + container_of(drv, struct idxd_device_driver, drv); + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + int i = 0; + + while (idxd_drv->type[i] != IDXD_DEV_NONE) { + if (idxd_dev->type == idxd_drv->type[i]) + return 1; + i++; + } + + return 0; +} + +static int idxd_config_bus_probe(struct device *dev) +{ + struct idxd_device_driver *idxd_drv = + container_of(dev->driver, struct idxd_device_driver, drv); + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return idxd_drv->probe(idxd_dev); +} + +static void idxd_config_bus_remove(struct device *dev) +{ + struct idxd_device_driver *idxd_drv = + container_of(dev->driver, struct idxd_device_driver, drv); + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + idxd_drv->remove(idxd_dev); +} + +struct bus_type dsa_bus_type = { + .name = "dsa", + .match = idxd_config_bus_match, + .probe = idxd_config_bus_probe, + .remove = idxd_config_bus_remove, +}; +EXPORT_SYMBOL_GPL(dsa_bus_type); + +static int __init dsa_bus_init(void) +{ + return bus_register(&dsa_bus_type); +} +module_init(dsa_bus_init); + +static void __exit dsa_bus_exit(void) +{ + bus_unregister(&dsa_bus_type); +} +module_exit(dsa_bus_exit); + +MODULE_DESCRIPTION("IDXD driver dsa_bus_type driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c new file mode 100644 index 0000000000..d32deb9b4e --- /dev/null +++ b/drivers/dma/idxd/cdev.c @@ -0,0 +1,692 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "registers.h" +#include "idxd.h" + +struct idxd_cdev_context { + const char *name; + dev_t devt; + struct ida minor_ida; +}; + +/* + * Since user file names are global in DSA devices, define their ida's as + * global to avoid conflict file names. + */ +static DEFINE_IDA(file_ida); +static DEFINE_MUTEX(ida_lock); + +/* + * ictx is an array based off of accelerator types. enum idxd_type + * is used as index + */ +static struct idxd_cdev_context ictx[IDXD_TYPE_MAX] = { + { .name = "dsa" }, + { .name = "iax" } +}; + +struct idxd_user_context { + struct idxd_wq *wq; + struct task_struct *task; + unsigned int pasid; + struct mm_struct *mm; + unsigned int flags; + struct iommu_sva *sva; + struct idxd_dev idxd_dev; + u64 counters[COUNTER_MAX]; + int id; + pid_t pid; +}; + +static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid); +static void idxd_xa_pasid_remove(struct idxd_user_context *ctx); + +static inline struct idxd_user_context *dev_to_uctx(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return container_of(idxd_dev, struct idxd_user_context, idxd_dev); +} + +static ssize_t cr_faults_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct idxd_user_context *ctx = dev_to_uctx(dev); + + return sysfs_emit(buf, "%llu\n", ctx->counters[COUNTER_FAULTS]); +} +static DEVICE_ATTR_RO(cr_faults); + +static ssize_t cr_fault_failures_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_user_context *ctx = dev_to_uctx(dev); + + return sysfs_emit(buf, "%llu\n", ctx->counters[COUNTER_FAULT_FAILS]); +} +static DEVICE_ATTR_RO(cr_fault_failures); + +static ssize_t pid_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct idxd_user_context *ctx = dev_to_uctx(dev); + + return sysfs_emit(buf, "%u\n", ctx->pid); +} +static DEVICE_ATTR_RO(pid); + +static struct attribute *cdev_file_attributes[] = { + &dev_attr_cr_faults.attr, + &dev_attr_cr_fault_failures.attr, + &dev_attr_pid.attr, + NULL +}; + +static umode_t cdev_file_attr_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, typeof(*dev), kobj); + struct idxd_user_context *ctx = dev_to_uctx(dev); + struct idxd_wq *wq = ctx->wq; + + if (!wq_pasid_enabled(wq)) + return 0; + + return a->mode; +} + +static const struct attribute_group cdev_file_attribute_group = { + .attrs = cdev_file_attributes, + .is_visible = cdev_file_attr_visible, +}; + +static const struct attribute_group *cdev_file_attribute_groups[] = { + &cdev_file_attribute_group, + NULL +}; + +static void idxd_file_dev_release(struct device *dev) +{ + struct idxd_user_context *ctx = dev_to_uctx(dev); + struct idxd_wq *wq = ctx->wq; + struct idxd_device *idxd = wq->idxd; + int rc; + + mutex_lock(&ida_lock); + ida_free(&file_ida, ctx->id); + mutex_unlock(&ida_lock); + + /* Wait for in-flight operations to complete. */ + if (wq_shared(wq)) { + idxd_device_drain_pasid(idxd, ctx->pasid); + } else { + if (device_user_pasid_enabled(idxd)) { + /* The wq disable in the disable pasid function will drain the wq */ + rc = idxd_wq_disable_pasid(wq); + if (rc < 0) + dev_err(dev, "wq disable pasid failed.\n"); + } else { + idxd_wq_drain(wq); + } + } + + if (ctx->sva) { + idxd_cdev_evl_drain_pasid(wq, ctx->pasid); + iommu_sva_unbind_device(ctx->sva); + idxd_xa_pasid_remove(ctx); + } + kfree(ctx); + mutex_lock(&wq->wq_lock); + idxd_wq_put(wq); + mutex_unlock(&wq->wq_lock); +} + +static struct device_type idxd_cdev_file_type = { + .name = "idxd_file", + .release = idxd_file_dev_release, + .groups = cdev_file_attribute_groups, +}; + +static void idxd_cdev_dev_release(struct device *dev) +{ + struct idxd_cdev *idxd_cdev = dev_to_cdev(dev); + struct idxd_cdev_context *cdev_ctx; + struct idxd_wq *wq = idxd_cdev->wq; + + cdev_ctx = &ictx[wq->idxd->data->type]; + ida_simple_remove(&cdev_ctx->minor_ida, idxd_cdev->minor); + kfree(idxd_cdev); +} + +static struct device_type idxd_cdev_device_type = { + .name = "idxd_cdev", + .release = idxd_cdev_dev_release, +}; + +static inline struct idxd_cdev *inode_idxd_cdev(struct inode *inode) +{ + struct cdev *cdev = inode->i_cdev; + + return container_of(cdev, struct idxd_cdev, cdev); +} + +static inline struct idxd_wq *inode_wq(struct inode *inode) +{ + struct idxd_cdev *idxd_cdev = inode_idxd_cdev(inode); + + return idxd_cdev->wq; +} + +static void idxd_xa_pasid_remove(struct idxd_user_context *ctx) +{ + struct idxd_wq *wq = ctx->wq; + void *ptr; + + mutex_lock(&wq->uc_lock); + ptr = xa_cmpxchg(&wq->upasid_xa, ctx->pasid, ctx, NULL, GFP_KERNEL); + if (ptr != (void *)ctx) + dev_warn(&wq->idxd->pdev->dev, "xarray cmpxchg failed for pasid %u\n", + ctx->pasid); + mutex_unlock(&wq->uc_lock); +} + +void idxd_user_counter_increment(struct idxd_wq *wq, u32 pasid, int index) +{ + struct idxd_user_context *ctx; + + if (index >= COUNTER_MAX) + return; + + mutex_lock(&wq->uc_lock); + ctx = xa_load(&wq->upasid_xa, pasid); + if (!ctx) { + mutex_unlock(&wq->uc_lock); + return; + } + ctx->counters[index]++; + mutex_unlock(&wq->uc_lock); +} + +static int idxd_cdev_open(struct inode *inode, struct file *filp) +{ + struct idxd_user_context *ctx; + struct idxd_device *idxd; + struct idxd_wq *wq; + struct device *dev, *fdev; + int rc = 0; + struct iommu_sva *sva; + unsigned int pasid; + struct idxd_cdev *idxd_cdev; + + wq = inode_wq(inode); + idxd = wq->idxd; + dev = &idxd->pdev->dev; + + dev_dbg(dev, "%s called: %d\n", __func__, idxd_wq_refcount(wq)); + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + mutex_lock(&wq->wq_lock); + + if (idxd_wq_refcount(wq) > 0 && wq_dedicated(wq)) { + rc = -EBUSY; + goto failed; + } + + ctx->wq = wq; + filp->private_data = ctx; + ctx->pid = current->pid; + + if (device_user_pasid_enabled(idxd)) { + sva = iommu_sva_bind_device(dev, current->mm); + if (IS_ERR(sva)) { + rc = PTR_ERR(sva); + dev_err(dev, "pasid allocation failed: %d\n", rc); + goto failed; + } + + pasid = iommu_sva_get_pasid(sva); + if (pasid == IOMMU_PASID_INVALID) { + rc = -EINVAL; + goto failed_get_pasid; + } + + ctx->sva = sva; + ctx->pasid = pasid; + ctx->mm = current->mm; + + mutex_lock(&wq->uc_lock); + rc = xa_insert(&wq->upasid_xa, pasid, ctx, GFP_KERNEL); + mutex_unlock(&wq->uc_lock); + if (rc < 0) + dev_warn(dev, "PASID entry already exist in xarray.\n"); + + if (wq_dedicated(wq)) { + rc = idxd_wq_set_pasid(wq, pasid); + if (rc < 0) { + dev_err(dev, "wq set pasid failed: %d\n", rc); + goto failed_set_pasid; + } + } + } + + idxd_cdev = wq->idxd_cdev; + mutex_lock(&ida_lock); + ctx->id = ida_alloc(&file_ida, GFP_KERNEL); + mutex_unlock(&ida_lock); + if (ctx->id < 0) { + dev_warn(dev, "ida alloc failure\n"); + goto failed_ida; + } + ctx->idxd_dev.type = IDXD_DEV_CDEV_FILE; + fdev = user_ctx_dev(ctx); + device_initialize(fdev); + fdev->parent = cdev_dev(idxd_cdev); + fdev->bus = &dsa_bus_type; + fdev->type = &idxd_cdev_file_type; + + rc = dev_set_name(fdev, "file%d", ctx->id); + if (rc < 0) { + dev_warn(dev, "set name failure\n"); + goto failed_dev_name; + } + + rc = device_add(fdev); + if (rc < 0) { + dev_warn(dev, "file device add failure\n"); + goto failed_dev_add; + } + + idxd_wq_get(wq); + mutex_unlock(&wq->wq_lock); + return 0; + +failed_dev_add: +failed_dev_name: + put_device(fdev); +failed_ida: +failed_set_pasid: + if (device_user_pasid_enabled(idxd)) + idxd_xa_pasid_remove(ctx); +failed_get_pasid: + if (device_user_pasid_enabled(idxd)) + iommu_sva_unbind_device(sva); +failed: + mutex_unlock(&wq->wq_lock); + kfree(ctx); + return rc; +} + +static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid) +{ + struct idxd_device *idxd = wq->idxd; + struct idxd_evl *evl = idxd->evl; + union evl_status_reg status; + u16 h, t, size; + int ent_size = evl_ent_size(idxd); + struct __evl_entry *entry_head; + + if (!evl) + return; + + spin_lock(&evl->lock); + status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + t = status.tail; + h = evl->head; + size = evl->size; + + while (h != t) { + entry_head = (struct __evl_entry *)(evl->log + (h * ent_size)); + if (entry_head->pasid == pasid && entry_head->wq_idx == wq->id) + set_bit(h, evl->bmap); + h = (h + 1) % size; + } + spin_unlock(&evl->lock); + + drain_workqueue(wq->wq); +} + +static int idxd_cdev_release(struct inode *node, struct file *filep) +{ + struct idxd_user_context *ctx = filep->private_data; + struct idxd_wq *wq = ctx->wq; + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + + dev_dbg(dev, "%s called\n", __func__); + filep->private_data = NULL; + + device_unregister(user_ctx_dev(ctx)); + + return 0; +} + +static int check_vma(struct idxd_wq *wq, struct vm_area_struct *vma, + const char *func) +{ + struct device *dev = &wq->idxd->pdev->dev; + + if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { + dev_info_ratelimited(dev, + "%s: %s: mapping too large: %lu\n", + current->comm, func, + vma->vm_end - vma->vm_start); + return -EINVAL; + } + + return 0; +} + +static int idxd_cdev_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct idxd_user_context *ctx = filp->private_data; + struct idxd_wq *wq = ctx->wq; + struct idxd_device *idxd = wq->idxd; + struct pci_dev *pdev = idxd->pdev; + phys_addr_t base = pci_resource_start(pdev, IDXD_WQ_BAR); + unsigned long pfn; + int rc; + + dev_dbg(&pdev->dev, "%s called\n", __func__); + rc = check_vma(wq, vma, __func__); + if (rc < 0) + return rc; + + vm_flags_set(vma, VM_DONTCOPY); + pfn = (base + idxd_get_wq_portal_full_offset(wq->id, + IDXD_PORTAL_LIMITED)) >> PAGE_SHIFT; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_private_data = ctx; + + return io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, + vma->vm_page_prot); +} + +static __poll_t idxd_cdev_poll(struct file *filp, + struct poll_table_struct *wait) +{ + struct idxd_user_context *ctx = filp->private_data; + struct idxd_wq *wq = ctx->wq; + struct idxd_device *idxd = wq->idxd; + __poll_t out = 0; + + poll_wait(filp, &wq->err_queue, wait); + spin_lock(&idxd->dev_lock); + if (idxd->sw_err.valid) + out = EPOLLIN | EPOLLRDNORM; + spin_unlock(&idxd->dev_lock); + + return out; +} + +static const struct file_operations idxd_cdev_fops = { + .owner = THIS_MODULE, + .open = idxd_cdev_open, + .release = idxd_cdev_release, + .mmap = idxd_cdev_mmap, + .poll = idxd_cdev_poll, +}; + +int idxd_cdev_get_major(struct idxd_device *idxd) +{ + return MAJOR(ictx[idxd->data->type].devt); +} + +int idxd_wq_add_cdev(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct idxd_cdev *idxd_cdev; + struct cdev *cdev; + struct device *dev; + struct idxd_cdev_context *cdev_ctx; + int rc, minor; + + idxd_cdev = kzalloc(sizeof(*idxd_cdev), GFP_KERNEL); + if (!idxd_cdev) + return -ENOMEM; + + idxd_cdev->idxd_dev.type = IDXD_DEV_CDEV; + idxd_cdev->wq = wq; + cdev = &idxd_cdev->cdev; + dev = cdev_dev(idxd_cdev); + cdev_ctx = &ictx[wq->idxd->data->type]; + minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL); + if (minor < 0) { + kfree(idxd_cdev); + return minor; + } + idxd_cdev->minor = minor; + + device_initialize(dev); + dev->parent = wq_confdev(wq); + dev->bus = &dsa_bus_type; + dev->type = &idxd_cdev_device_type; + dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor); + + rc = dev_set_name(dev, "%s/wq%u.%u", idxd->data->name_prefix, idxd->id, wq->id); + if (rc < 0) + goto err; + + wq->idxd_cdev = idxd_cdev; + cdev_init(cdev, &idxd_cdev_fops); + rc = cdev_device_add(cdev, dev); + if (rc) { + dev_dbg(&wq->idxd->pdev->dev, "cdev_add failed: %d\n", rc); + goto err; + } + + return 0; + + err: + put_device(dev); + wq->idxd_cdev = NULL; + return rc; +} + +void idxd_wq_del_cdev(struct idxd_wq *wq) +{ + struct idxd_cdev *idxd_cdev; + + idxd_cdev = wq->idxd_cdev; + ida_destroy(&file_ida); + wq->idxd_cdev = NULL; + cdev_device_del(&idxd_cdev->cdev, cdev_dev(idxd_cdev)); + put_device(cdev_dev(idxd_cdev)); +} + +static int idxd_user_drv_probe(struct idxd_dev *idxd_dev) +{ + struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); + struct idxd_device *idxd = wq->idxd; + int rc; + + if (idxd->state != IDXD_DEV_ENABLED) + return -ENXIO; + + /* + * User type WQ is enabled only when SVA is enabled for two reasons: + * - If no IOMMU or IOMMU Passthrough without SVA, userspace + * can directly access physical address through the WQ. + * - The IDXD cdev driver does not provide any ways to pin + * user pages and translate the address from user VA to IOVA or + * PA without IOMMU SVA. Therefore the application has no way + * to instruct the device to perform DMA function. This makes + * the cdev not usable for normal application usage. + */ + if (!device_user_pasid_enabled(idxd)) { + idxd->cmd_status = IDXD_SCMD_WQ_USER_NO_IOMMU; + dev_dbg(&idxd->pdev->dev, + "User type WQ cannot be enabled without SVA.\n"); + + return -EOPNOTSUPP; + } + + mutex_lock(&wq->wq_lock); + + wq->wq = create_workqueue(dev_name(wq_confdev(wq))); + if (!wq->wq) { + rc = -ENOMEM; + goto wq_err; + } + + wq->type = IDXD_WQT_USER; + rc = drv_enable_wq(wq); + if (rc < 0) + goto err; + + rc = idxd_wq_add_cdev(wq); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_CDEV_ERR; + goto err_cdev; + } + + idxd->cmd_status = 0; + mutex_unlock(&wq->wq_lock); + return 0; + +err_cdev: + drv_disable_wq(wq); +err: + destroy_workqueue(wq->wq); + wq->type = IDXD_WQT_NONE; +wq_err: + mutex_unlock(&wq->wq_lock); + return rc; +} + +static void idxd_user_drv_remove(struct idxd_dev *idxd_dev) +{ + struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); + + mutex_lock(&wq->wq_lock); + idxd_wq_del_cdev(wq); + drv_disable_wq(wq); + wq->type = IDXD_WQT_NONE; + destroy_workqueue(wq->wq); + wq->wq = NULL; + mutex_unlock(&wq->wq_lock); +} + +static enum idxd_dev_type dev_types[] = { + IDXD_DEV_WQ, + IDXD_DEV_NONE, +}; + +struct idxd_device_driver idxd_user_drv = { + .probe = idxd_user_drv_probe, + .remove = idxd_user_drv_remove, + .name = "user", + .type = dev_types, +}; +EXPORT_SYMBOL_GPL(idxd_user_drv); + +int idxd_cdev_register(void) +{ + int rc, i; + + for (i = 0; i < IDXD_TYPE_MAX; i++) { + ida_init(&ictx[i].minor_ida); + rc = alloc_chrdev_region(&ictx[i].devt, 0, MINORMASK, + ictx[i].name); + if (rc) + goto err_free_chrdev_region; + } + + return 0; + +err_free_chrdev_region: + for (i--; i >= 0; i--) + unregister_chrdev_region(ictx[i].devt, MINORMASK); + + return rc; +} + +void idxd_cdev_remove(void) +{ + int i; + + for (i = 0; i < IDXD_TYPE_MAX; i++) { + unregister_chrdev_region(ictx[i].devt, MINORMASK); + ida_destroy(&ictx[i].minor_ida); + } +} + +/** + * idxd_copy_cr - copy completion record to user address space found by wq and + * PASID + * @wq: work queue + * @pasid: PASID + * @addr: user fault address to write + * @cr: completion record + * @len: number of bytes to copy + * + * This is called by a work that handles completion record fault. + * + * Return: number of bytes copied. + */ +int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr, + void *cr, int len) +{ + struct device *dev = &wq->idxd->pdev->dev; + int left = len, status_size = 1; + struct idxd_user_context *ctx; + struct mm_struct *mm; + + mutex_lock(&wq->uc_lock); + + ctx = xa_load(&wq->upasid_xa, pasid); + if (!ctx) { + dev_warn(dev, "No user context\n"); + goto out; + } + + mm = ctx->mm; + /* + * The completion record fault handling work is running in kernel + * thread context. It temporarily switches to the mm to copy cr + * to addr in the mm. + */ + kthread_use_mm(mm); + left = copy_to_user((void __user *)addr + status_size, cr + status_size, + len - status_size); + /* + * Copy status only after the rest of completion record is copied + * successfully so that the user gets the complete completion record + * when a non-zero status is polled. + */ + if (!left) { + u8 status; + + /* + * Ensure that the completion record's status field is written + * after the rest of the completion record has been written. + * This ensures that the user receives the correct completion + * record information once polling for a non-zero status. + */ + wmb(); + status = *(u8 *)cr; + if (put_user(status, (u8 __user *)addr)) + left += status_size; + } else { + left += status_size; + } + kthread_unuse_mm(mm); + +out: + mutex_unlock(&wq->uc_lock); + + return len - left; +} diff --git a/drivers/dma/idxd/compat.c b/drivers/dma/idxd/compat.c new file mode 100644 index 0000000000..5fd38d1b9d --- /dev/null +++ b/drivers/dma/idxd/compat.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */ +#include +#include +#include +#include +#include +#include "idxd.h" + +extern int device_driver_attach(struct device_driver *drv, struct device *dev); +extern void device_driver_detach(struct device *dev); + +#define DRIVER_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \ + struct driver_attribute driver_attr_##_name = \ + __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) + +static ssize_t unbind_store(struct device_driver *drv, const char *buf, size_t count) +{ + const struct bus_type *bus = drv->bus; + struct device *dev; + int rc = -ENODEV; + + dev = bus_find_device_by_name(bus, NULL, buf); + if (dev && dev->driver) { + device_driver_detach(dev); + rc = count; + } + + return rc; +} +static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, 0200, NULL, unbind_store); + +static ssize_t bind_store(struct device_driver *drv, const char *buf, size_t count) +{ + const struct bus_type *bus = drv->bus; + struct device *dev; + struct device_driver *alt_drv = NULL; + int rc = -ENODEV; + struct idxd_dev *idxd_dev; + + dev = bus_find_device_by_name(bus, NULL, buf); + if (!dev || dev->driver || drv != &dsa_drv.drv) + return -ENODEV; + + idxd_dev = confdev_to_idxd_dev(dev); + if (is_idxd_dev(idxd_dev)) { + alt_drv = driver_find("idxd", bus); + } else if (is_idxd_wq_dev(idxd_dev)) { + struct idxd_wq *wq = confdev_to_wq(dev); + + if (is_idxd_wq_kernel(wq)) + alt_drv = driver_find("dmaengine", bus); + else if (is_idxd_wq_user(wq)) + alt_drv = driver_find("user", bus); + } + if (!alt_drv) + return -ENODEV; + + rc = device_driver_attach(alt_drv, dev); + if (rc < 0) + return rc; + + return count; +} +static DRIVER_ATTR_IGNORE_LOCKDEP(bind, 0200, NULL, bind_store); + +static struct attribute *dsa_drv_compat_attrs[] = { + &driver_attr_bind.attr, + &driver_attr_unbind.attr, + NULL, +}; + +static const struct attribute_group dsa_drv_compat_attr_group = { + .attrs = dsa_drv_compat_attrs, +}; + +static const struct attribute_group *dsa_drv_compat_groups[] = { + &dsa_drv_compat_attr_group, + NULL, +}; + +static int idxd_dsa_drv_probe(struct idxd_dev *idxd_dev) +{ + return -ENODEV; +} + +static void idxd_dsa_drv_remove(struct idxd_dev *idxd_dev) +{ +} + +static enum idxd_dev_type dev_types[] = { + IDXD_DEV_NONE, +}; + +struct idxd_device_driver dsa_drv = { + .name = "dsa", + .probe = idxd_dsa_drv_probe, + .remove = idxd_dsa_drv_remove, + .type = dev_types, + .drv = { + .suppress_bind_attrs = true, + .groups = dsa_drv_compat_groups, + }, +}; + +module_idxd_driver(dsa_drv); +MODULE_IMPORT_NS(IDXD); diff --git a/drivers/dma/idxd/debugfs.c b/drivers/dma/idxd/debugfs.c new file mode 100644 index 0000000000..9cfbd9b14c --- /dev/null +++ b/drivers/dma/idxd/debugfs.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */ +#include +#include +#include +#include +#include +#include +#include +#include "idxd.h" +#include "registers.h" + +static struct dentry *idxd_debugfs_dir; + +static void dump_event_entry(struct idxd_device *idxd, struct seq_file *s, + u16 index, int *count, bool processed) +{ + struct idxd_evl *evl = idxd->evl; + struct dsa_evl_entry *entry; + struct dsa_completion_record *cr; + u64 *raw; + int i; + int evl_strides = evl_ent_size(idxd) / sizeof(u64); + + entry = (struct dsa_evl_entry *)evl->log + index; + + if (!entry->e.desc_valid) + return; + + seq_printf(s, "Event Log entry %d (real index %u) processed: %u\n", + *count, index, processed); + + seq_printf(s, "desc valid %u wq idx valid %u\n" + "batch %u fault rw %u priv %u error 0x%x\n" + "wq idx %u op %#x pasid %u batch idx %u\n" + "fault addr %#llx\n", + entry->e.desc_valid, entry->e.wq_idx_valid, + entry->e.batch, entry->e.fault_rw, entry->e.priv, + entry->e.error, entry->e.wq_idx, entry->e.operation, + entry->e.pasid, entry->e.batch_idx, entry->e.fault_addr); + + cr = &entry->cr; + seq_printf(s, "status %#x result %#x fault_info %#x bytes_completed %u\n" + "fault addr %#llx inv flags %#x\n\n", + cr->status, cr->result, cr->fault_info, cr->bytes_completed, + cr->fault_addr, cr->invalid_flags); + + raw = (u64 *)entry; + + for (i = 0; i < evl_strides; i++) + seq_printf(s, "entry[%d] = %#llx\n", i, raw[i]); + + seq_puts(s, "\n"); + *count += 1; +} + +static int debugfs_evl_show(struct seq_file *s, void *d) +{ + struct idxd_device *idxd = s->private; + struct idxd_evl *evl = idxd->evl; + union evl_status_reg evl_status; + u16 h, t, evl_size, i; + int count = 0; + bool processed = true; + + if (!evl || !evl->log) + return 0; + + spin_lock(&evl->lock); + + h = evl->head; + evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + t = evl_status.tail; + evl_size = evl->size; + + seq_printf(s, "Event Log head %u tail %u interrupt pending %u\n\n", + evl_status.head, evl_status.tail, evl_status.int_pending); + + i = t; + while (1) { + i = (i + 1) % evl_size; + if (i == t) + break; + + if (processed && i == h) + processed = false; + dump_event_entry(idxd, s, i, &count, processed); + } + + spin_unlock(&evl->lock); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(debugfs_evl); + +int idxd_device_init_debugfs(struct idxd_device *idxd) +{ + if (IS_ERR_OR_NULL(idxd_debugfs_dir)) + return 0; + + idxd->dbgfs_dir = debugfs_create_dir(dev_name(idxd_confdev(idxd)), idxd_debugfs_dir); + if (IS_ERR(idxd->dbgfs_dir)) + return PTR_ERR(idxd->dbgfs_dir); + + if (idxd->evl) { + idxd->dbgfs_evl_file = debugfs_create_file("event_log", 0400, + idxd->dbgfs_dir, idxd, + &debugfs_evl_fops); + if (IS_ERR(idxd->dbgfs_evl_file)) { + debugfs_remove_recursive(idxd->dbgfs_dir); + idxd->dbgfs_dir = NULL; + return PTR_ERR(idxd->dbgfs_evl_file); + } + } + + return 0; +} + +void idxd_device_remove_debugfs(struct idxd_device *idxd) +{ + debugfs_remove_recursive(idxd->dbgfs_dir); +} + +int idxd_init_debugfs(void) +{ + if (!debugfs_initialized()) + return 0; + + idxd_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); + if (IS_ERR(idxd_debugfs_dir)) + return PTR_ERR(idxd_debugfs_dir); + return 0; +} + +void idxd_remove_debugfs(void) +{ + debugfs_remove_recursive(idxd_debugfs_dir); +} diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c new file mode 100644 index 0000000000..fa0f880bea --- /dev/null +++ b/drivers/dma/idxd/device.c @@ -0,0 +1,1607 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "../dmaengine.h" +#include "idxd.h" +#include "registers.h" + +static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, + u32 *status); +static void idxd_device_wqs_clear_state(struct idxd_device *idxd); +static void idxd_wq_disable_cleanup(struct idxd_wq *wq); + +/* Interrupt control bits */ +void idxd_unmask_error_interrupts(struct idxd_device *idxd) +{ + union genctrl_reg genctrl; + + genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET); + genctrl.softerr_int_en = 1; + genctrl.halt_int_en = 1; + iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET); +} + +void idxd_mask_error_interrupts(struct idxd_device *idxd) +{ + union genctrl_reg genctrl; + + genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET); + genctrl.softerr_int_en = 0; + genctrl.halt_int_en = 0; + iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET); +} + +static void free_hw_descs(struct idxd_wq *wq) +{ + int i; + + for (i = 0; i < wq->num_descs; i++) + kfree(wq->hw_descs[i]); + + kfree(wq->hw_descs); +} + +static int alloc_hw_descs(struct idxd_wq *wq, int num) +{ + struct device *dev = &wq->idxd->pdev->dev; + int i; + int node = dev_to_node(dev); + + wq->hw_descs = kcalloc_node(num, sizeof(struct dsa_hw_desc *), + GFP_KERNEL, node); + if (!wq->hw_descs) + return -ENOMEM; + + for (i = 0; i < num; i++) { + wq->hw_descs[i] = kzalloc_node(sizeof(*wq->hw_descs[i]), + GFP_KERNEL, node); + if (!wq->hw_descs[i]) { + free_hw_descs(wq); + return -ENOMEM; + } + } + + return 0; +} + +static void free_descs(struct idxd_wq *wq) +{ + int i; + + for (i = 0; i < wq->num_descs; i++) + kfree(wq->descs[i]); + + kfree(wq->descs); +} + +static int alloc_descs(struct idxd_wq *wq, int num) +{ + struct device *dev = &wq->idxd->pdev->dev; + int i; + int node = dev_to_node(dev); + + wq->descs = kcalloc_node(num, sizeof(struct idxd_desc *), + GFP_KERNEL, node); + if (!wq->descs) + return -ENOMEM; + + for (i = 0; i < num; i++) { + wq->descs[i] = kzalloc_node(sizeof(*wq->descs[i]), + GFP_KERNEL, node); + if (!wq->descs[i]) { + free_descs(wq); + return -ENOMEM; + } + } + + return 0; +} + +/* WQ control bits */ +int idxd_wq_alloc_resources(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + int rc, num_descs, i; + + if (wq->type != IDXD_WQT_KERNEL) + return 0; + + num_descs = wq_dedicated(wq) ? wq->size : wq->threshold; + wq->num_descs = num_descs; + + rc = alloc_hw_descs(wq, num_descs); + if (rc < 0) + return rc; + + wq->compls_size = num_descs * idxd->data->compl_size; + wq->compls = dma_alloc_coherent(dev, wq->compls_size, &wq->compls_addr, GFP_KERNEL); + if (!wq->compls) { + rc = -ENOMEM; + goto fail_alloc_compls; + } + + rc = alloc_descs(wq, num_descs); + if (rc < 0) + goto fail_alloc_descs; + + rc = sbitmap_queue_init_node(&wq->sbq, num_descs, -1, false, GFP_KERNEL, + dev_to_node(dev)); + if (rc < 0) + goto fail_sbitmap_init; + + for (i = 0; i < num_descs; i++) { + struct idxd_desc *desc = wq->descs[i]; + + desc->hw = wq->hw_descs[i]; + if (idxd->data->type == IDXD_TYPE_DSA) + desc->completion = &wq->compls[i]; + else if (idxd->data->type == IDXD_TYPE_IAX) + desc->iax_completion = &wq->iax_compls[i]; + desc->compl_dma = wq->compls_addr + idxd->data->compl_size * i; + desc->id = i; + desc->wq = wq; + desc->cpu = -1; + } + + return 0; + + fail_sbitmap_init: + free_descs(wq); + fail_alloc_descs: + dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr); + fail_alloc_compls: + free_hw_descs(wq); + return rc; +} + +void idxd_wq_free_resources(struct idxd_wq *wq) +{ + struct device *dev = &wq->idxd->pdev->dev; + + if (wq->type != IDXD_WQT_KERNEL) + return; + + free_hw_descs(wq); + free_descs(wq); + dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr); + sbitmap_queue_free(&wq->sbq); +} + +int idxd_wq_enable(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + u32 status; + + if (wq->state == IDXD_WQ_ENABLED) { + dev_dbg(dev, "WQ %d already enabled\n", wq->id); + return 0; + } + + idxd_cmd_exec(idxd, IDXD_CMD_ENABLE_WQ, wq->id, &status); + + if (status != IDXD_CMDSTS_SUCCESS && + status != IDXD_CMDSTS_ERR_WQ_ENABLED) { + dev_dbg(dev, "WQ enable failed: %#x\n", status); + return -ENXIO; + } + + wq->state = IDXD_WQ_ENABLED; + set_bit(wq->id, idxd->wq_enable_map); + dev_dbg(dev, "WQ %d enabled\n", wq->id); + return 0; +} + +int idxd_wq_disable(struct idxd_wq *wq, bool reset_config) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + u32 status, operand; + + dev_dbg(dev, "Disabling WQ %d\n", wq->id); + + if (wq->state != IDXD_WQ_ENABLED) { + dev_dbg(dev, "WQ %d in wrong state: %d\n", wq->id, wq->state); + return 0; + } + + operand = BIT(wq->id % 16) | ((wq->id / 16) << 16); + idxd_cmd_exec(idxd, IDXD_CMD_DISABLE_WQ, operand, &status); + + if (status != IDXD_CMDSTS_SUCCESS) { + dev_dbg(dev, "WQ disable failed: %#x\n", status); + return -ENXIO; + } + + if (reset_config) + idxd_wq_disable_cleanup(wq); + clear_bit(wq->id, idxd->wq_enable_map); + wq->state = IDXD_WQ_DISABLED; + dev_dbg(dev, "WQ %d disabled\n", wq->id); + return 0; +} + +void idxd_wq_drain(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + u32 operand; + + if (wq->state != IDXD_WQ_ENABLED) { + dev_dbg(dev, "WQ %d in wrong state: %d\n", wq->id, wq->state); + return; + } + + dev_dbg(dev, "Draining WQ %d\n", wq->id); + operand = BIT(wq->id % 16) | ((wq->id / 16) << 16); + idxd_cmd_exec(idxd, IDXD_CMD_DRAIN_WQ, operand, NULL); +} + +void idxd_wq_reset(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + u32 operand; + + if (wq->state != IDXD_WQ_ENABLED) { + dev_dbg(dev, "WQ %d in wrong state: %d\n", wq->id, wq->state); + return; + } + + operand = BIT(wq->id % 16) | ((wq->id / 16) << 16); + idxd_cmd_exec(idxd, IDXD_CMD_RESET_WQ, operand, NULL); + idxd_wq_disable_cleanup(wq); +} + +int idxd_wq_map_portal(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + resource_size_t start; + + start = pci_resource_start(pdev, IDXD_WQ_BAR); + start += idxd_get_wq_portal_full_offset(wq->id, IDXD_PORTAL_LIMITED); + + wq->portal = devm_ioremap(dev, start, IDXD_PORTAL_SIZE); + if (!wq->portal) + return -ENOMEM; + + return 0; +} + +void idxd_wq_unmap_portal(struct idxd_wq *wq) +{ + struct device *dev = &wq->idxd->pdev->dev; + + devm_iounmap(dev, wq->portal); + wq->portal = NULL; + wq->portal_offset = 0; +} + +void idxd_wqs_unmap_portal(struct idxd_device *idxd) +{ + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + if (wq->portal) + idxd_wq_unmap_portal(wq); + } +} + +static void __idxd_wq_set_pasid_locked(struct idxd_wq *wq, int pasid) +{ + struct idxd_device *idxd = wq->idxd; + union wqcfg wqcfg; + unsigned int offset; + + offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PASID_IDX); + spin_lock(&idxd->dev_lock); + wqcfg.bits[WQCFG_PASID_IDX] = ioread32(idxd->reg_base + offset); + wqcfg.pasid_en = 1; + wqcfg.pasid = pasid; + wq->wqcfg->bits[WQCFG_PASID_IDX] = wqcfg.bits[WQCFG_PASID_IDX]; + iowrite32(wqcfg.bits[WQCFG_PASID_IDX], idxd->reg_base + offset); + spin_unlock(&idxd->dev_lock); +} + +int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid) +{ + int rc; + + rc = idxd_wq_disable(wq, false); + if (rc < 0) + return rc; + + __idxd_wq_set_pasid_locked(wq, pasid); + + rc = idxd_wq_enable(wq); + if (rc < 0) + return rc; + + return 0; +} + +int idxd_wq_disable_pasid(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + int rc; + union wqcfg wqcfg; + unsigned int offset; + + rc = idxd_wq_disable(wq, false); + if (rc < 0) + return rc; + + offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PASID_IDX); + spin_lock(&idxd->dev_lock); + wqcfg.bits[WQCFG_PASID_IDX] = ioread32(idxd->reg_base + offset); + wqcfg.pasid_en = 0; + wqcfg.pasid = 0; + iowrite32(wqcfg.bits[WQCFG_PASID_IDX], idxd->reg_base + offset); + spin_unlock(&idxd->dev_lock); + + rc = idxd_wq_enable(wq); + if (rc < 0) + return rc; + + return 0; +} + +static void idxd_wq_disable_cleanup(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + + lockdep_assert_held(&wq->wq_lock); + wq->state = IDXD_WQ_DISABLED; + memset(wq->wqcfg, 0, idxd->wqcfg_size); + wq->type = IDXD_WQT_NONE; + wq->threshold = 0; + wq->priority = 0; + wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; + wq->flags = 0; + memset(wq->name, 0, WQ_NAME_SIZE); + wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; + idxd_wq_set_max_batch_size(idxd->data->type, wq, WQ_DEFAULT_MAX_BATCH); + if (wq->opcap_bmap) + bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); +} + +static void idxd_wq_device_reset_cleanup(struct idxd_wq *wq) +{ + lockdep_assert_held(&wq->wq_lock); + + wq->size = 0; + wq->group = NULL; +} + +static void idxd_wq_ref_release(struct percpu_ref *ref) +{ + struct idxd_wq *wq = container_of(ref, struct idxd_wq, wq_active); + + complete(&wq->wq_dead); +} + +int idxd_wq_init_percpu_ref(struct idxd_wq *wq) +{ + int rc; + + memset(&wq->wq_active, 0, sizeof(wq->wq_active)); + rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL); + if (rc < 0) + return rc; + reinit_completion(&wq->wq_dead); + reinit_completion(&wq->wq_resurrect); + return 0; +} + +void __idxd_wq_quiesce(struct idxd_wq *wq) +{ + lockdep_assert_held(&wq->wq_lock); + reinit_completion(&wq->wq_resurrect); + percpu_ref_kill(&wq->wq_active); + complete_all(&wq->wq_resurrect); + wait_for_completion(&wq->wq_dead); +} + +void idxd_wq_quiesce(struct idxd_wq *wq) +{ + mutex_lock(&wq->wq_lock); + __idxd_wq_quiesce(wq); + mutex_unlock(&wq->wq_lock); +} + +/* Device control bits */ +static inline bool idxd_is_enabled(struct idxd_device *idxd) +{ + union gensts_reg gensts; + + gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); + + if (gensts.state == IDXD_DEVICE_STATE_ENABLED) + return true; + return false; +} + +static inline bool idxd_device_is_halted(struct idxd_device *idxd) +{ + union gensts_reg gensts; + + gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); + + return (gensts.state == IDXD_DEVICE_STATE_HALT); +} + +/* + * This is function is only used for reset during probe and will + * poll for completion. Once the device is setup with interrupts, + * all commands will be done via interrupt completion. + */ +int idxd_device_init_reset(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + union idxd_command_reg cmd; + + if (idxd_device_is_halted(idxd)) { + dev_warn(&idxd->pdev->dev, "Device is HALTED!\n"); + return -ENXIO; + } + + memset(&cmd, 0, sizeof(cmd)); + cmd.cmd = IDXD_CMD_RESET_DEVICE; + dev_dbg(dev, "%s: sending reset for init.\n", __func__); + spin_lock(&idxd->cmd_lock); + iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET); + + while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) & + IDXD_CMDSTS_ACTIVE) + cpu_relax(); + spin_unlock(&idxd->cmd_lock); + return 0; +} + +static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, + u32 *status) +{ + union idxd_command_reg cmd; + DECLARE_COMPLETION_ONSTACK(done); + u32 stat; + unsigned long flags; + + if (idxd_device_is_halted(idxd)) { + dev_warn(&idxd->pdev->dev, "Device is HALTED!\n"); + if (status) + *status = IDXD_CMDSTS_HW_ERR; + return; + } + + memset(&cmd, 0, sizeof(cmd)); + cmd.cmd = cmd_code; + cmd.operand = operand; + cmd.int_req = 1; + + spin_lock_irqsave(&idxd->cmd_lock, flags); + wait_event_lock_irq(idxd->cmd_waitq, + !test_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags), + idxd->cmd_lock); + + dev_dbg(&idxd->pdev->dev, "%s: sending cmd: %#x op: %#x\n", + __func__, cmd_code, operand); + + idxd->cmd_status = 0; + __set_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags); + idxd->cmd_done = &done; + iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET); + + /* + * After command submitted, release lock and go to sleep until + * the command completes via interrupt. + */ + spin_unlock_irqrestore(&idxd->cmd_lock, flags); + wait_for_completion(&done); + stat = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); + spin_lock(&idxd->cmd_lock); + if (status) + *status = stat; + idxd->cmd_status = stat & GENMASK(7, 0); + + __clear_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags); + /* Wake up other pending commands */ + wake_up(&idxd->cmd_waitq); + spin_unlock(&idxd->cmd_lock); +} + +int idxd_device_enable(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + u32 status; + + if (idxd_is_enabled(idxd)) { + dev_dbg(dev, "Device already enabled\n"); + return -ENXIO; + } + + idxd_cmd_exec(idxd, IDXD_CMD_ENABLE_DEVICE, 0, &status); + + /* If the command is successful or if the device was enabled */ + if (status != IDXD_CMDSTS_SUCCESS && + status != IDXD_CMDSTS_ERR_DEV_ENABLED) { + dev_dbg(dev, "%s: err_code: %#x\n", __func__, status); + return -ENXIO; + } + + idxd->state = IDXD_DEV_ENABLED; + return 0; +} + +int idxd_device_disable(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + u32 status; + + if (!idxd_is_enabled(idxd)) { + dev_dbg(dev, "Device is not enabled\n"); + return 0; + } + + idxd_cmd_exec(idxd, IDXD_CMD_DISABLE_DEVICE, 0, &status); + + /* If the command is successful or if the device was disabled */ + if (status != IDXD_CMDSTS_SUCCESS && + !(status & IDXD_CMDSTS_ERR_DIS_DEV_EN)) { + dev_dbg(dev, "%s: err_code: %#x\n", __func__, status); + return -ENXIO; + } + + idxd_device_clear_state(idxd); + return 0; +} + +void idxd_device_reset(struct idxd_device *idxd) +{ + idxd_cmd_exec(idxd, IDXD_CMD_RESET_DEVICE, 0, NULL); + idxd_device_clear_state(idxd); + spin_lock(&idxd->dev_lock); + idxd_unmask_error_interrupts(idxd); + spin_unlock(&idxd->dev_lock); +} + +void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid) +{ + struct device *dev = &idxd->pdev->dev; + u32 operand; + + operand = pasid; + dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_DRAIN_PASID, operand); + idxd_cmd_exec(idxd, IDXD_CMD_DRAIN_PASID, operand, NULL); + dev_dbg(dev, "pasid %d drained\n", pasid); +} + +int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle, + enum idxd_interrupt_type irq_type) +{ + struct device *dev = &idxd->pdev->dev; + u32 operand, status; + + if (!(idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE))) + return -EOPNOTSUPP; + + dev_dbg(dev, "get int handle, idx %d\n", idx); + + operand = idx & GENMASK(15, 0); + if (irq_type == IDXD_IRQ_IMS) + operand |= CMD_INT_HANDLE_IMS; + + dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_REQUEST_INT_HANDLE, operand); + + idxd_cmd_exec(idxd, IDXD_CMD_REQUEST_INT_HANDLE, operand, &status); + + if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) { + dev_dbg(dev, "request int handle failed: %#x\n", status); + return -ENXIO; + } + + *handle = (status >> IDXD_CMDSTS_RES_SHIFT) & GENMASK(15, 0); + + dev_dbg(dev, "int handle acquired: %u\n", *handle); + return 0; +} + +int idxd_device_release_int_handle(struct idxd_device *idxd, int handle, + enum idxd_interrupt_type irq_type) +{ + struct device *dev = &idxd->pdev->dev; + u32 operand, status; + union idxd_command_reg cmd; + + if (!(idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE))) + return -EOPNOTSUPP; + + dev_dbg(dev, "release int handle, handle %d\n", handle); + + memset(&cmd, 0, sizeof(cmd)); + operand = handle & GENMASK(15, 0); + + if (irq_type == IDXD_IRQ_IMS) + operand |= CMD_INT_HANDLE_IMS; + + cmd.cmd = IDXD_CMD_RELEASE_INT_HANDLE; + cmd.operand = operand; + + dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_RELEASE_INT_HANDLE, operand); + + spin_lock(&idxd->cmd_lock); + iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET); + + while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) & IDXD_CMDSTS_ACTIVE) + cpu_relax(); + status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); + spin_unlock(&idxd->cmd_lock); + + if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) { + dev_dbg(dev, "release int handle failed: %#x\n", status); + return -ENXIO; + } + + dev_dbg(dev, "int handle released.\n"); + return 0; +} + +/* Device configuration bits */ +static void idxd_engines_clear_state(struct idxd_device *idxd) +{ + struct idxd_engine *engine; + int i; + + lockdep_assert_held(&idxd->dev_lock); + for (i = 0; i < idxd->max_engines; i++) { + engine = idxd->engines[i]; + engine->group = NULL; + } +} + +static void idxd_groups_clear_state(struct idxd_device *idxd) +{ + struct idxd_group *group; + int i; + + lockdep_assert_held(&idxd->dev_lock); + for (i = 0; i < idxd->max_groups; i++) { + group = idxd->groups[i]; + memset(&group->grpcfg, 0, sizeof(group->grpcfg)); + group->num_engines = 0; + group->num_wqs = 0; + group->use_rdbuf_limit = false; + /* + * The default value is the same as the value of + * total read buffers in GRPCAP. + */ + group->rdbufs_allowed = idxd->max_rdbufs; + group->rdbufs_reserved = 0; + if (idxd->hw.version <= DEVICE_VERSION_2 && !tc_override) { + group->tc_a = 1; + group->tc_b = 1; + } else { + group->tc_a = -1; + group->tc_b = -1; + } + group->desc_progress_limit = 0; + group->batch_progress_limit = 0; + } +} + +static void idxd_device_wqs_clear_state(struct idxd_device *idxd) +{ + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + mutex_lock(&wq->wq_lock); + idxd_wq_disable_cleanup(wq); + idxd_wq_device_reset_cleanup(wq); + mutex_unlock(&wq->wq_lock); + } +} + +void idxd_device_clear_state(struct idxd_device *idxd) +{ + /* IDXD is always disabled. Other states are cleared only when IDXD is configurable. */ + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { + /* + * Clearing wq state is protected by wq lock. + * So no need to be protected by device lock. + */ + idxd_device_wqs_clear_state(idxd); + + spin_lock(&idxd->dev_lock); + idxd_groups_clear_state(idxd); + idxd_engines_clear_state(idxd); + } else { + spin_lock(&idxd->dev_lock); + } + + idxd->state = IDXD_DEV_DISABLED; + spin_unlock(&idxd->dev_lock); +} + +static int idxd_device_evl_setup(struct idxd_device *idxd) +{ + union gencfg_reg gencfg; + union evlcfg_reg evlcfg; + union genctrl_reg genctrl; + struct device *dev = &idxd->pdev->dev; + void *addr; + dma_addr_t dma_addr; + int size; + struct idxd_evl *evl = idxd->evl; + unsigned long *bmap; + int rc; + + if (!evl) + return 0; + + size = evl_size(idxd); + + bmap = bitmap_zalloc(size, GFP_KERNEL); + if (!bmap) { + rc = -ENOMEM; + goto err_bmap; + } + + /* + * Address needs to be page aligned. However, dma_alloc_coherent() provides + * at minimal page size aligned address. No manual alignment required. + */ + addr = dma_alloc_coherent(dev, size, &dma_addr, GFP_KERNEL); + if (!addr) { + rc = -ENOMEM; + goto err_alloc; + } + + spin_lock(&evl->lock); + evl->log = addr; + evl->dma = dma_addr; + evl->log_size = size; + evl->bmap = bmap; + + memset(&evlcfg, 0, sizeof(evlcfg)); + evlcfg.bits[0] = dma_addr & GENMASK(63, 12); + evlcfg.size = evl->size; + + iowrite64(evlcfg.bits[0], idxd->reg_base + IDXD_EVLCFG_OFFSET); + iowrite64(evlcfg.bits[1], idxd->reg_base + IDXD_EVLCFG_OFFSET + 8); + + genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET); + genctrl.evl_int_en = 1; + iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET); + + gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); + gencfg.evl_en = 1; + iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); + + spin_unlock(&evl->lock); + return 0; + +err_alloc: + bitmap_free(bmap); +err_bmap: + return rc; +} + +static void idxd_device_evl_free(struct idxd_device *idxd) +{ + void *evl_log; + unsigned int evl_log_size; + dma_addr_t evl_dma; + union gencfg_reg gencfg; + union genctrl_reg genctrl; + struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl = idxd->evl; + + gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); + if (!gencfg.evl_en) + return; + + spin_lock(&evl->lock); + gencfg.evl_en = 0; + iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); + + genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET); + genctrl.evl_int_en = 0; + iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET); + + iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET); + iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET + 8); + + bitmap_free(evl->bmap); + evl_log = evl->log; + evl_log_size = evl->log_size; + evl_dma = evl->dma; + evl->log = NULL; + evl->size = IDXD_EVL_SIZE_MIN; + spin_unlock(&evl->lock); + + dma_free_coherent(dev, evl_log_size, evl_log, evl_dma); +} + +static void idxd_group_config_write(struct idxd_group *group) +{ + struct idxd_device *idxd = group->idxd; + struct device *dev = &idxd->pdev->dev; + int i; + u32 grpcfg_offset; + + dev_dbg(dev, "Writing group %d cfg registers\n", group->id); + + /* setup GRPWQCFG */ + for (i = 0; i < GRPWQCFG_STRIDES; i++) { + grpcfg_offset = GRPWQCFG_OFFSET(idxd, group->id, i); + iowrite64(group->grpcfg.wqs[i], idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPCFG wq[%d:%d: %#x]: %#llx\n", + group->id, i, grpcfg_offset, + ioread64(idxd->reg_base + grpcfg_offset)); + } + + /* setup GRPENGCFG */ + grpcfg_offset = GRPENGCFG_OFFSET(idxd, group->id); + iowrite64(group->grpcfg.engines, idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPCFG engs[%d: %#x]: %#llx\n", group->id, + grpcfg_offset, ioread64(idxd->reg_base + grpcfg_offset)); + + /* setup GRPFLAGS */ + grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id); + iowrite64(group->grpcfg.flags.bits, idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#llx\n", + group->id, grpcfg_offset, + ioread64(idxd->reg_base + grpcfg_offset)); +} + +static int idxd_groups_config_write(struct idxd_device *idxd) + +{ + union gencfg_reg reg; + int i; + struct device *dev = &idxd->pdev->dev; + + /* Setup bandwidth rdbuf limit */ + if (idxd->hw.gen_cap.config_en && idxd->rdbuf_limit) { + reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); + reg.rdbuf_limit = idxd->rdbuf_limit; + iowrite32(reg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); + } + + dev_dbg(dev, "GENCFG(%#x): %#x\n", IDXD_GENCFG_OFFSET, + ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET)); + + for (i = 0; i < idxd->max_groups; i++) { + struct idxd_group *group = idxd->groups[i]; + + idxd_group_config_write(group); + } + + return 0; +} + +static bool idxd_device_pasid_priv_enabled(struct idxd_device *idxd) +{ + struct pci_dev *pdev = idxd->pdev; + + if (pdev->pasid_enabled && (pdev->pasid_features & PCI_PASID_CAP_PRIV)) + return true; + return false; +} + +static int idxd_wq_config_write(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + u32 wq_offset; + int i, n; + + if (!wq->group) + return 0; + + /* + * Instead of memset the entire shadow copy of WQCFG, copy from the hardware after + * wq reset. This will copy back the sticky values that are present on some devices. + */ + for (i = 0; i < WQCFG_STRIDES(idxd); i++) { + wq_offset = WQCFG_OFFSET(idxd, wq->id, i); + wq->wqcfg->bits[i] |= ioread32(idxd->reg_base + wq_offset); + } + + if (wq->size == 0 && wq->type != IDXD_WQT_NONE) + wq->size = WQ_DEFAULT_QUEUE_DEPTH; + + /* byte 0-3 */ + wq->wqcfg->wq_size = wq->size; + + /* bytes 4-7 */ + wq->wqcfg->wq_thresh = wq->threshold; + + /* byte 8-11 */ + if (wq_dedicated(wq)) + wq->wqcfg->mode = 1; + + /* + * The WQ priv bit is set depending on the WQ type. priv = 1 if the + * WQ type is kernel to indicate privileged access. This setting only + * matters for dedicated WQ. According to the DSA spec: + * If the WQ is in dedicated mode, WQ PASID Enable is 1, and the + * Privileged Mode Enable field of the PCI Express PASID capability + * is 0, this field must be 0. + * + * In the case of a dedicated kernel WQ that is not able to support + * the PASID cap, then the configuration will be rejected. + */ + if (wq_dedicated(wq) && wq->wqcfg->pasid_en && + !idxd_device_pasid_priv_enabled(idxd) && + wq->type == IDXD_WQT_KERNEL) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_PRIV; + return -EOPNOTSUPP; + } + + wq->wqcfg->priority = wq->priority; + + if (idxd->hw.gen_cap.block_on_fault && + test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags) && + !test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags)) + wq->wqcfg->bof = 1; + + if (idxd->hw.wq_cap.wq_ats_support) + wq->wqcfg->wq_ats_disable = test_bit(WQ_FLAG_ATS_DISABLE, &wq->flags); + + if (idxd->hw.wq_cap.wq_prs_support) + wq->wqcfg->wq_prs_disable = test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags); + + /* bytes 12-15 */ + wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes); + idxd_wqcfg_set_max_batch_shift(idxd->data->type, wq->wqcfg, ilog2(wq->max_batch_size)); + + /* bytes 32-63 */ + if (idxd->hw.wq_cap.op_config && wq->opcap_bmap) { + memset(wq->wqcfg->op_config, 0, IDXD_MAX_OPCAP_BITS / 8); + for_each_set_bit(n, wq->opcap_bmap, IDXD_MAX_OPCAP_BITS) { + int pos = n % BITS_PER_LONG_LONG; + int idx = n / BITS_PER_LONG_LONG; + + wq->wqcfg->op_config[idx] |= BIT(pos); + } + } + + dev_dbg(dev, "WQ %d CFGs\n", wq->id); + for (i = 0; i < WQCFG_STRIDES(idxd); i++) { + wq_offset = WQCFG_OFFSET(idxd, wq->id, i); + iowrite32(wq->wqcfg->bits[i], idxd->reg_base + wq_offset); + dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n", + wq->id, i, wq_offset, + ioread32(idxd->reg_base + wq_offset)); + } + + return 0; +} + +static int idxd_wqs_config_write(struct idxd_device *idxd) +{ + int i, rc; + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + rc = idxd_wq_config_write(wq); + if (rc < 0) + return rc; + } + + return 0; +} + +static void idxd_group_flags_setup(struct idxd_device *idxd) +{ + int i; + + /* TC-A 0 and TC-B 1 should be defaults */ + for (i = 0; i < idxd->max_groups; i++) { + struct idxd_group *group = idxd->groups[i]; + + if (group->tc_a == -1) + group->tc_a = group->grpcfg.flags.tc_a = 0; + else + group->grpcfg.flags.tc_a = group->tc_a; + if (group->tc_b == -1) + group->tc_b = group->grpcfg.flags.tc_b = 1; + else + group->grpcfg.flags.tc_b = group->tc_b; + group->grpcfg.flags.use_rdbuf_limit = group->use_rdbuf_limit; + group->grpcfg.flags.rdbufs_reserved = group->rdbufs_reserved; + group->grpcfg.flags.rdbufs_allowed = group->rdbufs_allowed; + group->grpcfg.flags.desc_progress_limit = group->desc_progress_limit; + group->grpcfg.flags.batch_progress_limit = group->batch_progress_limit; + } +} + +static int idxd_engines_setup(struct idxd_device *idxd) +{ + int i, engines = 0; + struct idxd_engine *eng; + struct idxd_group *group; + + for (i = 0; i < idxd->max_groups; i++) { + group = idxd->groups[i]; + group->grpcfg.engines = 0; + } + + for (i = 0; i < idxd->max_engines; i++) { + eng = idxd->engines[i]; + group = eng->group; + + if (!group) + continue; + + group->grpcfg.engines |= BIT(eng->id); + engines++; + } + + if (!engines) + return -EINVAL; + + return 0; +} + +static int idxd_wqs_setup(struct idxd_device *idxd) +{ + struct idxd_wq *wq; + struct idxd_group *group; + int i, j, configured = 0; + struct device *dev = &idxd->pdev->dev; + + for (i = 0; i < idxd->max_groups; i++) { + group = idxd->groups[i]; + for (j = 0; j < 4; j++) + group->grpcfg.wqs[j] = 0; + } + + for (i = 0; i < idxd->max_wqs; i++) { + wq = idxd->wqs[i]; + group = wq->group; + + if (!wq->group) + continue; + + if (wq_shared(wq) && !wq_shared_supported(wq)) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_SWQ_SUPPORT; + dev_warn(dev, "No shared wq support but configured.\n"); + return -EINVAL; + } + + group->grpcfg.wqs[wq->id / 64] |= BIT(wq->id % 64); + configured++; + } + + if (configured == 0) { + idxd->cmd_status = IDXD_SCMD_WQ_NONE_CONFIGURED; + return -EINVAL; + } + + return 0; +} + +int idxd_device_config(struct idxd_device *idxd) +{ + int rc; + + lockdep_assert_held(&idxd->dev_lock); + rc = idxd_wqs_setup(idxd); + if (rc < 0) + return rc; + + rc = idxd_engines_setup(idxd); + if (rc < 0) + return rc; + + idxd_group_flags_setup(idxd); + + rc = idxd_wqs_config_write(idxd); + if (rc < 0) + return rc; + + rc = idxd_groups_config_write(idxd); + if (rc < 0) + return rc; + + return 0; +} + +static int idxd_wq_load_config(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + int wqcfg_offset; + int i; + + wqcfg_offset = WQCFG_OFFSET(idxd, wq->id, 0); + memcpy_fromio(wq->wqcfg, idxd->reg_base + wqcfg_offset, idxd->wqcfg_size); + + wq->size = wq->wqcfg->wq_size; + wq->threshold = wq->wqcfg->wq_thresh; + + /* The driver does not support shared WQ mode in read-only config yet */ + if (wq->wqcfg->mode == 0 || wq->wqcfg->pasid_en) + return -EOPNOTSUPP; + + set_bit(WQ_FLAG_DEDICATED, &wq->flags); + + wq->priority = wq->wqcfg->priority; + + wq->max_xfer_bytes = 1ULL << wq->wqcfg->max_xfer_shift; + idxd_wq_set_max_batch_size(idxd->data->type, wq, 1U << wq->wqcfg->max_batch_shift); + + for (i = 0; i < WQCFG_STRIDES(idxd); i++) { + wqcfg_offset = WQCFG_OFFSET(idxd, wq->id, i); + dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n", wq->id, i, wqcfg_offset, wq->wqcfg->bits[i]); + } + + return 0; +} + +static void idxd_group_load_config(struct idxd_group *group) +{ + struct idxd_device *idxd = group->idxd; + struct device *dev = &idxd->pdev->dev; + int i, j, grpcfg_offset; + + /* + * Load WQS bit fields + * Iterate through all 256 bits 64 bits at a time + */ + for (i = 0; i < GRPWQCFG_STRIDES; i++) { + struct idxd_wq *wq; + + grpcfg_offset = GRPWQCFG_OFFSET(idxd, group->id, i); + group->grpcfg.wqs[i] = ioread64(idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPCFG wq[%d:%d: %#x]: %#llx\n", + group->id, i, grpcfg_offset, group->grpcfg.wqs[i]); + + if (i * 64 >= idxd->max_wqs) + break; + + /* Iterate through all 64 bits and check for wq set */ + for (j = 0; j < 64; j++) { + int id = i * 64 + j; + + /* No need to check beyond max wqs */ + if (id >= idxd->max_wqs) + break; + + /* Set group assignment for wq if wq bit is set */ + if (group->grpcfg.wqs[i] & BIT(j)) { + wq = idxd->wqs[id]; + wq->group = group; + } + } + } + + grpcfg_offset = GRPENGCFG_OFFSET(idxd, group->id); + group->grpcfg.engines = ioread64(idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPCFG engs[%d: %#x]: %#llx\n", group->id, + grpcfg_offset, group->grpcfg.engines); + + /* Iterate through all 64 bits to check engines set */ + for (i = 0; i < 64; i++) { + if (i >= idxd->max_engines) + break; + + if (group->grpcfg.engines & BIT(i)) { + struct idxd_engine *engine = idxd->engines[i]; + + engine->group = group; + } + } + + grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id); + group->grpcfg.flags.bits = ioread64(idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#llx\n", + group->id, grpcfg_offset, group->grpcfg.flags.bits); +} + +int idxd_device_load_config(struct idxd_device *idxd) +{ + union gencfg_reg reg; + int i, rc; + + reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); + idxd->rdbuf_limit = reg.rdbuf_limit; + + for (i = 0; i < idxd->max_groups; i++) { + struct idxd_group *group = idxd->groups[i]; + + idxd_group_load_config(group); + } + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + rc = idxd_wq_load_config(wq); + if (rc < 0) + return rc; + } + + return 0; +} + +static void idxd_flush_pending_descs(struct idxd_irq_entry *ie) +{ + struct idxd_desc *desc, *itr; + struct llist_node *head; + LIST_HEAD(flist); + enum idxd_complete_type ctype; + + spin_lock(&ie->list_lock); + head = llist_del_all(&ie->pending_llist); + if (head) { + llist_for_each_entry_safe(desc, itr, head, llnode) + list_add_tail(&desc->list, &ie->work_list); + } + + list_for_each_entry_safe(desc, itr, &ie->work_list, list) + list_move_tail(&desc->list, &flist); + spin_unlock(&ie->list_lock); + + list_for_each_entry_safe(desc, itr, &flist, list) { + struct dma_async_tx_descriptor *tx; + + list_del(&desc->list); + ctype = desc->completion->status ? IDXD_COMPLETE_NORMAL : IDXD_COMPLETE_ABORT; + /* + * wq is being disabled. Any remaining descriptors are + * likely to be stuck and can be dropped. callback could + * point to code that is no longer accessible, for example + * if dmatest module has been unloaded. + */ + tx = &desc->txd; + tx->callback = NULL; + tx->callback_result = NULL; + idxd_dma_complete_txd(desc, ctype, true); + } +} + +static void idxd_device_set_perm_entry(struct idxd_device *idxd, + struct idxd_irq_entry *ie) +{ + union msix_perm mperm; + + if (ie->pasid == IOMMU_PASID_INVALID) + return; + + mperm.bits = 0; + mperm.pasid = ie->pasid; + mperm.pasid_en = 1; + iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + ie->id * 8); +} + +static void idxd_device_clear_perm_entry(struct idxd_device *idxd, + struct idxd_irq_entry *ie) +{ + iowrite32(0, idxd->reg_base + idxd->msix_perm_offset + ie->id * 8); +} + +void idxd_wq_free_irq(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct idxd_irq_entry *ie = &wq->ie; + + if (wq->type != IDXD_WQT_KERNEL) + return; + + free_irq(ie->vector, ie); + idxd_flush_pending_descs(ie); + if (idxd->request_int_handles) + idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); + idxd_device_clear_perm_entry(idxd, ie); + ie->vector = -1; + ie->int_handle = INVALID_INT_HANDLE; + ie->pasid = IOMMU_PASID_INVALID; +} + +int idxd_wq_request_irq(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + struct idxd_irq_entry *ie; + int rc; + + if (wq->type != IDXD_WQT_KERNEL) + return 0; + + ie = &wq->ie; + ie->vector = pci_irq_vector(pdev, ie->id); + ie->pasid = device_pasid_enabled(idxd) ? idxd->pasid : IOMMU_PASID_INVALID; + idxd_device_set_perm_entry(idxd, ie); + + rc = request_threaded_irq(ie->vector, NULL, idxd_wq_thread, 0, "idxd-portal", ie); + if (rc < 0) { + dev_err(dev, "Failed to request irq %d.\n", ie->vector); + goto err_irq; + } + + if (idxd->request_int_handles) { + rc = idxd_device_request_int_handle(idxd, ie->id, &ie->int_handle, + IDXD_IRQ_MSIX); + if (rc < 0) + goto err_int_handle; + } else { + ie->int_handle = ie->id; + } + + return 0; + +err_int_handle: + ie->int_handle = INVALID_INT_HANDLE; + free_irq(ie->vector, ie); +err_irq: + idxd_device_clear_perm_entry(idxd, ie); + ie->pasid = IOMMU_PASID_INVALID; + return rc; +} + +int drv_enable_wq(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + int rc = -ENXIO; + + lockdep_assert_held(&wq->wq_lock); + + if (idxd->state != IDXD_DEV_ENABLED) { + idxd->cmd_status = IDXD_SCMD_DEV_NOT_ENABLED; + goto err; + } + + if (wq->state != IDXD_WQ_DISABLED) { + dev_dbg(dev, "wq %d already enabled.\n", wq->id); + idxd->cmd_status = IDXD_SCMD_WQ_ENABLED; + rc = -EBUSY; + goto err; + } + + if (!wq->group) { + dev_dbg(dev, "wq %d not attached to group.\n", wq->id); + idxd->cmd_status = IDXD_SCMD_WQ_NO_GRP; + goto err; + } + + if (strlen(wq->name) == 0) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_NAME; + dev_dbg(dev, "wq %d name not set.\n", wq->id); + goto err; + } + + /* Shared WQ checks */ + if (wq_shared(wq)) { + if (!wq_shared_supported(wq)) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_SVM; + dev_dbg(dev, "PASID not enabled and shared wq.\n"); + goto err; + } + /* + * Shared wq with the threshold set to 0 means the user + * did not set the threshold or transitioned from a + * dedicated wq but did not set threshold. A value + * of 0 would effectively disable the shared wq. The + * driver does not allow a value of 0 to be set for + * threshold via sysfs. + */ + if (wq->threshold == 0) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_THRESH; + dev_dbg(dev, "Shared wq and threshold 0.\n"); + goto err; + } + } + + /* + * In the event that the WQ is configurable for pasid, the driver + * should setup the pasid, pasid_en bit. This is true for both kernel + * and user shared workqueues. There is no need to setup priv bit in + * that in-kernel DMA will also do user privileged requests. + * A dedicated wq that is not 'kernel' type will configure pasid and + * pasid_en later on so there is no need to setup. + */ + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { + if (wq_pasid_enabled(wq)) { + if (is_idxd_wq_kernel(wq) || wq_shared(wq)) { + u32 pasid = wq_dedicated(wq) ? idxd->pasid : 0; + + __idxd_wq_set_pasid_locked(wq, pasid); + } + } + } + + rc = 0; + spin_lock(&idxd->dev_lock); + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + rc = idxd_device_config(idxd); + spin_unlock(&idxd->dev_lock); + if (rc < 0) { + dev_dbg(dev, "Writing wq %d config failed: %d\n", wq->id, rc); + goto err; + } + + rc = idxd_wq_enable(wq); + if (rc < 0) { + dev_dbg(dev, "wq %d enabling failed: %d\n", wq->id, rc); + goto err; + } + + rc = idxd_wq_map_portal(wq); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_WQ_PORTAL_ERR; + dev_dbg(dev, "wq %d portal mapping failed: %d\n", wq->id, rc); + goto err_map_portal; + } + + wq->client_count = 0; + + rc = idxd_wq_request_irq(wq); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_WQ_IRQ_ERR; + dev_dbg(dev, "WQ %d irq setup failed: %d\n", wq->id, rc); + goto err_irq; + } + + rc = idxd_wq_alloc_resources(wq); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_WQ_RES_ALLOC_ERR; + dev_dbg(dev, "WQ resource alloc failed\n"); + goto err_res_alloc; + } + + rc = idxd_wq_init_percpu_ref(wq); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_PERCPU_ERR; + dev_dbg(dev, "percpu_ref setup failed\n"); + goto err_ref; + } + + return 0; + +err_ref: + idxd_wq_free_resources(wq); +err_res_alloc: + idxd_wq_free_irq(wq); +err_irq: + idxd_wq_unmap_portal(wq); +err_map_portal: + if (idxd_wq_disable(wq, false)) + dev_dbg(dev, "wq %s disable failed\n", dev_name(wq_confdev(wq))); +err: + return rc; +} + +void drv_disable_wq(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + + lockdep_assert_held(&wq->wq_lock); + + if (idxd_wq_refcount(wq)) + dev_warn(dev, "Clients has claim on wq %d: %d\n", + wq->id, idxd_wq_refcount(wq)); + + idxd_wq_unmap_portal(wq); + idxd_wq_drain(wq); + idxd_wq_free_irq(wq); + idxd_wq_reset(wq); + idxd_wq_free_resources(wq); + percpu_ref_exit(&wq->wq_active); + wq->type = IDXD_WQT_NONE; + wq->client_count = 0; +} + +int idxd_device_drv_probe(struct idxd_dev *idxd_dev) +{ + struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev); + int rc = 0; + + /* + * Device should be in disabled state for the idxd_drv to load. If it's in + * enabled state, then the device was altered outside of driver's control. + * If the state is in halted state, then we don't want to proceed. + */ + if (idxd->state != IDXD_DEV_DISABLED) { + idxd->cmd_status = IDXD_SCMD_DEV_ENABLED; + return -ENXIO; + } + + /* Device configuration */ + spin_lock(&idxd->dev_lock); + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + rc = idxd_device_config(idxd); + spin_unlock(&idxd->dev_lock); + if (rc < 0) + return -ENXIO; + + /* + * System PASID is preserved across device disable/enable cycle, but + * genconfig register content gets cleared during device reset. We + * need to re-enable user interrupts for kernel work queue completion + * IRQ to function. + */ + if (idxd->pasid != IOMMU_PASID_INVALID) + idxd_set_user_intr(idxd, 1); + + rc = idxd_device_evl_setup(idxd); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_DEV_EVL_ERR; + return rc; + } + + /* Start device */ + rc = idxd_device_enable(idxd); + if (rc < 0) { + idxd_device_evl_free(idxd); + return rc; + } + + /* Setup DMA device without channels */ + rc = idxd_register_dma_device(idxd); + if (rc < 0) { + idxd_device_disable(idxd); + idxd_device_evl_free(idxd); + idxd->cmd_status = IDXD_SCMD_DEV_DMA_ERR; + return rc; + } + + idxd->cmd_status = 0; + return 0; +} + +void idxd_device_drv_remove(struct idxd_dev *idxd_dev) +{ + struct device *dev = &idxd_dev->conf_dev; + struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev); + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + struct device *wq_dev = wq_confdev(wq); + + if (wq->state == IDXD_WQ_DISABLED) + continue; + dev_warn(dev, "Active wq %d on disable %s.\n", i, dev_name(wq_dev)); + device_release_driver(wq_dev); + } + + idxd_unregister_dma_device(idxd); + idxd_device_disable(idxd); + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + idxd_device_reset(idxd); + idxd_device_evl_free(idxd); +} + +static enum idxd_dev_type dev_types[] = { + IDXD_DEV_DSA, + IDXD_DEV_IAX, + IDXD_DEV_NONE, +}; + +struct idxd_device_driver idxd_drv = { + .type = dev_types, + .probe = idxd_device_drv_probe, + .remove = idxd_device_drv_remove, + .name = "idxd", +}; +EXPORT_SYMBOL_GPL(idxd_drv); diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c new file mode 100644 index 0000000000..07623fb0f5 --- /dev/null +++ b/drivers/dma/idxd/dma.c @@ -0,0 +1,359 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "../dmaengine.h" +#include "registers.h" +#include "idxd.h" + +static inline struct idxd_wq *to_idxd_wq(struct dma_chan *c) +{ + struct idxd_dma_chan *idxd_chan; + + idxd_chan = container_of(c, struct idxd_dma_chan, chan); + return idxd_chan->wq; +} + +void idxd_dma_complete_txd(struct idxd_desc *desc, + enum idxd_complete_type comp_type, + bool free_desc) +{ + struct idxd_device *idxd = desc->wq->idxd; + struct dma_async_tx_descriptor *tx; + struct dmaengine_result res; + int complete = 1; + + if (desc->completion->status == DSA_COMP_SUCCESS) { + res.result = DMA_TRANS_NOERROR; + } else if (desc->completion->status) { + if (idxd->request_int_handles && comp_type != IDXD_COMPLETE_ABORT && + desc->completion->status == DSA_COMP_INT_HANDLE_INVAL && + idxd_queue_int_handle_resubmit(desc)) + return; + res.result = DMA_TRANS_WRITE_FAILED; + } else if (comp_type == IDXD_COMPLETE_ABORT) { + res.result = DMA_TRANS_ABORTED; + } else { + complete = 0; + } + + tx = &desc->txd; + if (complete && tx->cookie) { + dma_cookie_complete(tx); + dma_descriptor_unmap(tx); + dmaengine_desc_get_callback_invoke(tx, &res); + tx->callback = NULL; + tx->callback_result = NULL; + } + + if (free_desc) + idxd_free_desc(desc->wq, desc); +} + +static void op_flag_setup(unsigned long flags, u32 *desc_flags) +{ + *desc_flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR; + if (flags & DMA_PREP_INTERRUPT) + *desc_flags |= IDXD_OP_FLAG_RCI; +} + +static inline void idxd_prep_desc_common(struct idxd_wq *wq, + struct dsa_hw_desc *hw, char opcode, + u64 addr_f1, u64 addr_f2, u64 len, + u64 compl, u32 flags) +{ + hw->flags = flags; + hw->opcode = opcode; + hw->src_addr = addr_f1; + hw->dst_addr = addr_f2; + hw->xfer_size = len; + /* + * For dedicated WQ, this field is ignored and HW will use the WQCFG.priv + * field instead. This field should be set to 0 for kernel descriptors + * since kernel DMA on VT-d supports "user" privilege only. + */ + hw->priv = 0; + hw->completion_addr = compl; +} + +static struct dma_async_tx_descriptor * +idxd_dma_prep_interrupt(struct dma_chan *c, unsigned long flags) +{ + struct idxd_wq *wq = to_idxd_wq(c); + u32 desc_flags; + struct idxd_desc *desc; + + if (wq->state != IDXD_WQ_ENABLED) + return NULL; + + op_flag_setup(flags, &desc_flags); + desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); + if (IS_ERR(desc)) + return NULL; + + idxd_prep_desc_common(wq, desc->hw, DSA_OPCODE_NOOP, + 0, 0, 0, desc->compl_dma, desc_flags); + desc->txd.flags = flags; + return &desc->txd; +} + +static struct dma_async_tx_descriptor * +idxd_dma_submit_memcpy(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct idxd_wq *wq = to_idxd_wq(c); + u32 desc_flags; + struct idxd_device *idxd = wq->idxd; + struct idxd_desc *desc; + + if (wq->state != IDXD_WQ_ENABLED) + return NULL; + + if (len > idxd->max_xfer_bytes) + return NULL; + + op_flag_setup(flags, &desc_flags); + desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); + if (IS_ERR(desc)) + return NULL; + + idxd_prep_desc_common(wq, desc->hw, DSA_OPCODE_MEMMOVE, + dma_src, dma_dest, len, desc->compl_dma, + desc_flags); + + desc->txd.flags = flags; + + return &desc->txd; +} + +static int idxd_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct idxd_wq *wq = to_idxd_wq(chan); + struct device *dev = &wq->idxd->pdev->dev; + + idxd_wq_get(wq); + dev_dbg(dev, "%s: client_count: %d\n", __func__, + idxd_wq_refcount(wq)); + return 0; +} + +static void idxd_dma_free_chan_resources(struct dma_chan *chan) +{ + struct idxd_wq *wq = to_idxd_wq(chan); + struct device *dev = &wq->idxd->pdev->dev; + + idxd_wq_put(wq); + dev_dbg(dev, "%s: client_count: %d\n", __func__, + idxd_wq_refcount(wq)); +} + +static enum dma_status idxd_dma_tx_status(struct dma_chan *dma_chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + return DMA_OUT_OF_ORDER; +} + +/* + * issue_pending() does not need to do anything since tx_submit() does the job + * already. + */ +static void idxd_dma_issue_pending(struct dma_chan *dma_chan) +{ +} + +static dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct dma_chan *c = tx->chan; + struct idxd_wq *wq = to_idxd_wq(c); + dma_cookie_t cookie; + int rc; + struct idxd_desc *desc = container_of(tx, struct idxd_desc, txd); + + cookie = dma_cookie_assign(tx); + + rc = idxd_submit_desc(wq, desc); + if (rc < 0) { + idxd_free_desc(wq, desc); + return rc; + } + + return cookie; +} + +static void idxd_dma_release(struct dma_device *device) +{ + struct idxd_dma_dev *idxd_dma = container_of(device, struct idxd_dma_dev, dma); + + kfree(idxd_dma); +} + +int idxd_register_dma_device(struct idxd_device *idxd) +{ + struct idxd_dma_dev *idxd_dma; + struct dma_device *dma; + struct device *dev = &idxd->pdev->dev; + int rc; + + idxd_dma = kzalloc_node(sizeof(*idxd_dma), GFP_KERNEL, dev_to_node(dev)); + if (!idxd_dma) + return -ENOMEM; + + dma = &idxd_dma->dma; + INIT_LIST_HEAD(&dma->channels); + dma->dev = dev; + + dma_cap_set(DMA_INTERRUPT, dma->cap_mask); + dma_cap_set(DMA_PRIVATE, dma->cap_mask); + dma_cap_set(DMA_COMPLETION_NO_ORDER, dma->cap_mask); + dma->device_release = idxd_dma_release; + + dma->device_prep_dma_interrupt = idxd_dma_prep_interrupt; + if (idxd->hw.opcap.bits[0] & IDXD_OPCAP_MEMMOVE) { + dma_cap_set(DMA_MEMCPY, dma->cap_mask); + dma->device_prep_dma_memcpy = idxd_dma_submit_memcpy; + } + + dma->device_tx_status = idxd_dma_tx_status; + dma->device_issue_pending = idxd_dma_issue_pending; + dma->device_alloc_chan_resources = idxd_dma_alloc_chan_resources; + dma->device_free_chan_resources = idxd_dma_free_chan_resources; + + rc = dma_async_device_register(dma); + if (rc < 0) { + kfree(idxd_dma); + return rc; + } + + idxd_dma->idxd = idxd; + /* + * This pointer is protected by the refs taken by the dma_chan. It will remain valid + * as long as there are outstanding channels. + */ + idxd->idxd_dma = idxd_dma; + return 0; +} + +void idxd_unregister_dma_device(struct idxd_device *idxd) +{ + dma_async_device_unregister(&idxd->idxd_dma->dma); +} + +static int idxd_register_dma_channel(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct dma_device *dma = &idxd->idxd_dma->dma; + struct device *dev = &idxd->pdev->dev; + struct idxd_dma_chan *idxd_chan; + struct dma_chan *chan; + int rc, i; + + idxd_chan = kzalloc_node(sizeof(*idxd_chan), GFP_KERNEL, dev_to_node(dev)); + if (!idxd_chan) + return -ENOMEM; + + chan = &idxd_chan->chan; + chan->device = dma; + list_add_tail(&chan->device_node, &dma->channels); + + for (i = 0; i < wq->num_descs; i++) { + struct idxd_desc *desc = wq->descs[i]; + + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.tx_submit = idxd_dma_tx_submit; + } + + rc = dma_async_device_channel_register(dma, chan); + if (rc < 0) { + kfree(idxd_chan); + return rc; + } + + wq->idxd_chan = idxd_chan; + idxd_chan->wq = wq; + get_device(wq_confdev(wq)); + + return 0; +} + +static void idxd_unregister_dma_channel(struct idxd_wq *wq) +{ + struct idxd_dma_chan *idxd_chan = wq->idxd_chan; + struct dma_chan *chan = &idxd_chan->chan; + struct idxd_dma_dev *idxd_dma = wq->idxd->idxd_dma; + + dma_async_device_channel_unregister(&idxd_dma->dma, chan); + list_del(&chan->device_node); + kfree(wq->idxd_chan); + wq->idxd_chan = NULL; + put_device(wq_confdev(wq)); +} + +static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) +{ + struct device *dev = &idxd_dev->conf_dev; + struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); + struct idxd_device *idxd = wq->idxd; + int rc; + + if (idxd->state != IDXD_DEV_ENABLED) + return -ENXIO; + + mutex_lock(&wq->wq_lock); + wq->type = IDXD_WQT_KERNEL; + + rc = drv_enable_wq(wq); + if (rc < 0) { + dev_dbg(dev, "Enable wq %d failed: %d\n", wq->id, rc); + rc = -ENXIO; + goto err; + } + + rc = idxd_register_dma_channel(wq); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_DMA_CHAN_ERR; + dev_dbg(dev, "Failed to register dma channel\n"); + goto err_dma; + } + + idxd->cmd_status = 0; + mutex_unlock(&wq->wq_lock); + return 0; + +err_dma: + drv_disable_wq(wq); +err: + wq->type = IDXD_WQT_NONE; + mutex_unlock(&wq->wq_lock); + return rc; +} + +static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev) +{ + struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); + + mutex_lock(&wq->wq_lock); + __idxd_wq_quiesce(wq); + idxd_unregister_dma_channel(wq); + drv_disable_wq(wq); + mutex_unlock(&wq->wq_lock); +} + +static enum idxd_dev_type dev_types[] = { + IDXD_DEV_WQ, + IDXD_DEV_NONE, +}; + +struct idxd_device_driver idxd_dmaengine_drv = { + .probe = idxd_dmaengine_drv_probe, + .remove = idxd_dmaengine_drv_remove, + .name = "dmaengine", + .type = dev_types, +}; +EXPORT_SYMBOL_GPL(idxd_dmaengine_drv); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h new file mode 100644 index 0000000000..e269ca1f48 --- /dev/null +++ b/drivers/dma/idxd/idxd.h @@ -0,0 +1,752 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ +#ifndef _IDXD_H_ +#define _IDXD_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "registers.h" + +#define IDXD_DRIVER_VERSION "1.00" + +extern struct kmem_cache *idxd_desc_pool; +extern bool tc_override; + +struct idxd_wq; +struct idxd_dev; + +enum idxd_dev_type { + IDXD_DEV_NONE = -1, + IDXD_DEV_DSA = 0, + IDXD_DEV_IAX, + IDXD_DEV_WQ, + IDXD_DEV_GROUP, + IDXD_DEV_ENGINE, + IDXD_DEV_CDEV, + IDXD_DEV_CDEV_FILE, + IDXD_DEV_MAX_TYPE, +}; + +struct idxd_dev { + struct device conf_dev; + enum idxd_dev_type type; +}; + +#define IDXD_REG_TIMEOUT 50 +#define IDXD_DRAIN_TIMEOUT 5000 + +enum idxd_type { + IDXD_TYPE_UNKNOWN = -1, + IDXD_TYPE_DSA = 0, + IDXD_TYPE_IAX, + IDXD_TYPE_MAX, +}; + +#define IDXD_NAME_SIZE 128 +#define IDXD_PMU_EVENT_MAX 64 + +#define IDXD_ENQCMDS_RETRIES 32 +#define IDXD_ENQCMDS_MAX_RETRIES 64 + +struct idxd_device_driver { + const char *name; + enum idxd_dev_type *type; + int (*probe)(struct idxd_dev *idxd_dev); + void (*remove)(struct idxd_dev *idxd_dev); + struct device_driver drv; +}; + +extern struct idxd_device_driver dsa_drv; +extern struct idxd_device_driver idxd_drv; +extern struct idxd_device_driver idxd_dmaengine_drv; +extern struct idxd_device_driver idxd_user_drv; + +#define INVALID_INT_HANDLE -1 +struct idxd_irq_entry { + int id; + int vector; + struct llist_head pending_llist; + struct list_head work_list; + /* + * Lock to protect access between irq thread process descriptor + * and irq thread processing error descriptor. + */ + spinlock_t list_lock; + int int_handle; + ioasid_t pasid; +}; + +struct idxd_group { + struct idxd_dev idxd_dev; + struct idxd_device *idxd; + struct grpcfg grpcfg; + int id; + int num_engines; + int num_wqs; + bool use_rdbuf_limit; + u8 rdbufs_allowed; + u8 rdbufs_reserved; + int tc_a; + int tc_b; + int desc_progress_limit; + int batch_progress_limit; +}; + +struct idxd_pmu { + struct idxd_device *idxd; + + struct perf_event *event_list[IDXD_PMU_EVENT_MAX]; + int n_events; + + DECLARE_BITMAP(used_mask, IDXD_PMU_EVENT_MAX); + + struct pmu pmu; + char name[IDXD_NAME_SIZE]; + int cpu; + + int n_counters; + int counter_width; + int n_event_categories; + + bool per_counter_caps_supported; + unsigned long supported_event_categories; + + unsigned long supported_filters; + int n_filters; + + struct hlist_node cpuhp_node; +}; + +#define IDXD_MAX_PRIORITY 0xf + +enum { + COUNTER_FAULTS = 0, + COUNTER_FAULT_FAILS, + COUNTER_MAX +}; + +enum idxd_wq_state { + IDXD_WQ_DISABLED = 0, + IDXD_WQ_ENABLED, +}; + +enum idxd_wq_flag { + WQ_FLAG_DEDICATED = 0, + WQ_FLAG_BLOCK_ON_FAULT, + WQ_FLAG_ATS_DISABLE, + WQ_FLAG_PRS_DISABLE, +}; + +enum idxd_wq_type { + IDXD_WQT_NONE = 0, + IDXD_WQT_KERNEL, + IDXD_WQT_USER, +}; + +struct idxd_cdev { + struct idxd_wq *wq; + struct cdev cdev; + struct idxd_dev idxd_dev; + int minor; +}; + +#define IDXD_ALLOCATED_BATCH_SIZE 128U +#define WQ_NAME_SIZE 1024 +#define WQ_TYPE_SIZE 10 + +#define WQ_DEFAULT_QUEUE_DEPTH 16 +#define WQ_DEFAULT_MAX_XFER SZ_2M +#define WQ_DEFAULT_MAX_BATCH 32 + +enum idxd_op_type { + IDXD_OP_BLOCK = 0, + IDXD_OP_NONBLOCK = 1, +}; + +enum idxd_complete_type { + IDXD_COMPLETE_NORMAL = 0, + IDXD_COMPLETE_ABORT, + IDXD_COMPLETE_DEV_FAIL, +}; + +struct idxd_dma_chan { + struct dma_chan chan; + struct idxd_wq *wq; +}; + +struct idxd_wq { + void __iomem *portal; + u32 portal_offset; + unsigned int enqcmds_retries; + struct percpu_ref wq_active; + struct completion wq_dead; + struct completion wq_resurrect; + struct idxd_dev idxd_dev; + struct idxd_cdev *idxd_cdev; + struct wait_queue_head err_queue; + struct workqueue_struct *wq; + struct idxd_device *idxd; + int id; + struct idxd_irq_entry ie; + enum idxd_wq_type type; + struct idxd_group *group; + int client_count; + struct mutex wq_lock; /* mutex for workqueue */ + u32 size; + u32 threshold; + u32 priority; + enum idxd_wq_state state; + unsigned long flags; + union wqcfg *wqcfg; + unsigned long *opcap_bmap; + + struct dsa_hw_desc **hw_descs; + int num_descs; + union { + struct dsa_completion_record *compls; + struct iax_completion_record *iax_compls; + }; + dma_addr_t compls_addr; + int compls_size; + struct idxd_desc **descs; + struct sbitmap_queue sbq; + struct idxd_dma_chan *idxd_chan; + char name[WQ_NAME_SIZE + 1]; + u64 max_xfer_bytes; + u32 max_batch_size; + + /* Lock to protect upasid_xa access. */ + struct mutex uc_lock; + struct xarray upasid_xa; +}; + +struct idxd_engine { + struct idxd_dev idxd_dev; + int id; + struct idxd_group *group; + struct idxd_device *idxd; +}; + +/* shadow registers */ +struct idxd_hw { + u32 version; + union gen_cap_reg gen_cap; + union wq_cap_reg wq_cap; + union group_cap_reg group_cap; + union engine_cap_reg engine_cap; + struct opcap opcap; + u32 cmd_cap; + union iaa_cap_reg iaa_cap; +}; + +enum idxd_device_state { + IDXD_DEV_HALTED = -1, + IDXD_DEV_DISABLED = 0, + IDXD_DEV_ENABLED, +}; + +enum idxd_device_flag { + IDXD_FLAG_CONFIGURABLE = 0, + IDXD_FLAG_CMD_RUNNING, + IDXD_FLAG_PASID_ENABLED, + IDXD_FLAG_USER_PASID_ENABLED, +}; + +struct idxd_dma_dev { + struct idxd_device *idxd; + struct dma_device dma; +}; + +struct idxd_driver_data { + const char *name_prefix; + enum idxd_type type; + struct device_type *dev_type; + int compl_size; + int align; + int evl_cr_off; + int cr_status_off; + int cr_result_off; +}; + +struct idxd_evl { + /* Lock to protect event log access. */ + spinlock_t lock; + void *log; + dma_addr_t dma; + /* Total size of event log = number of entries * entry size. */ + unsigned int log_size; + /* The number of entries in the event log. */ + u16 size; + u16 head; + unsigned long *bmap; + bool batch_fail[IDXD_MAX_BATCH_IDENT]; +}; + +struct idxd_evl_fault { + struct work_struct work; + struct idxd_wq *wq; + u8 status; + + /* make this last member always */ + struct __evl_entry entry[]; +}; + +struct idxd_device { + struct idxd_dev idxd_dev; + struct idxd_driver_data *data; + struct list_head list; + struct idxd_hw hw; + enum idxd_device_state state; + unsigned long flags; + int id; + int major; + u32 cmd_status; + struct idxd_irq_entry ie; /* misc irq, msix 0 */ + + struct pci_dev *pdev; + void __iomem *reg_base; + + spinlock_t dev_lock; /* spinlock for device */ + spinlock_t cmd_lock; /* spinlock for device commands */ + struct completion *cmd_done; + struct idxd_group **groups; + struct idxd_wq **wqs; + struct idxd_engine **engines; + + struct iommu_sva *sva; + unsigned int pasid; + + int num_groups; + int irq_cnt; + bool request_int_handles; + + u32 msix_perm_offset; + u32 wqcfg_offset; + u32 grpcfg_offset; + u32 perfmon_offset; + + u64 max_xfer_bytes; + u32 max_batch_size; + int max_groups; + int max_engines; + int max_rdbufs; + int max_wqs; + int max_wq_size; + int rdbuf_limit; + int nr_rdbufs; /* non-reserved read buffers */ + unsigned int wqcfg_size; + unsigned long *wq_enable_map; + + union sw_err_reg sw_err; + wait_queue_head_t cmd_waitq; + + struct idxd_dma_dev *idxd_dma; + struct workqueue_struct *wq; + struct work_struct work; + + struct idxd_pmu *idxd_pmu; + + unsigned long *opcap_bmap; + struct idxd_evl *evl; + struct kmem_cache *evl_cache; + + struct dentry *dbgfs_dir; + struct dentry *dbgfs_evl_file; +}; + +static inline unsigned int evl_ent_size(struct idxd_device *idxd) +{ + return idxd->hw.gen_cap.evl_support ? + (32 * (1 << idxd->hw.gen_cap.evl_support)) : 0; +} + +static inline unsigned int evl_size(struct idxd_device *idxd) +{ + return idxd->evl->size * evl_ent_size(idxd); +} + +/* IDXD software descriptor */ +struct idxd_desc { + union { + struct dsa_hw_desc *hw; + struct iax_hw_desc *iax_hw; + }; + dma_addr_t desc_dma; + union { + struct dsa_completion_record *completion; + struct iax_completion_record *iax_completion; + }; + dma_addr_t compl_dma; + struct dma_async_tx_descriptor txd; + struct llist_node llnode; + struct list_head list; + int id; + int cpu; + struct idxd_wq *wq; +}; + +/* + * This is software defined error for the completion status. We overload the error code + * that will never appear in completion status and only SWERR register. + */ +enum idxd_completion_status { + IDXD_COMP_DESC_ABORT = 0xff, +}; + +#define idxd_confdev(idxd) &idxd->idxd_dev.conf_dev +#define wq_confdev(wq) &wq->idxd_dev.conf_dev +#define engine_confdev(engine) &engine->idxd_dev.conf_dev +#define group_confdev(group) &group->idxd_dev.conf_dev +#define cdev_dev(cdev) &cdev->idxd_dev.conf_dev +#define user_ctx_dev(ctx) (&(ctx)->idxd_dev.conf_dev) + +#define confdev_to_idxd_dev(dev) container_of(dev, struct idxd_dev, conf_dev) +#define idxd_dev_to_idxd(idxd_dev) container_of(idxd_dev, struct idxd_device, idxd_dev) +#define idxd_dev_to_wq(idxd_dev) container_of(idxd_dev, struct idxd_wq, idxd_dev) + +static inline struct idxd_device *confdev_to_idxd(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return idxd_dev_to_idxd(idxd_dev); +} + +static inline struct idxd_wq *confdev_to_wq(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return idxd_dev_to_wq(idxd_dev); +} + +static inline struct idxd_engine *confdev_to_engine(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return container_of(idxd_dev, struct idxd_engine, idxd_dev); +} + +static inline struct idxd_group *confdev_to_group(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return container_of(idxd_dev, struct idxd_group, idxd_dev); +} + +static inline struct idxd_cdev *dev_to_cdev(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return container_of(idxd_dev, struct idxd_cdev, idxd_dev); +} + +static inline void idxd_dev_set_type(struct idxd_dev *idev, int type) +{ + if (type >= IDXD_DEV_MAX_TYPE) { + idev->type = IDXD_DEV_NONE; + return; + } + + idev->type = type; +} + +static inline struct idxd_irq_entry *idxd_get_ie(struct idxd_device *idxd, int idx) +{ + return (idx == 0) ? &idxd->ie : &idxd->wqs[idx - 1]->ie; +} + +static inline struct idxd_wq *ie_to_wq(struct idxd_irq_entry *ie) +{ + return container_of(ie, struct idxd_wq, ie); +} + +static inline struct idxd_device *ie_to_idxd(struct idxd_irq_entry *ie) +{ + return container_of(ie, struct idxd_device, ie); +} + +static inline void idxd_set_user_intr(struct idxd_device *idxd, bool enable) +{ + union gencfg_reg reg; + + reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); + reg.user_int_en = enable; + iowrite32(reg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); +} + +extern struct bus_type dsa_bus_type; + +extern bool support_enqcmd; +extern struct ida idxd_ida; +extern struct device_type dsa_device_type; +extern struct device_type iax_device_type; +extern struct device_type idxd_wq_device_type; +extern struct device_type idxd_engine_device_type; +extern struct device_type idxd_group_device_type; + +static inline bool is_dsa_dev(struct idxd_dev *idxd_dev) +{ + return idxd_dev->type == IDXD_DEV_DSA; +} + +static inline bool is_iax_dev(struct idxd_dev *idxd_dev) +{ + return idxd_dev->type == IDXD_DEV_IAX; +} + +static inline bool is_idxd_dev(struct idxd_dev *idxd_dev) +{ + return is_dsa_dev(idxd_dev) || is_iax_dev(idxd_dev); +} + +static inline bool is_idxd_wq_dev(struct idxd_dev *idxd_dev) +{ + return idxd_dev->type == IDXD_DEV_WQ; +} + +static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq) +{ + if (wq->type == IDXD_WQT_KERNEL && strcmp(wq->name, "dmaengine") == 0) + return true; + return false; +} + +static inline bool is_idxd_wq_user(struct idxd_wq *wq) +{ + return wq->type == IDXD_WQT_USER; +} + +static inline bool is_idxd_wq_kernel(struct idxd_wq *wq) +{ + return wq->type == IDXD_WQT_KERNEL; +} + +static inline bool wq_dedicated(struct idxd_wq *wq) +{ + return test_bit(WQ_FLAG_DEDICATED, &wq->flags); +} + +static inline bool wq_shared(struct idxd_wq *wq) +{ + return !test_bit(WQ_FLAG_DEDICATED, &wq->flags); +} + +static inline bool device_pasid_enabled(struct idxd_device *idxd) +{ + return test_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); +} + +static inline bool device_user_pasid_enabled(struct idxd_device *idxd) +{ + return test_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags); +} + +static inline bool wq_pasid_enabled(struct idxd_wq *wq) +{ + return (is_idxd_wq_kernel(wq) && device_pasid_enabled(wq->idxd)) || + (is_idxd_wq_user(wq) && device_user_pasid_enabled(wq->idxd)); +} + +static inline bool wq_shared_supported(struct idxd_wq *wq) +{ + return (support_enqcmd && wq_pasid_enabled(wq)); +} + +enum idxd_portal_prot { + IDXD_PORTAL_UNLIMITED = 0, + IDXD_PORTAL_LIMITED, +}; + +enum idxd_interrupt_type { + IDXD_IRQ_MSIX = 0, + IDXD_IRQ_IMS, +}; + +static inline int idxd_get_wq_portal_offset(enum idxd_portal_prot prot) +{ + return prot * 0x1000; +} + +static inline int idxd_get_wq_portal_full_offset(int wq_id, + enum idxd_portal_prot prot) +{ + return ((wq_id * 4) << PAGE_SHIFT) + idxd_get_wq_portal_offset(prot); +} + +#define IDXD_PORTAL_MASK (PAGE_SIZE - 1) + +/* + * Even though this function can be accessed by multiple threads, it is safe to use. + * At worst the address gets used more than once before it gets incremented. We don't + * hit a threshold until iops becomes many million times a second. So the occasional + * reuse of the same address is tolerable compare to using an atomic variable. This is + * safe on a system that has atomic load/store for 32bit integers. Given that this is an + * Intel iEP device, that should not be a problem. + */ +static inline void __iomem *idxd_wq_portal_addr(struct idxd_wq *wq) +{ + int ofs = wq->portal_offset; + + wq->portal_offset = (ofs + sizeof(struct dsa_raw_desc)) & IDXD_PORTAL_MASK; + return wq->portal + ofs; +} + +static inline void idxd_wq_get(struct idxd_wq *wq) +{ + wq->client_count++; +} + +static inline void idxd_wq_put(struct idxd_wq *wq) +{ + wq->client_count--; +} + +static inline int idxd_wq_refcount(struct idxd_wq *wq) +{ + return wq->client_count; +}; + +/* + * Intel IAA does not support batch processing. + * The max batch size of device, max batch size of wq and + * max batch shift of wqcfg should be always 0 on IAA. + */ +static inline void idxd_set_max_batch_size(int idxd_type, struct idxd_device *idxd, + u32 max_batch_size) +{ + if (idxd_type == IDXD_TYPE_IAX) + idxd->max_batch_size = 0; + else + idxd->max_batch_size = max_batch_size; +} + +static inline void idxd_wq_set_max_batch_size(int idxd_type, struct idxd_wq *wq, + u32 max_batch_size) +{ + if (idxd_type == IDXD_TYPE_IAX) + wq->max_batch_size = 0; + else + wq->max_batch_size = max_batch_size; +} + +static inline void idxd_wqcfg_set_max_batch_shift(int idxd_type, union wqcfg *wqcfg, + u32 max_batch_shift) +{ + if (idxd_type == IDXD_TYPE_IAX) + wqcfg->max_batch_shift = 0; + else + wqcfg->max_batch_shift = max_batch_shift; +} + +int __must_check __idxd_driver_register(struct idxd_device_driver *idxd_drv, + struct module *module, const char *mod_name); +#define idxd_driver_register(driver) \ + __idxd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) + +void idxd_driver_unregister(struct idxd_device_driver *idxd_drv); + +#define module_idxd_driver(__idxd_driver) \ + module_driver(__idxd_driver, idxd_driver_register, idxd_driver_unregister) + +int idxd_register_bus_type(void); +void idxd_unregister_bus_type(void); +int idxd_register_devices(struct idxd_device *idxd); +void idxd_unregister_devices(struct idxd_device *idxd); +void idxd_wqs_quiesce(struct idxd_device *idxd); +bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc); +void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count); + +/* device interrupt control */ +irqreturn_t idxd_misc_thread(int vec, void *data); +irqreturn_t idxd_wq_thread(int irq, void *data); +void idxd_mask_error_interrupts(struct idxd_device *idxd); +void idxd_unmask_error_interrupts(struct idxd_device *idxd); + +/* device control */ +int idxd_device_drv_probe(struct idxd_dev *idxd_dev); +void idxd_device_drv_remove(struct idxd_dev *idxd_dev); +int drv_enable_wq(struct idxd_wq *wq); +void drv_disable_wq(struct idxd_wq *wq); +int idxd_device_init_reset(struct idxd_device *idxd); +int idxd_device_enable(struct idxd_device *idxd); +int idxd_device_disable(struct idxd_device *idxd); +void idxd_device_reset(struct idxd_device *idxd); +void idxd_device_clear_state(struct idxd_device *idxd); +int idxd_device_config(struct idxd_device *idxd); +void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid); +int idxd_device_load_config(struct idxd_device *idxd); +int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle, + enum idxd_interrupt_type irq_type); +int idxd_device_release_int_handle(struct idxd_device *idxd, int handle, + enum idxd_interrupt_type irq_type); + +/* work queue control */ +void idxd_wqs_unmap_portal(struct idxd_device *idxd); +int idxd_wq_alloc_resources(struct idxd_wq *wq); +void idxd_wq_free_resources(struct idxd_wq *wq); +int idxd_wq_enable(struct idxd_wq *wq); +int idxd_wq_disable(struct idxd_wq *wq, bool reset_config); +void idxd_wq_drain(struct idxd_wq *wq); +void idxd_wq_reset(struct idxd_wq *wq); +int idxd_wq_map_portal(struct idxd_wq *wq); +void idxd_wq_unmap_portal(struct idxd_wq *wq); +int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid); +int idxd_wq_disable_pasid(struct idxd_wq *wq); +void __idxd_wq_quiesce(struct idxd_wq *wq); +void idxd_wq_quiesce(struct idxd_wq *wq); +int idxd_wq_init_percpu_ref(struct idxd_wq *wq); +void idxd_wq_free_irq(struct idxd_wq *wq); +int idxd_wq_request_irq(struct idxd_wq *wq); + +/* submission */ +int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc); +struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype); +void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc); +int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc); + +/* dmaengine */ +int idxd_register_dma_device(struct idxd_device *idxd); +void idxd_unregister_dma_device(struct idxd_device *idxd); +void idxd_dma_complete_txd(struct idxd_desc *desc, + enum idxd_complete_type comp_type, bool free_desc); + +/* cdev */ +int idxd_cdev_register(void); +void idxd_cdev_remove(void); +int idxd_cdev_get_major(struct idxd_device *idxd); +int idxd_wq_add_cdev(struct idxd_wq *wq); +void idxd_wq_del_cdev(struct idxd_wq *wq); +int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr, + void *buf, int len); +void idxd_user_counter_increment(struct idxd_wq *wq, u32 pasid, int index); + +/* perfmon */ +#if IS_ENABLED(CONFIG_INTEL_IDXD_PERFMON) +int perfmon_pmu_init(struct idxd_device *idxd); +void perfmon_pmu_remove(struct idxd_device *idxd); +void perfmon_counter_overflow(struct idxd_device *idxd); +void perfmon_init(void); +void perfmon_exit(void); +#else +static inline int perfmon_pmu_init(struct idxd_device *idxd) { return 0; } +static inline void perfmon_pmu_remove(struct idxd_device *idxd) {} +static inline void perfmon_counter_overflow(struct idxd_device *idxd) {} +static inline void perfmon_init(void) {} +static inline void perfmon_exit(void) {} +#endif + +/* debugfs */ +int idxd_device_init_debugfs(struct idxd_device *idxd); +void idxd_device_remove_debugfs(struct idxd_device *idxd); +int idxd_init_debugfs(void); +void idxd_remove_debugfs(void); + +#endif diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c new file mode 100644 index 0000000000..0eb1c827a2 --- /dev/null +++ b/drivers/dma/idxd/init.c @@ -0,0 +1,913 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../dmaengine.h" +#include "registers.h" +#include "idxd.h" +#include "perfmon.h" + +MODULE_VERSION(IDXD_DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); +MODULE_IMPORT_NS(IDXD); + +static bool sva = true; +module_param(sva, bool, 0644); +MODULE_PARM_DESC(sva, "Toggle SVA support on/off"); + +bool tc_override; +module_param(tc_override, bool, 0644); +MODULE_PARM_DESC(tc_override, "Override traffic class defaults"); + +#define DRV_NAME "idxd" + +bool support_enqcmd; +DEFINE_IDA(idxd_ida); + +static struct idxd_driver_data idxd_driver_data[] = { + [IDXD_TYPE_DSA] = { + .name_prefix = "dsa", + .type = IDXD_TYPE_DSA, + .compl_size = sizeof(struct dsa_completion_record), + .align = 32, + .dev_type = &dsa_device_type, + .evl_cr_off = offsetof(struct dsa_evl_entry, cr), + .cr_status_off = offsetof(struct dsa_completion_record, status), + .cr_result_off = offsetof(struct dsa_completion_record, result), + }, + [IDXD_TYPE_IAX] = { + .name_prefix = "iax", + .type = IDXD_TYPE_IAX, + .compl_size = sizeof(struct iax_completion_record), + .align = 64, + .dev_type = &iax_device_type, + .evl_cr_off = offsetof(struct iax_evl_entry, cr), + .cr_status_off = offsetof(struct iax_completion_record, status), + .cr_result_off = offsetof(struct iax_completion_record, error_code), + }, +}; + +static struct pci_device_id idxd_pci_tbl[] = { + /* DSA ver 1.0 platforms */ + { PCI_DEVICE_DATA(INTEL, DSA_SPR0, &idxd_driver_data[IDXD_TYPE_DSA]) }, + + /* IAX ver 1.0 platforms */ + { PCI_DEVICE_DATA(INTEL, IAX_SPR0, &idxd_driver_data[IDXD_TYPE_IAX]) }, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, idxd_pci_tbl); + +static int idxd_setup_interrupts(struct idxd_device *idxd) +{ + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + struct idxd_irq_entry *ie; + int i, msixcnt; + int rc = 0; + + msixcnt = pci_msix_vec_count(pdev); + if (msixcnt < 0) { + dev_err(dev, "Not MSI-X interrupt capable.\n"); + return -ENOSPC; + } + idxd->irq_cnt = msixcnt; + + rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX); + if (rc != msixcnt) { + dev_err(dev, "Failed enabling %d MSIX entries: %d\n", msixcnt, rc); + return -ENOSPC; + } + dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt); + + + ie = idxd_get_ie(idxd, 0); + ie->vector = pci_irq_vector(pdev, 0); + rc = request_threaded_irq(ie->vector, NULL, idxd_misc_thread, 0, "idxd-misc", ie); + if (rc < 0) { + dev_err(dev, "Failed to allocate misc interrupt.\n"); + goto err_misc_irq; + } + dev_dbg(dev, "Requested idxd-misc handler on msix vector %d\n", ie->vector); + + for (i = 0; i < idxd->max_wqs; i++) { + int msix_idx = i + 1; + + ie = idxd_get_ie(idxd, msix_idx); + ie->id = msix_idx; + ie->int_handle = INVALID_INT_HANDLE; + ie->pasid = IOMMU_PASID_INVALID; + + spin_lock_init(&ie->list_lock); + init_llist_head(&ie->pending_llist); + INIT_LIST_HEAD(&ie->work_list); + } + + idxd_unmask_error_interrupts(idxd); + return 0; + + err_misc_irq: + idxd_mask_error_interrupts(idxd); + pci_free_irq_vectors(pdev); + dev_err(dev, "No usable interrupts\n"); + return rc; +} + +static void idxd_cleanup_interrupts(struct idxd_device *idxd) +{ + struct pci_dev *pdev = idxd->pdev; + struct idxd_irq_entry *ie; + int msixcnt; + + msixcnt = pci_msix_vec_count(pdev); + if (msixcnt <= 0) + return; + + ie = idxd_get_ie(idxd, 0); + idxd_mask_error_interrupts(idxd); + free_irq(ie->vector, ie); + pci_free_irq_vectors(pdev); +} + +static int idxd_setup_wqs(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + struct idxd_wq *wq; + struct device *conf_dev; + int i, rc; + + idxd->wqs = kcalloc_node(idxd->max_wqs, sizeof(struct idxd_wq *), + GFP_KERNEL, dev_to_node(dev)); + if (!idxd->wqs) + return -ENOMEM; + + idxd->wq_enable_map = bitmap_zalloc_node(idxd->max_wqs, GFP_KERNEL, dev_to_node(dev)); + if (!idxd->wq_enable_map) { + kfree(idxd->wqs); + return -ENOMEM; + } + + for (i = 0; i < idxd->max_wqs; i++) { + wq = kzalloc_node(sizeof(*wq), GFP_KERNEL, dev_to_node(dev)); + if (!wq) { + rc = -ENOMEM; + goto err; + } + + idxd_dev_set_type(&wq->idxd_dev, IDXD_DEV_WQ); + conf_dev = wq_confdev(wq); + wq->id = i; + wq->idxd = idxd; + device_initialize(wq_confdev(wq)); + conf_dev->parent = idxd_confdev(idxd); + conf_dev->bus = &dsa_bus_type; + conf_dev->type = &idxd_wq_device_type; + rc = dev_set_name(conf_dev, "wq%d.%d", idxd->id, wq->id); + if (rc < 0) { + put_device(conf_dev); + goto err; + } + + mutex_init(&wq->wq_lock); + init_waitqueue_head(&wq->err_queue); + init_completion(&wq->wq_dead); + init_completion(&wq->wq_resurrect); + wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; + idxd_wq_set_max_batch_size(idxd->data->type, wq, WQ_DEFAULT_MAX_BATCH); + wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; + wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); + if (!wq->wqcfg) { + put_device(conf_dev); + rc = -ENOMEM; + goto err; + } + + if (idxd->hw.wq_cap.op_config) { + wq->opcap_bmap = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL); + if (!wq->opcap_bmap) { + put_device(conf_dev); + rc = -ENOMEM; + goto err; + } + bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); + } + mutex_init(&wq->uc_lock); + xa_init(&wq->upasid_xa); + idxd->wqs[i] = wq; + } + + return 0; + + err: + while (--i >= 0) { + wq = idxd->wqs[i]; + conf_dev = wq_confdev(wq); + put_device(conf_dev); + } + return rc; +} + +static int idxd_setup_engines(struct idxd_device *idxd) +{ + struct idxd_engine *engine; + struct device *dev = &idxd->pdev->dev; + struct device *conf_dev; + int i, rc; + + idxd->engines = kcalloc_node(idxd->max_engines, sizeof(struct idxd_engine *), + GFP_KERNEL, dev_to_node(dev)); + if (!idxd->engines) + return -ENOMEM; + + for (i = 0; i < idxd->max_engines; i++) { + engine = kzalloc_node(sizeof(*engine), GFP_KERNEL, dev_to_node(dev)); + if (!engine) { + rc = -ENOMEM; + goto err; + } + + idxd_dev_set_type(&engine->idxd_dev, IDXD_DEV_ENGINE); + conf_dev = engine_confdev(engine); + engine->id = i; + engine->idxd = idxd; + device_initialize(conf_dev); + conf_dev->parent = idxd_confdev(idxd); + conf_dev->bus = &dsa_bus_type; + conf_dev->type = &idxd_engine_device_type; + rc = dev_set_name(conf_dev, "engine%d.%d", idxd->id, engine->id); + if (rc < 0) { + put_device(conf_dev); + goto err; + } + + idxd->engines[i] = engine; + } + + return 0; + + err: + while (--i >= 0) { + engine = idxd->engines[i]; + conf_dev = engine_confdev(engine); + put_device(conf_dev); + } + return rc; +} + +static int idxd_setup_groups(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + struct device *conf_dev; + struct idxd_group *group; + int i, rc; + + idxd->groups = kcalloc_node(idxd->max_groups, sizeof(struct idxd_group *), + GFP_KERNEL, dev_to_node(dev)); + if (!idxd->groups) + return -ENOMEM; + + for (i = 0; i < idxd->max_groups; i++) { + group = kzalloc_node(sizeof(*group), GFP_KERNEL, dev_to_node(dev)); + if (!group) { + rc = -ENOMEM; + goto err; + } + + idxd_dev_set_type(&group->idxd_dev, IDXD_DEV_GROUP); + conf_dev = group_confdev(group); + group->id = i; + group->idxd = idxd; + device_initialize(conf_dev); + conf_dev->parent = idxd_confdev(idxd); + conf_dev->bus = &dsa_bus_type; + conf_dev->type = &idxd_group_device_type; + rc = dev_set_name(conf_dev, "group%d.%d", idxd->id, group->id); + if (rc < 0) { + put_device(conf_dev); + goto err; + } + + idxd->groups[i] = group; + if (idxd->hw.version <= DEVICE_VERSION_2 && !tc_override) { + group->tc_a = 1; + group->tc_b = 1; + } else { + group->tc_a = -1; + group->tc_b = -1; + } + /* + * The default value is the same as the value of + * total read buffers in GRPCAP. + */ + group->rdbufs_allowed = idxd->max_rdbufs; + } + + return 0; + + err: + while (--i >= 0) { + group = idxd->groups[i]; + put_device(group_confdev(group)); + } + return rc; +} + +static void idxd_cleanup_internals(struct idxd_device *idxd) +{ + int i; + + for (i = 0; i < idxd->max_groups; i++) + put_device(group_confdev(idxd->groups[i])); + for (i = 0; i < idxd->max_engines; i++) + put_device(engine_confdev(idxd->engines[i])); + for (i = 0; i < idxd->max_wqs; i++) + put_device(wq_confdev(idxd->wqs[i])); + destroy_workqueue(idxd->wq); +} + +static int idxd_init_evl(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl; + + if (idxd->hw.gen_cap.evl_support == 0) + return 0; + + evl = kzalloc_node(sizeof(*evl), GFP_KERNEL, dev_to_node(dev)); + if (!evl) + return -ENOMEM; + + spin_lock_init(&evl->lock); + evl->size = IDXD_EVL_SIZE_MIN; + + idxd->evl_cache = kmem_cache_create(dev_name(idxd_confdev(idxd)), + sizeof(struct idxd_evl_fault) + evl_ent_size(idxd), + 0, 0, NULL); + if (!idxd->evl_cache) { + kfree(evl); + return -ENOMEM; + } + + idxd->evl = evl; + return 0; +} + +static int idxd_setup_internals(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + int rc, i; + + init_waitqueue_head(&idxd->cmd_waitq); + + rc = idxd_setup_wqs(idxd); + if (rc < 0) + goto err_wqs; + + rc = idxd_setup_engines(idxd); + if (rc < 0) + goto err_engine; + + rc = idxd_setup_groups(idxd); + if (rc < 0) + goto err_group; + + idxd->wq = create_workqueue(dev_name(dev)); + if (!idxd->wq) { + rc = -ENOMEM; + goto err_wkq_create; + } + + rc = idxd_init_evl(idxd); + if (rc < 0) + goto err_evl; + + return 0; + + err_evl: + destroy_workqueue(idxd->wq); + err_wkq_create: + for (i = 0; i < idxd->max_groups; i++) + put_device(group_confdev(idxd->groups[i])); + err_group: + for (i = 0; i < idxd->max_engines; i++) + put_device(engine_confdev(idxd->engines[i])); + err_engine: + for (i = 0; i < idxd->max_wqs; i++) + put_device(wq_confdev(idxd->wqs[i])); + err_wqs: + return rc; +} + +static void idxd_read_table_offsets(struct idxd_device *idxd) +{ + union offsets_reg offsets; + struct device *dev = &idxd->pdev->dev; + + offsets.bits[0] = ioread64(idxd->reg_base + IDXD_TABLE_OFFSET); + offsets.bits[1] = ioread64(idxd->reg_base + IDXD_TABLE_OFFSET + sizeof(u64)); + idxd->grpcfg_offset = offsets.grpcfg * IDXD_TABLE_MULT; + dev_dbg(dev, "IDXD Group Config Offset: %#x\n", idxd->grpcfg_offset); + idxd->wqcfg_offset = offsets.wqcfg * IDXD_TABLE_MULT; + dev_dbg(dev, "IDXD Work Queue Config Offset: %#x\n", idxd->wqcfg_offset); + idxd->msix_perm_offset = offsets.msix_perm * IDXD_TABLE_MULT; + dev_dbg(dev, "IDXD MSIX Permission Offset: %#x\n", idxd->msix_perm_offset); + idxd->perfmon_offset = offsets.perfmon * IDXD_TABLE_MULT; + dev_dbg(dev, "IDXD Perfmon Offset: %#x\n", idxd->perfmon_offset); +} + +void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count) +{ + int i, j, nr; + + for (i = 0, nr = 0; i < count; i++) { + for (j = 0; j < BITS_PER_LONG_LONG; j++) { + if (val[i] & BIT(j)) + set_bit(nr, bmap); + nr++; + } + } +} + +static void idxd_read_caps(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + int i; + + /* reading generic capabilities */ + idxd->hw.gen_cap.bits = ioread64(idxd->reg_base + IDXD_GENCAP_OFFSET); + dev_dbg(dev, "gen_cap: %#llx\n", idxd->hw.gen_cap.bits); + + if (idxd->hw.gen_cap.cmd_cap) { + idxd->hw.cmd_cap = ioread32(idxd->reg_base + IDXD_CMDCAP_OFFSET); + dev_dbg(dev, "cmd_cap: %#x\n", idxd->hw.cmd_cap); + } + + /* reading command capabilities */ + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) + idxd->request_int_handles = true; + + idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift; + dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes); + idxd_set_max_batch_size(idxd->data->type, idxd, 1U << idxd->hw.gen_cap.max_batch_shift); + dev_dbg(dev, "max batch size: %u\n", idxd->max_batch_size); + if (idxd->hw.gen_cap.config_en) + set_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags); + + /* reading group capabilities */ + idxd->hw.group_cap.bits = + ioread64(idxd->reg_base + IDXD_GRPCAP_OFFSET); + dev_dbg(dev, "group_cap: %#llx\n", idxd->hw.group_cap.bits); + idxd->max_groups = idxd->hw.group_cap.num_groups; + dev_dbg(dev, "max groups: %u\n", idxd->max_groups); + idxd->max_rdbufs = idxd->hw.group_cap.total_rdbufs; + dev_dbg(dev, "max read buffers: %u\n", idxd->max_rdbufs); + idxd->nr_rdbufs = idxd->max_rdbufs; + + /* read engine capabilities */ + idxd->hw.engine_cap.bits = + ioread64(idxd->reg_base + IDXD_ENGCAP_OFFSET); + dev_dbg(dev, "engine_cap: %#llx\n", idxd->hw.engine_cap.bits); + idxd->max_engines = idxd->hw.engine_cap.num_engines; + dev_dbg(dev, "max engines: %u\n", idxd->max_engines); + + /* read workqueue capabilities */ + idxd->hw.wq_cap.bits = ioread64(idxd->reg_base + IDXD_WQCAP_OFFSET); + dev_dbg(dev, "wq_cap: %#llx\n", idxd->hw.wq_cap.bits); + idxd->max_wq_size = idxd->hw.wq_cap.total_wq_size; + dev_dbg(dev, "total workqueue size: %u\n", idxd->max_wq_size); + idxd->max_wqs = idxd->hw.wq_cap.num_wqs; + dev_dbg(dev, "max workqueues: %u\n", idxd->max_wqs); + idxd->wqcfg_size = 1 << (idxd->hw.wq_cap.wqcfg_size + IDXD_WQCFG_MIN); + dev_dbg(dev, "wqcfg size: %u\n", idxd->wqcfg_size); + + /* reading operation capabilities */ + for (i = 0; i < 4; i++) { + idxd->hw.opcap.bits[i] = ioread64(idxd->reg_base + + IDXD_OPCAP_OFFSET + i * sizeof(u64)); + dev_dbg(dev, "opcap[%d]: %#llx\n", i, idxd->hw.opcap.bits[i]); + } + multi_u64_to_bmap(idxd->opcap_bmap, &idxd->hw.opcap.bits[0], 4); + + /* read iaa cap */ + if (idxd->data->type == IDXD_TYPE_IAX && idxd->hw.version >= DEVICE_VERSION_2) + idxd->hw.iaa_cap.bits = ioread64(idxd->reg_base + IDXD_IAACAP_OFFSET); +} + +static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data) +{ + struct device *dev = &pdev->dev; + struct device *conf_dev; + struct idxd_device *idxd; + int rc; + + idxd = kzalloc_node(sizeof(*idxd), GFP_KERNEL, dev_to_node(dev)); + if (!idxd) + return NULL; + + conf_dev = idxd_confdev(idxd); + idxd->pdev = pdev; + idxd->data = data; + idxd_dev_set_type(&idxd->idxd_dev, idxd->data->type); + idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL); + if (idxd->id < 0) + return NULL; + + idxd->opcap_bmap = bitmap_zalloc_node(IDXD_MAX_OPCAP_BITS, GFP_KERNEL, dev_to_node(dev)); + if (!idxd->opcap_bmap) { + ida_free(&idxd_ida, idxd->id); + return NULL; + } + + device_initialize(conf_dev); + conf_dev->parent = dev; + conf_dev->bus = &dsa_bus_type; + conf_dev->type = idxd->data->dev_type; + rc = dev_set_name(conf_dev, "%s%d", idxd->data->name_prefix, idxd->id); + if (rc < 0) { + put_device(conf_dev); + return NULL; + } + + spin_lock_init(&idxd->dev_lock); + spin_lock_init(&idxd->cmd_lock); + + return idxd; +} + +static int idxd_enable_system_pasid(struct idxd_device *idxd) +{ + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + struct iommu_domain *domain; + ioasid_t pasid; + int ret; + + /* + * Attach a global PASID to the DMA domain so that we can use ENQCMDS + * to submit work on buffers mapped by DMA API. + */ + domain = iommu_get_domain_for_dev(dev); + if (!domain) + return -EPERM; + + pasid = iommu_alloc_global_pasid(dev); + if (pasid == IOMMU_PASID_INVALID) + return -ENOSPC; + + /* + * DMA domain is owned by the driver, it should support all valid + * types such as DMA-FQ, identity, etc. + */ + ret = iommu_attach_device_pasid(domain, dev, pasid); + if (ret) { + dev_err(dev, "failed to attach device pasid %d, domain type %d", + pasid, domain->type); + iommu_free_global_pasid(pasid); + return ret; + } + + /* Since we set user privilege for kernel DMA, enable completion IRQ */ + idxd_set_user_intr(idxd, 1); + idxd->pasid = pasid; + + return ret; +} + +static void idxd_disable_system_pasid(struct idxd_device *idxd) +{ + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + struct iommu_domain *domain; + + domain = iommu_get_domain_for_dev(dev); + if (!domain) + return; + + iommu_detach_device_pasid(domain, dev, idxd->pasid); + iommu_free_global_pasid(idxd->pasid); + + idxd_set_user_intr(idxd, 0); + idxd->sva = NULL; + idxd->pasid = IOMMU_PASID_INVALID; +} + +static int idxd_enable_sva(struct pci_dev *pdev) +{ + int ret; + + ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_IOPF); + if (ret) + return ret; + + ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); + if (ret) + iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_IOPF); + + return ret; +} + +static void idxd_disable_sva(struct pci_dev *pdev) +{ + iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); + iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_IOPF); +} + +static int idxd_probe(struct idxd_device *idxd) +{ + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + int rc; + + dev_dbg(dev, "%s entered and resetting device\n", __func__); + rc = idxd_device_init_reset(idxd); + if (rc < 0) + return rc; + + dev_dbg(dev, "IDXD reset complete\n"); + + if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) { + if (idxd_enable_sva(pdev)) { + dev_warn(dev, "Unable to turn on user SVA feature.\n"); + } else { + set_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags); + + rc = idxd_enable_system_pasid(idxd); + if (rc) + dev_warn(dev, "No in-kernel DMA with PASID. %d\n", rc); + else + set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); + } + } else if (!sva) { + dev_warn(dev, "User forced SVA off via module param.\n"); + } + + idxd_read_caps(idxd); + idxd_read_table_offsets(idxd); + + rc = idxd_setup_internals(idxd); + if (rc) + goto err; + + /* If the configs are readonly, then load them from device */ + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { + dev_dbg(dev, "Loading RO device config\n"); + rc = idxd_device_load_config(idxd); + if (rc < 0) + goto err_config; + } + + rc = idxd_setup_interrupts(idxd); + if (rc) + goto err_config; + + idxd->major = idxd_cdev_get_major(idxd); + + rc = perfmon_pmu_init(idxd); + if (rc < 0) + dev_warn(dev, "Failed to initialize perfmon. No PMU support: %d\n", rc); + + dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id); + return 0; + + err_config: + idxd_cleanup_internals(idxd); + err: + if (device_pasid_enabled(idxd)) + idxd_disable_system_pasid(idxd); + if (device_user_pasid_enabled(idxd)) + idxd_disable_sva(pdev); + return rc; +} + +static void idxd_cleanup(struct idxd_device *idxd) +{ + perfmon_pmu_remove(idxd); + idxd_cleanup_interrupts(idxd); + idxd_cleanup_internals(idxd); + if (device_pasid_enabled(idxd)) + idxd_disable_system_pasid(idxd); + if (device_user_pasid_enabled(idxd)) + idxd_disable_sva(idxd->pdev); +} + +static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct device *dev = &pdev->dev; + struct idxd_device *idxd; + struct idxd_driver_data *data = (struct idxd_driver_data *)id->driver_data; + int rc; + + rc = pci_enable_device(pdev); + if (rc) + return rc; + + dev_dbg(dev, "Alloc IDXD context\n"); + idxd = idxd_alloc(pdev, data); + if (!idxd) { + rc = -ENOMEM; + goto err_idxd_alloc; + } + + dev_dbg(dev, "Mapping BARs\n"); + idxd->reg_base = pci_iomap(pdev, IDXD_MMIO_BAR, 0); + if (!idxd->reg_base) { + rc = -ENOMEM; + goto err_iomap; + } + + dev_dbg(dev, "Set DMA masks\n"); + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (rc) + goto err; + + dev_dbg(dev, "Set PCI master\n"); + pci_set_master(pdev); + pci_set_drvdata(pdev, idxd); + + idxd->hw.version = ioread32(idxd->reg_base + IDXD_VER_OFFSET); + rc = idxd_probe(idxd); + if (rc) { + dev_err(dev, "Intel(R) IDXD DMA Engine init failed\n"); + goto err; + } + + rc = idxd_register_devices(idxd); + if (rc) { + dev_err(dev, "IDXD sysfs setup failed\n"); + goto err_dev_register; + } + + rc = idxd_device_init_debugfs(idxd); + if (rc) + dev_warn(dev, "IDXD debugfs failed to setup\n"); + + dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n", + idxd->hw.version); + + return 0; + + err_dev_register: + idxd_cleanup(idxd); + err: + pci_iounmap(pdev, idxd->reg_base); + err_iomap: + put_device(idxd_confdev(idxd)); + err_idxd_alloc: + pci_disable_device(pdev); + return rc; +} + +void idxd_wqs_quiesce(struct idxd_device *idxd) +{ + struct idxd_wq *wq; + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + wq = idxd->wqs[i]; + if (wq->state == IDXD_WQ_ENABLED && wq->type == IDXD_WQT_KERNEL) + idxd_wq_quiesce(wq); + } +} + +static void idxd_shutdown(struct pci_dev *pdev) +{ + struct idxd_device *idxd = pci_get_drvdata(pdev); + struct idxd_irq_entry *irq_entry; + int rc; + + rc = idxd_device_disable(idxd); + if (rc) + dev_err(&pdev->dev, "Disabling device failed\n"); + + irq_entry = &idxd->ie; + synchronize_irq(irq_entry->vector); + idxd_mask_error_interrupts(idxd); + flush_workqueue(idxd->wq); +} + +static void idxd_remove(struct pci_dev *pdev) +{ + struct idxd_device *idxd = pci_get_drvdata(pdev); + struct idxd_irq_entry *irq_entry; + + idxd_unregister_devices(idxd); + /* + * When ->release() is called for the idxd->conf_dev, it frees all the memory related + * to the idxd context. The driver still needs those bits in order to do the rest of + * the cleanup. However, we do need to unbound the idxd sub-driver. So take a ref + * on the device here to hold off the freeing while allowing the idxd sub-driver + * to unbind. + */ + get_device(idxd_confdev(idxd)); + device_unregister(idxd_confdev(idxd)); + idxd_shutdown(pdev); + if (device_pasid_enabled(idxd)) + idxd_disable_system_pasid(idxd); + idxd_device_remove_debugfs(idxd); + + irq_entry = idxd_get_ie(idxd, 0); + free_irq(irq_entry->vector, irq_entry); + pci_free_irq_vectors(pdev); + pci_iounmap(pdev, idxd->reg_base); + if (device_user_pasid_enabled(idxd)) + idxd_disable_sva(pdev); + pci_disable_device(pdev); + destroy_workqueue(idxd->wq); + perfmon_pmu_remove(idxd); + put_device(idxd_confdev(idxd)); +} + +static struct pci_driver idxd_pci_driver = { + .name = DRV_NAME, + .id_table = idxd_pci_tbl, + .probe = idxd_pci_probe, + .remove = idxd_remove, + .shutdown = idxd_shutdown, +}; + +static int __init idxd_init_module(void) +{ + int err; + + /* + * If the CPU does not support MOVDIR64B or ENQCMDS, there's no point in + * enumerating the device. We can not utilize it. + */ + if (!cpu_feature_enabled(X86_FEATURE_MOVDIR64B)) { + pr_warn("idxd driver failed to load without MOVDIR64B.\n"); + return -ENODEV; + } + + if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) + pr_warn("Platform does not have ENQCMD(S) support.\n"); + else + support_enqcmd = true; + + perfmon_init(); + + err = idxd_driver_register(&idxd_drv); + if (err < 0) + goto err_idxd_driver_register; + + err = idxd_driver_register(&idxd_dmaengine_drv); + if (err < 0) + goto err_idxd_dmaengine_driver_register; + + err = idxd_driver_register(&idxd_user_drv); + if (err < 0) + goto err_idxd_user_driver_register; + + err = idxd_cdev_register(); + if (err) + goto err_cdev_register; + + err = idxd_init_debugfs(); + if (err) + goto err_debugfs; + + err = pci_register_driver(&idxd_pci_driver); + if (err) + goto err_pci_register; + + return 0; + +err_pci_register: + idxd_remove_debugfs(); +err_debugfs: + idxd_cdev_remove(); +err_cdev_register: + idxd_driver_unregister(&idxd_user_drv); +err_idxd_user_driver_register: + idxd_driver_unregister(&idxd_dmaengine_drv); +err_idxd_dmaengine_driver_register: + idxd_driver_unregister(&idxd_drv); +err_idxd_driver_register: + return err; +} +module_init(idxd_init_module); + +static void __exit idxd_exit_module(void) +{ + idxd_driver_unregister(&idxd_user_drv); + idxd_driver_unregister(&idxd_dmaengine_drv); + idxd_driver_unregister(&idxd_drv); + pci_unregister_driver(&idxd_pci_driver); + idxd_cdev_remove(); + perfmon_exit(); + idxd_remove_debugfs(); +} +module_exit(idxd_exit_module); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c new file mode 100644 index 0000000000..b501320a9c --- /dev/null +++ b/drivers/dma/idxd/irq.c @@ -0,0 +1,655 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../dmaengine.h" +#include "idxd.h" +#include "registers.h" + +enum irq_work_type { + IRQ_WORK_NORMAL = 0, + IRQ_WORK_PROCESS_FAULT, +}; + +struct idxd_resubmit { + struct work_struct work; + struct idxd_desc *desc; +}; + +struct idxd_int_handle_revoke { + struct work_struct work; + struct idxd_device *idxd; +}; + +static void idxd_device_reinit(struct work_struct *work) +{ + struct idxd_device *idxd = container_of(work, struct idxd_device, work); + struct device *dev = &idxd->pdev->dev; + int rc, i; + + idxd_device_reset(idxd); + rc = idxd_device_config(idxd); + if (rc < 0) + goto out; + + rc = idxd_device_enable(idxd); + if (rc < 0) + goto out; + + for (i = 0; i < idxd->max_wqs; i++) { + if (test_bit(i, idxd->wq_enable_map)) { + struct idxd_wq *wq = idxd->wqs[i]; + + rc = idxd_wq_enable(wq); + if (rc < 0) { + clear_bit(i, idxd->wq_enable_map); + dev_warn(dev, "Unable to re-enable wq %s\n", + dev_name(wq_confdev(wq))); + } + } + } + + return; + + out: + idxd_device_clear_state(idxd); +} + +/* + * The function sends a drain descriptor for the interrupt handle. The drain ensures + * all descriptors with this interrupt handle is flushed and the interrupt + * will allow the cleanup of the outstanding descriptors. + */ +static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) +{ + struct idxd_wq *wq = ie_to_wq(ie); + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + struct dsa_hw_desc desc = {}; + void __iomem *portal; + int rc; + + /* Issue a simple drain operation with interrupt but no completion record */ + desc.flags = IDXD_OP_FLAG_RCI; + desc.opcode = DSA_OPCODE_DRAIN; + desc.priv = 1; + + if (ie->pasid != IOMMU_PASID_INVALID) + desc.pasid = ie->pasid; + desc.int_handle = ie->int_handle; + portal = idxd_wq_portal_addr(wq); + + /* + * The wmb() makes sure that the descriptor is all there before we + * issue. + */ + wmb(); + if (wq_dedicated(wq)) { + iosubmit_cmds512(portal, &desc, 1); + } else { + rc = idxd_enqcmds(wq, portal, &desc); + /* This should not fail unless hardware failed. */ + if (rc < 0) + dev_warn(dev, "Failed to submit drain desc on wq %d\n", wq->id); + } +} + +static void idxd_abort_invalid_int_handle_descs(struct idxd_irq_entry *ie) +{ + LIST_HEAD(flist); + struct idxd_desc *d, *t; + struct llist_node *head; + + spin_lock(&ie->list_lock); + head = llist_del_all(&ie->pending_llist); + if (head) { + llist_for_each_entry_safe(d, t, head, llnode) + list_add_tail(&d->list, &ie->work_list); + } + + list_for_each_entry_safe(d, t, &ie->work_list, list) { + if (d->completion->status == DSA_COMP_INT_HANDLE_INVAL) + list_move_tail(&d->list, &flist); + } + spin_unlock(&ie->list_lock); + + list_for_each_entry_safe(d, t, &flist, list) { + list_del(&d->list); + idxd_dma_complete_txd(d, IDXD_COMPLETE_ABORT, true); + } +} + +static void idxd_int_handle_revoke(struct work_struct *work) +{ + struct idxd_int_handle_revoke *revoke = + container_of(work, struct idxd_int_handle_revoke, work); + struct idxd_device *idxd = revoke->idxd; + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + int i, new_handle, rc; + + if (!idxd->request_int_handles) { + kfree(revoke); + dev_warn(dev, "Unexpected int handle refresh interrupt.\n"); + return; + } + + /* + * The loop attempts to acquire new interrupt handle for all interrupt + * vectors that supports a handle. If a new interrupt handle is acquired and the + * wq is kernel type, the driver will kill the percpu_ref to pause all + * ongoing descriptor submissions. The interrupt handle is then changed. + * After change, the percpu_ref is revived and all the pending submissions + * are woken to try again. A drain is sent to for the interrupt handle + * at the end to make sure all invalid int handle descriptors are processed. + */ + for (i = 1; i < idxd->irq_cnt; i++) { + struct idxd_irq_entry *ie = idxd_get_ie(idxd, i); + struct idxd_wq *wq = ie_to_wq(ie); + + if (ie->int_handle == INVALID_INT_HANDLE) + continue; + + rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX); + if (rc < 0) { + dev_warn(dev, "get int handle %d failed: %d\n", i, rc); + /* + * Failed to acquire new interrupt handle. Kill the WQ + * and release all the pending submitters. The submitters will + * get error return code and handle appropriately. + */ + ie->int_handle = INVALID_INT_HANDLE; + idxd_wq_quiesce(wq); + idxd_abort_invalid_int_handle_descs(ie); + continue; + } + + /* No change in interrupt handle, nothing needs to be done */ + if (ie->int_handle == new_handle) + continue; + + if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL) { + /* + * All the MSIX interrupts are allocated at once during probe. + * Therefore we need to update all interrupts even if the WQ + * isn't supporting interrupt operations. + */ + ie->int_handle = new_handle; + continue; + } + + mutex_lock(&wq->wq_lock); + reinit_completion(&wq->wq_resurrect); + + /* Kill percpu_ref to pause additional descriptor submissions */ + percpu_ref_kill(&wq->wq_active); + + /* Wait for all submitters quiesce before we change interrupt handle */ + wait_for_completion(&wq->wq_dead); + + ie->int_handle = new_handle; + + /* Revive percpu ref and wake up all the waiting submitters */ + percpu_ref_reinit(&wq->wq_active); + complete_all(&wq->wq_resurrect); + mutex_unlock(&wq->wq_lock); + + /* + * The delay here is to wait for all possible MOVDIR64B that + * are issued before percpu_ref_kill() has happened to have + * reached the PCIe domain before the drain is issued. The driver + * needs to ensure that the drain descriptor issued does not pass + * all the other issued descriptors that contain the invalid + * interrupt handle in order to ensure that the drain descriptor + * interrupt will allow the cleanup of all the descriptors with + * invalid interrupt handle. + */ + if (wq_dedicated(wq)) + udelay(100); + idxd_int_handle_revoke_drain(ie); + } + kfree(revoke); +} + +static void idxd_evl_fault_work(struct work_struct *work) +{ + struct idxd_evl_fault *fault = container_of(work, struct idxd_evl_fault, work); + struct idxd_wq *wq = fault->wq; + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl = idxd->evl; + struct __evl_entry *entry_head = fault->entry; + void *cr = (void *)entry_head + idxd->data->evl_cr_off; + int cr_size = idxd->data->compl_size; + u8 *status = (u8 *)cr + idxd->data->cr_status_off; + u8 *result = (u8 *)cr + idxd->data->cr_result_off; + int copied, copy_size; + bool *bf; + + switch (fault->status) { + case DSA_COMP_CRA_XLAT: + if (entry_head->batch && entry_head->first_err_in_batch) + evl->batch_fail[entry_head->batch_id] = false; + + copy_size = cr_size; + idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULTS); + break; + case DSA_COMP_BATCH_EVL_ERR: + bf = &evl->batch_fail[entry_head->batch_id]; + + copy_size = entry_head->rcr || *bf ? cr_size : 0; + if (*bf) { + if (*status == DSA_COMP_SUCCESS) + *status = DSA_COMP_BATCH_FAIL; + *result = 1; + *bf = false; + } + idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULTS); + break; + case DSA_COMP_DRAIN_EVL: + copy_size = cr_size; + break; + default: + copy_size = 0; + dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n", fault->status); + break; + } + + if (copy_size == 0) + return; + + /* + * Copy completion record to fault_addr in user address space + * that is found by wq and PASID. + */ + copied = idxd_copy_cr(wq, entry_head->pasid, entry_head->fault_addr, + cr, copy_size); + /* + * The task that triggered the page fault is unknown currently + * because multiple threads may share the user address + * space or the task exits already before this fault. + * So if the copy fails, SIGSEGV can not be sent to the task. + * Just print an error for the failure. The user application + * waiting for the completion record will time out on this + * failure. + */ + switch (fault->status) { + case DSA_COMP_CRA_XLAT: + if (copied != copy_size) { + idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULT_FAILS); + dev_dbg_ratelimited(dev, "Failed to write to completion record: (%d:%d)\n", + copy_size, copied); + if (entry_head->batch) + evl->batch_fail[entry_head->batch_id] = true; + } + break; + case DSA_COMP_BATCH_EVL_ERR: + if (copied != copy_size) { + idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULT_FAILS); + dev_dbg_ratelimited(dev, "Failed to write to batch completion record: (%d:%d)\n", + copy_size, copied); + } + break; + case DSA_COMP_DRAIN_EVL: + if (copied != copy_size) + dev_dbg_ratelimited(dev, "Failed to write to drain completion record: (%d:%d)\n", + copy_size, copied); + break; + } + + kmem_cache_free(idxd->evl_cache, fault); +} + +static void process_evl_entry(struct idxd_device *idxd, + struct __evl_entry *entry_head, unsigned int index) +{ + struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl = idxd->evl; + u8 status; + + if (test_bit(index, evl->bmap)) { + clear_bit(index, evl->bmap); + } else { + status = DSA_COMP_STATUS(entry_head->error); + + if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL || + status == DSA_COMP_BATCH_EVL_ERR) { + struct idxd_evl_fault *fault; + int ent_size = evl_ent_size(idxd); + + if (entry_head->rci) + dev_dbg(dev, "Completion Int Req set, ignoring!\n"); + + if (!entry_head->rcr && status == DSA_COMP_DRAIN_EVL) + return; + + fault = kmem_cache_alloc(idxd->evl_cache, GFP_ATOMIC); + if (fault) { + struct idxd_wq *wq = idxd->wqs[entry_head->wq_idx]; + + fault->wq = wq; + fault->status = status; + memcpy(&fault->entry, entry_head, ent_size); + INIT_WORK(&fault->work, idxd_evl_fault_work); + queue_work(wq->wq, &fault->work); + } else { + dev_warn(dev, "Failed to service fault work.\n"); + } + } else { + dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n", + status, entry_head->operation, + entry_head->fault_addr); + } + } +} + +static void process_evl_entries(struct idxd_device *idxd) +{ + union evl_status_reg evl_status; + unsigned int h, t; + struct idxd_evl *evl = idxd->evl; + struct __evl_entry *entry_head; + unsigned int ent_size = evl_ent_size(idxd); + u32 size; + + evl_status.bits = 0; + evl_status.int_pending = 1; + + spin_lock(&evl->lock); + /* Clear interrupt pending bit */ + iowrite32(evl_status.bits_upper32, + idxd->reg_base + IDXD_EVLSTATUS_OFFSET + sizeof(u32)); + h = evl->head; + evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + t = evl_status.tail; + size = idxd->evl->size; + + while (h != t) { + entry_head = (struct __evl_entry *)(evl->log + (h * ent_size)); + process_evl_entry(idxd, entry_head, h); + h = (h + 1) % size; + } + + evl->head = h; + evl_status.head = h; + iowrite32(evl_status.bits_lower32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + spin_unlock(&evl->lock); +} + +irqreturn_t idxd_misc_thread(int vec, void *data) +{ + struct idxd_irq_entry *irq_entry = data; + struct idxd_device *idxd = ie_to_idxd(irq_entry); + struct device *dev = &idxd->pdev->dev; + union gensts_reg gensts; + u32 val = 0; + int i; + bool err = false; + u32 cause; + + cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); + if (!cause) + return IRQ_NONE; + + iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); + + if (cause & IDXD_INTC_HALT_STATE) + goto halt; + + if (cause & IDXD_INTC_ERR) { + spin_lock(&idxd->dev_lock); + for (i = 0; i < 4; i++) + idxd->sw_err.bits[i] = ioread64(idxd->reg_base + + IDXD_SWERR_OFFSET + i * sizeof(u64)); + + iowrite64(idxd->sw_err.bits[0] & IDXD_SWERR_ACK, + idxd->reg_base + IDXD_SWERR_OFFSET); + + if (idxd->sw_err.valid && idxd->sw_err.wq_idx_valid) { + int id = idxd->sw_err.wq_idx; + struct idxd_wq *wq = idxd->wqs[id]; + + if (wq->type == IDXD_WQT_USER) + wake_up_interruptible(&wq->err_queue); + } else { + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + if (wq->type == IDXD_WQT_USER) + wake_up_interruptible(&wq->err_queue); + } + } + + spin_unlock(&idxd->dev_lock); + val |= IDXD_INTC_ERR; + + for (i = 0; i < 4; i++) + dev_warn(dev, "err[%d]: %#16.16llx\n", + i, idxd->sw_err.bits[i]); + err = true; + } + + if (cause & IDXD_INTC_INT_HANDLE_REVOKED) { + struct idxd_int_handle_revoke *revoke; + + val |= IDXD_INTC_INT_HANDLE_REVOKED; + + revoke = kzalloc(sizeof(*revoke), GFP_ATOMIC); + if (revoke) { + revoke->idxd = idxd; + INIT_WORK(&revoke->work, idxd_int_handle_revoke); + queue_work(idxd->wq, &revoke->work); + + } else { + dev_err(dev, "Failed to allocate work for int handle revoke\n"); + idxd_wqs_quiesce(idxd); + } + } + + if (cause & IDXD_INTC_CMD) { + val |= IDXD_INTC_CMD; + complete(idxd->cmd_done); + } + + if (cause & IDXD_INTC_OCCUPY) { + /* Driver does not utilize occupancy interrupt */ + val |= IDXD_INTC_OCCUPY; + } + + if (cause & IDXD_INTC_PERFMON_OVFL) { + val |= IDXD_INTC_PERFMON_OVFL; + perfmon_counter_overflow(idxd); + } + + if (cause & IDXD_INTC_EVL) { + val |= IDXD_INTC_EVL; + process_evl_entries(idxd); + } + + val ^= cause; + if (val) + dev_warn_once(dev, "Unexpected interrupt cause bits set: %#x\n", + val); + + if (!err) + goto out; + +halt: + gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); + if (gensts.state == IDXD_DEVICE_STATE_HALT) { + idxd->state = IDXD_DEV_HALTED; + if (gensts.reset_type == IDXD_DEVICE_RESET_SOFTWARE) { + /* + * If we need a software reset, we will throw the work + * on a system workqueue in order to allow interrupts + * for the device command completions. + */ + INIT_WORK(&idxd->work, idxd_device_reinit); + queue_work(idxd->wq, &idxd->work); + } else { + idxd->state = IDXD_DEV_HALTED; + idxd_wqs_quiesce(idxd); + idxd_wqs_unmap_portal(idxd); + idxd_device_clear_state(idxd); + dev_err(&idxd->pdev->dev, + "idxd halted, need %s.\n", + gensts.reset_type == IDXD_DEVICE_RESET_FLR ? + "FLR" : "system reset"); + } + } + +out: + return IRQ_HANDLED; +} + +static void idxd_int_handle_resubmit_work(struct work_struct *work) +{ + struct idxd_resubmit *irw = container_of(work, struct idxd_resubmit, work); + struct idxd_desc *desc = irw->desc; + struct idxd_wq *wq = desc->wq; + int rc; + + desc->completion->status = 0; + rc = idxd_submit_desc(wq, desc); + if (rc < 0) { + dev_dbg(&wq->idxd->pdev->dev, "Failed to resubmit desc %d to wq %d.\n", + desc->id, wq->id); + /* + * If the error is not -EAGAIN, it means the submission failed due to wq + * has been killed instead of ENQCMDS failure. Here the driver needs to + * notify the submitter of the failure by reporting abort status. + * + * -EAGAIN comes from ENQCMDS failure. idxd_submit_desc() will handle the + * abort. + */ + if (rc != -EAGAIN) { + desc->completion->status = IDXD_COMP_DESC_ABORT; + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, false); + } + idxd_free_desc(wq, desc); + } + kfree(irw); +} + +bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc) +{ + struct idxd_wq *wq = desc->wq; + struct idxd_device *idxd = wq->idxd; + struct idxd_resubmit *irw; + + irw = kzalloc(sizeof(*irw), GFP_KERNEL); + if (!irw) + return false; + + irw->desc = desc; + INIT_WORK(&irw->work, idxd_int_handle_resubmit_work); + queue_work(idxd->wq, &irw->work); + return true; +} + +static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) +{ + struct idxd_desc *desc, *t; + struct llist_node *head; + + head = llist_del_all(&irq_entry->pending_llist); + if (!head) + return; + + llist_for_each_entry_safe(desc, t, head, llnode) { + u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; + + if (status) { + /* + * Check against the original status as ABORT is software defined + * and 0xff, which DSA_COMP_STATUS_MASK can mask out. + */ + if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); + continue; + } + + idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true); + } else { + spin_lock(&irq_entry->list_lock); + list_add_tail(&desc->list, + &irq_entry->work_list); + spin_unlock(&irq_entry->list_lock); + } + } +} + +static void irq_process_work_list(struct idxd_irq_entry *irq_entry) +{ + LIST_HEAD(flist); + struct idxd_desc *desc, *n; + + /* + * This lock protects list corruption from access of list outside of the irq handler + * thread. + */ + spin_lock(&irq_entry->list_lock); + if (list_empty(&irq_entry->work_list)) { + spin_unlock(&irq_entry->list_lock); + return; + } + + list_for_each_entry_safe(desc, n, &irq_entry->work_list, list) { + if (desc->completion->status) { + list_move_tail(&desc->list, &flist); + } + } + + spin_unlock(&irq_entry->list_lock); + + list_for_each_entry(desc, &flist, list) { + /* + * Check against the original status as ABORT is software defined + * and 0xff, which DSA_COMP_STATUS_MASK can mask out. + */ + if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); + continue; + } + + idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true); + } +} + +irqreturn_t idxd_wq_thread(int irq, void *data) +{ + struct idxd_irq_entry *irq_entry = data; + + /* + * There are two lists we are processing. The pending_llist is where + * submmiter adds all the submitted descriptor after sending it to + * the workqueue. It's a lockless singly linked list. The work_list + * is the common linux double linked list. We are in a scenario of + * multiple producers and a single consumer. The producers are all + * the kernel submitters of descriptors, and the consumer is the + * kernel irq handler thread for the msix vector when using threaded + * irq. To work with the restrictions of llist to remain lockless, + * we are doing the following steps: + * 1. Iterate through the work_list and process any completed + * descriptor. Delete the completed entries during iteration. + * 2. llist_del_all() from the pending list. + * 3. Iterate through the llist that was deleted from the pending list + * and process the completed entries. + * 4. If the entry is still waiting on hardware, list_add_tail() to + * the work_list. + */ + irq_process_work_list(irq_entry); + irq_process_pending_llist(irq_entry); + + return IRQ_HANDLED; +} diff --git a/drivers/dma/idxd/perfmon.c b/drivers/dma/idxd/perfmon.c new file mode 100644 index 0000000000..fdda6d6042 --- /dev/null +++ b/drivers/dma/idxd/perfmon.c @@ -0,0 +1,661 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2020 Intel Corporation. All rights rsvd. */ + +#include +#include +#include "idxd.h" +#include "perfmon.h" + +static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, + char *buf); + +static cpumask_t perfmon_dsa_cpu_mask; +static bool cpuhp_set_up; +static enum cpuhp_state cpuhp_slot; + +/* + * perf userspace reads this attribute to determine which cpus to open + * counters on. It's connected to perfmon_dsa_cpu_mask, which is + * maintained by the cpu hotplug handlers. + */ +static DEVICE_ATTR_RO(cpumask); + +static struct attribute *perfmon_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group cpumask_attr_group = { + .attrs = perfmon_cpumask_attrs, +}; + +/* + * These attributes specify the bits in the config word that the perf + * syscall uses to pass the event ids and categories to perfmon. + */ +DEFINE_PERFMON_FORMAT_ATTR(event_category, "config:0-3"); +DEFINE_PERFMON_FORMAT_ATTR(event, "config:4-31"); + +/* + * These attributes specify the bits in the config1 word that the perf + * syscall uses to pass filter data to perfmon. + */ +DEFINE_PERFMON_FORMAT_ATTR(filter_wq, "config1:0-31"); +DEFINE_PERFMON_FORMAT_ATTR(filter_tc, "config1:32-39"); +DEFINE_PERFMON_FORMAT_ATTR(filter_pgsz, "config1:40-43"); +DEFINE_PERFMON_FORMAT_ATTR(filter_sz, "config1:44-51"); +DEFINE_PERFMON_FORMAT_ATTR(filter_eng, "config1:52-59"); + +#define PERFMON_FILTERS_START 2 +#define PERFMON_FILTERS_MAX 5 + +static struct attribute *perfmon_format_attrs[] = { + &format_attr_idxd_event_category.attr, + &format_attr_idxd_event.attr, + &format_attr_idxd_filter_wq.attr, + &format_attr_idxd_filter_tc.attr, + &format_attr_idxd_filter_pgsz.attr, + &format_attr_idxd_filter_sz.attr, + &format_attr_idxd_filter_eng.attr, + NULL, +}; + +static struct attribute_group perfmon_format_attr_group = { + .name = "format", + .attrs = perfmon_format_attrs, +}; + +static const struct attribute_group *perfmon_attr_groups[] = { + &perfmon_format_attr_group, + &cpumask_attr_group, + NULL, +}; + +static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &perfmon_dsa_cpu_mask); +} + +static bool is_idxd_event(struct idxd_pmu *idxd_pmu, struct perf_event *event) +{ + return &idxd_pmu->pmu == event->pmu; +} + +static int perfmon_collect_events(struct idxd_pmu *idxd_pmu, + struct perf_event *leader, + bool do_grp) +{ + struct perf_event *event; + int n, max_count; + + max_count = idxd_pmu->n_counters; + n = idxd_pmu->n_events; + + if (n >= max_count) + return -EINVAL; + + if (is_idxd_event(idxd_pmu, leader)) { + idxd_pmu->event_list[n] = leader; + idxd_pmu->event_list[n]->hw.idx = n; + n++; + } + + if (!do_grp) + return n; + + for_each_sibling_event(event, leader) { + if (!is_idxd_event(idxd_pmu, event) || + event->state <= PERF_EVENT_STATE_OFF) + continue; + + if (n >= max_count) + return -EINVAL; + + idxd_pmu->event_list[n] = event; + idxd_pmu->event_list[n]->hw.idx = n; + n++; + } + + return n; +} + +static void perfmon_assign_hw_event(struct idxd_pmu *idxd_pmu, + struct perf_event *event, int idx) +{ + struct idxd_device *idxd = idxd_pmu->idxd; + struct hw_perf_event *hwc = &event->hw; + + hwc->idx = idx; + hwc->config_base = ioread64(CNTRCFG_REG(idxd, idx)); + hwc->event_base = ioread64(CNTRCFG_REG(idxd, idx)); +} + +static int perfmon_assign_event(struct idxd_pmu *idxd_pmu, + struct perf_event *event) +{ + int i; + + for (i = 0; i < IDXD_PMU_EVENT_MAX; i++) + if (!test_and_set_bit(i, idxd_pmu->used_mask)) + return i; + + return -EINVAL; +} + +/* + * Check whether there are enough counters to satisfy that all the + * events in the group can actually be scheduled at the same time. + * + * To do this, create a fake idxd_pmu object so the event collection + * and assignment functions can be used without affecting the internal + * state of the real idxd_pmu object. + */ +static int perfmon_validate_group(struct idxd_pmu *pmu, + struct perf_event *event) +{ + struct perf_event *leader = event->group_leader; + struct idxd_pmu *fake_pmu; + int i, ret = 0, n, idx; + + fake_pmu = kzalloc(sizeof(*fake_pmu), GFP_KERNEL); + if (!fake_pmu) + return -ENOMEM; + + fake_pmu->pmu.name = pmu->pmu.name; + fake_pmu->n_counters = pmu->n_counters; + + n = perfmon_collect_events(fake_pmu, leader, true); + if (n < 0) { + ret = n; + goto out; + } + + fake_pmu->n_events = n; + n = perfmon_collect_events(fake_pmu, event, false); + if (n < 0) { + ret = n; + goto out; + } + + fake_pmu->n_events = n; + + for (i = 0; i < n; i++) { + event = fake_pmu->event_list[i]; + + idx = perfmon_assign_event(fake_pmu, event); + if (idx < 0) { + ret = idx; + goto out; + } + } +out: + kfree(fake_pmu); + + return ret; +} + +static int perfmon_pmu_event_init(struct perf_event *event) +{ + struct idxd_device *idxd; + int ret = 0; + + idxd = event_to_idxd(event); + event->hw.idx = -1; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* sampling not supported */ + if (event->attr.sample_period) + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + if (event->pmu != &idxd->idxd_pmu->pmu) + return -EINVAL; + + event->hw.event_base = ioread64(PERFMON_TABLE_OFFSET(idxd)); + event->cpu = idxd->idxd_pmu->cpu; + event->hw.config = event->attr.config; + + if (event->group_leader != event) + /* non-group events have themselves as leader */ + ret = perfmon_validate_group(idxd->idxd_pmu, event); + + return ret; +} + +static inline u64 perfmon_pmu_read_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct idxd_device *idxd; + int cntr = hwc->idx; + + idxd = event_to_idxd(event); + + return ioread64(CNTRDATA_REG(idxd, cntr)); +} + +static void perfmon_pmu_event_update(struct perf_event *event) +{ + struct idxd_device *idxd = event_to_idxd(event); + u64 prev_raw_count, new_raw_count, delta, p, n; + int shift = 64 - idxd->idxd_pmu->counter_width; + struct hw_perf_event *hwc = &event->hw; + + prev_raw_count = local64_read(&hwc->prev_count); + do { + new_raw_count = perfmon_pmu_read_counter(event); + } while (!local64_try_cmpxchg(&hwc->prev_count, + &prev_raw_count, new_raw_count)); + n = (new_raw_count << shift); + p = (prev_raw_count << shift); + + delta = ((n - p) >> shift); + + local64_add(delta, &event->count); +} + +void perfmon_counter_overflow(struct idxd_device *idxd) +{ + int i, n_counters, max_loop = OVERFLOW_SIZE; + struct perf_event *event; + unsigned long ovfstatus; + + n_counters = min(idxd->idxd_pmu->n_counters, OVERFLOW_SIZE); + + ovfstatus = ioread32(OVFSTATUS_REG(idxd)); + + /* + * While updating overflowed counters, other counters behind + * them could overflow and be missed in a given pass. + * Normally this could happen at most n_counters times, but in + * theory a tiny counter width could result in continual + * overflows and endless looping. max_loop provides a + * failsafe in that highly unlikely case. + */ + while (ovfstatus && max_loop--) { + /* Figure out which counter(s) overflowed */ + for_each_set_bit(i, &ovfstatus, n_counters) { + unsigned long ovfstatus_clear = 0; + + /* Update event->count for overflowed counter */ + event = idxd->idxd_pmu->event_list[i]; + perfmon_pmu_event_update(event); + /* Writing 1 to OVFSTATUS bit clears it */ + set_bit(i, &ovfstatus_clear); + iowrite32(ovfstatus_clear, OVFSTATUS_REG(idxd)); + } + + ovfstatus = ioread32(OVFSTATUS_REG(idxd)); + } + + /* + * Should never happen. If so, it means a counter(s) looped + * around twice while this handler was running. + */ + WARN_ON_ONCE(ovfstatus); +} + +static inline void perfmon_reset_config(struct idxd_device *idxd) +{ + iowrite32(CONFIG_RESET, PERFRST_REG(idxd)); + iowrite32(0, OVFSTATUS_REG(idxd)); + iowrite32(0, PERFFRZ_REG(idxd)); +} + +static inline void perfmon_reset_counters(struct idxd_device *idxd) +{ + iowrite32(CNTR_RESET, PERFRST_REG(idxd)); +} + +static inline void perfmon_reset(struct idxd_device *idxd) +{ + perfmon_reset_config(idxd); + perfmon_reset_counters(idxd); +} + +static void perfmon_pmu_event_start(struct perf_event *event, int mode) +{ + u32 flt_wq, flt_tc, flt_pg_sz, flt_xfer_sz, flt_eng = 0; + u64 cntr_cfg, cntrdata, event_enc, event_cat = 0; + struct hw_perf_event *hwc = &event->hw; + union filter_cfg flt_cfg; + union event_cfg event_cfg; + struct idxd_device *idxd; + int cntr; + + idxd = event_to_idxd(event); + + event->hw.idx = hwc->idx; + cntr = hwc->idx; + + /* Obtain event category and event value from user space */ + event_cfg.val = event->attr.config; + flt_cfg.val = event->attr.config1; + event_cat = event_cfg.event_cat; + event_enc = event_cfg.event_enc; + + /* Obtain filter configuration from user space */ + flt_wq = flt_cfg.wq; + flt_tc = flt_cfg.tc; + flt_pg_sz = flt_cfg.pg_sz; + flt_xfer_sz = flt_cfg.xfer_sz; + flt_eng = flt_cfg.eng; + + if (flt_wq && test_bit(FLT_WQ, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_wq, FLTCFG_REG(idxd, cntr, FLT_WQ)); + if (flt_tc && test_bit(FLT_TC, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_tc, FLTCFG_REG(idxd, cntr, FLT_TC)); + if (flt_pg_sz && test_bit(FLT_PG_SZ, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_pg_sz, FLTCFG_REG(idxd, cntr, FLT_PG_SZ)); + if (flt_xfer_sz && test_bit(FLT_XFER_SZ, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_xfer_sz, FLTCFG_REG(idxd, cntr, FLT_XFER_SZ)); + if (flt_eng && test_bit(FLT_ENG, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_eng, FLTCFG_REG(idxd, cntr, FLT_ENG)); + + /* Read the start value */ + cntrdata = ioread64(CNTRDATA_REG(idxd, cntr)); + local64_set(&event->hw.prev_count, cntrdata); + + /* Set counter to event/category */ + cntr_cfg = event_cat << CNTRCFG_CATEGORY_SHIFT; + cntr_cfg |= event_enc << CNTRCFG_EVENT_SHIFT; + /* Set interrupt on overflow and counter enable bits */ + cntr_cfg |= (CNTRCFG_IRQ_OVERFLOW | CNTRCFG_ENABLE); + + iowrite64(cntr_cfg, CNTRCFG_REG(idxd, cntr)); +} + +static void perfmon_pmu_event_stop(struct perf_event *event, int mode) +{ + struct hw_perf_event *hwc = &event->hw; + struct idxd_device *idxd; + int i, cntr = hwc->idx; + u64 cntr_cfg; + + idxd = event_to_idxd(event); + + /* remove this event from event list */ + for (i = 0; i < idxd->idxd_pmu->n_events; i++) { + if (event != idxd->idxd_pmu->event_list[i]) + continue; + + for (++i; i < idxd->idxd_pmu->n_events; i++) + idxd->idxd_pmu->event_list[i - 1] = idxd->idxd_pmu->event_list[i]; + --idxd->idxd_pmu->n_events; + break; + } + + cntr_cfg = ioread64(CNTRCFG_REG(idxd, cntr)); + cntr_cfg &= ~CNTRCFG_ENABLE; + iowrite64(cntr_cfg, CNTRCFG_REG(idxd, cntr)); + + if (mode == PERF_EF_UPDATE) + perfmon_pmu_event_update(event); + + event->hw.idx = -1; + clear_bit(cntr, idxd->idxd_pmu->used_mask); +} + +static void perfmon_pmu_event_del(struct perf_event *event, int mode) +{ + perfmon_pmu_event_stop(event, PERF_EF_UPDATE); +} + +static int perfmon_pmu_event_add(struct perf_event *event, int flags) +{ + struct idxd_device *idxd = event_to_idxd(event); + struct idxd_pmu *idxd_pmu = idxd->idxd_pmu; + struct hw_perf_event *hwc = &event->hw; + int idx, n; + + n = perfmon_collect_events(idxd_pmu, event, false); + if (n < 0) + return n; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_ARCH; + + idx = perfmon_assign_event(idxd_pmu, event); + if (idx < 0) + return idx; + + perfmon_assign_hw_event(idxd_pmu, event, idx); + + if (flags & PERF_EF_START) + perfmon_pmu_event_start(event, 0); + + idxd_pmu->n_events = n; + + return 0; +} + +static void enable_perfmon_pmu(struct idxd_device *idxd) +{ + iowrite32(COUNTER_UNFREEZE, PERFFRZ_REG(idxd)); +} + +static void disable_perfmon_pmu(struct idxd_device *idxd) +{ + iowrite32(COUNTER_FREEZE, PERFFRZ_REG(idxd)); +} + +static void perfmon_pmu_enable(struct pmu *pmu) +{ + struct idxd_device *idxd = pmu_to_idxd(pmu); + + enable_perfmon_pmu(idxd); +} + +static void perfmon_pmu_disable(struct pmu *pmu) +{ + struct idxd_device *idxd = pmu_to_idxd(pmu); + + disable_perfmon_pmu(idxd); +} + +static void skip_filter(int i) +{ + int j; + + for (j = i; j < PERFMON_FILTERS_MAX; j++) + perfmon_format_attrs[PERFMON_FILTERS_START + j] = + perfmon_format_attrs[PERFMON_FILTERS_START + j + 1]; +} + +static void idxd_pmu_init(struct idxd_pmu *idxd_pmu) +{ + int i; + + for (i = 0 ; i < PERFMON_FILTERS_MAX; i++) { + if (!test_bit(i, &idxd_pmu->supported_filters)) + skip_filter(i); + } + + idxd_pmu->pmu.name = idxd_pmu->name; + idxd_pmu->pmu.attr_groups = perfmon_attr_groups; + idxd_pmu->pmu.task_ctx_nr = perf_invalid_context; + idxd_pmu->pmu.event_init = perfmon_pmu_event_init; + idxd_pmu->pmu.pmu_enable = perfmon_pmu_enable, + idxd_pmu->pmu.pmu_disable = perfmon_pmu_disable, + idxd_pmu->pmu.add = perfmon_pmu_event_add; + idxd_pmu->pmu.del = perfmon_pmu_event_del; + idxd_pmu->pmu.start = perfmon_pmu_event_start; + idxd_pmu->pmu.stop = perfmon_pmu_event_stop; + idxd_pmu->pmu.read = perfmon_pmu_event_update; + idxd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; + idxd_pmu->pmu.module = THIS_MODULE; +} + +void perfmon_pmu_remove(struct idxd_device *idxd) +{ + if (!idxd->idxd_pmu) + return; + + cpuhp_state_remove_instance(cpuhp_slot, &idxd->idxd_pmu->cpuhp_node); + perf_pmu_unregister(&idxd->idxd_pmu->pmu); + kfree(idxd->idxd_pmu); + idxd->idxd_pmu = NULL; +} + +static int perf_event_cpu_online(unsigned int cpu, struct hlist_node *node) +{ + struct idxd_pmu *idxd_pmu; + + idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node); + + /* select the first online CPU as the designated reader */ + if (cpumask_empty(&perfmon_dsa_cpu_mask)) { + cpumask_set_cpu(cpu, &perfmon_dsa_cpu_mask); + idxd_pmu->cpu = cpu; + } + + return 0; +} + +static int perf_event_cpu_offline(unsigned int cpu, struct hlist_node *node) +{ + struct idxd_pmu *idxd_pmu; + unsigned int target; + + idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node); + + if (!cpumask_test_and_clear_cpu(cpu, &perfmon_dsa_cpu_mask)) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + + /* migrate events if there is a valid target */ + if (target < nr_cpu_ids) + cpumask_set_cpu(target, &perfmon_dsa_cpu_mask); + else + target = -1; + + perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target); + + return 0; +} + +int perfmon_pmu_init(struct idxd_device *idxd) +{ + union idxd_perfcap perfcap; + struct idxd_pmu *idxd_pmu; + int rc = -ENODEV; + + /* + * perfmon module initialization failed, nothing to do + */ + if (!cpuhp_set_up) + return -ENODEV; + + /* + * If perfmon_offset or num_counters is 0, it means perfmon is + * not supported on this hardware. + */ + if (idxd->perfmon_offset == 0) + return -ENODEV; + + idxd_pmu = kzalloc(sizeof(*idxd_pmu), GFP_KERNEL); + if (!idxd_pmu) + return -ENOMEM; + + idxd_pmu->idxd = idxd; + idxd->idxd_pmu = idxd_pmu; + + if (idxd->data->type == IDXD_TYPE_DSA) { + rc = sprintf(idxd_pmu->name, "dsa%d", idxd->id); + if (rc < 0) + goto free; + } else if (idxd->data->type == IDXD_TYPE_IAX) { + rc = sprintf(idxd_pmu->name, "iax%d", idxd->id); + if (rc < 0) + goto free; + } else { + goto free; + } + + perfmon_reset(idxd); + + perfcap.bits = ioread64(PERFCAP_REG(idxd)); + + /* + * If total perf counter is 0, stop further registration. + * This is necessary in order to support driver running on + * guest which does not have pmon support. + */ + if (perfcap.num_perf_counter == 0) + goto free; + + /* A counter width of 0 means it can't count */ + if (perfcap.counter_width == 0) + goto free; + + /* Overflow interrupt and counter freeze support must be available */ + if (!perfcap.overflow_interrupt || !perfcap.counter_freeze) + goto free; + + /* Number of event categories cannot be 0 */ + if (perfcap.num_event_category == 0) + goto free; + + /* + * We don't support per-counter capabilities for now. + */ + if (perfcap.cap_per_counter) + goto free; + + idxd_pmu->n_event_categories = perfcap.num_event_category; + idxd_pmu->supported_event_categories = perfcap.global_event_category; + idxd_pmu->per_counter_caps_supported = perfcap.cap_per_counter; + + /* check filter capability. If 0, then filters are not supported */ + idxd_pmu->supported_filters = perfcap.filter; + if (perfcap.filter) + idxd_pmu->n_filters = hweight8(perfcap.filter); + + /* Store the total number of counters categories, and counter width */ + idxd_pmu->n_counters = perfcap.num_perf_counter; + idxd_pmu->counter_width = perfcap.counter_width; + + idxd_pmu_init(idxd_pmu); + + rc = perf_pmu_register(&idxd_pmu->pmu, idxd_pmu->name, -1); + if (rc) + goto free; + + rc = cpuhp_state_add_instance(cpuhp_slot, &idxd_pmu->cpuhp_node); + if (rc) { + perf_pmu_unregister(&idxd->idxd_pmu->pmu); + goto free; + } +out: + return rc; +free: + kfree(idxd_pmu); + idxd->idxd_pmu = NULL; + + goto out; +} + +void __init perfmon_init(void) +{ + int rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "driver/dma/idxd/perf:online", + perf_event_cpu_online, + perf_event_cpu_offline); + if (WARN_ON(rc < 0)) + return; + + cpuhp_slot = rc; + cpuhp_set_up = true; +} + +void __exit perfmon_exit(void) +{ + if (cpuhp_set_up) + cpuhp_remove_multi_state(cpuhp_slot); +} diff --git a/drivers/dma/idxd/perfmon.h b/drivers/dma/idxd/perfmon.h new file mode 100644 index 0000000000..9a081a1bc6 --- /dev/null +++ b/drivers/dma/idxd/perfmon.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2020 Intel Corporation. All rights rsvd. */ + +#ifndef _PERFMON_H_ +#define _PERFMON_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "registers.h" + +static inline struct idxd_pmu *event_to_pmu(struct perf_event *event) +{ + struct idxd_pmu *idxd_pmu; + struct pmu *pmu; + + pmu = event->pmu; + idxd_pmu = container_of(pmu, struct idxd_pmu, pmu); + + return idxd_pmu; +} + +static inline struct idxd_device *event_to_idxd(struct perf_event *event) +{ + struct idxd_pmu *idxd_pmu; + struct pmu *pmu; + + pmu = event->pmu; + idxd_pmu = container_of(pmu, struct idxd_pmu, pmu); + + return idxd_pmu->idxd; +} + +static inline struct idxd_device *pmu_to_idxd(struct pmu *pmu) +{ + struct idxd_pmu *idxd_pmu; + + idxd_pmu = container_of(pmu, struct idxd_pmu, pmu); + + return idxd_pmu->idxd; +} + +enum dsa_perf_events { + DSA_PERF_EVENT_WQ = 0, + DSA_PERF_EVENT_ENGINE, + DSA_PERF_EVENT_ADDR_TRANS, + DSA_PERF_EVENT_OP, + DSA_PERF_EVENT_COMPL, + DSA_PERF_EVENT_MAX, +}; + +enum filter_enc { + FLT_WQ = 0, + FLT_TC, + FLT_PG_SZ, + FLT_XFER_SZ, + FLT_ENG, + FLT_MAX, +}; + +#define CONFIG_RESET 0x0000000000000001 +#define CNTR_RESET 0x0000000000000002 +#define CNTR_ENABLE 0x0000000000000001 +#define INTR_OVFL 0x0000000000000002 + +#define COUNTER_FREEZE 0x00000000FFFFFFFF +#define COUNTER_UNFREEZE 0x0000000000000000 +#define OVERFLOW_SIZE 32 + +#define CNTRCFG_ENABLE BIT(0) +#define CNTRCFG_IRQ_OVERFLOW BIT(1) +#define CNTRCFG_CATEGORY_SHIFT 8 +#define CNTRCFG_EVENT_SHIFT 32 + +#define PERFMON_TABLE_OFFSET(_idxd) \ +({ \ + typeof(_idxd) __idxd = (_idxd); \ + ((__idxd)->reg_base + (__idxd)->perfmon_offset); \ +}) +#define PERFMON_REG_OFFSET(idxd, offset) \ + (PERFMON_TABLE_OFFSET(idxd) + (offset)) + +#define PERFCAP_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFCAP_OFFSET)) +#define PERFRST_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFRST_OFFSET)) +#define OVFSTATUS_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_OVFSTATUS_OFFSET)) +#define PERFFRZ_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFFRZ_OFFSET)) + +#define FLTCFG_REG(idxd, cntr, flt) \ + (PERFMON_REG_OFFSET(idxd, IDXD_FLTCFG_OFFSET) + ((cntr) * 32) + ((flt) * 4)) + +#define CNTRCFG_REG(idxd, cntr) \ + (PERFMON_REG_OFFSET(idxd, IDXD_CNTRCFG_OFFSET) + ((cntr) * 8)) +#define CNTRDATA_REG(idxd, cntr) \ + (PERFMON_REG_OFFSET(idxd, IDXD_CNTRDATA_OFFSET) + ((cntr) * 8)) +#define CNTRCAP_REG(idxd, cntr) \ + (PERFMON_REG_OFFSET(idxd, IDXD_CNTRCAP_OFFSET) + ((cntr) * 8)) + +#define EVNTCAP_REG(idxd, category) \ + (PERFMON_REG_OFFSET(idxd, IDXD_EVNTCAP_OFFSET) + ((category) * 8)) + +#define DEFINE_PERFMON_FORMAT_ATTR(_name, _format) \ +static ssize_t __perfmon_idxd_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ +static struct kobj_attribute format_attr_idxd_##_name = \ + __ATTR(_name, 0444, __perfmon_idxd_##_name##_show, NULL) + +#endif diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h new file mode 100644 index 0000000000..7b54a3939e --- /dev/null +++ b/drivers/dma/idxd/registers.h @@ -0,0 +1,632 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ +#ifndef _IDXD_REGISTERS_H_ +#define _IDXD_REGISTERS_H_ + +#include + +/* PCI Config */ +#define PCI_DEVICE_ID_INTEL_DSA_SPR0 0x0b25 +#define PCI_DEVICE_ID_INTEL_IAX_SPR0 0x0cfe + +#define DEVICE_VERSION_1 0x100 +#define DEVICE_VERSION_2 0x200 + +#define IDXD_MMIO_BAR 0 +#define IDXD_WQ_BAR 2 +#define IDXD_PORTAL_SIZE PAGE_SIZE + +/* MMIO Device BAR0 Registers */ +#define IDXD_VER_OFFSET 0x00 +#define IDXD_VER_MAJOR_MASK 0xf0 +#define IDXD_VER_MINOR_MASK 0x0f +#define GET_IDXD_VER_MAJOR(x) (((x) & IDXD_VER_MAJOR_MASK) >> 4) +#define GET_IDXD_VER_MINOR(x) ((x) & IDXD_VER_MINOR_MASK) + +union gen_cap_reg { + struct { + u64 block_on_fault:1; + u64 overlap_copy:1; + u64 cache_control_mem:1; + u64 cache_control_cache:1; + u64 cmd_cap:1; + u64 rsvd:3; + u64 dest_readback:1; + u64 drain_readback:1; + u64 rsvd2:3; + u64 evl_support:2; + u64 batch_continuation:1; + u64 max_xfer_shift:5; + u64 max_batch_shift:4; + u64 max_ims_mult:6; + u64 config_en:1; + u64 rsvd3:32; + }; + u64 bits; +} __packed; +#define IDXD_GENCAP_OFFSET 0x10 + +union wq_cap_reg { + struct { + u64 total_wq_size:16; + u64 num_wqs:8; + u64 wqcfg_size:4; + u64 rsvd:20; + u64 shared_mode:1; + u64 dedicated_mode:1; + u64 wq_ats_support:1; + u64 priority:1; + u64 occupancy:1; + u64 occupancy_int:1; + u64 op_config:1; + u64 wq_prs_support:1; + u64 rsvd4:8; + }; + u64 bits; +} __packed; +#define IDXD_WQCAP_OFFSET 0x20 +#define IDXD_WQCFG_MIN 5 + +union group_cap_reg { + struct { + u64 num_groups:8; + u64 total_rdbufs:8; /* formerly total_tokens */ + u64 rdbuf_ctrl:1; /* formerly token_en */ + u64 rdbuf_limit:1; /* formerly token_limit */ + u64 progress_limit:1; /* descriptor and batch descriptor */ + u64 rsvd:45; + }; + u64 bits; +} __packed; +#define IDXD_GRPCAP_OFFSET 0x30 + +union engine_cap_reg { + struct { + u64 num_engines:8; + u64 rsvd:56; + }; + u64 bits; +} __packed; + +#define IDXD_ENGCAP_OFFSET 0x38 + +#define IDXD_OPCAP_NOOP 0x0001 +#define IDXD_OPCAP_BATCH 0x0002 +#define IDXD_OPCAP_MEMMOVE 0x0008 +struct opcap { + u64 bits[4]; +}; + +#define IDXD_MAX_OPCAP_BITS 256U + +#define IDXD_OPCAP_OFFSET 0x40 + +#define IDXD_TABLE_OFFSET 0x60 +union offsets_reg { + struct { + u64 grpcfg:16; + u64 wqcfg:16; + u64 msix_perm:16; + u64 ims:16; + u64 perfmon:16; + u64 rsvd:48; + }; + u64 bits[2]; +} __packed; + +#define IDXD_TABLE_MULT 0x100 + +#define IDXD_GENCFG_OFFSET 0x80 +union gencfg_reg { + struct { + u32 rdbuf_limit:8; + u32 rsvd:4; + u32 user_int_en:1; + u32 evl_en:1; + u32 rsvd2:18; + }; + u32 bits; +} __packed; + +#define IDXD_GENCTRL_OFFSET 0x88 +union genctrl_reg { + struct { + u32 softerr_int_en:1; + u32 halt_int_en:1; + u32 evl_int_en:1; + u32 rsvd:29; + }; + u32 bits; +} __packed; + +#define IDXD_GENSTATS_OFFSET 0x90 +union gensts_reg { + struct { + u32 state:2; + u32 reset_type:2; + u32 rsvd:28; + }; + u32 bits; +} __packed; + +enum idxd_device_status_state { + IDXD_DEVICE_STATE_DISABLED = 0, + IDXD_DEVICE_STATE_ENABLED, + IDXD_DEVICE_STATE_DRAIN, + IDXD_DEVICE_STATE_HALT, +}; + +enum idxd_device_reset_type { + IDXD_DEVICE_RESET_SOFTWARE = 0, + IDXD_DEVICE_RESET_FLR, + IDXD_DEVICE_RESET_WARM, + IDXD_DEVICE_RESET_COLD, +}; + +#define IDXD_INTCAUSE_OFFSET 0x98 +#define IDXD_INTC_ERR 0x01 +#define IDXD_INTC_CMD 0x02 +#define IDXD_INTC_OCCUPY 0x04 +#define IDXD_INTC_PERFMON_OVFL 0x08 +#define IDXD_INTC_HALT_STATE 0x10 +#define IDXD_INTC_EVL 0x20 +#define IDXD_INTC_INT_HANDLE_REVOKED 0x80000000 + +#define IDXD_CMD_OFFSET 0xa0 +union idxd_command_reg { + struct { + u32 operand:20; + u32 cmd:5; + u32 rsvd:6; + u32 int_req:1; + }; + u32 bits; +} __packed; + +enum idxd_cmd { + IDXD_CMD_ENABLE_DEVICE = 1, + IDXD_CMD_DISABLE_DEVICE, + IDXD_CMD_DRAIN_ALL, + IDXD_CMD_ABORT_ALL, + IDXD_CMD_RESET_DEVICE, + IDXD_CMD_ENABLE_WQ, + IDXD_CMD_DISABLE_WQ, + IDXD_CMD_DRAIN_WQ, + IDXD_CMD_ABORT_WQ, + IDXD_CMD_RESET_WQ, + IDXD_CMD_DRAIN_PASID, + IDXD_CMD_ABORT_PASID, + IDXD_CMD_REQUEST_INT_HANDLE, + IDXD_CMD_RELEASE_INT_HANDLE, +}; + +#define CMD_INT_HANDLE_IMS 0x10000 + +#define IDXD_CMDSTS_OFFSET 0xa8 +union cmdsts_reg { + struct { + u8 err; + u16 result; + u8 rsvd:7; + u8 active:1; + }; + u32 bits; +} __packed; +#define IDXD_CMDSTS_ACTIVE 0x80000000 +#define IDXD_CMDSTS_ERR_MASK 0xff +#define IDXD_CMDSTS_RES_SHIFT 8 + +enum idxd_cmdsts_err { + IDXD_CMDSTS_SUCCESS = 0, + IDXD_CMDSTS_INVAL_CMD, + IDXD_CMDSTS_INVAL_WQIDX, + IDXD_CMDSTS_HW_ERR, + /* enable device errors */ + IDXD_CMDSTS_ERR_DEV_ENABLED = 0x10, + IDXD_CMDSTS_ERR_CONFIG, + IDXD_CMDSTS_ERR_BUSMASTER_EN, + IDXD_CMDSTS_ERR_PASID_INVAL, + IDXD_CMDSTS_ERR_WQ_SIZE_ERANGE, + IDXD_CMDSTS_ERR_GRP_CONFIG, + IDXD_CMDSTS_ERR_GRP_CONFIG2, + IDXD_CMDSTS_ERR_GRP_CONFIG3, + IDXD_CMDSTS_ERR_GRP_CONFIG4, + /* enable wq errors */ + IDXD_CMDSTS_ERR_DEV_NOTEN = 0x20, + IDXD_CMDSTS_ERR_WQ_ENABLED, + IDXD_CMDSTS_ERR_WQ_SIZE, + IDXD_CMDSTS_ERR_WQ_PRIOR, + IDXD_CMDSTS_ERR_WQ_MODE, + IDXD_CMDSTS_ERR_BOF_EN, + IDXD_CMDSTS_ERR_PASID_EN, + IDXD_CMDSTS_ERR_MAX_BATCH_SIZE, + IDXD_CMDSTS_ERR_MAX_XFER_SIZE, + /* disable device errors */ + IDXD_CMDSTS_ERR_DIS_DEV_EN = 0x31, + /* disable WQ, drain WQ, abort WQ, reset WQ */ + IDXD_CMDSTS_ERR_DEV_NOT_EN, + /* request interrupt handle */ + IDXD_CMDSTS_ERR_INVAL_INT_IDX = 0x41, + IDXD_CMDSTS_ERR_NO_HANDLE, +}; + +#define IDXD_CMDCAP_OFFSET 0xb0 + +#define IDXD_SWERR_OFFSET 0xc0 +#define IDXD_SWERR_VALID 0x00000001 +#define IDXD_SWERR_OVERFLOW 0x00000002 +#define IDXD_SWERR_ACK (IDXD_SWERR_VALID | IDXD_SWERR_OVERFLOW) +union sw_err_reg { + struct { + u64 valid:1; + u64 overflow:1; + u64 desc_valid:1; + u64 wq_idx_valid:1; + u64 batch:1; + u64 fault_rw:1; + u64 priv:1; + u64 rsvd:1; + u64 error:8; + u64 wq_idx:8; + u64 rsvd2:8; + u64 operation:8; + u64 pasid:20; + u64 rsvd3:4; + + u64 batch_idx:16; + u64 rsvd4:16; + u64 invalid_flags:32; + + u64 fault_addr; + + u64 rsvd5; + }; + u64 bits[4]; +} __packed; + +union iaa_cap_reg { + struct { + u64 dec_aecs_format_ver:1; + u64 drop_init_bits:1; + u64 chaining:1; + u64 force_array_output_mod:1; + u64 load_part_aecs:1; + u64 comp_early_abort:1; + u64 nested_comp:1; + u64 diction_comp:1; + u64 header_gen:1; + u64 crypto_gcm:1; + u64 crypto_cfb:1; + u64 crypto_xts:1; + u64 rsvd:52; + }; + u64 bits; +} __packed; + +#define IDXD_IAACAP_OFFSET 0x180 + +#define IDXD_EVLCFG_OFFSET 0xe0 +union evlcfg_reg { + struct { + u64 pasid_en:1; + u64 priv:1; + u64 rsvd:10; + u64 base_addr:52; + + u64 size:16; + u64 pasid:20; + u64 rsvd2:28; + }; + u64 bits[2]; +} __packed; + +#define IDXD_EVL_SIZE_MIN 0x0040 +#define IDXD_EVL_SIZE_MAX 0xffff + +union msix_perm { + struct { + u32 rsvd:2; + u32 ignore:1; + u32 pasid_en:1; + u32 rsvd2:8; + u32 pasid:20; + }; + u32 bits; +} __packed; + +union group_flags { + struct { + u64 tc_a:3; + u64 tc_b:3; + u64 rsvd:1; + u64 use_rdbuf_limit:1; + u64 rdbufs_reserved:8; + u64 rsvd2:4; + u64 rdbufs_allowed:8; + u64 rsvd3:4; + u64 desc_progress_limit:2; + u64 rsvd4:2; + u64 batch_progress_limit:2; + u64 rsvd5:26; + }; + u64 bits; +} __packed; + +struct grpcfg { + u64 wqs[4]; + u64 engines; + union group_flags flags; +} __packed; + +union wqcfg { + struct { + /* bytes 0-3 */ + u16 wq_size; + u16 rsvd; + + /* bytes 4-7 */ + u16 wq_thresh; + u16 rsvd1; + + /* bytes 8-11 */ + u32 mode:1; /* shared or dedicated */ + u32 bof:1; /* block on fault */ + u32 wq_ats_disable:1; + u32 wq_prs_disable:1; + u32 priority:4; + u32 pasid:20; + u32 pasid_en:1; + u32 priv:1; + u32 rsvd3:2; + + /* bytes 12-15 */ + u32 max_xfer_shift:5; + u32 max_batch_shift:4; + u32 rsvd4:23; + + /* bytes 16-19 */ + u16 occupancy_inth; + u16 occupancy_table_sel:1; + u16 rsvd5:15; + + /* bytes 20-23 */ + u16 occupancy_limit; + u16 occupancy_int_en:1; + u16 rsvd6:15; + + /* bytes 24-27 */ + u16 occupancy; + u16 occupancy_int:1; + u16 rsvd7:12; + u16 mode_support:1; + u16 wq_state:2; + + /* bytes 28-31 */ + u32 rsvd8; + + /* bytes 32-63 */ + u64 op_config[4]; + }; + u32 bits[16]; +} __packed; + +#define WQCFG_PASID_IDX 2 +#define WQCFG_PRIVL_IDX 2 +#define WQCFG_OCCUP_IDX 6 + +#define WQCFG_OCCUP_MASK 0xffff + +/* + * This macro calculates the offset into the WQCFG register + * idxd - struct idxd * + * n - wq id + * ofs - the index of the 32b dword for the config register + * + * The WQCFG register block is divided into groups per each wq. The n index + * allows us to move to the register group that's for that particular wq. + * Each register is 32bits. The ofs gives us the number of register to access. + */ +#define WQCFG_OFFSET(_idxd_dev, n, ofs) \ +({\ + typeof(_idxd_dev) __idxd_dev = (_idxd_dev); \ + (__idxd_dev)->wqcfg_offset + (n) * (__idxd_dev)->wqcfg_size + sizeof(u32) * (ofs); \ +}) + +#define WQCFG_STRIDES(_idxd_dev) ((_idxd_dev)->wqcfg_size / sizeof(u32)) + +#define GRPCFG_SIZE 64 +#define GRPWQCFG_STRIDES 4 + +/* + * This macro calculates the offset into the GRPCFG register + * idxd - struct idxd * + * n - wq id + * ofs - the index of the 32b dword for the config register + * + * The WQCFG register block is divided into groups per each wq. The n index + * allows us to move to the register group that's for that particular wq. + * Each register is 32bits. The ofs gives us the number of register to access. + */ +#define GRPWQCFG_OFFSET(idxd_dev, n, ofs) ((idxd_dev)->grpcfg_offset +\ + (n) * GRPCFG_SIZE + sizeof(u64) * (ofs)) +#define GRPENGCFG_OFFSET(idxd_dev, n) ((idxd_dev)->grpcfg_offset + (n) * GRPCFG_SIZE + 32) +#define GRPFLGCFG_OFFSET(idxd_dev, n) ((idxd_dev)->grpcfg_offset + (n) * GRPCFG_SIZE + 40) + +/* Following is performance monitor registers */ +#define IDXD_PERFCAP_OFFSET 0x0 +union idxd_perfcap { + struct { + u64 num_perf_counter:6; + u64 rsvd1:2; + u64 counter_width:8; + u64 num_event_category:4; + u64 global_event_category:16; + u64 filter:8; + u64 rsvd2:8; + u64 cap_per_counter:1; + u64 writeable_counter:1; + u64 counter_freeze:1; + u64 overflow_interrupt:1; + u64 rsvd3:8; + }; + u64 bits; +} __packed; + +#define IDXD_EVNTCAP_OFFSET 0x80 +union idxd_evntcap { + struct { + u64 events:28; + u64 rsvd:36; + }; + u64 bits; +} __packed; + +struct idxd_event { + union { + struct { + u32 event_category:4; + u32 events:28; + }; + u32 val; + }; +} __packed; + +#define IDXD_CNTRCAP_OFFSET 0x800 +struct idxd_cntrcap { + union { + struct { + u32 counter_width:8; + u32 rsvd:20; + u32 num_events:4; + }; + u32 val; + }; + struct idxd_event events[]; +} __packed; + +#define IDXD_PERFRST_OFFSET 0x10 +union idxd_perfrst { + struct { + u32 perfrst_config:1; + u32 perfrst_counter:1; + u32 rsvd:30; + }; + u32 val; +} __packed; + +#define IDXD_OVFSTATUS_OFFSET 0x30 +#define IDXD_PERFFRZ_OFFSET 0x20 +#define IDXD_CNTRCFG_OFFSET 0x100 +union idxd_cntrcfg { + struct { + u64 enable:1; + u64 interrupt_ovf:1; + u64 global_freeze_ovf:1; + u64 rsvd1:5; + u64 event_category:4; + u64 rsvd2:20; + u64 events:28; + u64 rsvd3:4; + }; + u64 val; +} __packed; + +#define IDXD_FLTCFG_OFFSET 0x300 + +#define IDXD_CNTRDATA_OFFSET 0x200 +union idxd_cntrdata { + struct { + u64 event_count_value; + }; + u64 val; +} __packed; + +union event_cfg { + struct { + u64 event_cat:4; + u64 event_enc:28; + }; + u64 val; +} __packed; + +union filter_cfg { + struct { + u64 wq:32; + u64 tc:8; + u64 pg_sz:4; + u64 xfer_sz:8; + u64 eng:8; + }; + u64 val; +} __packed; + +#define IDXD_EVLSTATUS_OFFSET 0xf0 + +union evl_status_reg { + struct { + u32 head:16; + u32 rsvd:16; + u32 tail:16; + u32 rsvd2:14; + u32 int_pending:1; + u32 rsvd3:1; + }; + struct { + u32 bits_lower32; + u32 bits_upper32; + }; + u64 bits; +} __packed; + +#define IDXD_MAX_BATCH_IDENT 256 + +struct __evl_entry { + u64 rsvd:2; + u64 desc_valid:1; + u64 wq_idx_valid:1; + u64 batch:1; + u64 fault_rw:1; + u64 priv:1; + u64 err_info_valid:1; + u64 error:8; + u64 wq_idx:8; + u64 batch_id:8; + u64 operation:8; + u64 pasid:20; + u64 rsvd2:4; + + u16 batch_idx; + u16 rsvd3; + union { + /* Invalid Flags 0x11 */ + u32 invalid_flags; + /* Invalid Int Handle 0x19 */ + /* Page fault 0x1a */ + /* Page fault 0x06, 0x1f, only operand_id */ + /* Page fault before drain or in batch, 0x26, 0x27 */ + struct { + u16 int_handle; + u16 rci:1; + u16 ims:1; + u16 rcr:1; + u16 first_err_in_batch:1; + u16 rsvd4_2:9; + u16 operand_id:3; + }; + }; + u64 fault_addr; + u64 rsvd5; +} __packed; + +struct dsa_evl_entry { + struct __evl_entry e; + struct dsa_completion_record cr; +} __packed; + +struct iax_evl_entry { + struct __evl_entry e; + u64 rsvd[4]; + struct iax_completion_record cr; +} __packed; + +#endif diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c new file mode 100644 index 0000000000..3f922518e3 --- /dev/null +++ b/drivers/dma/idxd/submit.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ +#include +#include +#include +#include +#include +#include "idxd.h" +#include "registers.h" + +static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) +{ + struct idxd_desc *desc; + struct idxd_device *idxd = wq->idxd; + + desc = wq->descs[idx]; + memset(desc->hw, 0, sizeof(struct dsa_hw_desc)); + memset(desc->completion, 0, idxd->data->compl_size); + desc->cpu = cpu; + + if (device_pasid_enabled(idxd)) + desc->hw->pasid = idxd->pasid; + + return desc; +} + +struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype) +{ + int cpu, idx; + struct idxd_device *idxd = wq->idxd; + DEFINE_SBQ_WAIT(wait); + struct sbq_wait_state *ws; + struct sbitmap_queue *sbq; + + if (idxd->state != IDXD_DEV_ENABLED) + return ERR_PTR(-EIO); + + sbq = &wq->sbq; + idx = sbitmap_queue_get(sbq, &cpu); + if (idx < 0) { + if (optype == IDXD_OP_NONBLOCK) + return ERR_PTR(-EAGAIN); + } else { + return __get_desc(wq, idx, cpu); + } + + ws = &sbq->ws[0]; + for (;;) { + sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE); + if (signal_pending_state(TASK_INTERRUPTIBLE, current)) + break; + idx = sbitmap_queue_get(sbq, &cpu); + if (idx >= 0) + break; + schedule(); + } + + sbitmap_finish_wait(sbq, ws, &wait); + if (idx < 0) + return ERR_PTR(-EAGAIN); + + return __get_desc(wq, idx, cpu); +} + +void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc) +{ + int cpu = desc->cpu; + + desc->cpu = -1; + sbitmap_queue_clear(&wq->sbq, desc->id, cpu); +} + +static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, + struct idxd_desc *desc) +{ + struct idxd_desc *d, *n; + + lockdep_assert_held(&ie->list_lock); + list_for_each_entry_safe(d, n, &ie->work_list, list) { + if (d == desc) { + list_del(&d->list); + return d; + } + } + + /* + * At this point, the desc needs to be aborted is held by the completion + * handler where it has taken it off the pending list but has not added to the + * work list. It will be cleaned up by the interrupt handler when it sees the + * IDXD_COMP_DESC_ABORT for completion status. + */ + return NULL; +} + +static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, + struct idxd_desc *desc) +{ + struct idxd_desc *d, *t, *found = NULL; + struct llist_node *head; + LIST_HEAD(flist); + + desc->completion->status = IDXD_COMP_DESC_ABORT; + /* + * Grab the list lock so it will block the irq thread handler. This allows the + * abort code to locate the descriptor need to be aborted. + */ + spin_lock(&ie->list_lock); + head = llist_del_all(&ie->pending_llist); + if (head) { + llist_for_each_entry_safe(d, t, head, llnode) { + if (d == desc) { + found = desc; + continue; + } + + if (d->completion->status) + list_add_tail(&d->list, &flist); + else + list_add_tail(&d->list, &ie->work_list); + } + } + + if (!found) + found = list_abort_desc(wq, ie, desc); + spin_unlock(&ie->list_lock); + + if (found) + idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false); + + /* + * completing the descriptor will return desc to allocator and + * the desc can be acquired by a different process and the + * desc->list can be modified. Delete desc from list so the + * list trasversing does not get corrupted by the other process. + */ + list_for_each_entry_safe(d, t, &flist, list) { + list_del_init(&d->list); + idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true); + } +} + +/* + * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver + * has better control of number of descriptors being submitted to a shared wq by limiting + * the number of driver allocated descriptors to the wq size. However, when the swq is + * exported to a guest kernel, it may be shared with multiple guest kernels. This means + * the likelihood of getting busy returned on the swq when submitting goes significantly up. + * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving + * up. The sysfs knob can be tuned by the system administrator. + */ +int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc) +{ + unsigned int retries = wq->enqcmds_retries; + int rc; + + do { + rc = enqcmds(portal, desc); + if (rc == 0) + break; + cpu_relax(); + } while (retries--); + + return rc; +} + +int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) +{ + struct idxd_device *idxd = wq->idxd; + struct idxd_irq_entry *ie = NULL; + u32 desc_flags = desc->hw->flags; + void __iomem *portal; + int rc; + + if (idxd->state != IDXD_DEV_ENABLED) + return -EIO; + + if (!percpu_ref_tryget_live(&wq->wq_active)) { + wait_for_completion(&wq->wq_resurrect); + if (!percpu_ref_tryget_live(&wq->wq_active)) + return -ENXIO; + } + + portal = idxd_wq_portal_addr(wq); + + /* + * Pending the descriptor to the lockless list for the irq_entry + * that we designated the descriptor to. + */ + if (desc_flags & IDXD_OP_FLAG_RCI) { + ie = &wq->ie; + desc->hw->int_handle = ie->int_handle; + llist_add(&desc->llnode, &ie->pending_llist); + } + + /* + * The wmb() flushes writes to coherent DMA data before + * possibly triggering a DMA read. The wmb() is necessary + * even on UP because the recipient is a device. + */ + wmb(); + + if (wq_dedicated(wq)) { + iosubmit_cmds512(portal, desc->hw, 1); + } else { + rc = idxd_enqcmds(wq, portal, desc->hw); + if (rc < 0) { + percpu_ref_put(&wq->wq_active); + /* abort operation frees the descriptor */ + if (ie) + llist_abort_desc(wq, ie, desc); + return rc; + } + } + + percpu_ref_put(&wq->wq_active); + return 0; +} diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c new file mode 100644 index 0000000000..7caba90d85 --- /dev/null +++ b/drivers/dma/idxd/sysfs.c @@ -0,0 +1,1934 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ +#include +#include +#include +#include +#include +#include +#include +#include "registers.h" +#include "idxd.h" + +static char *idxd_wq_type_names[] = { + [IDXD_WQT_NONE] = "none", + [IDXD_WQT_KERNEL] = "kernel", + [IDXD_WQT_USER] = "user", +}; + +/* IDXD engine attributes */ +static ssize_t engine_group_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_engine *engine = confdev_to_engine(dev); + + if (engine->group) + return sysfs_emit(buf, "%d\n", engine->group->id); + else + return sysfs_emit(buf, "%d\n", -1); +} + +static ssize_t engine_group_id_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_engine *engine = confdev_to_engine(dev); + struct idxd_device *idxd = engine->idxd; + long id; + int rc; + struct idxd_group *prevg; + + rc = kstrtol(buf, 10, &id); + if (rc < 0) + return -EINVAL; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (id > idxd->max_groups - 1 || id < -1) + return -EINVAL; + + if (id == -1) { + if (engine->group) { + engine->group->num_engines--; + engine->group = NULL; + } + return count; + } + + prevg = engine->group; + + if (prevg) + prevg->num_engines--; + engine->group = idxd->groups[id]; + engine->group->num_engines++; + + return count; +} + +static struct device_attribute dev_attr_engine_group = + __ATTR(group_id, 0644, engine_group_id_show, + engine_group_id_store); + +static struct attribute *idxd_engine_attributes[] = { + &dev_attr_engine_group.attr, + NULL, +}; + +static const struct attribute_group idxd_engine_attribute_group = { + .attrs = idxd_engine_attributes, +}; + +static const struct attribute_group *idxd_engine_attribute_groups[] = { + &idxd_engine_attribute_group, + NULL, +}; + +static void idxd_conf_engine_release(struct device *dev) +{ + struct idxd_engine *engine = confdev_to_engine(dev); + + kfree(engine); +} + +struct device_type idxd_engine_device_type = { + .name = "engine", + .release = idxd_conf_engine_release, + .groups = idxd_engine_attribute_groups, +}; + +/* Group attributes */ + +static void idxd_set_free_rdbufs(struct idxd_device *idxd) +{ + int i, rdbufs; + + for (i = 0, rdbufs = 0; i < idxd->max_groups; i++) { + struct idxd_group *g = idxd->groups[i]; + + rdbufs += g->rdbufs_reserved; + } + + idxd->nr_rdbufs = idxd->max_rdbufs - rdbufs; +} + +static ssize_t group_read_buffers_reserved_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct idxd_group *group = confdev_to_group(dev); + + return sysfs_emit(buf, "%u\n", group->rdbufs_reserved); +} + +static ssize_t group_tokens_reserved_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffers_reserved.\n"); + return group_read_buffers_reserved_show(dev, attr, buf); +} + +static ssize_t group_read_buffers_reserved_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_group *group = confdev_to_group(dev); + struct idxd_device *idxd = group->idxd; + unsigned long val; + int rc; + + rc = kstrtoul(buf, 10, &val); + if (rc < 0) + return -EINVAL; + + if (idxd->data->type == IDXD_TYPE_IAX) + return -EOPNOTSUPP; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (idxd->state == IDXD_DEV_ENABLED) + return -EPERM; + + if (val > idxd->max_rdbufs) + return -EINVAL; + + if (val > idxd->nr_rdbufs + group->rdbufs_reserved) + return -EINVAL; + + group->rdbufs_reserved = val; + idxd_set_free_rdbufs(idxd); + return count; +} + +static ssize_t group_tokens_reserved_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffers_reserved.\n"); + return group_read_buffers_reserved_store(dev, attr, buf, count); +} + +static struct device_attribute dev_attr_group_tokens_reserved = + __ATTR(tokens_reserved, 0644, group_tokens_reserved_show, + group_tokens_reserved_store); + +static struct device_attribute dev_attr_group_read_buffers_reserved = + __ATTR(read_buffers_reserved, 0644, group_read_buffers_reserved_show, + group_read_buffers_reserved_store); + +static ssize_t group_read_buffers_allowed_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct idxd_group *group = confdev_to_group(dev); + + return sysfs_emit(buf, "%u\n", group->rdbufs_allowed); +} + +static ssize_t group_tokens_allowed_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffers_allowed.\n"); + return group_read_buffers_allowed_show(dev, attr, buf); +} + +static ssize_t group_read_buffers_allowed_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_group *group = confdev_to_group(dev); + struct idxd_device *idxd = group->idxd; + unsigned long val; + int rc; + + rc = kstrtoul(buf, 10, &val); + if (rc < 0) + return -EINVAL; + + if (idxd->data->type == IDXD_TYPE_IAX) + return -EOPNOTSUPP; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (idxd->state == IDXD_DEV_ENABLED) + return -EPERM; + + if (val < 4 * group->num_engines || + val > group->rdbufs_reserved + idxd->nr_rdbufs) + return -EINVAL; + + group->rdbufs_allowed = val; + return count; +} + +static ssize_t group_tokens_allowed_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffers_allowed.\n"); + return group_read_buffers_allowed_store(dev, attr, buf, count); +} + +static struct device_attribute dev_attr_group_tokens_allowed = + __ATTR(tokens_allowed, 0644, group_tokens_allowed_show, + group_tokens_allowed_store); + +static struct device_attribute dev_attr_group_read_buffers_allowed = + __ATTR(read_buffers_allowed, 0644, group_read_buffers_allowed_show, + group_read_buffers_allowed_store); + +static ssize_t group_use_read_buffer_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct idxd_group *group = confdev_to_group(dev); + + return sysfs_emit(buf, "%u\n", group->use_rdbuf_limit); +} + +static ssize_t group_use_token_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + dev_warn_once(dev, "attribute deprecated, see use_read_buffer_limit.\n"); + return group_use_read_buffer_limit_show(dev, attr, buf); +} + +static ssize_t group_use_read_buffer_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_group *group = confdev_to_group(dev); + struct idxd_device *idxd = group->idxd; + unsigned long val; + int rc; + + rc = kstrtoul(buf, 10, &val); + if (rc < 0) + return -EINVAL; + + if (idxd->data->type == IDXD_TYPE_IAX) + return -EOPNOTSUPP; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (idxd->state == IDXD_DEV_ENABLED) + return -EPERM; + + if (idxd->rdbuf_limit == 0) + return -EPERM; + + group->use_rdbuf_limit = !!val; + return count; +} + +static ssize_t group_use_token_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + dev_warn_once(dev, "attribute deprecated, see use_read_buffer_limit.\n"); + return group_use_read_buffer_limit_store(dev, attr, buf, count); +} + +static struct device_attribute dev_attr_group_use_token_limit = + __ATTR(use_token_limit, 0644, group_use_token_limit_show, + group_use_token_limit_store); + +static struct device_attribute dev_attr_group_use_read_buffer_limit = + __ATTR(use_read_buffer_limit, 0644, group_use_read_buffer_limit_show, + group_use_read_buffer_limit_store); + +static ssize_t group_engines_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_group *group = confdev_to_group(dev); + int i, rc = 0; + struct idxd_device *idxd = group->idxd; + + for (i = 0; i < idxd->max_engines; i++) { + struct idxd_engine *engine = idxd->engines[i]; + + if (!engine->group) + continue; + + if (engine->group->id == group->id) + rc += sysfs_emit_at(buf, rc, "engine%d.%d ", idxd->id, engine->id); + } + + if (!rc) + return 0; + rc--; + rc += sysfs_emit_at(buf, rc, "\n"); + + return rc; +} + +static struct device_attribute dev_attr_group_engines = + __ATTR(engines, 0444, group_engines_show, NULL); + +static ssize_t group_work_queues_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_group *group = confdev_to_group(dev); + int i, rc = 0; + struct idxd_device *idxd = group->idxd; + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + if (!wq->group) + continue; + + if (wq->group->id == group->id) + rc += sysfs_emit_at(buf, rc, "wq%d.%d ", idxd->id, wq->id); + } + + if (!rc) + return 0; + rc--; + rc += sysfs_emit_at(buf, rc, "\n"); + + return rc; +} + +static struct device_attribute dev_attr_group_work_queues = + __ATTR(work_queues, 0444, group_work_queues_show, NULL); + +static ssize_t group_traffic_class_a_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct idxd_group *group = confdev_to_group(dev); + + return sysfs_emit(buf, "%d\n", group->tc_a); +} + +static ssize_t group_traffic_class_a_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_group *group = confdev_to_group(dev); + struct idxd_device *idxd = group->idxd; + long val; + int rc; + + rc = kstrtol(buf, 10, &val); + if (rc < 0) + return -EINVAL; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (idxd->state == IDXD_DEV_ENABLED) + return -EPERM; + + if (idxd->hw.version <= DEVICE_VERSION_2 && !tc_override) + return -EPERM; + + if (val < 0 || val > 7) + return -EINVAL; + + group->tc_a = val; + return count; +} + +static struct device_attribute dev_attr_group_traffic_class_a = + __ATTR(traffic_class_a, 0644, group_traffic_class_a_show, + group_traffic_class_a_store); + +static ssize_t group_traffic_class_b_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct idxd_group *group = confdev_to_group(dev); + + return sysfs_emit(buf, "%d\n", group->tc_b); +} + +static ssize_t group_traffic_class_b_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_group *group = confdev_to_group(dev); + struct idxd_device *idxd = group->idxd; + long val; + int rc; + + rc = kstrtol(buf, 10, &val); + if (rc < 0) + return -EINVAL; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (idxd->state == IDXD_DEV_ENABLED) + return -EPERM; + + if (idxd->hw.version <= DEVICE_VERSION_2 && !tc_override) + return -EPERM; + + if (val < 0 || val > 7) + return -EINVAL; + + group->tc_b = val; + return count; +} + +static struct device_attribute dev_attr_group_traffic_class_b = + __ATTR(traffic_class_b, 0644, group_traffic_class_b_show, + group_traffic_class_b_store); + +static ssize_t group_desc_progress_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct idxd_group *group = confdev_to_group(dev); + + return sysfs_emit(buf, "%d\n", group->desc_progress_limit); +} + +static ssize_t group_desc_progress_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_group *group = confdev_to_group(dev); + int val, rc; + + rc = kstrtoint(buf, 10, &val); + if (rc < 0) + return -EINVAL; + + if (val & ~GENMASK(1, 0)) + return -EINVAL; + + group->desc_progress_limit = val; + return count; +} + +static struct device_attribute dev_attr_group_desc_progress_limit = + __ATTR(desc_progress_limit, 0644, group_desc_progress_limit_show, + group_desc_progress_limit_store); + +static ssize_t group_batch_progress_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct idxd_group *group = confdev_to_group(dev); + + return sysfs_emit(buf, "%d\n", group->batch_progress_limit); +} + +static ssize_t group_batch_progress_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_group *group = confdev_to_group(dev); + int val, rc; + + rc = kstrtoint(buf, 10, &val); + if (rc < 0) + return -EINVAL; + + if (val & ~GENMASK(1, 0)) + return -EINVAL; + + group->batch_progress_limit = val; + return count; +} + +static struct device_attribute dev_attr_group_batch_progress_limit = + __ATTR(batch_progress_limit, 0644, group_batch_progress_limit_show, + group_batch_progress_limit_store); +static struct attribute *idxd_group_attributes[] = { + &dev_attr_group_work_queues.attr, + &dev_attr_group_engines.attr, + &dev_attr_group_use_token_limit.attr, + &dev_attr_group_use_read_buffer_limit.attr, + &dev_attr_group_tokens_allowed.attr, + &dev_attr_group_read_buffers_allowed.attr, + &dev_attr_group_tokens_reserved.attr, + &dev_attr_group_read_buffers_reserved.attr, + &dev_attr_group_traffic_class_a.attr, + &dev_attr_group_traffic_class_b.attr, + &dev_attr_group_desc_progress_limit.attr, + &dev_attr_group_batch_progress_limit.attr, + NULL, +}; + +static bool idxd_group_attr_progress_limit_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + return (attr == &dev_attr_group_desc_progress_limit.attr || + attr == &dev_attr_group_batch_progress_limit.attr) && + !idxd->hw.group_cap.progress_limit; +} + +static bool idxd_group_attr_read_buffers_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + /* + * Intel IAA does not support Read Buffer allocation control, + * make these attributes invisible. + */ + return (attr == &dev_attr_group_use_token_limit.attr || + attr == &dev_attr_group_use_read_buffer_limit.attr || + attr == &dev_attr_group_tokens_allowed.attr || + attr == &dev_attr_group_read_buffers_allowed.attr || + attr == &dev_attr_group_tokens_reserved.attr || + attr == &dev_attr_group_read_buffers_reserved.attr) && + idxd->data->type == IDXD_TYPE_IAX; +} + +static umode_t idxd_group_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct idxd_group *group = confdev_to_group(dev); + struct idxd_device *idxd = group->idxd; + + if (idxd_group_attr_progress_limit_invisible(attr, idxd)) + return 0; + + if (idxd_group_attr_read_buffers_invisible(attr, idxd)) + return 0; + + return attr->mode; +} + +static const struct attribute_group idxd_group_attribute_group = { + .attrs = idxd_group_attributes, + .is_visible = idxd_group_attr_visible, +}; + +static const struct attribute_group *idxd_group_attribute_groups[] = { + &idxd_group_attribute_group, + NULL, +}; + +static void idxd_conf_group_release(struct device *dev) +{ + struct idxd_group *group = confdev_to_group(dev); + + kfree(group); +} + +struct device_type idxd_group_device_type = { + .name = "group", + .release = idxd_conf_group_release, + .groups = idxd_group_attribute_groups, +}; + +/* IDXD work queue attribs */ +static ssize_t wq_clients_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + return sysfs_emit(buf, "%d\n", wq->client_count); +} + +static struct device_attribute dev_attr_wq_clients = + __ATTR(clients, 0444, wq_clients_show, NULL); + +static ssize_t wq_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + switch (wq->state) { + case IDXD_WQ_DISABLED: + return sysfs_emit(buf, "disabled\n"); + case IDXD_WQ_ENABLED: + return sysfs_emit(buf, "enabled\n"); + } + + return sysfs_emit(buf, "unknown\n"); +} + +static struct device_attribute dev_attr_wq_state = + __ATTR(state, 0444, wq_state_show, NULL); + +static ssize_t wq_group_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + if (wq->group) + return sysfs_emit(buf, "%u\n", wq->group->id); + else + return sysfs_emit(buf, "-1\n"); +} + +static ssize_t wq_group_id_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + long id; + int rc; + struct idxd_group *prevg, *group; + + rc = kstrtol(buf, 10, &id); + if (rc < 0) + return -EINVAL; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (wq->state != IDXD_WQ_DISABLED) + return -EPERM; + + if (id > idxd->max_groups - 1 || id < -1) + return -EINVAL; + + if (id == -1) { + if (wq->group) { + wq->group->num_wqs--; + wq->group = NULL; + } + return count; + } + + group = idxd->groups[id]; + prevg = wq->group; + + if (prevg) + prevg->num_wqs--; + wq->group = group; + group->num_wqs++; + return count; +} + +static struct device_attribute dev_attr_wq_group_id = + __ATTR(group_id, 0644, wq_group_id_show, wq_group_id_store); + +static ssize_t wq_mode_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + return sysfs_emit(buf, "%s\n", wq_dedicated(wq) ? "dedicated" : "shared"); +} + +static ssize_t wq_mode_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (wq->state != IDXD_WQ_DISABLED) + return -EPERM; + + if (sysfs_streq(buf, "dedicated")) { + set_bit(WQ_FLAG_DEDICATED, &wq->flags); + wq->threshold = 0; + } else if (sysfs_streq(buf, "shared")) { + clear_bit(WQ_FLAG_DEDICATED, &wq->flags); + } else { + return -EINVAL; + } + + return count; +} + +static struct device_attribute dev_attr_wq_mode = + __ATTR(mode, 0644, wq_mode_show, wq_mode_store); + +static ssize_t wq_size_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + return sysfs_emit(buf, "%u\n", wq->size); +} + +static int total_claimed_wq_size(struct idxd_device *idxd) +{ + int i; + int wq_size = 0; + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + wq_size += wq->size; + } + + return wq_size; +} + +static ssize_t wq_size_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + unsigned long size; + struct idxd_device *idxd = wq->idxd; + int rc; + + rc = kstrtoul(buf, 10, &size); + if (rc < 0) + return -EINVAL; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (idxd->state == IDXD_DEV_ENABLED) + return -EPERM; + + if (size + total_claimed_wq_size(idxd) - wq->size > idxd->max_wq_size) + return -EINVAL; + + wq->size = size; + return count; +} + +static struct device_attribute dev_attr_wq_size = + __ATTR(size, 0644, wq_size_show, wq_size_store); + +static ssize_t wq_priority_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + return sysfs_emit(buf, "%u\n", wq->priority); +} + +static ssize_t wq_priority_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + unsigned long prio; + struct idxd_device *idxd = wq->idxd; + int rc; + + rc = kstrtoul(buf, 10, &prio); + if (rc < 0) + return -EINVAL; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (wq->state != IDXD_WQ_DISABLED) + return -EPERM; + + if (prio > IDXD_MAX_PRIORITY) + return -EINVAL; + + wq->priority = prio; + return count; +} + +static struct device_attribute dev_attr_wq_priority = + __ATTR(priority, 0644, wq_priority_show, wq_priority_store); + +static ssize_t wq_block_on_fault_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags)); +} + +static ssize_t wq_block_on_fault_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + bool bof; + int rc; + + if (!idxd->hw.gen_cap.block_on_fault) + return -EOPNOTSUPP; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (wq->state != IDXD_WQ_DISABLED) + return -ENXIO; + + rc = kstrtobool(buf, &bof); + if (rc < 0) + return rc; + + if (bof) { + if (test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags)) + return -EOPNOTSUPP; + + set_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); + } else { + clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); + } + + return count; +} + +static struct device_attribute dev_attr_wq_block_on_fault = + __ATTR(block_on_fault, 0644, wq_block_on_fault_show, + wq_block_on_fault_store); + +static ssize_t wq_threshold_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + return sysfs_emit(buf, "%u\n", wq->threshold); +} + +static ssize_t wq_threshold_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + unsigned int val; + int rc; + + rc = kstrtouint(buf, 0, &val); + if (rc < 0) + return -EINVAL; + + if (val > wq->size || val <= 0) + return -EINVAL; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (wq->state != IDXD_WQ_DISABLED) + return -ENXIO; + + if (test_bit(WQ_FLAG_DEDICATED, &wq->flags)) + return -EINVAL; + + wq->threshold = val; + + return count; +} + +static struct device_attribute dev_attr_wq_threshold = + __ATTR(threshold, 0644, wq_threshold_show, wq_threshold_store); + +static ssize_t wq_type_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + switch (wq->type) { + case IDXD_WQT_KERNEL: + return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_KERNEL]); + case IDXD_WQT_USER: + return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_USER]); + case IDXD_WQT_NONE: + default: + return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_NONE]); + } + + return -EINVAL; +} + +static ssize_t wq_type_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + enum idxd_wq_type old_type; + + if (wq->state != IDXD_WQ_DISABLED) + return -EPERM; + + old_type = wq->type; + if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_NONE])) + wq->type = IDXD_WQT_NONE; + else if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_KERNEL])) + wq->type = IDXD_WQT_KERNEL; + else if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_USER])) + wq->type = IDXD_WQT_USER; + else + return -EINVAL; + + /* If we are changing queue type, clear the name */ + if (wq->type != old_type) + memset(wq->name, 0, WQ_NAME_SIZE + 1); + + return count; +} + +static struct device_attribute dev_attr_wq_type = + __ATTR(type, 0644, wq_type_show, wq_type_store); + +static ssize_t wq_name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + return sysfs_emit(buf, "%s\n", wq->name); +} + +static ssize_t wq_name_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + char *input, *pos; + + if (wq->state != IDXD_WQ_DISABLED) + return -EPERM; + + if (strlen(buf) > WQ_NAME_SIZE || strlen(buf) == 0) + return -EINVAL; + + input = kstrndup(buf, count, GFP_KERNEL); + if (!input) + return -ENOMEM; + + pos = strim(input); + memset(wq->name, 0, WQ_NAME_SIZE + 1); + sprintf(wq->name, "%s", pos); + kfree(input); + return count; +} + +static struct device_attribute dev_attr_wq_name = + __ATTR(name, 0644, wq_name_show, wq_name_store); + +static ssize_t wq_cdev_minor_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + int minor = -1; + + mutex_lock(&wq->wq_lock); + if (wq->idxd_cdev) + minor = wq->idxd_cdev->minor; + mutex_unlock(&wq->wq_lock); + + if (minor == -1) + return -ENXIO; + return sysfs_emit(buf, "%d\n", minor); +} + +static struct device_attribute dev_attr_wq_cdev_minor = + __ATTR(cdev_minor, 0444, wq_cdev_minor_show, NULL); + +static int __get_sysfs_u64(const char *buf, u64 *val) +{ + int rc; + + rc = kstrtou64(buf, 0, val); + if (rc < 0) + return -EINVAL; + + if (*val == 0) + return -EINVAL; + + *val = roundup_pow_of_two(*val); + return 0; +} + +static ssize_t wq_max_transfer_size_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + return sysfs_emit(buf, "%llu\n", wq->max_xfer_bytes); +} + +static ssize_t wq_max_transfer_size_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + u64 xfer_size; + int rc; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (wq->state != IDXD_WQ_DISABLED) + return -EPERM; + + rc = __get_sysfs_u64(buf, &xfer_size); + if (rc < 0) + return rc; + + if (xfer_size > idxd->max_xfer_bytes) + return -EINVAL; + + wq->max_xfer_bytes = xfer_size; + + return count; +} + +static struct device_attribute dev_attr_wq_max_transfer_size = + __ATTR(max_transfer_size, 0644, + wq_max_transfer_size_show, wq_max_transfer_size_store); + +static ssize_t wq_max_batch_size_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + return sysfs_emit(buf, "%u\n", wq->max_batch_size); +} + +static ssize_t wq_max_batch_size_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + u64 batch_size; + int rc; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (wq->state != IDXD_WQ_DISABLED) + return -EPERM; + + rc = __get_sysfs_u64(buf, &batch_size); + if (rc < 0) + return rc; + + if (batch_size > idxd->max_batch_size) + return -EINVAL; + + idxd_wq_set_max_batch_size(idxd->data->type, wq, (u32)batch_size); + + return count; +} + +static struct device_attribute dev_attr_wq_max_batch_size = + __ATTR(max_batch_size, 0644, wq_max_batch_size_show, wq_max_batch_size_store); + +static ssize_t wq_ats_disable_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_ATS_DISABLE, &wq->flags)); +} + +static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + bool ats_dis; + int rc; + + if (wq->state != IDXD_WQ_DISABLED) + return -EPERM; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + rc = kstrtobool(buf, &ats_dis); + if (rc < 0) + return rc; + + if (ats_dis) + set_bit(WQ_FLAG_ATS_DISABLE, &wq->flags); + else + clear_bit(WQ_FLAG_ATS_DISABLE, &wq->flags); + + return count; +} + +static struct device_attribute dev_attr_wq_ats_disable = + __ATTR(ats_disable, 0644, wq_ats_disable_show, wq_ats_disable_store); + +static ssize_t wq_prs_disable_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags)); +} + +static ssize_t wq_prs_disable_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + bool prs_dis; + int rc; + + if (wq->state != IDXD_WQ_DISABLED) + return -EPERM; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + rc = kstrtobool(buf, &prs_dis); + if (rc < 0) + return rc; + + if (prs_dis) { + set_bit(WQ_FLAG_PRS_DISABLE, &wq->flags); + /* when PRS is disabled, BOF needs to be off as well */ + clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); + } else { + clear_bit(WQ_FLAG_PRS_DISABLE, &wq->flags); + } + return count; +} + +static struct device_attribute dev_attr_wq_prs_disable = + __ATTR(prs_disable, 0644, wq_prs_disable_show, wq_prs_disable_store); + +static ssize_t wq_occupancy_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + u32 occup, offset; + + if (!idxd->hw.wq_cap.occupancy) + return -EOPNOTSUPP; + + offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_OCCUP_IDX); + occup = ioread32(idxd->reg_base + offset) & WQCFG_OCCUP_MASK; + + return sysfs_emit(buf, "%u\n", occup); +} + +static struct device_attribute dev_attr_wq_occupancy = + __ATTR(occupancy, 0444, wq_occupancy_show, NULL); + +static ssize_t wq_enqcmds_retries_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + if (wq_dedicated(wq)) + return -EOPNOTSUPP; + + return sysfs_emit(buf, "%u\n", wq->enqcmds_retries); +} + +static ssize_t wq_enqcmds_retries_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + int rc; + unsigned int retries; + + if (wq_dedicated(wq)) + return -EOPNOTSUPP; + + rc = kstrtouint(buf, 10, &retries); + if (rc < 0) + return rc; + + if (retries > IDXD_ENQCMDS_MAX_RETRIES) + retries = IDXD_ENQCMDS_MAX_RETRIES; + + wq->enqcmds_retries = retries; + return count; +} + +static struct device_attribute dev_attr_wq_enqcmds_retries = + __ATTR(enqcmds_retries, 0644, wq_enqcmds_retries_show, wq_enqcmds_retries_store); + +static ssize_t wq_op_config_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + return sysfs_emit(buf, "%*pb\n", IDXD_MAX_OPCAP_BITS, wq->opcap_bmap); +} + +static int idxd_verify_supported_opcap(struct idxd_device *idxd, unsigned long *opmask) +{ + int bit; + + /* + * The OPCAP is defined as 256 bits that represents each operation the device + * supports per bit. Iterate through all the bits and check if the input mask + * is set for bits that are not set in the OPCAP for the device. If no OPCAP + * bit is set and input mask has the bit set, then return error. + */ + for_each_set_bit(bit, opmask, IDXD_MAX_OPCAP_BITS) { + if (!test_bit(bit, idxd->opcap_bmap)) + return -EINVAL; + } + + return 0; +} + +static ssize_t wq_op_config_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + unsigned long *opmask; + int rc; + + if (wq->state != IDXD_WQ_DISABLED) + return -EPERM; + + opmask = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL); + if (!opmask) + return -ENOMEM; + + rc = bitmap_parse(buf, count, opmask, IDXD_MAX_OPCAP_BITS); + if (rc < 0) + goto err; + + rc = idxd_verify_supported_opcap(idxd, opmask); + if (rc < 0) + goto err; + + bitmap_copy(wq->opcap_bmap, opmask, IDXD_MAX_OPCAP_BITS); + + bitmap_free(opmask); + return count; + +err: + bitmap_free(opmask); + return rc; +} + +static struct device_attribute dev_attr_wq_op_config = + __ATTR(op_config, 0644, wq_op_config_show, wq_op_config_store); + +static struct attribute *idxd_wq_attributes[] = { + &dev_attr_wq_clients.attr, + &dev_attr_wq_state.attr, + &dev_attr_wq_group_id.attr, + &dev_attr_wq_mode.attr, + &dev_attr_wq_size.attr, + &dev_attr_wq_priority.attr, + &dev_attr_wq_block_on_fault.attr, + &dev_attr_wq_threshold.attr, + &dev_attr_wq_type.attr, + &dev_attr_wq_name.attr, + &dev_attr_wq_cdev_minor.attr, + &dev_attr_wq_max_transfer_size.attr, + &dev_attr_wq_max_batch_size.attr, + &dev_attr_wq_ats_disable.attr, + &dev_attr_wq_prs_disable.attr, + &dev_attr_wq_occupancy.attr, + &dev_attr_wq_enqcmds_retries.attr, + &dev_attr_wq_op_config.attr, + NULL, +}; + +/* A WQ attr is invisible if the feature is not supported in WQCAP. */ +#define idxd_wq_attr_invisible(name, cap_field, a, idxd) \ + ((a) == &dev_attr_wq_##name.attr && !(idxd)->hw.wq_cap.cap_field) + +static bool idxd_wq_attr_max_batch_size_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + /* Intel IAA does not support batch processing, make it invisible */ + return attr == &dev_attr_wq_max_batch_size.attr && + idxd->data->type == IDXD_TYPE_IAX; +} + +static umode_t idxd_wq_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + + if (idxd_wq_attr_invisible(op_config, op_config, attr, idxd)) + return 0; + + if (idxd_wq_attr_max_batch_size_invisible(attr, idxd)) + return 0; + + if (idxd_wq_attr_invisible(prs_disable, wq_prs_support, attr, idxd)) + return 0; + + if (idxd_wq_attr_invisible(ats_disable, wq_ats_support, attr, idxd)) + return 0; + + return attr->mode; +} + +static const struct attribute_group idxd_wq_attribute_group = { + .attrs = idxd_wq_attributes, + .is_visible = idxd_wq_attr_visible, +}; + +static const struct attribute_group *idxd_wq_attribute_groups[] = { + &idxd_wq_attribute_group, + NULL, +}; + +static void idxd_conf_wq_release(struct device *dev) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + bitmap_free(wq->opcap_bmap); + kfree(wq->wqcfg); + xa_destroy(&wq->upasid_xa); + kfree(wq); +} + +struct device_type idxd_wq_device_type = { + .name = "wq", + .release = idxd_conf_wq_release, + .groups = idxd_wq_attribute_groups, +}; + +/* IDXD device attribs */ +static ssize_t version_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + return sysfs_emit(buf, "%#x\n", idxd->hw.version); +} +static DEVICE_ATTR_RO(version); + +static ssize_t max_work_queues_size_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + return sysfs_emit(buf, "%u\n", idxd->max_wq_size); +} +static DEVICE_ATTR_RO(max_work_queues_size); + +static ssize_t max_groups_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + return sysfs_emit(buf, "%u\n", idxd->max_groups); +} +static DEVICE_ATTR_RO(max_groups); + +static ssize_t max_work_queues_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + return sysfs_emit(buf, "%u\n", idxd->max_wqs); +} +static DEVICE_ATTR_RO(max_work_queues); + +static ssize_t max_engines_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + return sysfs_emit(buf, "%u\n", idxd->max_engines); +} +static DEVICE_ATTR_RO(max_engines); + +static ssize_t numa_node_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + return sysfs_emit(buf, "%d\n", dev_to_node(&idxd->pdev->dev)); +} +static DEVICE_ATTR_RO(numa_node); + +static ssize_t max_batch_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + return sysfs_emit(buf, "%u\n", idxd->max_batch_size); +} +static DEVICE_ATTR_RO(max_batch_size); + +static ssize_t max_transfer_size_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + return sysfs_emit(buf, "%llu\n", idxd->max_xfer_bytes); +} +static DEVICE_ATTR_RO(max_transfer_size); + +static ssize_t op_cap_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + return sysfs_emit(buf, "%*pb\n", IDXD_MAX_OPCAP_BITS, idxd->opcap_bmap); +} +static DEVICE_ATTR_RO(op_cap); + +static ssize_t gen_cap_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + return sysfs_emit(buf, "%#llx\n", idxd->hw.gen_cap.bits); +} +static DEVICE_ATTR_RO(gen_cap); + +static ssize_t configurable_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + return sysfs_emit(buf, "%u\n", test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)); +} +static DEVICE_ATTR_RO(configurable); + +static ssize_t clients_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + int count = 0, i; + + spin_lock(&idxd->dev_lock); + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + count += wq->client_count; + } + spin_unlock(&idxd->dev_lock); + + return sysfs_emit(buf, "%d\n", count); +} +static DEVICE_ATTR_RO(clients); + +static ssize_t pasid_enabled_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + return sysfs_emit(buf, "%u\n", device_user_pasid_enabled(idxd)); +} +static DEVICE_ATTR_RO(pasid_enabled); + +static ssize_t state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + switch (idxd->state) { + case IDXD_DEV_DISABLED: + return sysfs_emit(buf, "disabled\n"); + case IDXD_DEV_ENABLED: + return sysfs_emit(buf, "enabled\n"); + case IDXD_DEV_HALTED: + return sysfs_emit(buf, "halted\n"); + } + + return sysfs_emit(buf, "unknown\n"); +} +static DEVICE_ATTR_RO(state); + +static ssize_t errors_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + DECLARE_BITMAP(swerr_bmap, 256); + + bitmap_zero(swerr_bmap, 256); + spin_lock(&idxd->dev_lock); + multi_u64_to_bmap(swerr_bmap, &idxd->sw_err.bits[0], 4); + spin_unlock(&idxd->dev_lock); + return sysfs_emit(buf, "%*pb\n", 256, swerr_bmap); +} +static DEVICE_ATTR_RO(errors); + +static ssize_t max_read_buffers_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + return sysfs_emit(buf, "%u\n", idxd->max_rdbufs); +} + +static ssize_t max_tokens_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + dev_warn_once(dev, "attribute deprecated, see max_read_buffers.\n"); + return max_read_buffers_show(dev, attr, buf); +} + +static DEVICE_ATTR_RO(max_tokens); /* deprecated */ +static DEVICE_ATTR_RO(max_read_buffers); + +static ssize_t read_buffer_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + return sysfs_emit(buf, "%u\n", idxd->rdbuf_limit); +} + +static ssize_t token_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffer_limit.\n"); + return read_buffer_limit_show(dev, attr, buf); +} + +static ssize_t read_buffer_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + unsigned long val; + int rc; + + rc = kstrtoul(buf, 10, &val); + if (rc < 0) + return -EINVAL; + + if (idxd->state == IDXD_DEV_ENABLED) + return -EPERM; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (!idxd->hw.group_cap.rdbuf_limit) + return -EPERM; + + if (val > idxd->hw.group_cap.total_rdbufs) + return -EINVAL; + + idxd->rdbuf_limit = val; + return count; +} + +static ssize_t token_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffer_limit\n"); + return read_buffer_limit_store(dev, attr, buf, count); +} + +static DEVICE_ATTR_RW(token_limit); /* deprecated */ +static DEVICE_ATTR_RW(read_buffer_limit); + +static ssize_t cdev_major_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + return sysfs_emit(buf, "%u\n", idxd->major); +} +static DEVICE_ATTR_RO(cdev_major); + +static ssize_t cmd_status_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + return sysfs_emit(buf, "%#x\n", idxd->cmd_status); +} + +static ssize_t cmd_status_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + idxd->cmd_status = 0; + return count; +} +static DEVICE_ATTR_RW(cmd_status); + +static ssize_t iaa_cap_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + if (idxd->hw.version < DEVICE_VERSION_2) + return -EOPNOTSUPP; + + return sysfs_emit(buf, "%#llx\n", idxd->hw.iaa_cap.bits); +} +static DEVICE_ATTR_RO(iaa_cap); + +static ssize_t event_log_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + if (!idxd->evl) + return -EOPNOTSUPP; + + return sysfs_emit(buf, "%u\n", idxd->evl->size); +} + +static ssize_t event_log_size_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + unsigned long val; + int rc; + + if (!idxd->evl) + return -EOPNOTSUPP; + + rc = kstrtoul(buf, 10, &val); + if (rc < 0) + return -EINVAL; + + if (idxd->state == IDXD_DEV_ENABLED) + return -EPERM; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (val < IDXD_EVL_SIZE_MIN || val > IDXD_EVL_SIZE_MAX || + (val * evl_ent_size(idxd) > ULONG_MAX - idxd->evl->dma)) + return -EINVAL; + + idxd->evl->size = val; + return count; +} +static DEVICE_ATTR_RW(event_log_size); + +static bool idxd_device_attr_max_batch_size_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + /* Intel IAA does not support batch processing, make it invisible */ + return attr == &dev_attr_max_batch_size.attr && + idxd->data->type == IDXD_TYPE_IAX; +} + +static bool idxd_device_attr_read_buffers_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + /* + * Intel IAA does not support Read Buffer allocation control, + * make these attributes invisible. + */ + return (attr == &dev_attr_max_tokens.attr || + attr == &dev_attr_max_read_buffers.attr || + attr == &dev_attr_token_limit.attr || + attr == &dev_attr_read_buffer_limit.attr) && + idxd->data->type == IDXD_TYPE_IAX; +} + +static bool idxd_device_attr_iaa_cap_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + return attr == &dev_attr_iaa_cap.attr && + (idxd->data->type != IDXD_TYPE_IAX || + idxd->hw.version < DEVICE_VERSION_2); +} + +static bool idxd_device_attr_event_log_size_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + return (attr == &dev_attr_event_log_size.attr && + !idxd->hw.gen_cap.evl_support); +} + +static umode_t idxd_device_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct idxd_device *idxd = confdev_to_idxd(dev); + + if (idxd_device_attr_max_batch_size_invisible(attr, idxd)) + return 0; + + if (idxd_device_attr_read_buffers_invisible(attr, idxd)) + return 0; + + if (idxd_device_attr_iaa_cap_invisible(attr, idxd)) + return 0; + + if (idxd_device_attr_event_log_size_invisible(attr, idxd)) + return 0; + + return attr->mode; +} + +static struct attribute *idxd_device_attributes[] = { + &dev_attr_version.attr, + &dev_attr_max_groups.attr, + &dev_attr_max_work_queues.attr, + &dev_attr_max_work_queues_size.attr, + &dev_attr_max_engines.attr, + &dev_attr_numa_node.attr, + &dev_attr_max_batch_size.attr, + &dev_attr_max_transfer_size.attr, + &dev_attr_op_cap.attr, + &dev_attr_gen_cap.attr, + &dev_attr_configurable.attr, + &dev_attr_clients.attr, + &dev_attr_pasid_enabled.attr, + &dev_attr_state.attr, + &dev_attr_errors.attr, + &dev_attr_max_tokens.attr, + &dev_attr_max_read_buffers.attr, + &dev_attr_token_limit.attr, + &dev_attr_read_buffer_limit.attr, + &dev_attr_cdev_major.attr, + &dev_attr_cmd_status.attr, + &dev_attr_iaa_cap.attr, + &dev_attr_event_log_size.attr, + NULL, +}; + +static const struct attribute_group idxd_device_attribute_group = { + .attrs = idxd_device_attributes, + .is_visible = idxd_device_attr_visible, +}; + +static const struct attribute_group *idxd_attribute_groups[] = { + &idxd_device_attribute_group, + NULL, +}; + +static void idxd_conf_device_release(struct device *dev) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + kfree(idxd->groups); + bitmap_free(idxd->wq_enable_map); + kfree(idxd->wqs); + kfree(idxd->engines); + kfree(idxd->evl); + kmem_cache_destroy(idxd->evl_cache); + ida_free(&idxd_ida, idxd->id); + bitmap_free(idxd->opcap_bmap); + kfree(idxd); +} + +struct device_type dsa_device_type = { + .name = "dsa", + .release = idxd_conf_device_release, + .groups = idxd_attribute_groups, +}; + +struct device_type iax_device_type = { + .name = "iax", + .release = idxd_conf_device_release, + .groups = idxd_attribute_groups, +}; + +static int idxd_register_engine_devices(struct idxd_device *idxd) +{ + struct idxd_engine *engine; + int i, j, rc; + + for (i = 0; i < idxd->max_engines; i++) { + engine = idxd->engines[i]; + rc = device_add(engine_confdev(engine)); + if (rc < 0) + goto cleanup; + } + + return 0; + +cleanup: + j = i - 1; + for (; i < idxd->max_engines; i++) { + engine = idxd->engines[i]; + put_device(engine_confdev(engine)); + } + + while (j--) { + engine = idxd->engines[j]; + device_unregister(engine_confdev(engine)); + } + return rc; +} + +static int idxd_register_group_devices(struct idxd_device *idxd) +{ + struct idxd_group *group; + int i, j, rc; + + for (i = 0; i < idxd->max_groups; i++) { + group = idxd->groups[i]; + rc = device_add(group_confdev(group)); + if (rc < 0) + goto cleanup; + } + + return 0; + +cleanup: + j = i - 1; + for (; i < idxd->max_groups; i++) { + group = idxd->groups[i]; + put_device(group_confdev(group)); + } + + while (j--) { + group = idxd->groups[j]; + device_unregister(group_confdev(group)); + } + return rc; +} + +static int idxd_register_wq_devices(struct idxd_device *idxd) +{ + struct idxd_wq *wq; + int i, rc, j; + + for (i = 0; i < idxd->max_wqs; i++) { + wq = idxd->wqs[i]; + rc = device_add(wq_confdev(wq)); + if (rc < 0) + goto cleanup; + } + + return 0; + +cleanup: + j = i - 1; + for (; i < idxd->max_wqs; i++) { + wq = idxd->wqs[i]; + put_device(wq_confdev(wq)); + } + + while (j--) { + wq = idxd->wqs[j]; + device_unregister(wq_confdev(wq)); + } + return rc; +} + +int idxd_register_devices(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + int rc, i; + + rc = device_add(idxd_confdev(idxd)); + if (rc < 0) + return rc; + + rc = idxd_register_wq_devices(idxd); + if (rc < 0) { + dev_dbg(dev, "WQ devices registering failed: %d\n", rc); + goto err_wq; + } + + rc = idxd_register_engine_devices(idxd); + if (rc < 0) { + dev_dbg(dev, "Engine devices registering failed: %d\n", rc); + goto err_engine; + } + + rc = idxd_register_group_devices(idxd); + if (rc < 0) { + dev_dbg(dev, "Group device registering failed: %d\n", rc); + goto err_group; + } + + return 0; + + err_group: + for (i = 0; i < idxd->max_engines; i++) + device_unregister(engine_confdev(idxd->engines[i])); + err_engine: + for (i = 0; i < idxd->max_wqs; i++) + device_unregister(wq_confdev(idxd->wqs[i])); + err_wq: + device_del(idxd_confdev(idxd)); + return rc; +} + +void idxd_unregister_devices(struct idxd_device *idxd) +{ + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + device_unregister(wq_confdev(wq)); + } + + for (i = 0; i < idxd->max_engines; i++) { + struct idxd_engine *engine = idxd->engines[i]; + + device_unregister(engine_confdev(engine)); + } + + for (i = 0; i < idxd->max_groups; i++) { + struct idxd_group *group = idxd->groups[i]; + + device_unregister(group_confdev(group)); + } +} + +int idxd_register_bus_type(void) +{ + return bus_register(&dsa_bus_type); +} + +void idxd_unregister_bus_type(void) +{ + bus_unregister(&dsa_bus_type); +} diff --git a/drivers/dma/img-mdc-dma.c b/drivers/dma/img-mdc-dma.c new file mode 100644 index 0000000000..9be0d3226e --- /dev/null +++ b/drivers/dma/img-mdc-dma.c @@ -0,0 +1,1087 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * IMG Multi-threaded DMA Controller (MDC) + * + * Copyright (C) 2009,2012,2013 Imagination Technologies Ltd. + * Copyright (C) 2014 Google, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" +#include "virt-dma.h" + +#define MDC_MAX_DMA_CHANNELS 32 + +#define MDC_GENERAL_CONFIG 0x000 +#define MDC_GENERAL_CONFIG_LIST_IEN BIT(31) +#define MDC_GENERAL_CONFIG_IEN BIT(29) +#define MDC_GENERAL_CONFIG_LEVEL_INT BIT(28) +#define MDC_GENERAL_CONFIG_INC_W BIT(12) +#define MDC_GENERAL_CONFIG_INC_R BIT(8) +#define MDC_GENERAL_CONFIG_PHYSICAL_W BIT(7) +#define MDC_GENERAL_CONFIG_WIDTH_W_SHIFT 4 +#define MDC_GENERAL_CONFIG_WIDTH_W_MASK 0x7 +#define MDC_GENERAL_CONFIG_PHYSICAL_R BIT(3) +#define MDC_GENERAL_CONFIG_WIDTH_R_SHIFT 0 +#define MDC_GENERAL_CONFIG_WIDTH_R_MASK 0x7 + +#define MDC_READ_PORT_CONFIG 0x004 +#define MDC_READ_PORT_CONFIG_STHREAD_SHIFT 28 +#define MDC_READ_PORT_CONFIG_STHREAD_MASK 0xf +#define MDC_READ_PORT_CONFIG_RTHREAD_SHIFT 24 +#define MDC_READ_PORT_CONFIG_RTHREAD_MASK 0xf +#define MDC_READ_PORT_CONFIG_WTHREAD_SHIFT 16 +#define MDC_READ_PORT_CONFIG_WTHREAD_MASK 0xf +#define MDC_READ_PORT_CONFIG_BURST_SIZE_SHIFT 4 +#define MDC_READ_PORT_CONFIG_BURST_SIZE_MASK 0xff +#define MDC_READ_PORT_CONFIG_DREQ_ENABLE BIT(1) + +#define MDC_READ_ADDRESS 0x008 + +#define MDC_WRITE_ADDRESS 0x00c + +#define MDC_TRANSFER_SIZE 0x010 +#define MDC_TRANSFER_SIZE_MASK 0xffffff + +#define MDC_LIST_NODE_ADDRESS 0x014 + +#define MDC_CMDS_PROCESSED 0x018 +#define MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT 16 +#define MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK 0x3f +#define MDC_CMDS_PROCESSED_INT_ACTIVE BIT(8) +#define MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT 0 +#define MDC_CMDS_PROCESSED_CMDS_DONE_MASK 0x3f + +#define MDC_CONTROL_AND_STATUS 0x01c +#define MDC_CONTROL_AND_STATUS_CANCEL BIT(20) +#define MDC_CONTROL_AND_STATUS_LIST_EN BIT(4) +#define MDC_CONTROL_AND_STATUS_EN BIT(0) + +#define MDC_ACTIVE_TRANSFER_SIZE 0x030 + +#define MDC_GLOBAL_CONFIG_A 0x900 +#define MDC_GLOBAL_CONFIG_A_THREAD_ID_WIDTH_SHIFT 16 +#define MDC_GLOBAL_CONFIG_A_THREAD_ID_WIDTH_MASK 0xff +#define MDC_GLOBAL_CONFIG_A_DMA_CONTEXTS_SHIFT 8 +#define MDC_GLOBAL_CONFIG_A_DMA_CONTEXTS_MASK 0xff +#define MDC_GLOBAL_CONFIG_A_SYS_DAT_WIDTH_SHIFT 0 +#define MDC_GLOBAL_CONFIG_A_SYS_DAT_WIDTH_MASK 0xff + +struct mdc_hw_list_desc { + u32 gen_conf; + u32 readport_conf; + u32 read_addr; + u32 write_addr; + u32 xfer_size; + u32 node_addr; + u32 cmds_done; + u32 ctrl_status; + /* + * Not part of the list descriptor, but instead used by the CPU to + * traverse the list. + */ + struct mdc_hw_list_desc *next_desc; +}; + +struct mdc_tx_desc { + struct mdc_chan *chan; + struct virt_dma_desc vd; + dma_addr_t list_phys; + struct mdc_hw_list_desc *list; + bool cyclic; + bool cmd_loaded; + unsigned int list_len; + unsigned int list_period_len; + size_t list_xfer_size; + unsigned int list_cmds_done; +}; + +struct mdc_chan { + struct mdc_dma *mdma; + struct virt_dma_chan vc; + struct dma_slave_config config; + struct mdc_tx_desc *desc; + int irq; + unsigned int periph; + unsigned int thread; + unsigned int chan_nr; +}; + +struct mdc_dma_soc_data { + void (*enable_chan)(struct mdc_chan *mchan); + void (*disable_chan)(struct mdc_chan *mchan); +}; + +struct mdc_dma { + struct dma_device dma_dev; + void __iomem *regs; + struct clk *clk; + struct dma_pool *desc_pool; + struct regmap *periph_regs; + spinlock_t lock; + unsigned int nr_threads; + unsigned int nr_channels; + unsigned int bus_width; + unsigned int max_burst_mult; + unsigned int max_xfer_size; + const struct mdc_dma_soc_data *soc; + struct mdc_chan channels[MDC_MAX_DMA_CHANNELS]; +}; + +static inline u32 mdc_readl(struct mdc_dma *mdma, u32 reg) +{ + return readl(mdma->regs + reg); +} + +static inline void mdc_writel(struct mdc_dma *mdma, u32 val, u32 reg) +{ + writel(val, mdma->regs + reg); +} + +static inline u32 mdc_chan_readl(struct mdc_chan *mchan, u32 reg) +{ + return mdc_readl(mchan->mdma, mchan->chan_nr * 0x040 + reg); +} + +static inline void mdc_chan_writel(struct mdc_chan *mchan, u32 val, u32 reg) +{ + mdc_writel(mchan->mdma, val, mchan->chan_nr * 0x040 + reg); +} + +static inline struct mdc_chan *to_mdc_chan(struct dma_chan *c) +{ + return container_of(to_virt_chan(c), struct mdc_chan, vc); +} + +static inline struct mdc_tx_desc *to_mdc_desc(struct dma_async_tx_descriptor *t) +{ + struct virt_dma_desc *vdesc = container_of(t, struct virt_dma_desc, tx); + + return container_of(vdesc, struct mdc_tx_desc, vd); +} + +static inline struct device *mdma2dev(struct mdc_dma *mdma) +{ + return mdma->dma_dev.dev; +} + +static inline unsigned int to_mdc_width(unsigned int bytes) +{ + return ffs(bytes) - 1; +} + +static inline void mdc_set_read_width(struct mdc_hw_list_desc *ldesc, + unsigned int bytes) +{ + ldesc->gen_conf |= to_mdc_width(bytes) << + MDC_GENERAL_CONFIG_WIDTH_R_SHIFT; +} + +static inline void mdc_set_write_width(struct mdc_hw_list_desc *ldesc, + unsigned int bytes) +{ + ldesc->gen_conf |= to_mdc_width(bytes) << + MDC_GENERAL_CONFIG_WIDTH_W_SHIFT; +} + +static void mdc_list_desc_config(struct mdc_chan *mchan, + struct mdc_hw_list_desc *ldesc, + enum dma_transfer_direction dir, + dma_addr_t src, dma_addr_t dst, size_t len) +{ + struct mdc_dma *mdma = mchan->mdma; + unsigned int max_burst, burst_size; + + ldesc->gen_conf = MDC_GENERAL_CONFIG_IEN | MDC_GENERAL_CONFIG_LIST_IEN | + MDC_GENERAL_CONFIG_LEVEL_INT | MDC_GENERAL_CONFIG_PHYSICAL_W | + MDC_GENERAL_CONFIG_PHYSICAL_R; + ldesc->readport_conf = + (mchan->thread << MDC_READ_PORT_CONFIG_STHREAD_SHIFT) | + (mchan->thread << MDC_READ_PORT_CONFIG_RTHREAD_SHIFT) | + (mchan->thread << MDC_READ_PORT_CONFIG_WTHREAD_SHIFT); + ldesc->read_addr = src; + ldesc->write_addr = dst; + ldesc->xfer_size = len - 1; + ldesc->node_addr = 0; + ldesc->cmds_done = 0; + ldesc->ctrl_status = MDC_CONTROL_AND_STATUS_LIST_EN | + MDC_CONTROL_AND_STATUS_EN; + ldesc->next_desc = NULL; + + if (IS_ALIGNED(dst, mdma->bus_width) && + IS_ALIGNED(src, mdma->bus_width)) + max_burst = mdma->bus_width * mdma->max_burst_mult; + else + max_burst = mdma->bus_width * (mdma->max_burst_mult - 1); + + if (dir == DMA_MEM_TO_DEV) { + ldesc->gen_conf |= MDC_GENERAL_CONFIG_INC_R; + ldesc->readport_conf |= MDC_READ_PORT_CONFIG_DREQ_ENABLE; + mdc_set_read_width(ldesc, mdma->bus_width); + mdc_set_write_width(ldesc, mchan->config.dst_addr_width); + burst_size = min(max_burst, mchan->config.dst_maxburst * + mchan->config.dst_addr_width); + } else if (dir == DMA_DEV_TO_MEM) { + ldesc->gen_conf |= MDC_GENERAL_CONFIG_INC_W; + ldesc->readport_conf |= MDC_READ_PORT_CONFIG_DREQ_ENABLE; + mdc_set_read_width(ldesc, mchan->config.src_addr_width); + mdc_set_write_width(ldesc, mdma->bus_width); + burst_size = min(max_burst, mchan->config.src_maxburst * + mchan->config.src_addr_width); + } else { + ldesc->gen_conf |= MDC_GENERAL_CONFIG_INC_R | + MDC_GENERAL_CONFIG_INC_W; + mdc_set_read_width(ldesc, mdma->bus_width); + mdc_set_write_width(ldesc, mdma->bus_width); + burst_size = max_burst; + } + ldesc->readport_conf |= (burst_size - 1) << + MDC_READ_PORT_CONFIG_BURST_SIZE_SHIFT; +} + +static void mdc_list_desc_free(struct mdc_tx_desc *mdesc) +{ + struct mdc_dma *mdma = mdesc->chan->mdma; + struct mdc_hw_list_desc *curr, *next; + dma_addr_t curr_phys, next_phys; + + curr = mdesc->list; + curr_phys = mdesc->list_phys; + while (curr) { + next = curr->next_desc; + next_phys = curr->node_addr; + dma_pool_free(mdma->desc_pool, curr, curr_phys); + curr = next; + curr_phys = next_phys; + } +} + +static void mdc_desc_free(struct virt_dma_desc *vd) +{ + struct mdc_tx_desc *mdesc = to_mdc_desc(&vd->tx); + + mdc_list_desc_free(mdesc); + kfree(mdesc); +} + +static struct dma_async_tx_descriptor *mdc_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len, + unsigned long flags) +{ + struct mdc_chan *mchan = to_mdc_chan(chan); + struct mdc_dma *mdma = mchan->mdma; + struct mdc_tx_desc *mdesc; + struct mdc_hw_list_desc *curr, *prev = NULL; + dma_addr_t curr_phys; + + if (!len) + return NULL; + + mdesc = kzalloc(sizeof(*mdesc), GFP_NOWAIT); + if (!mdesc) + return NULL; + mdesc->chan = mchan; + mdesc->list_xfer_size = len; + + while (len > 0) { + size_t xfer_size; + + curr = dma_pool_alloc(mdma->desc_pool, GFP_NOWAIT, &curr_phys); + if (!curr) + goto free_desc; + + if (prev) { + prev->node_addr = curr_phys; + prev->next_desc = curr; + } else { + mdesc->list_phys = curr_phys; + mdesc->list = curr; + } + + xfer_size = min_t(size_t, mdma->max_xfer_size, len); + + mdc_list_desc_config(mchan, curr, DMA_MEM_TO_MEM, src, dest, + xfer_size); + + prev = curr; + + mdesc->list_len++; + src += xfer_size; + dest += xfer_size; + len -= xfer_size; + } + + return vchan_tx_prep(&mchan->vc, &mdesc->vd, flags); + +free_desc: + mdc_desc_free(&mdesc->vd); + + return NULL; +} + +static int mdc_check_slave_width(struct mdc_chan *mchan, + enum dma_transfer_direction dir) +{ + enum dma_slave_buswidth width; + + if (dir == DMA_MEM_TO_DEV) + width = mchan->config.dst_addr_width; + else + width = mchan->config.src_addr_width; + + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + case DMA_SLAVE_BUSWIDTH_2_BYTES: + case DMA_SLAVE_BUSWIDTH_4_BYTES: + case DMA_SLAVE_BUSWIDTH_8_BYTES: + break; + default: + return -EINVAL; + } + + if (width > mchan->mdma->bus_width) + return -EINVAL; + + return 0; +} + +static struct dma_async_tx_descriptor *mdc_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction dir, + unsigned long flags) +{ + struct mdc_chan *mchan = to_mdc_chan(chan); + struct mdc_dma *mdma = mchan->mdma; + struct mdc_tx_desc *mdesc; + struct mdc_hw_list_desc *curr, *prev = NULL; + dma_addr_t curr_phys; + + if (!buf_len && !period_len) + return NULL; + + if (!is_slave_direction(dir)) + return NULL; + + if (mdc_check_slave_width(mchan, dir) < 0) + return NULL; + + mdesc = kzalloc(sizeof(*mdesc), GFP_NOWAIT); + if (!mdesc) + return NULL; + mdesc->chan = mchan; + mdesc->cyclic = true; + mdesc->list_xfer_size = buf_len; + mdesc->list_period_len = DIV_ROUND_UP(period_len, + mdma->max_xfer_size); + + while (buf_len > 0) { + size_t remainder = min(period_len, buf_len); + + while (remainder > 0) { + size_t xfer_size; + + curr = dma_pool_alloc(mdma->desc_pool, GFP_NOWAIT, + &curr_phys); + if (!curr) + goto free_desc; + + if (!prev) { + mdesc->list_phys = curr_phys; + mdesc->list = curr; + } else { + prev->node_addr = curr_phys; + prev->next_desc = curr; + } + + xfer_size = min_t(size_t, mdma->max_xfer_size, + remainder); + + if (dir == DMA_MEM_TO_DEV) { + mdc_list_desc_config(mchan, curr, dir, + buf_addr, + mchan->config.dst_addr, + xfer_size); + } else { + mdc_list_desc_config(mchan, curr, dir, + mchan->config.src_addr, + buf_addr, + xfer_size); + } + + prev = curr; + + mdesc->list_len++; + buf_addr += xfer_size; + buf_len -= xfer_size; + remainder -= xfer_size; + } + } + prev->node_addr = mdesc->list_phys; + + return vchan_tx_prep(&mchan->vc, &mdesc->vd, flags); + +free_desc: + mdc_desc_free(&mdesc->vd); + + return NULL; +} + +static struct dma_async_tx_descriptor *mdc_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct mdc_chan *mchan = to_mdc_chan(chan); + struct mdc_dma *mdma = mchan->mdma; + struct mdc_tx_desc *mdesc; + struct scatterlist *sg; + struct mdc_hw_list_desc *curr, *prev = NULL; + dma_addr_t curr_phys; + unsigned int i; + + if (!sgl) + return NULL; + + if (!is_slave_direction(dir)) + return NULL; + + if (mdc_check_slave_width(mchan, dir) < 0) + return NULL; + + mdesc = kzalloc(sizeof(*mdesc), GFP_NOWAIT); + if (!mdesc) + return NULL; + mdesc->chan = mchan; + + for_each_sg(sgl, sg, sg_len, i) { + dma_addr_t buf = sg_dma_address(sg); + size_t buf_len = sg_dma_len(sg); + + while (buf_len > 0) { + size_t xfer_size; + + curr = dma_pool_alloc(mdma->desc_pool, GFP_NOWAIT, + &curr_phys); + if (!curr) + goto free_desc; + + if (!prev) { + mdesc->list_phys = curr_phys; + mdesc->list = curr; + } else { + prev->node_addr = curr_phys; + prev->next_desc = curr; + } + + xfer_size = min_t(size_t, mdma->max_xfer_size, + buf_len); + + if (dir == DMA_MEM_TO_DEV) { + mdc_list_desc_config(mchan, curr, dir, buf, + mchan->config.dst_addr, + xfer_size); + } else { + mdc_list_desc_config(mchan, curr, dir, + mchan->config.src_addr, + buf, xfer_size); + } + + prev = curr; + + mdesc->list_len++; + mdesc->list_xfer_size += xfer_size; + buf += xfer_size; + buf_len -= xfer_size; + } + } + + return vchan_tx_prep(&mchan->vc, &mdesc->vd, flags); + +free_desc: + mdc_desc_free(&mdesc->vd); + + return NULL; +} + +static void mdc_issue_desc(struct mdc_chan *mchan) +{ + struct mdc_dma *mdma = mchan->mdma; + struct virt_dma_desc *vd; + struct mdc_tx_desc *mdesc; + u32 val; + + vd = vchan_next_desc(&mchan->vc); + if (!vd) + return; + + list_del(&vd->node); + + mdesc = to_mdc_desc(&vd->tx); + mchan->desc = mdesc; + + dev_dbg(mdma2dev(mdma), "Issuing descriptor on channel %d\n", + mchan->chan_nr); + + mdma->soc->enable_chan(mchan); + + val = mdc_chan_readl(mchan, MDC_GENERAL_CONFIG); + val |= MDC_GENERAL_CONFIG_LIST_IEN | MDC_GENERAL_CONFIG_IEN | + MDC_GENERAL_CONFIG_LEVEL_INT | MDC_GENERAL_CONFIG_PHYSICAL_W | + MDC_GENERAL_CONFIG_PHYSICAL_R; + mdc_chan_writel(mchan, val, MDC_GENERAL_CONFIG); + val = (mchan->thread << MDC_READ_PORT_CONFIG_STHREAD_SHIFT) | + (mchan->thread << MDC_READ_PORT_CONFIG_RTHREAD_SHIFT) | + (mchan->thread << MDC_READ_PORT_CONFIG_WTHREAD_SHIFT); + mdc_chan_writel(mchan, val, MDC_READ_PORT_CONFIG); + mdc_chan_writel(mchan, mdesc->list_phys, MDC_LIST_NODE_ADDRESS); + val = mdc_chan_readl(mchan, MDC_CONTROL_AND_STATUS); + val |= MDC_CONTROL_AND_STATUS_LIST_EN; + mdc_chan_writel(mchan, val, MDC_CONTROL_AND_STATUS); +} + +static void mdc_issue_pending(struct dma_chan *chan) +{ + struct mdc_chan *mchan = to_mdc_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&mchan->vc.lock, flags); + if (vchan_issue_pending(&mchan->vc) && !mchan->desc) + mdc_issue_desc(mchan); + spin_unlock_irqrestore(&mchan->vc.lock, flags); +} + +static enum dma_status mdc_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) +{ + struct mdc_chan *mchan = to_mdc_chan(chan); + struct mdc_tx_desc *mdesc; + struct virt_dma_desc *vd; + unsigned long flags; + size_t bytes = 0; + int ret; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; + + if (!txstate) + return ret; + + spin_lock_irqsave(&mchan->vc.lock, flags); + vd = vchan_find_desc(&mchan->vc, cookie); + if (vd) { + mdesc = to_mdc_desc(&vd->tx); + bytes = mdesc->list_xfer_size; + } else if (mchan->desc && mchan->desc->vd.tx.cookie == cookie) { + struct mdc_hw_list_desc *ldesc; + u32 val1, val2, done, processed, residue; + int i, cmds; + + mdesc = mchan->desc; + + /* + * Determine the number of commands that haven't been + * processed (handled by the IRQ handler) yet. + */ + do { + val1 = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED) & + ~MDC_CMDS_PROCESSED_INT_ACTIVE; + residue = mdc_chan_readl(mchan, + MDC_ACTIVE_TRANSFER_SIZE); + val2 = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED) & + ~MDC_CMDS_PROCESSED_INT_ACTIVE; + } while (val1 != val2); + + done = (val1 >> MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT) & + MDC_CMDS_PROCESSED_CMDS_DONE_MASK; + processed = (val1 >> MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT) & + MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK; + cmds = (done - processed) % + (MDC_CMDS_PROCESSED_CMDS_DONE_MASK + 1); + + /* + * If the command loaded event hasn't been processed yet, then + * the difference above includes an extra command. + */ + if (!mdesc->cmd_loaded) + cmds--; + else + cmds += mdesc->list_cmds_done; + + bytes = mdesc->list_xfer_size; + ldesc = mdesc->list; + for (i = 0; i < cmds; i++) { + bytes -= ldesc->xfer_size + 1; + ldesc = ldesc->next_desc; + } + if (ldesc) { + if (residue != MDC_TRANSFER_SIZE_MASK) + bytes -= ldesc->xfer_size - residue; + else + bytes -= ldesc->xfer_size + 1; + } + } + spin_unlock_irqrestore(&mchan->vc.lock, flags); + + dma_set_residue(txstate, bytes); + + return ret; +} + +static unsigned int mdc_get_new_events(struct mdc_chan *mchan) +{ + u32 val, processed, done1, done2; + unsigned int ret; + + val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED); + processed = (val >> MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT) & + MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK; + /* + * CMDS_DONE may have incremented between reading CMDS_PROCESSED + * and clearing INT_ACTIVE. Re-read CMDS_PROCESSED to ensure we + * didn't miss a command completion. + */ + do { + val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED); + + done1 = (val >> MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT) & + MDC_CMDS_PROCESSED_CMDS_DONE_MASK; + + val &= ~((MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK << + MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT) | + MDC_CMDS_PROCESSED_INT_ACTIVE); + + val |= done1 << MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT; + + mdc_chan_writel(mchan, val, MDC_CMDS_PROCESSED); + + val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED); + + done2 = (val >> MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT) & + MDC_CMDS_PROCESSED_CMDS_DONE_MASK; + } while (done1 != done2); + + if (done1 >= processed) + ret = done1 - processed; + else + ret = ((MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK + 1) - + processed) + done1; + + return ret; +} + +static int mdc_terminate_all(struct dma_chan *chan) +{ + struct mdc_chan *mchan = to_mdc_chan(chan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&mchan->vc.lock, flags); + + mdc_chan_writel(mchan, MDC_CONTROL_AND_STATUS_CANCEL, + MDC_CONTROL_AND_STATUS); + + if (mchan->desc) { + vchan_terminate_vdesc(&mchan->desc->vd); + mchan->desc = NULL; + } + vchan_get_all_descriptors(&mchan->vc, &head); + + mdc_get_new_events(mchan); + + spin_unlock_irqrestore(&mchan->vc.lock, flags); + + vchan_dma_desc_free_list(&mchan->vc, &head); + + return 0; +} + +static void mdc_synchronize(struct dma_chan *chan) +{ + struct mdc_chan *mchan = to_mdc_chan(chan); + + vchan_synchronize(&mchan->vc); +} + +static int mdc_slave_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct mdc_chan *mchan = to_mdc_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&mchan->vc.lock, flags); + mchan->config = *config; + spin_unlock_irqrestore(&mchan->vc.lock, flags); + + return 0; +} + +static int mdc_alloc_chan_resources(struct dma_chan *chan) +{ + struct mdc_chan *mchan = to_mdc_chan(chan); + struct device *dev = mdma2dev(mchan->mdma); + + return pm_runtime_get_sync(dev); +} + +static void mdc_free_chan_resources(struct dma_chan *chan) +{ + struct mdc_chan *mchan = to_mdc_chan(chan); + struct mdc_dma *mdma = mchan->mdma; + struct device *dev = mdma2dev(mdma); + + mdc_terminate_all(chan); + mdma->soc->disable_chan(mchan); + pm_runtime_put(dev); +} + +static irqreturn_t mdc_chan_irq(int irq, void *dev_id) +{ + struct mdc_chan *mchan = (struct mdc_chan *)dev_id; + struct mdc_tx_desc *mdesc; + unsigned int i, new_events; + + spin_lock(&mchan->vc.lock); + + dev_dbg(mdma2dev(mchan->mdma), "IRQ on channel %d\n", mchan->chan_nr); + + new_events = mdc_get_new_events(mchan); + + if (!new_events) + goto out; + + mdesc = mchan->desc; + if (!mdesc) { + dev_warn(mdma2dev(mchan->mdma), + "IRQ with no active descriptor on channel %d\n", + mchan->chan_nr); + goto out; + } + + for (i = 0; i < new_events; i++) { + /* + * The first interrupt in a transfer indicates that the + * command list has been loaded, not that a command has + * been completed. + */ + if (!mdesc->cmd_loaded) { + mdesc->cmd_loaded = true; + continue; + } + + mdesc->list_cmds_done++; + if (mdesc->cyclic) { + mdesc->list_cmds_done %= mdesc->list_len; + if (mdesc->list_cmds_done % mdesc->list_period_len == 0) + vchan_cyclic_callback(&mdesc->vd); + } else if (mdesc->list_cmds_done == mdesc->list_len) { + mchan->desc = NULL; + vchan_cookie_complete(&mdesc->vd); + mdc_issue_desc(mchan); + break; + } + } +out: + spin_unlock(&mchan->vc.lock); + + return IRQ_HANDLED; +} + +static struct dma_chan *mdc_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct mdc_dma *mdma = ofdma->of_dma_data; + struct dma_chan *chan; + + if (dma_spec->args_count != 3) + return NULL; + + list_for_each_entry(chan, &mdma->dma_dev.channels, device_node) { + struct mdc_chan *mchan = to_mdc_chan(chan); + + if (!(dma_spec->args[1] & BIT(mchan->chan_nr))) + continue; + if (dma_get_slave_channel(chan)) { + mchan->periph = dma_spec->args[0]; + mchan->thread = dma_spec->args[2]; + return chan; + } + } + + return NULL; +} + +#define PISTACHIO_CR_PERIPH_DMA_ROUTE(ch) (0x120 + 0x4 * ((ch) / 4)) +#define PISTACHIO_CR_PERIPH_DMA_ROUTE_SHIFT(ch) (8 * ((ch) % 4)) +#define PISTACHIO_CR_PERIPH_DMA_ROUTE_MASK 0x3f + +static void pistachio_mdc_enable_chan(struct mdc_chan *mchan) +{ + struct mdc_dma *mdma = mchan->mdma; + + regmap_update_bits(mdma->periph_regs, + PISTACHIO_CR_PERIPH_DMA_ROUTE(mchan->chan_nr), + PISTACHIO_CR_PERIPH_DMA_ROUTE_MASK << + PISTACHIO_CR_PERIPH_DMA_ROUTE_SHIFT(mchan->chan_nr), + mchan->periph << + PISTACHIO_CR_PERIPH_DMA_ROUTE_SHIFT(mchan->chan_nr)); +} + +static void pistachio_mdc_disable_chan(struct mdc_chan *mchan) +{ + struct mdc_dma *mdma = mchan->mdma; + + regmap_update_bits(mdma->periph_regs, + PISTACHIO_CR_PERIPH_DMA_ROUTE(mchan->chan_nr), + PISTACHIO_CR_PERIPH_DMA_ROUTE_MASK << + PISTACHIO_CR_PERIPH_DMA_ROUTE_SHIFT(mchan->chan_nr), + 0); +} + +static const struct mdc_dma_soc_data pistachio_mdc_data = { + .enable_chan = pistachio_mdc_enable_chan, + .disable_chan = pistachio_mdc_disable_chan, +}; + +static const struct of_device_id mdc_dma_of_match[] = { + { .compatible = "img,pistachio-mdc-dma", .data = &pistachio_mdc_data, }, + { }, +}; +MODULE_DEVICE_TABLE(of, mdc_dma_of_match); + +static int img_mdc_runtime_suspend(struct device *dev) +{ + struct mdc_dma *mdma = dev_get_drvdata(dev); + + clk_disable_unprepare(mdma->clk); + + return 0; +} + +static int img_mdc_runtime_resume(struct device *dev) +{ + struct mdc_dma *mdma = dev_get_drvdata(dev); + + return clk_prepare_enable(mdma->clk); +} + +static int mdc_dma_probe(struct platform_device *pdev) +{ + struct mdc_dma *mdma; + unsigned int i; + u32 val; + int ret; + + mdma = devm_kzalloc(&pdev->dev, sizeof(*mdma), GFP_KERNEL); + if (!mdma) + return -ENOMEM; + platform_set_drvdata(pdev, mdma); + + mdma->soc = of_device_get_match_data(&pdev->dev); + + mdma->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(mdma->regs)) + return PTR_ERR(mdma->regs); + + mdma->periph_regs = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "img,cr-periph"); + if (IS_ERR(mdma->periph_regs)) + return PTR_ERR(mdma->periph_regs); + + mdma->clk = devm_clk_get(&pdev->dev, "sys"); + if (IS_ERR(mdma->clk)) + return PTR_ERR(mdma->clk); + + dma_cap_zero(mdma->dma_dev.cap_mask); + dma_cap_set(DMA_SLAVE, mdma->dma_dev.cap_mask); + dma_cap_set(DMA_PRIVATE, mdma->dma_dev.cap_mask); + dma_cap_set(DMA_CYCLIC, mdma->dma_dev.cap_mask); + dma_cap_set(DMA_MEMCPY, mdma->dma_dev.cap_mask); + + val = mdc_readl(mdma, MDC_GLOBAL_CONFIG_A); + mdma->nr_channels = (val >> MDC_GLOBAL_CONFIG_A_DMA_CONTEXTS_SHIFT) & + MDC_GLOBAL_CONFIG_A_DMA_CONTEXTS_MASK; + mdma->nr_threads = + 1 << ((val >> MDC_GLOBAL_CONFIG_A_THREAD_ID_WIDTH_SHIFT) & + MDC_GLOBAL_CONFIG_A_THREAD_ID_WIDTH_MASK); + mdma->bus_width = + (1 << ((val >> MDC_GLOBAL_CONFIG_A_SYS_DAT_WIDTH_SHIFT) & + MDC_GLOBAL_CONFIG_A_SYS_DAT_WIDTH_MASK)) / 8; + /* + * Although transfer sizes of up to MDC_TRANSFER_SIZE_MASK + 1 bytes + * are supported, this makes it possible for the value reported in + * MDC_ACTIVE_TRANSFER_SIZE to be ambiguous - an active transfer size + * of MDC_TRANSFER_SIZE_MASK may indicate either that 0 bytes or + * MDC_TRANSFER_SIZE_MASK + 1 bytes are remaining. To eliminate this + * ambiguity, restrict transfer sizes to one bus-width less than the + * actual maximum. + */ + mdma->max_xfer_size = MDC_TRANSFER_SIZE_MASK + 1 - mdma->bus_width; + + of_property_read_u32(pdev->dev.of_node, "dma-channels", + &mdma->nr_channels); + ret = of_property_read_u32(pdev->dev.of_node, + "img,max-burst-multiplier", + &mdma->max_burst_mult); + if (ret) + return ret; + + mdma->dma_dev.dev = &pdev->dev; + mdma->dma_dev.device_prep_slave_sg = mdc_prep_slave_sg; + mdma->dma_dev.device_prep_dma_cyclic = mdc_prep_dma_cyclic; + mdma->dma_dev.device_prep_dma_memcpy = mdc_prep_dma_memcpy; + mdma->dma_dev.device_alloc_chan_resources = mdc_alloc_chan_resources; + mdma->dma_dev.device_free_chan_resources = mdc_free_chan_resources; + mdma->dma_dev.device_tx_status = mdc_tx_status; + mdma->dma_dev.device_issue_pending = mdc_issue_pending; + mdma->dma_dev.device_terminate_all = mdc_terminate_all; + mdma->dma_dev.device_synchronize = mdc_synchronize; + mdma->dma_dev.device_config = mdc_slave_config; + + mdma->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + mdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + for (i = 1; i <= mdma->bus_width; i <<= 1) { + mdma->dma_dev.src_addr_widths |= BIT(i); + mdma->dma_dev.dst_addr_widths |= BIT(i); + } + + INIT_LIST_HEAD(&mdma->dma_dev.channels); + for (i = 0; i < mdma->nr_channels; i++) { + struct mdc_chan *mchan = &mdma->channels[i]; + + mchan->mdma = mdma; + mchan->chan_nr = i; + mchan->irq = platform_get_irq(pdev, i); + if (mchan->irq < 0) + return mchan->irq; + + ret = devm_request_irq(&pdev->dev, mchan->irq, mdc_chan_irq, + IRQ_TYPE_LEVEL_HIGH, + dev_name(&pdev->dev), mchan); + if (ret < 0) + return ret; + + mchan->vc.desc_free = mdc_desc_free; + vchan_init(&mchan->vc, &mdma->dma_dev); + } + + mdma->desc_pool = dmam_pool_create(dev_name(&pdev->dev), &pdev->dev, + sizeof(struct mdc_hw_list_desc), + 4, 0); + if (!mdma->desc_pool) + return -ENOMEM; + + pm_runtime_enable(&pdev->dev); + if (!pm_runtime_enabled(&pdev->dev)) { + ret = img_mdc_runtime_resume(&pdev->dev); + if (ret) + return ret; + } + + ret = dma_async_device_register(&mdma->dma_dev); + if (ret) + goto suspend; + + ret = of_dma_controller_register(pdev->dev.of_node, mdc_of_xlate, mdma); + if (ret) + goto unregister; + + dev_info(&pdev->dev, "MDC with %u channels and %u threads\n", + mdma->nr_channels, mdma->nr_threads); + + return 0; + +unregister: + dma_async_device_unregister(&mdma->dma_dev); +suspend: + if (!pm_runtime_enabled(&pdev->dev)) + img_mdc_runtime_suspend(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return ret; +} + +static int mdc_dma_remove(struct platform_device *pdev) +{ + struct mdc_dma *mdma = platform_get_drvdata(pdev); + struct mdc_chan *mchan, *next; + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&mdma->dma_dev); + + list_for_each_entry_safe(mchan, next, &mdma->dma_dev.channels, + vc.chan.device_node) { + list_del(&mchan->vc.chan.device_node); + + devm_free_irq(&pdev->dev, mchan->irq, mchan); + + tasklet_kill(&mchan->vc.task); + } + + pm_runtime_disable(&pdev->dev); + if (!pm_runtime_status_suspended(&pdev->dev)) + img_mdc_runtime_suspend(&pdev->dev); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int img_mdc_suspend_late(struct device *dev) +{ + struct mdc_dma *mdma = dev_get_drvdata(dev); + int i; + + /* Check that all channels are idle */ + for (i = 0; i < mdma->nr_channels; i++) { + struct mdc_chan *mchan = &mdma->channels[i]; + + if (unlikely(mchan->desc)) + return -EBUSY; + } + + return pm_runtime_force_suspend(dev); +} + +static int img_mdc_resume_early(struct device *dev) +{ + return pm_runtime_force_resume(dev); +} +#endif /* CONFIG_PM_SLEEP */ + +static const struct dev_pm_ops img_mdc_pm_ops = { + SET_RUNTIME_PM_OPS(img_mdc_runtime_suspend, + img_mdc_runtime_resume, NULL) + SET_LATE_SYSTEM_SLEEP_PM_OPS(img_mdc_suspend_late, + img_mdc_resume_early) +}; + +static struct platform_driver mdc_dma_driver = { + .driver = { + .name = "img-mdc-dma", + .pm = &img_mdc_pm_ops, + .of_match_table = of_match_ptr(mdc_dma_of_match), + }, + .probe = mdc_dma_probe, + .remove = mdc_dma_remove, +}; +module_platform_driver(mdc_dma_driver); + +MODULE_DESCRIPTION("IMG Multi-threaded DMA Controller (MDC) driver"); +MODULE_AUTHOR("Andrew Bresticker "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c new file mode 100644 index 0000000000..114f254b9f --- /dev/null +++ b/drivers/dma/imx-dma.c @@ -0,0 +1,1252 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// drivers/dma/imx-dma.c +// +// This file contains a driver for the Freescale i.MX DMA engine +// found on i.MX1/21/27 +// +// Copyright 2010 Sascha Hauer, Pengutronix +// Copyright 2012 Javier Martin, Vista Silicon + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "dmaengine.h" +#define IMXDMA_MAX_CHAN_DESCRIPTORS 16 +#define IMX_DMA_CHANNELS 16 + +#define IMX_DMA_2D_SLOTS 2 +#define IMX_DMA_2D_SLOT_A 0 +#define IMX_DMA_2D_SLOT_B 1 + +#define IMX_DMA_LENGTH_LOOP ((unsigned int)-1) +#define IMX_DMA_MEMSIZE_32 (0 << 4) +#define IMX_DMA_MEMSIZE_8 (1 << 4) +#define IMX_DMA_MEMSIZE_16 (2 << 4) +#define IMX_DMA_TYPE_LINEAR (0 << 10) +#define IMX_DMA_TYPE_2D (1 << 10) +#define IMX_DMA_TYPE_FIFO (2 << 10) + +#define IMX_DMA_ERR_BURST (1 << 0) +#define IMX_DMA_ERR_REQUEST (1 << 1) +#define IMX_DMA_ERR_TRANSFER (1 << 2) +#define IMX_DMA_ERR_BUFFER (1 << 3) +#define IMX_DMA_ERR_TIMEOUT (1 << 4) + +#define DMA_DCR 0x00 /* Control Register */ +#define DMA_DISR 0x04 /* Interrupt status Register */ +#define DMA_DIMR 0x08 /* Interrupt mask Register */ +#define DMA_DBTOSR 0x0c /* Burst timeout status Register */ +#define DMA_DRTOSR 0x10 /* Request timeout Register */ +#define DMA_DSESR 0x14 /* Transfer Error Status Register */ +#define DMA_DBOSR 0x18 /* Buffer overflow status Register */ +#define DMA_DBTOCR 0x1c /* Burst timeout control Register */ +#define DMA_WSRA 0x40 /* W-Size Register A */ +#define DMA_XSRA 0x44 /* X-Size Register A */ +#define DMA_YSRA 0x48 /* Y-Size Register A */ +#define DMA_WSRB 0x4c /* W-Size Register B */ +#define DMA_XSRB 0x50 /* X-Size Register B */ +#define DMA_YSRB 0x54 /* Y-Size Register B */ +#define DMA_SAR(x) (0x80 + ((x) << 6)) /* Source Address Registers */ +#define DMA_DAR(x) (0x84 + ((x) << 6)) /* Destination Address Registers */ +#define DMA_CNTR(x) (0x88 + ((x) << 6)) /* Count Registers */ +#define DMA_CCR(x) (0x8c + ((x) << 6)) /* Control Registers */ +#define DMA_RSSR(x) (0x90 + ((x) << 6)) /* Request source select Registers */ +#define DMA_BLR(x) (0x94 + ((x) << 6)) /* Burst length Registers */ +#define DMA_RTOR(x) (0x98 + ((x) << 6)) /* Request timeout Registers */ +#define DMA_BUCR(x) (0x98 + ((x) << 6)) /* Bus Utilization Registers */ +#define DMA_CCNR(x) (0x9C + ((x) << 6)) /* Channel counter Registers */ + +#define DCR_DRST (1<<1) +#define DCR_DEN (1<<0) +#define DBTOCR_EN (1<<15) +#define DBTOCR_CNT(x) ((x) & 0x7fff) +#define CNTR_CNT(x) ((x) & 0xffffff) +#define CCR_ACRPT (1<<14) +#define CCR_DMOD_LINEAR (0x0 << 12) +#define CCR_DMOD_2D (0x1 << 12) +#define CCR_DMOD_FIFO (0x2 << 12) +#define CCR_DMOD_EOBFIFO (0x3 << 12) +#define CCR_SMOD_LINEAR (0x0 << 10) +#define CCR_SMOD_2D (0x1 << 10) +#define CCR_SMOD_FIFO (0x2 << 10) +#define CCR_SMOD_EOBFIFO (0x3 << 10) +#define CCR_MDIR_DEC (1<<9) +#define CCR_MSEL_B (1<<8) +#define CCR_DSIZ_32 (0x0 << 6) +#define CCR_DSIZ_8 (0x1 << 6) +#define CCR_DSIZ_16 (0x2 << 6) +#define CCR_SSIZ_32 (0x0 << 4) +#define CCR_SSIZ_8 (0x1 << 4) +#define CCR_SSIZ_16 (0x2 << 4) +#define CCR_REN (1<<3) +#define CCR_RPT (1<<2) +#define CCR_FRC (1<<1) +#define CCR_CEN (1<<0) +#define RTOR_EN (1<<15) +#define RTOR_CLK (1<<14) +#define RTOR_PSC (1<<13) + +enum imxdma_prep_type { + IMXDMA_DESC_MEMCPY, + IMXDMA_DESC_INTERLEAVED, + IMXDMA_DESC_SLAVE_SG, + IMXDMA_DESC_CYCLIC, +}; + +struct imx_dma_2d_config { + u16 xsr; + u16 ysr; + u16 wsr; + int count; +}; + +struct imxdma_desc { + struct list_head node; + struct dma_async_tx_descriptor desc; + enum dma_status status; + dma_addr_t src; + dma_addr_t dest; + size_t len; + enum dma_transfer_direction direction; + enum imxdma_prep_type type; + /* For memcpy and interleaved */ + unsigned int config_port; + unsigned int config_mem; + /* For interleaved transfers */ + unsigned int x; + unsigned int y; + unsigned int w; + /* For slave sg and cyclic */ + struct scatterlist *sg; + unsigned int sgcount; +}; + +struct imxdma_channel { + int hw_chaining; + struct timer_list watchdog; + struct imxdma_engine *imxdma; + unsigned int channel; + + struct tasklet_struct dma_tasklet; + struct list_head ld_free; + struct list_head ld_queue; + struct list_head ld_active; + int descs_allocated; + enum dma_slave_buswidth word_size; + dma_addr_t per_address; + u32 watermark_level; + struct dma_chan chan; + struct dma_async_tx_descriptor desc; + enum dma_status status; + int dma_request; + struct scatterlist *sg_list; + u32 ccr_from_device; + u32 ccr_to_device; + bool enabled_2d; + int slot_2d; + unsigned int irq; + struct dma_slave_config config; +}; + +enum imx_dma_type { + IMX1_DMA, + IMX21_DMA, + IMX27_DMA, +}; + +struct imxdma_engine { + struct device *dev; + struct dma_device dma_device; + void __iomem *base; + struct clk *dma_ahb; + struct clk *dma_ipg; + spinlock_t lock; + struct imx_dma_2d_config slots_2d[IMX_DMA_2D_SLOTS]; + struct imxdma_channel channel[IMX_DMA_CHANNELS]; + enum imx_dma_type devtype; + unsigned int irq; + unsigned int irq_err; + +}; + +struct imxdma_filter_data { + struct imxdma_engine *imxdma; + int request; +}; + +static const struct of_device_id imx_dma_of_dev_id[] = { + { + .compatible = "fsl,imx1-dma", .data = (const void *)IMX1_DMA, + }, { + .compatible = "fsl,imx21-dma", .data = (const void *)IMX21_DMA, + }, { + .compatible = "fsl,imx27-dma", .data = (const void *)IMX27_DMA, + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(of, imx_dma_of_dev_id); + +static inline int is_imx1_dma(struct imxdma_engine *imxdma) +{ + return imxdma->devtype == IMX1_DMA; +} + +static inline int is_imx27_dma(struct imxdma_engine *imxdma) +{ + return imxdma->devtype == IMX27_DMA; +} + +static struct imxdma_channel *to_imxdma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct imxdma_channel, chan); +} + +static inline bool imxdma_chan_is_doing_cyclic(struct imxdma_channel *imxdmac) +{ + struct imxdma_desc *desc; + + if (!list_empty(&imxdmac->ld_active)) { + desc = list_first_entry(&imxdmac->ld_active, struct imxdma_desc, + node); + if (desc->type == IMXDMA_DESC_CYCLIC) + return true; + } + return false; +} + + + +static void imx_dmav1_writel(struct imxdma_engine *imxdma, unsigned val, + unsigned offset) +{ + __raw_writel(val, imxdma->base + offset); +} + +static unsigned imx_dmav1_readl(struct imxdma_engine *imxdma, unsigned offset) +{ + return __raw_readl(imxdma->base + offset); +} + +static int imxdma_hw_chain(struct imxdma_channel *imxdmac) +{ + struct imxdma_engine *imxdma = imxdmac->imxdma; + + if (is_imx27_dma(imxdma)) + return imxdmac->hw_chaining; + else + return 0; +} + +/* + * imxdma_sg_next - prepare next chunk for scatter-gather DMA emulation + */ +static inline void imxdma_sg_next(struct imxdma_desc *d) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + struct scatterlist *sg = d->sg; + size_t now; + + now = min_t(size_t, d->len, sg_dma_len(sg)); + if (d->len != IMX_DMA_LENGTH_LOOP) + d->len -= now; + + if (d->direction == DMA_DEV_TO_MEM) + imx_dmav1_writel(imxdma, sg->dma_address, + DMA_DAR(imxdmac->channel)); + else + imx_dmav1_writel(imxdma, sg->dma_address, + DMA_SAR(imxdmac->channel)); + + imx_dmav1_writel(imxdma, now, DMA_CNTR(imxdmac->channel)); + + dev_dbg(imxdma->dev, " %s channel: %d dst 0x%08x, src 0x%08x, " + "size 0x%08x\n", __func__, imxdmac->channel, + imx_dmav1_readl(imxdma, DMA_DAR(imxdmac->channel)), + imx_dmav1_readl(imxdma, DMA_SAR(imxdmac->channel)), + imx_dmav1_readl(imxdma, DMA_CNTR(imxdmac->channel))); +} + +static void imxdma_enable_hw(struct imxdma_desc *d) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + int channel = imxdmac->channel; + unsigned long flags; + + dev_dbg(imxdma->dev, "%s channel %d\n", __func__, channel); + + local_irq_save(flags); + + imx_dmav1_writel(imxdma, 1 << channel, DMA_DISR); + imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_DIMR) & + ~(1 << channel), DMA_DIMR); + imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_CCR(channel)) | + CCR_CEN | CCR_ACRPT, DMA_CCR(channel)); + + if (!is_imx1_dma(imxdma) && + d->sg && imxdma_hw_chain(imxdmac)) { + d->sg = sg_next(d->sg); + if (d->sg) { + u32 tmp; + imxdma_sg_next(d); + tmp = imx_dmav1_readl(imxdma, DMA_CCR(channel)); + imx_dmav1_writel(imxdma, tmp | CCR_RPT | CCR_ACRPT, + DMA_CCR(channel)); + } + } + + local_irq_restore(flags); +} + +static void imxdma_disable_hw(struct imxdma_channel *imxdmac) +{ + struct imxdma_engine *imxdma = imxdmac->imxdma; + int channel = imxdmac->channel; + unsigned long flags; + + dev_dbg(imxdma->dev, "%s channel %d\n", __func__, channel); + + if (imxdma_hw_chain(imxdmac)) + del_timer(&imxdmac->watchdog); + + local_irq_save(flags); + imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_DIMR) | + (1 << channel), DMA_DIMR); + imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_CCR(channel)) & + ~CCR_CEN, DMA_CCR(channel)); + imx_dmav1_writel(imxdma, 1 << channel, DMA_DISR); + local_irq_restore(flags); +} + +static void imxdma_watchdog(struct timer_list *t) +{ + struct imxdma_channel *imxdmac = from_timer(imxdmac, t, watchdog); + struct imxdma_engine *imxdma = imxdmac->imxdma; + int channel = imxdmac->channel; + + imx_dmav1_writel(imxdma, 0, DMA_CCR(channel)); + + /* Tasklet watchdog error handler */ + tasklet_schedule(&imxdmac->dma_tasklet); + dev_dbg(imxdma->dev, "channel %d: watchdog timeout!\n", + imxdmac->channel); +} + +static irqreturn_t imxdma_err_handler(int irq, void *dev_id) +{ + struct imxdma_engine *imxdma = dev_id; + unsigned int err_mask; + int i, disr; + int errcode; + + disr = imx_dmav1_readl(imxdma, DMA_DISR); + + err_mask = imx_dmav1_readl(imxdma, DMA_DBTOSR) | + imx_dmav1_readl(imxdma, DMA_DRTOSR) | + imx_dmav1_readl(imxdma, DMA_DSESR) | + imx_dmav1_readl(imxdma, DMA_DBOSR); + + if (!err_mask) + return IRQ_HANDLED; + + imx_dmav1_writel(imxdma, disr & err_mask, DMA_DISR); + + for (i = 0; i < IMX_DMA_CHANNELS; i++) { + if (!(err_mask & (1 << i))) + continue; + errcode = 0; + + if (imx_dmav1_readl(imxdma, DMA_DBTOSR) & (1 << i)) { + imx_dmav1_writel(imxdma, 1 << i, DMA_DBTOSR); + errcode |= IMX_DMA_ERR_BURST; + } + if (imx_dmav1_readl(imxdma, DMA_DRTOSR) & (1 << i)) { + imx_dmav1_writel(imxdma, 1 << i, DMA_DRTOSR); + errcode |= IMX_DMA_ERR_REQUEST; + } + if (imx_dmav1_readl(imxdma, DMA_DSESR) & (1 << i)) { + imx_dmav1_writel(imxdma, 1 << i, DMA_DSESR); + errcode |= IMX_DMA_ERR_TRANSFER; + } + if (imx_dmav1_readl(imxdma, DMA_DBOSR) & (1 << i)) { + imx_dmav1_writel(imxdma, 1 << i, DMA_DBOSR); + errcode |= IMX_DMA_ERR_BUFFER; + } + /* Tasklet error handler */ + tasklet_schedule(&imxdma->channel[i].dma_tasklet); + + dev_warn(imxdma->dev, + "DMA timeout on channel %d -%s%s%s%s\n", i, + errcode & IMX_DMA_ERR_BURST ? " burst" : "", + errcode & IMX_DMA_ERR_REQUEST ? " request" : "", + errcode & IMX_DMA_ERR_TRANSFER ? " transfer" : "", + errcode & IMX_DMA_ERR_BUFFER ? " buffer" : ""); + } + return IRQ_HANDLED; +} + +static void dma_irq_handle_channel(struct imxdma_channel *imxdmac) +{ + struct imxdma_engine *imxdma = imxdmac->imxdma; + int chno = imxdmac->channel; + struct imxdma_desc *desc; + unsigned long flags; + + spin_lock_irqsave(&imxdma->lock, flags); + if (list_empty(&imxdmac->ld_active)) { + spin_unlock_irqrestore(&imxdma->lock, flags); + goto out; + } + + desc = list_first_entry(&imxdmac->ld_active, + struct imxdma_desc, + node); + spin_unlock_irqrestore(&imxdma->lock, flags); + + if (desc->sg) { + u32 tmp; + desc->sg = sg_next(desc->sg); + + if (desc->sg) { + imxdma_sg_next(desc); + + tmp = imx_dmav1_readl(imxdma, DMA_CCR(chno)); + + if (imxdma_hw_chain(imxdmac)) { + /* FIXME: The timeout should probably be + * configurable + */ + mod_timer(&imxdmac->watchdog, + jiffies + msecs_to_jiffies(500)); + + tmp |= CCR_CEN | CCR_RPT | CCR_ACRPT; + imx_dmav1_writel(imxdma, tmp, DMA_CCR(chno)); + } else { + imx_dmav1_writel(imxdma, tmp & ~CCR_CEN, + DMA_CCR(chno)); + tmp |= CCR_CEN; + } + + imx_dmav1_writel(imxdma, tmp, DMA_CCR(chno)); + + if (imxdma_chan_is_doing_cyclic(imxdmac)) + /* Tasklet progression */ + tasklet_schedule(&imxdmac->dma_tasklet); + + return; + } + + if (imxdma_hw_chain(imxdmac)) { + del_timer(&imxdmac->watchdog); + return; + } + } + +out: + imx_dmav1_writel(imxdma, 0, DMA_CCR(chno)); + /* Tasklet irq */ + tasklet_schedule(&imxdmac->dma_tasklet); +} + +static irqreturn_t dma_irq_handler(int irq, void *dev_id) +{ + struct imxdma_engine *imxdma = dev_id; + int i, disr; + + if (!is_imx1_dma(imxdma)) + imxdma_err_handler(irq, dev_id); + + disr = imx_dmav1_readl(imxdma, DMA_DISR); + + dev_dbg(imxdma->dev, "%s called, disr=0x%08x\n", __func__, disr); + + imx_dmav1_writel(imxdma, disr, DMA_DISR); + for (i = 0; i < IMX_DMA_CHANNELS; i++) { + if (disr & (1 << i)) + dma_irq_handle_channel(&imxdma->channel[i]); + } + + return IRQ_HANDLED; +} + +static int imxdma_xfer_desc(struct imxdma_desc *d) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + int slot = -1; + int i; + + /* Configure and enable */ + switch (d->type) { + case IMXDMA_DESC_INTERLEAVED: + /* Try to get a free 2D slot */ + for (i = 0; i < IMX_DMA_2D_SLOTS; i++) { + if ((imxdma->slots_2d[i].count > 0) && + ((imxdma->slots_2d[i].xsr != d->x) || + (imxdma->slots_2d[i].ysr != d->y) || + (imxdma->slots_2d[i].wsr != d->w))) + continue; + slot = i; + break; + } + if (slot < 0) + return -EBUSY; + + imxdma->slots_2d[slot].xsr = d->x; + imxdma->slots_2d[slot].ysr = d->y; + imxdma->slots_2d[slot].wsr = d->w; + imxdma->slots_2d[slot].count++; + + imxdmac->slot_2d = slot; + imxdmac->enabled_2d = true; + + if (slot == IMX_DMA_2D_SLOT_A) { + d->config_mem &= ~CCR_MSEL_B; + d->config_port &= ~CCR_MSEL_B; + imx_dmav1_writel(imxdma, d->x, DMA_XSRA); + imx_dmav1_writel(imxdma, d->y, DMA_YSRA); + imx_dmav1_writel(imxdma, d->w, DMA_WSRA); + } else { + d->config_mem |= CCR_MSEL_B; + d->config_port |= CCR_MSEL_B; + imx_dmav1_writel(imxdma, d->x, DMA_XSRB); + imx_dmav1_writel(imxdma, d->y, DMA_YSRB); + imx_dmav1_writel(imxdma, d->w, DMA_WSRB); + } + /* + * We fall-through here intentionally, since a 2D transfer is + * similar to MEMCPY just adding the 2D slot configuration. + */ + fallthrough; + case IMXDMA_DESC_MEMCPY: + imx_dmav1_writel(imxdma, d->src, DMA_SAR(imxdmac->channel)); + imx_dmav1_writel(imxdma, d->dest, DMA_DAR(imxdmac->channel)); + imx_dmav1_writel(imxdma, d->config_mem | (d->config_port << 2), + DMA_CCR(imxdmac->channel)); + + imx_dmav1_writel(imxdma, d->len, DMA_CNTR(imxdmac->channel)); + + dev_dbg(imxdma->dev, + "%s channel: %d dest=0x%08llx src=0x%08llx dma_length=%zu\n", + __func__, imxdmac->channel, + (unsigned long long)d->dest, + (unsigned long long)d->src, d->len); + + break; + /* Cyclic transfer is the same as slave_sg with special sg configuration. */ + case IMXDMA_DESC_CYCLIC: + case IMXDMA_DESC_SLAVE_SG: + if (d->direction == DMA_DEV_TO_MEM) { + imx_dmav1_writel(imxdma, imxdmac->per_address, + DMA_SAR(imxdmac->channel)); + imx_dmav1_writel(imxdma, imxdmac->ccr_from_device, + DMA_CCR(imxdmac->channel)); + + dev_dbg(imxdma->dev, + "%s channel: %d sg=%p sgcount=%d total length=%zu dev_addr=0x%08llx (dev2mem)\n", + __func__, imxdmac->channel, + d->sg, d->sgcount, d->len, + (unsigned long long)imxdmac->per_address); + } else if (d->direction == DMA_MEM_TO_DEV) { + imx_dmav1_writel(imxdma, imxdmac->per_address, + DMA_DAR(imxdmac->channel)); + imx_dmav1_writel(imxdma, imxdmac->ccr_to_device, + DMA_CCR(imxdmac->channel)); + + dev_dbg(imxdma->dev, + "%s channel: %d sg=%p sgcount=%d total length=%zu dev_addr=0x%08llx (mem2dev)\n", + __func__, imxdmac->channel, + d->sg, d->sgcount, d->len, + (unsigned long long)imxdmac->per_address); + } else { + dev_err(imxdma->dev, "%s channel: %d bad dma mode\n", + __func__, imxdmac->channel); + return -EINVAL; + } + + imxdma_sg_next(d); + + break; + default: + return -EINVAL; + } + imxdma_enable_hw(d); + return 0; +} + +static void imxdma_tasklet(struct tasklet_struct *t) +{ + struct imxdma_channel *imxdmac = from_tasklet(imxdmac, t, dma_tasklet); + struct imxdma_engine *imxdma = imxdmac->imxdma; + struct imxdma_desc *desc, *next_desc; + unsigned long flags; + + spin_lock_irqsave(&imxdma->lock, flags); + + if (list_empty(&imxdmac->ld_active)) { + /* Someone might have called terminate all */ + spin_unlock_irqrestore(&imxdma->lock, flags); + return; + } + desc = list_first_entry(&imxdmac->ld_active, struct imxdma_desc, node); + + /* If we are dealing with a cyclic descriptor, keep it on ld_active + * and dont mark the descriptor as complete. + * Only in non-cyclic cases it would be marked as complete + */ + if (imxdma_chan_is_doing_cyclic(imxdmac)) + goto out; + else + dma_cookie_complete(&desc->desc); + + /* Free 2D slot if it was an interleaved transfer */ + if (imxdmac->enabled_2d) { + imxdma->slots_2d[imxdmac->slot_2d].count--; + imxdmac->enabled_2d = false; + } + + list_move_tail(imxdmac->ld_active.next, &imxdmac->ld_free); + + if (!list_empty(&imxdmac->ld_queue)) { + next_desc = list_first_entry(&imxdmac->ld_queue, + struct imxdma_desc, node); + list_move_tail(imxdmac->ld_queue.next, &imxdmac->ld_active); + if (imxdma_xfer_desc(next_desc) < 0) + dev_warn(imxdma->dev, "%s: channel: %d couldn't xfer desc\n", + __func__, imxdmac->channel); + } +out: + spin_unlock_irqrestore(&imxdma->lock, flags); + + dmaengine_desc_get_callback_invoke(&desc->desc, NULL); +} + +static int imxdma_terminate_all(struct dma_chan *chan) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + unsigned long flags; + + imxdma_disable_hw(imxdmac); + + spin_lock_irqsave(&imxdma->lock, flags); + list_splice_tail_init(&imxdmac->ld_active, &imxdmac->ld_free); + list_splice_tail_init(&imxdmac->ld_queue, &imxdmac->ld_free); + spin_unlock_irqrestore(&imxdma->lock, flags); + return 0; +} + +static int imxdma_config_write(struct dma_chan *chan, + struct dma_slave_config *dmaengine_cfg, + enum dma_transfer_direction direction) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + unsigned int mode = 0; + + if (direction == DMA_DEV_TO_MEM) { + imxdmac->per_address = dmaengine_cfg->src_addr; + imxdmac->watermark_level = dmaengine_cfg->src_maxburst; + imxdmac->word_size = dmaengine_cfg->src_addr_width; + } else { + imxdmac->per_address = dmaengine_cfg->dst_addr; + imxdmac->watermark_level = dmaengine_cfg->dst_maxburst; + imxdmac->word_size = dmaengine_cfg->dst_addr_width; + } + + switch (imxdmac->word_size) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + mode = IMX_DMA_MEMSIZE_8; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + mode = IMX_DMA_MEMSIZE_16; + break; + default: + case DMA_SLAVE_BUSWIDTH_4_BYTES: + mode = IMX_DMA_MEMSIZE_32; + break; + } + + imxdmac->hw_chaining = 0; + + imxdmac->ccr_from_device = (mode | IMX_DMA_TYPE_FIFO) | + ((IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR) << 2) | + CCR_REN; + imxdmac->ccr_to_device = + (IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR) | + ((mode | IMX_DMA_TYPE_FIFO) << 2) | CCR_REN; + imx_dmav1_writel(imxdma, imxdmac->dma_request, + DMA_RSSR(imxdmac->channel)); + + /* Set burst length */ + imx_dmav1_writel(imxdma, imxdmac->watermark_level * + imxdmac->word_size, DMA_BLR(imxdmac->channel)); + + return 0; +} + +static int imxdma_config(struct dma_chan *chan, + struct dma_slave_config *dmaengine_cfg) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(chan); + + memcpy(&imxdmac->config, dmaengine_cfg, sizeof(*dmaengine_cfg)); + + return 0; +} + +static enum dma_status imxdma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + return dma_cookie_status(chan, cookie, txstate); +} + +static dma_cookie_t imxdma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(tx->chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + dma_cookie_t cookie; + unsigned long flags; + + spin_lock_irqsave(&imxdma->lock, flags); + list_move_tail(imxdmac->ld_free.next, &imxdmac->ld_queue); + cookie = dma_cookie_assign(tx); + spin_unlock_irqrestore(&imxdma->lock, flags); + + return cookie; +} + +static int imxdma_alloc_chan_resources(struct dma_chan *chan) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(chan); + struct imx_dma_data *data = chan->private; + + if (data != NULL) + imxdmac->dma_request = data->dma_request; + + while (imxdmac->descs_allocated < IMXDMA_MAX_CHAN_DESCRIPTORS) { + struct imxdma_desc *desc; + + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) + break; + dma_async_tx_descriptor_init(&desc->desc, chan); + desc->desc.tx_submit = imxdma_tx_submit; + /* txd.flags will be overwritten in prep funcs */ + desc->desc.flags = DMA_CTRL_ACK; + desc->status = DMA_COMPLETE; + + list_add_tail(&desc->node, &imxdmac->ld_free); + imxdmac->descs_allocated++; + } + + if (!imxdmac->descs_allocated) + return -ENOMEM; + + return imxdmac->descs_allocated; +} + +static void imxdma_free_chan_resources(struct dma_chan *chan) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + struct imxdma_desc *desc, *_desc; + unsigned long flags; + + spin_lock_irqsave(&imxdma->lock, flags); + + imxdma_disable_hw(imxdmac); + list_splice_tail_init(&imxdmac->ld_active, &imxdmac->ld_free); + list_splice_tail_init(&imxdmac->ld_queue, &imxdmac->ld_free); + + spin_unlock_irqrestore(&imxdma->lock, flags); + + list_for_each_entry_safe(desc, _desc, &imxdmac->ld_free, node) { + kfree(desc); + imxdmac->descs_allocated--; + } + INIT_LIST_HEAD(&imxdmac->ld_free); + + kfree(imxdmac->sg_list); + imxdmac->sg_list = NULL; +} + +static struct dma_async_tx_descriptor *imxdma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(chan); + struct scatterlist *sg; + int i, dma_length = 0; + struct imxdma_desc *desc; + + if (list_empty(&imxdmac->ld_free) || + imxdma_chan_is_doing_cyclic(imxdmac)) + return NULL; + + desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node); + + for_each_sg(sgl, sg, sg_len, i) { + dma_length += sg_dma_len(sg); + } + + imxdma_config_write(chan, &imxdmac->config, direction); + + switch (imxdmac->word_size) { + case DMA_SLAVE_BUSWIDTH_4_BYTES: + if (sg_dma_len(sgl) & 3 || sgl->dma_address & 3) + return NULL; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + if (sg_dma_len(sgl) & 1 || sgl->dma_address & 1) + return NULL; + break; + case DMA_SLAVE_BUSWIDTH_1_BYTE: + break; + default: + return NULL; + } + + desc->type = IMXDMA_DESC_SLAVE_SG; + desc->sg = sgl; + desc->sgcount = sg_len; + desc->len = dma_length; + desc->direction = direction; + if (direction == DMA_DEV_TO_MEM) { + desc->src = imxdmac->per_address; + } else { + desc->dest = imxdmac->per_address; + } + desc->desc.callback = NULL; + desc->desc.callback_param = NULL; + + return &desc->desc; +} + +static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + struct imxdma_desc *desc; + int i; + unsigned int periods = buf_len / period_len; + + dev_dbg(imxdma->dev, "%s channel: %d buf_len=%zu period_len=%zu\n", + __func__, imxdmac->channel, buf_len, period_len); + + if (list_empty(&imxdmac->ld_free) || + imxdma_chan_is_doing_cyclic(imxdmac)) + return NULL; + + desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node); + + kfree(imxdmac->sg_list); + + imxdmac->sg_list = kcalloc(periods + 1, + sizeof(struct scatterlist), GFP_ATOMIC); + if (!imxdmac->sg_list) + return NULL; + + sg_init_table(imxdmac->sg_list, periods); + + for (i = 0; i < periods; i++) { + sg_assign_page(&imxdmac->sg_list[i], NULL); + imxdmac->sg_list[i].offset = 0; + imxdmac->sg_list[i].dma_address = dma_addr; + sg_dma_len(&imxdmac->sg_list[i]) = period_len; + dma_addr += period_len; + } + + /* close the loop */ + sg_chain(imxdmac->sg_list, periods + 1, imxdmac->sg_list); + + desc->type = IMXDMA_DESC_CYCLIC; + desc->sg = imxdmac->sg_list; + desc->sgcount = periods; + desc->len = IMX_DMA_LENGTH_LOOP; + desc->direction = direction; + if (direction == DMA_DEV_TO_MEM) { + desc->src = imxdmac->per_address; + } else { + desc->dest = imxdmac->per_address; + } + desc->desc.callback = NULL; + desc->desc.callback_param = NULL; + + imxdma_config_write(chan, &imxdmac->config, direction); + + return &desc->desc; +} + +static struct dma_async_tx_descriptor *imxdma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dest, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + struct imxdma_desc *desc; + + dev_dbg(imxdma->dev, "%s channel: %d src=0x%llx dst=0x%llx len=%zu\n", + __func__, imxdmac->channel, (unsigned long long)src, + (unsigned long long)dest, len); + + if (list_empty(&imxdmac->ld_free) || + imxdma_chan_is_doing_cyclic(imxdmac)) + return NULL; + + desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node); + + desc->type = IMXDMA_DESC_MEMCPY; + desc->src = src; + desc->dest = dest; + desc->len = len; + desc->direction = DMA_MEM_TO_MEM; + desc->config_port = IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR; + desc->config_mem = IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR; + desc->desc.callback = NULL; + desc->desc.callback_param = NULL; + + return &desc->desc; +} + +static struct dma_async_tx_descriptor *imxdma_prep_dma_interleaved( + struct dma_chan *chan, struct dma_interleaved_template *xt, + unsigned long flags) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + struct imxdma_desc *desc; + + dev_dbg(imxdma->dev, "%s channel: %d src_start=0x%llx dst_start=0x%llx\n" + " src_sgl=%s dst_sgl=%s numf=%zu frame_size=%zu\n", __func__, + imxdmac->channel, (unsigned long long)xt->src_start, + (unsigned long long) xt->dst_start, + xt->src_sgl ? "true" : "false", xt->dst_sgl ? "true" : "false", + xt->numf, xt->frame_size); + + if (list_empty(&imxdmac->ld_free) || + imxdma_chan_is_doing_cyclic(imxdmac)) + return NULL; + + if (xt->frame_size != 1 || xt->numf <= 0 || xt->dir != DMA_MEM_TO_MEM) + return NULL; + + desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node); + + desc->type = IMXDMA_DESC_INTERLEAVED; + desc->src = xt->src_start; + desc->dest = xt->dst_start; + desc->x = xt->sgl[0].size; + desc->y = xt->numf; + desc->w = xt->sgl[0].icg + desc->x; + desc->len = desc->x * desc->y; + desc->direction = DMA_MEM_TO_MEM; + desc->config_port = IMX_DMA_MEMSIZE_32; + desc->config_mem = IMX_DMA_MEMSIZE_32; + if (xt->src_sgl) + desc->config_mem |= IMX_DMA_TYPE_2D; + if (xt->dst_sgl) + desc->config_port |= IMX_DMA_TYPE_2D; + desc->desc.callback = NULL; + desc->desc.callback_param = NULL; + + return &desc->desc; +} + +static void imxdma_issue_pending(struct dma_chan *chan) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + struct imxdma_desc *desc; + unsigned long flags; + + spin_lock_irqsave(&imxdma->lock, flags); + if (list_empty(&imxdmac->ld_active) && + !list_empty(&imxdmac->ld_queue)) { + desc = list_first_entry(&imxdmac->ld_queue, + struct imxdma_desc, node); + + if (imxdma_xfer_desc(desc) < 0) { + dev_warn(imxdma->dev, + "%s: channel: %d couldn't issue DMA xfer\n", + __func__, imxdmac->channel); + } else { + list_move_tail(imxdmac->ld_queue.next, + &imxdmac->ld_active); + } + } + spin_unlock_irqrestore(&imxdma->lock, flags); +} + +static bool imxdma_filter_fn(struct dma_chan *chan, void *param) +{ + struct imxdma_filter_data *fdata = param; + struct imxdma_channel *imxdma_chan = to_imxdma_chan(chan); + + if (chan->device->dev != fdata->imxdma->dev) + return false; + + imxdma_chan->dma_request = fdata->request; + chan->private = NULL; + + return true; +} + +static struct dma_chan *imxdma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + int count = dma_spec->args_count; + struct imxdma_engine *imxdma = ofdma->of_dma_data; + struct imxdma_filter_data fdata = { + .imxdma = imxdma, + }; + + if (count != 1) + return NULL; + + fdata.request = dma_spec->args[0]; + + return dma_request_channel(imxdma->dma_device.cap_mask, + imxdma_filter_fn, &fdata); +} + +static int __init imxdma_probe(struct platform_device *pdev) +{ + struct imxdma_engine *imxdma; + int ret, i; + int irq, irq_err; + + imxdma = devm_kzalloc(&pdev->dev, sizeof(*imxdma), GFP_KERNEL); + if (!imxdma) + return -ENOMEM; + + imxdma->dev = &pdev->dev; + imxdma->devtype = (uintptr_t)of_device_get_match_data(&pdev->dev); + + imxdma->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(imxdma->base)) + return PTR_ERR(imxdma->base); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + imxdma->dma_ipg = devm_clk_get(&pdev->dev, "ipg"); + if (IS_ERR(imxdma->dma_ipg)) + return PTR_ERR(imxdma->dma_ipg); + + imxdma->dma_ahb = devm_clk_get(&pdev->dev, "ahb"); + if (IS_ERR(imxdma->dma_ahb)) + return PTR_ERR(imxdma->dma_ahb); + + ret = clk_prepare_enable(imxdma->dma_ipg); + if (ret) + return ret; + ret = clk_prepare_enable(imxdma->dma_ahb); + if (ret) + goto disable_dma_ipg_clk; + + /* reset DMA module */ + imx_dmav1_writel(imxdma, DCR_DRST, DMA_DCR); + + if (is_imx1_dma(imxdma)) { + ret = devm_request_irq(&pdev->dev, irq, + dma_irq_handler, 0, "DMA", imxdma); + if (ret) { + dev_warn(imxdma->dev, "Can't register IRQ for DMA\n"); + goto disable_dma_ahb_clk; + } + imxdma->irq = irq; + + irq_err = platform_get_irq(pdev, 1); + if (irq_err < 0) { + ret = irq_err; + goto disable_dma_ahb_clk; + } + + ret = devm_request_irq(&pdev->dev, irq_err, + imxdma_err_handler, 0, "DMA", imxdma); + if (ret) { + dev_warn(imxdma->dev, "Can't register ERRIRQ for DMA\n"); + goto disable_dma_ahb_clk; + } + imxdma->irq_err = irq_err; + } + + /* enable DMA module */ + imx_dmav1_writel(imxdma, DCR_DEN, DMA_DCR); + + /* clear all interrupts */ + imx_dmav1_writel(imxdma, (1 << IMX_DMA_CHANNELS) - 1, DMA_DISR); + + /* disable interrupts */ + imx_dmav1_writel(imxdma, (1 << IMX_DMA_CHANNELS) - 1, DMA_DIMR); + + INIT_LIST_HEAD(&imxdma->dma_device.channels); + + dma_cap_set(DMA_SLAVE, imxdma->dma_device.cap_mask); + dma_cap_set(DMA_CYCLIC, imxdma->dma_device.cap_mask); + dma_cap_set(DMA_MEMCPY, imxdma->dma_device.cap_mask); + dma_cap_set(DMA_INTERLEAVE, imxdma->dma_device.cap_mask); + + /* Initialize 2D global parameters */ + for (i = 0; i < IMX_DMA_2D_SLOTS; i++) + imxdma->slots_2d[i].count = 0; + + spin_lock_init(&imxdma->lock); + + /* Initialize channel parameters */ + for (i = 0; i < IMX_DMA_CHANNELS; i++) { + struct imxdma_channel *imxdmac = &imxdma->channel[i]; + + if (!is_imx1_dma(imxdma)) { + ret = devm_request_irq(&pdev->dev, irq + i, + dma_irq_handler, 0, "DMA", imxdma); + if (ret) { + dev_warn(imxdma->dev, "Can't register IRQ %d " + "for DMA channel %d\n", + irq + i, i); + goto disable_dma_ahb_clk; + } + + imxdmac->irq = irq + i; + timer_setup(&imxdmac->watchdog, imxdma_watchdog, 0); + } + + imxdmac->imxdma = imxdma; + + INIT_LIST_HEAD(&imxdmac->ld_queue); + INIT_LIST_HEAD(&imxdmac->ld_free); + INIT_LIST_HEAD(&imxdmac->ld_active); + + tasklet_setup(&imxdmac->dma_tasklet, imxdma_tasklet); + imxdmac->chan.device = &imxdma->dma_device; + dma_cookie_init(&imxdmac->chan); + imxdmac->channel = i; + + /* Add the channel to the DMAC list */ + list_add_tail(&imxdmac->chan.device_node, + &imxdma->dma_device.channels); + } + + imxdma->dma_device.dev = &pdev->dev; + + imxdma->dma_device.device_alloc_chan_resources = imxdma_alloc_chan_resources; + imxdma->dma_device.device_free_chan_resources = imxdma_free_chan_resources; + imxdma->dma_device.device_tx_status = imxdma_tx_status; + imxdma->dma_device.device_prep_slave_sg = imxdma_prep_slave_sg; + imxdma->dma_device.device_prep_dma_cyclic = imxdma_prep_dma_cyclic; + imxdma->dma_device.device_prep_dma_memcpy = imxdma_prep_dma_memcpy; + imxdma->dma_device.device_prep_interleaved_dma = imxdma_prep_dma_interleaved; + imxdma->dma_device.device_config = imxdma_config; + imxdma->dma_device.device_terminate_all = imxdma_terminate_all; + imxdma->dma_device.device_issue_pending = imxdma_issue_pending; + + platform_set_drvdata(pdev, imxdma); + + imxdma->dma_device.copy_align = DMAENGINE_ALIGN_4_BYTES; + dma_set_max_seg_size(imxdma->dma_device.dev, 0xffffff); + + ret = dma_async_device_register(&imxdma->dma_device); + if (ret) { + dev_err(&pdev->dev, "unable to register\n"); + goto disable_dma_ahb_clk; + } + + if (pdev->dev.of_node) { + ret = of_dma_controller_register(pdev->dev.of_node, + imxdma_xlate, imxdma); + if (ret) { + dev_err(&pdev->dev, "unable to register of_dma_controller\n"); + goto err_of_dma_controller; + } + } + + return 0; + +err_of_dma_controller: + dma_async_device_unregister(&imxdma->dma_device); +disable_dma_ahb_clk: + clk_disable_unprepare(imxdma->dma_ahb); +disable_dma_ipg_clk: + clk_disable_unprepare(imxdma->dma_ipg); + return ret; +} + +static void imxdma_free_irq(struct platform_device *pdev, struct imxdma_engine *imxdma) +{ + int i; + + if (is_imx1_dma(imxdma)) { + disable_irq(imxdma->irq); + disable_irq(imxdma->irq_err); + } + + for (i = 0; i < IMX_DMA_CHANNELS; i++) { + struct imxdma_channel *imxdmac = &imxdma->channel[i]; + + if (!is_imx1_dma(imxdma)) + disable_irq(imxdmac->irq); + + tasklet_kill(&imxdmac->dma_tasklet); + } +} + +static int imxdma_remove(struct platform_device *pdev) +{ + struct imxdma_engine *imxdma = platform_get_drvdata(pdev); + + imxdma_free_irq(pdev, imxdma); + + dma_async_device_unregister(&imxdma->dma_device); + + if (pdev->dev.of_node) + of_dma_controller_free(pdev->dev.of_node); + + clk_disable_unprepare(imxdma->dma_ipg); + clk_disable_unprepare(imxdma->dma_ahb); + + return 0; +} + +static struct platform_driver imxdma_driver = { + .driver = { + .name = "imx-dma", + .of_match_table = imx_dma_of_dev_id, + }, + .remove = imxdma_remove, +}; + +static int __init imxdma_module_init(void) +{ + return platform_driver_probe(&imxdma_driver, imxdma_probe); +} +subsys_initcall(imxdma_module_init); + +MODULE_AUTHOR("Sascha Hauer, Pengutronix "); +MODULE_DESCRIPTION("i.MX dma driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c new file mode 100644 index 0000000000..51012bd399 --- /dev/null +++ b/drivers/dma/imx-sdma.c @@ -0,0 +1,2402 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// drivers/dma/imx-sdma.c +// +// This file contains a driver for the Freescale Smart DMA engine +// +// Copyright 2010 Sascha Hauer, Pengutronix +// +// Based on code from Freescale: +// +// Copyright 2004-2009 Freescale Semiconductor, Inc. All Rights Reserved. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "dmaengine.h" +#include "virt-dma.h" + +/* SDMA registers */ +#define SDMA_H_C0PTR 0x000 +#define SDMA_H_INTR 0x004 +#define SDMA_H_STATSTOP 0x008 +#define SDMA_H_START 0x00c +#define SDMA_H_EVTOVR 0x010 +#define SDMA_H_DSPOVR 0x014 +#define SDMA_H_HOSTOVR 0x018 +#define SDMA_H_EVTPEND 0x01c +#define SDMA_H_DSPENBL 0x020 +#define SDMA_H_RESET 0x024 +#define SDMA_H_EVTERR 0x028 +#define SDMA_H_INTRMSK 0x02c +#define SDMA_H_PSW 0x030 +#define SDMA_H_EVTERRDBG 0x034 +#define SDMA_H_CONFIG 0x038 +#define SDMA_ONCE_ENB 0x040 +#define SDMA_ONCE_DATA 0x044 +#define SDMA_ONCE_INSTR 0x048 +#define SDMA_ONCE_STAT 0x04c +#define SDMA_ONCE_CMD 0x050 +#define SDMA_EVT_MIRROR 0x054 +#define SDMA_ILLINSTADDR 0x058 +#define SDMA_CHN0ADDR 0x05c +#define SDMA_ONCE_RTB 0x060 +#define SDMA_XTRIG_CONF1 0x070 +#define SDMA_XTRIG_CONF2 0x074 +#define SDMA_CHNENBL0_IMX35 0x200 +#define SDMA_CHNENBL0_IMX31 0x080 +#define SDMA_CHNPRI_0 0x100 +#define SDMA_DONE0_CONFIG 0x1000 + +/* + * Buffer descriptor status values. + */ +#define BD_DONE 0x01 +#define BD_WRAP 0x02 +#define BD_CONT 0x04 +#define BD_INTR 0x08 +#define BD_RROR 0x10 +#define BD_LAST 0x20 +#define BD_EXTD 0x80 + +/* + * Data Node descriptor status values. + */ +#define DND_END_OF_FRAME 0x80 +#define DND_END_OF_XFER 0x40 +#define DND_DONE 0x20 +#define DND_UNUSED 0x01 + +/* + * IPCV2 descriptor status values. + */ +#define BD_IPCV2_END_OF_FRAME 0x40 + +#define IPCV2_MAX_NODES 50 +/* + * Error bit set in the CCB status field by the SDMA, + * in setbd routine, in case of a transfer error + */ +#define DATA_ERROR 0x10000000 + +/* + * Buffer descriptor commands. + */ +#define C0_ADDR 0x01 +#define C0_LOAD 0x02 +#define C0_DUMP 0x03 +#define C0_SETCTX 0x07 +#define C0_GETCTX 0x03 +#define C0_SETDM 0x01 +#define C0_SETPM 0x04 +#define C0_GETDM 0x02 +#define C0_GETPM 0x08 +/* + * Change endianness indicator in the BD command field + */ +#define CHANGE_ENDIANNESS 0x80 + +/* + * p_2_p watermark_level description + * Bits Name Description + * 0-7 Lower WML Lower watermark level + * 8 PS 1: Pad Swallowing + * 0: No Pad Swallowing + * 9 PA 1: Pad Adding + * 0: No Pad Adding + * 10 SPDIF If this bit is set both source + * and destination are on SPBA + * 11 Source Bit(SP) 1: Source on SPBA + * 0: Source on AIPS + * 12 Destination Bit(DP) 1: Destination on SPBA + * 0: Destination on AIPS + * 13-15 --------- MUST BE 0 + * 16-23 Higher WML HWML + * 24-27 N Total number of samples after + * which Pad adding/Swallowing + * must be done. It must be odd. + * 28 Lower WML Event(LWE) SDMA events reg to check for + * LWML event mask + * 0: LWE in EVENTS register + * 1: LWE in EVENTS2 register + * 29 Higher WML Event(HWE) SDMA events reg to check for + * HWML event mask + * 0: HWE in EVENTS register + * 1: HWE in EVENTS2 register + * 30 --------- MUST BE 0 + * 31 CONT 1: Amount of samples to be + * transferred is unknown and + * script will keep on + * transferring samples as long as + * both events are detected and + * script must be manually stopped + * by the application + * 0: The amount of samples to be + * transferred is equal to the + * count field of mode word + */ +#define SDMA_WATERMARK_LEVEL_LWML 0xFF +#define SDMA_WATERMARK_LEVEL_PS BIT(8) +#define SDMA_WATERMARK_LEVEL_PA BIT(9) +#define SDMA_WATERMARK_LEVEL_SPDIF BIT(10) +#define SDMA_WATERMARK_LEVEL_SP BIT(11) +#define SDMA_WATERMARK_LEVEL_DP BIT(12) +#define SDMA_WATERMARK_LEVEL_HWML (0xFF << 16) +#define SDMA_WATERMARK_LEVEL_LWE BIT(28) +#define SDMA_WATERMARK_LEVEL_HWE BIT(29) +#define SDMA_WATERMARK_LEVEL_CONT BIT(31) + +#define SDMA_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) + +#define SDMA_DMA_DIRECTIONS (BIT(DMA_DEV_TO_MEM) | \ + BIT(DMA_MEM_TO_DEV) | \ + BIT(DMA_DEV_TO_DEV)) + +#define SDMA_WATERMARK_LEVEL_N_FIFOS GENMASK(15, 12) +#define SDMA_WATERMARK_LEVEL_OFF_FIFOS GENMASK(19, 16) +#define SDMA_WATERMARK_LEVEL_WORDS_PER_FIFO GENMASK(31, 28) +#define SDMA_WATERMARK_LEVEL_SW_DONE BIT(23) + +#define SDMA_DONE0_CONFIG_DONE_SEL BIT(7) +#define SDMA_DONE0_CONFIG_DONE_DIS BIT(6) + +/* + * struct sdma_script_start_addrs - SDMA script start pointers + * + * start addresses of the different functions in the physical + * address space of the SDMA engine. + */ +struct sdma_script_start_addrs { + s32 ap_2_ap_addr; + s32 ap_2_bp_addr; + s32 ap_2_ap_fixed_addr; + s32 bp_2_ap_addr; + s32 loopback_on_dsp_side_addr; + s32 mcu_interrupt_only_addr; + s32 firi_2_per_addr; + s32 firi_2_mcu_addr; + s32 per_2_firi_addr; + s32 mcu_2_firi_addr; + s32 uart_2_per_addr; + s32 uart_2_mcu_addr; + s32 per_2_app_addr; + s32 mcu_2_app_addr; + s32 per_2_per_addr; + s32 uartsh_2_per_addr; + s32 uartsh_2_mcu_addr; + s32 per_2_shp_addr; + s32 mcu_2_shp_addr; + s32 ata_2_mcu_addr; + s32 mcu_2_ata_addr; + s32 app_2_per_addr; + s32 app_2_mcu_addr; + s32 shp_2_per_addr; + s32 shp_2_mcu_addr; + s32 mshc_2_mcu_addr; + s32 mcu_2_mshc_addr; + s32 spdif_2_mcu_addr; + s32 mcu_2_spdif_addr; + s32 asrc_2_mcu_addr; + s32 ext_mem_2_ipu_addr; + s32 descrambler_addr; + s32 dptc_dvfs_addr; + s32 utra_addr; + s32 ram_code_start_addr; + /* End of v1 array */ + s32 mcu_2_ssish_addr; + s32 ssish_2_mcu_addr; + s32 hdmi_dma_addr; + /* End of v2 array */ + s32 zcanfd_2_mcu_addr; + s32 zqspi_2_mcu_addr; + s32 mcu_2_ecspi_addr; + s32 mcu_2_sai_addr; + s32 sai_2_mcu_addr; + s32 uart_2_mcu_rom_addr; + s32 uartsh_2_mcu_rom_addr; + /* End of v3 array */ + s32 mcu_2_zqspi_addr; + /* End of v4 array */ +}; + +/* + * Mode/Count of data node descriptors - IPCv2 + */ +struct sdma_mode_count { +#define SDMA_BD_MAX_CNT 0xffff + u32 count : 16; /* size of the buffer pointed by this BD */ + u32 status : 8; /* E,R,I,C,W,D status bits stored here */ + u32 command : 8; /* command mostly used for channel 0 */ +}; + +/* + * Buffer descriptor + */ +struct sdma_buffer_descriptor { + struct sdma_mode_count mode; + u32 buffer_addr; /* address of the buffer described */ + u32 ext_buffer_addr; /* extended buffer address */ +} __attribute__ ((packed)); + +/** + * struct sdma_channel_control - Channel control Block + * + * @current_bd_ptr: current buffer descriptor processed + * @base_bd_ptr: first element of buffer descriptor array + * @unused: padding. The SDMA engine expects an array of 128 byte + * control blocks + */ +struct sdma_channel_control { + u32 current_bd_ptr; + u32 base_bd_ptr; + u32 unused[2]; +} __attribute__ ((packed)); + +/** + * struct sdma_state_registers - SDMA context for a channel + * + * @pc: program counter + * @unused1: unused + * @t: test bit: status of arithmetic & test instruction + * @rpc: return program counter + * @unused0: unused + * @sf: source fault while loading data + * @spc: loop start program counter + * @unused2: unused + * @df: destination fault while storing data + * @epc: loop end program counter + * @lm: loop mode + */ +struct sdma_state_registers { + u32 pc :14; + u32 unused1: 1; + u32 t : 1; + u32 rpc :14; + u32 unused0: 1; + u32 sf : 1; + u32 spc :14; + u32 unused2: 1; + u32 df : 1; + u32 epc :14; + u32 lm : 2; +} __attribute__ ((packed)); + +/** + * struct sdma_context_data - sdma context specific to a channel + * + * @channel_state: channel state bits + * @gReg: general registers + * @mda: burst dma destination address register + * @msa: burst dma source address register + * @ms: burst dma status register + * @md: burst dma data register + * @pda: peripheral dma destination address register + * @psa: peripheral dma source address register + * @ps: peripheral dma status register + * @pd: peripheral dma data register + * @ca: CRC polynomial register + * @cs: CRC accumulator register + * @dda: dedicated core destination address register + * @dsa: dedicated core source address register + * @ds: dedicated core status register + * @dd: dedicated core data register + * @scratch0: 1st word of dedicated ram for context switch + * @scratch1: 2nd word of dedicated ram for context switch + * @scratch2: 3rd word of dedicated ram for context switch + * @scratch3: 4th word of dedicated ram for context switch + * @scratch4: 5th word of dedicated ram for context switch + * @scratch5: 6th word of dedicated ram for context switch + * @scratch6: 7th word of dedicated ram for context switch + * @scratch7: 8th word of dedicated ram for context switch + */ +struct sdma_context_data { + struct sdma_state_registers channel_state; + u32 gReg[8]; + u32 mda; + u32 msa; + u32 ms; + u32 md; + u32 pda; + u32 psa; + u32 ps; + u32 pd; + u32 ca; + u32 cs; + u32 dda; + u32 dsa; + u32 ds; + u32 dd; + u32 scratch0; + u32 scratch1; + u32 scratch2; + u32 scratch3; + u32 scratch4; + u32 scratch5; + u32 scratch6; + u32 scratch7; +} __attribute__ ((packed)); + + +struct sdma_engine; + +/** + * struct sdma_desc - descriptor structor for one transfer + * @vd: descriptor for virt dma + * @num_bd: number of descriptors currently handling + * @bd_phys: physical address of bd + * @buf_tail: ID of the buffer that was processed + * @buf_ptail: ID of the previous buffer that was processed + * @period_len: period length, used in cyclic. + * @chn_real_count: the real count updated from bd->mode.count + * @chn_count: the transfer count set + * @sdmac: sdma_channel pointer + * @bd: pointer of allocate bd + */ +struct sdma_desc { + struct virt_dma_desc vd; + unsigned int num_bd; + dma_addr_t bd_phys; + unsigned int buf_tail; + unsigned int buf_ptail; + unsigned int period_len; + unsigned int chn_real_count; + unsigned int chn_count; + struct sdma_channel *sdmac; + struct sdma_buffer_descriptor *bd; +}; + +/** + * struct sdma_channel - housekeeping for a SDMA channel + * + * @vc: virt_dma base structure + * @desc: sdma description including vd and other special member + * @sdma: pointer to the SDMA engine for this channel + * @channel: the channel number, matches dmaengine chan_id + 1 + * @direction: transfer type. Needed for setting SDMA script + * @slave_config: Slave configuration + * @peripheral_type: Peripheral type. Needed for setting SDMA script + * @event_id0: aka dma request line + * @event_id1: for channels that use 2 events + * @word_size: peripheral access size + * @pc_from_device: script address for those device_2_memory + * @pc_to_device: script address for those memory_2_device + * @device_to_device: script address for those device_2_device + * @pc_to_pc: script address for those memory_2_memory + * @flags: loop mode or not + * @per_address: peripheral source or destination address in common case + * destination address in p_2_p case + * @per_address2: peripheral source address in p_2_p case + * @event_mask: event mask used in p_2_p script + * @watermark_level: value for gReg[7], some script will extend it from + * basic watermark such as p_2_p + * @shp_addr: value for gReg[6] + * @per_addr: value for gReg[2] + * @status: status of dma channel + * @context_loaded: ensure context is only loaded once + * @data: specific sdma interface structure + * @bd_pool: dma_pool for bd + * @terminate_worker: used to call back into terminate work function + * @terminated: terminated list + * @is_ram_script: flag for script in ram + * @n_fifos_src: number of source device fifos + * @n_fifos_dst: number of destination device fifos + * @sw_done: software done flag + * @stride_fifos_src: stride for source device FIFOs + * @stride_fifos_dst: stride for destination device FIFOs + * @words_per_fifo: copy number of words one time for one FIFO + */ +struct sdma_channel { + struct virt_dma_chan vc; + struct sdma_desc *desc; + struct sdma_engine *sdma; + unsigned int channel; + enum dma_transfer_direction direction; + struct dma_slave_config slave_config; + enum sdma_peripheral_type peripheral_type; + unsigned int event_id0; + unsigned int event_id1; + enum dma_slave_buswidth word_size; + unsigned int pc_from_device, pc_to_device; + unsigned int device_to_device; + unsigned int pc_to_pc; + unsigned long flags; + dma_addr_t per_address, per_address2; + unsigned long event_mask[2]; + unsigned long watermark_level; + u32 shp_addr, per_addr; + enum dma_status status; + struct imx_dma_data data; + struct work_struct terminate_worker; + struct list_head terminated; + bool is_ram_script; + unsigned int n_fifos_src; + unsigned int n_fifos_dst; + unsigned int stride_fifos_src; + unsigned int stride_fifos_dst; + unsigned int words_per_fifo; + bool sw_done; +}; + +#define IMX_DMA_SG_LOOP BIT(0) + +#define MAX_DMA_CHANNELS 32 +#define MXC_SDMA_DEFAULT_PRIORITY 1 +#define MXC_SDMA_MIN_PRIORITY 1 +#define MXC_SDMA_MAX_PRIORITY 7 + +#define SDMA_FIRMWARE_MAGIC 0x414d4453 + +/** + * struct sdma_firmware_header - Layout of the firmware image + * + * @magic: "SDMA" + * @version_major: increased whenever layout of struct + * sdma_script_start_addrs changes. + * @version_minor: firmware minor version (for binary compatible changes) + * @script_addrs_start: offset of struct sdma_script_start_addrs in this image + * @num_script_addrs: Number of script addresses in this image + * @ram_code_start: offset of SDMA ram image in this firmware image + * @ram_code_size: size of SDMA ram image + * @script_addrs: Stores the start address of the SDMA scripts + * (in SDMA memory space) + */ +struct sdma_firmware_header { + u32 magic; + u32 version_major; + u32 version_minor; + u32 script_addrs_start; + u32 num_script_addrs; + u32 ram_code_start; + u32 ram_code_size; +}; + +struct sdma_driver_data { + int chnenbl0; + int num_events; + struct sdma_script_start_addrs *script_addrs; + bool check_ratio; + /* + * ecspi ERR009165 fixed should be done in sdma script + * and it has been fixed in soc from i.mx6ul. + * please get more information from the below link: + * https://www.nxp.com/docs/en/errata/IMX6DQCE.pdf + */ + bool ecspi_fixed; +}; + +struct sdma_engine { + struct device *dev; + struct sdma_channel channel[MAX_DMA_CHANNELS]; + struct sdma_channel_control *channel_control; + void __iomem *regs; + struct sdma_context_data *context; + dma_addr_t context_phys; + struct dma_device dma_device; + struct clk *clk_ipg; + struct clk *clk_ahb; + spinlock_t channel_0_lock; + u32 script_number; + struct sdma_script_start_addrs *script_addrs; + const struct sdma_driver_data *drvdata; + u32 spba_start_addr; + u32 spba_end_addr; + unsigned int irq; + dma_addr_t bd0_phys; + struct sdma_buffer_descriptor *bd0; + /* clock ratio for AHB:SDMA core. 1:1 is 1, 2:1 is 0*/ + bool clk_ratio; + bool fw_loaded; +}; + +static int sdma_config_write(struct dma_chan *chan, + struct dma_slave_config *dmaengine_cfg, + enum dma_transfer_direction direction); + +static struct sdma_driver_data sdma_imx31 = { + .chnenbl0 = SDMA_CHNENBL0_IMX31, + .num_events = 32, +}; + +static struct sdma_script_start_addrs sdma_script_imx25 = { + .ap_2_ap_addr = 729, + .uart_2_mcu_addr = 904, + .per_2_app_addr = 1255, + .mcu_2_app_addr = 834, + .uartsh_2_mcu_addr = 1120, + .per_2_shp_addr = 1329, + .mcu_2_shp_addr = 1048, + .ata_2_mcu_addr = 1560, + .mcu_2_ata_addr = 1479, + .app_2_per_addr = 1189, + .app_2_mcu_addr = 770, + .shp_2_per_addr = 1407, + .shp_2_mcu_addr = 979, +}; + +static struct sdma_driver_data sdma_imx25 = { + .chnenbl0 = SDMA_CHNENBL0_IMX35, + .num_events = 48, + .script_addrs = &sdma_script_imx25, +}; + +static struct sdma_driver_data sdma_imx35 = { + .chnenbl0 = SDMA_CHNENBL0_IMX35, + .num_events = 48, +}; + +static struct sdma_script_start_addrs sdma_script_imx51 = { + .ap_2_ap_addr = 642, + .uart_2_mcu_addr = 817, + .mcu_2_app_addr = 747, + .mcu_2_shp_addr = 961, + .ata_2_mcu_addr = 1473, + .mcu_2_ata_addr = 1392, + .app_2_per_addr = 1033, + .app_2_mcu_addr = 683, + .shp_2_per_addr = 1251, + .shp_2_mcu_addr = 892, +}; + +static struct sdma_driver_data sdma_imx51 = { + .chnenbl0 = SDMA_CHNENBL0_IMX35, + .num_events = 48, + .script_addrs = &sdma_script_imx51, +}; + +static struct sdma_script_start_addrs sdma_script_imx53 = { + .ap_2_ap_addr = 642, + .app_2_mcu_addr = 683, + .mcu_2_app_addr = 747, + .uart_2_mcu_addr = 817, + .shp_2_mcu_addr = 891, + .mcu_2_shp_addr = 960, + .uartsh_2_mcu_addr = 1032, + .spdif_2_mcu_addr = 1100, + .mcu_2_spdif_addr = 1134, + .firi_2_mcu_addr = 1193, + .mcu_2_firi_addr = 1290, +}; + +static struct sdma_driver_data sdma_imx53 = { + .chnenbl0 = SDMA_CHNENBL0_IMX35, + .num_events = 48, + .script_addrs = &sdma_script_imx53, +}; + +static struct sdma_script_start_addrs sdma_script_imx6q = { + .ap_2_ap_addr = 642, + .uart_2_mcu_addr = 817, + .mcu_2_app_addr = 747, + .per_2_per_addr = 6331, + .uartsh_2_mcu_addr = 1032, + .mcu_2_shp_addr = 960, + .app_2_mcu_addr = 683, + .shp_2_mcu_addr = 891, + .spdif_2_mcu_addr = 1100, + .mcu_2_spdif_addr = 1134, +}; + +static struct sdma_driver_data sdma_imx6q = { + .chnenbl0 = SDMA_CHNENBL0_IMX35, + .num_events = 48, + .script_addrs = &sdma_script_imx6q, +}; + +static struct sdma_driver_data sdma_imx6ul = { + .chnenbl0 = SDMA_CHNENBL0_IMX35, + .num_events = 48, + .script_addrs = &sdma_script_imx6q, + .ecspi_fixed = true, +}; + +static struct sdma_script_start_addrs sdma_script_imx7d = { + .ap_2_ap_addr = 644, + .uart_2_mcu_addr = 819, + .mcu_2_app_addr = 749, + .uartsh_2_mcu_addr = 1034, + .mcu_2_shp_addr = 962, + .app_2_mcu_addr = 685, + .shp_2_mcu_addr = 893, + .spdif_2_mcu_addr = 1102, + .mcu_2_spdif_addr = 1136, +}; + +static struct sdma_driver_data sdma_imx7d = { + .chnenbl0 = SDMA_CHNENBL0_IMX35, + .num_events = 48, + .script_addrs = &sdma_script_imx7d, +}; + +static struct sdma_driver_data sdma_imx8mq = { + .chnenbl0 = SDMA_CHNENBL0_IMX35, + .num_events = 48, + .script_addrs = &sdma_script_imx7d, + .check_ratio = 1, +}; + +static const struct of_device_id sdma_dt_ids[] = { + { .compatible = "fsl,imx6q-sdma", .data = &sdma_imx6q, }, + { .compatible = "fsl,imx53-sdma", .data = &sdma_imx53, }, + { .compatible = "fsl,imx51-sdma", .data = &sdma_imx51, }, + { .compatible = "fsl,imx35-sdma", .data = &sdma_imx35, }, + { .compatible = "fsl,imx31-sdma", .data = &sdma_imx31, }, + { .compatible = "fsl,imx25-sdma", .data = &sdma_imx25, }, + { .compatible = "fsl,imx7d-sdma", .data = &sdma_imx7d, }, + { .compatible = "fsl,imx6ul-sdma", .data = &sdma_imx6ul, }, + { .compatible = "fsl,imx8mq-sdma", .data = &sdma_imx8mq, }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, sdma_dt_ids); + +#define SDMA_H_CONFIG_DSPDMA BIT(12) /* indicates if the DSPDMA is used */ +#define SDMA_H_CONFIG_RTD_PINS BIT(11) /* indicates if Real-Time Debug pins are enabled */ +#define SDMA_H_CONFIG_ACR BIT(4) /* indicates if AHB freq /core freq = 2 or 1 */ +#define SDMA_H_CONFIG_CSM (3) /* indicates which context switch mode is selected*/ + +static inline u32 chnenbl_ofs(struct sdma_engine *sdma, unsigned int event) +{ + u32 chnenbl0 = sdma->drvdata->chnenbl0; + return chnenbl0 + event * 4; +} + +static int sdma_config_ownership(struct sdma_channel *sdmac, + bool event_override, bool mcu_override, bool dsp_override) +{ + struct sdma_engine *sdma = sdmac->sdma; + int channel = sdmac->channel; + unsigned long evt, mcu, dsp; + + if (event_override && mcu_override && dsp_override) + return -EINVAL; + + evt = readl_relaxed(sdma->regs + SDMA_H_EVTOVR); + mcu = readl_relaxed(sdma->regs + SDMA_H_HOSTOVR); + dsp = readl_relaxed(sdma->regs + SDMA_H_DSPOVR); + + if (dsp_override) + __clear_bit(channel, &dsp); + else + __set_bit(channel, &dsp); + + if (event_override) + __clear_bit(channel, &evt); + else + __set_bit(channel, &evt); + + if (mcu_override) + __clear_bit(channel, &mcu); + else + __set_bit(channel, &mcu); + + writel_relaxed(evt, sdma->regs + SDMA_H_EVTOVR); + writel_relaxed(mcu, sdma->regs + SDMA_H_HOSTOVR); + writel_relaxed(dsp, sdma->regs + SDMA_H_DSPOVR); + + return 0; +} + +static int is_sdma_channel_enabled(struct sdma_engine *sdma, int channel) +{ + return !!(readl(sdma->regs + SDMA_H_STATSTOP) & BIT(channel)); +} + +static void sdma_enable_channel(struct sdma_engine *sdma, int channel) +{ + writel(BIT(channel), sdma->regs + SDMA_H_START); +} + +/* + * sdma_run_channel0 - run a channel and wait till it's done + */ +static int sdma_run_channel0(struct sdma_engine *sdma) +{ + int ret; + u32 reg; + + sdma_enable_channel(sdma, 0); + + ret = readl_relaxed_poll_timeout_atomic(sdma->regs + SDMA_H_STATSTOP, + reg, !(reg & 1), 1, 500); + if (ret) + dev_err(sdma->dev, "Timeout waiting for CH0 ready\n"); + + /* Set bits of CONFIG register with dynamic context switching */ + reg = readl(sdma->regs + SDMA_H_CONFIG); + if ((reg & SDMA_H_CONFIG_CSM) == 0) { + reg |= SDMA_H_CONFIG_CSM; + writel_relaxed(reg, sdma->regs + SDMA_H_CONFIG); + } + + return ret; +} + +static int sdma_load_script(struct sdma_engine *sdma, void *buf, int size, + u32 address) +{ + struct sdma_buffer_descriptor *bd0 = sdma->bd0; + void *buf_virt; + dma_addr_t buf_phys; + int ret; + unsigned long flags; + + buf_virt = dma_alloc_coherent(sdma->dev, size, &buf_phys, GFP_KERNEL); + if (!buf_virt) + return -ENOMEM; + + spin_lock_irqsave(&sdma->channel_0_lock, flags); + + bd0->mode.command = C0_SETPM; + bd0->mode.status = BD_DONE | BD_WRAP | BD_EXTD; + bd0->mode.count = size / 2; + bd0->buffer_addr = buf_phys; + bd0->ext_buffer_addr = address; + + memcpy(buf_virt, buf, size); + + ret = sdma_run_channel0(sdma); + + spin_unlock_irqrestore(&sdma->channel_0_lock, flags); + + dma_free_coherent(sdma->dev, size, buf_virt, buf_phys); + + return ret; +} + +static void sdma_event_enable(struct sdma_channel *sdmac, unsigned int event) +{ + struct sdma_engine *sdma = sdmac->sdma; + int channel = sdmac->channel; + unsigned long val; + u32 chnenbl = chnenbl_ofs(sdma, event); + + val = readl_relaxed(sdma->regs + chnenbl); + __set_bit(channel, &val); + writel_relaxed(val, sdma->regs + chnenbl); + + /* Set SDMA_DONEx_CONFIG is sw_done enabled */ + if (sdmac->sw_done) { + val = readl_relaxed(sdma->regs + SDMA_DONE0_CONFIG); + val |= SDMA_DONE0_CONFIG_DONE_SEL; + val &= ~SDMA_DONE0_CONFIG_DONE_DIS; + writel_relaxed(val, sdma->regs + SDMA_DONE0_CONFIG); + } +} + +static void sdma_event_disable(struct sdma_channel *sdmac, unsigned int event) +{ + struct sdma_engine *sdma = sdmac->sdma; + int channel = sdmac->channel; + u32 chnenbl = chnenbl_ofs(sdma, event); + unsigned long val; + + val = readl_relaxed(sdma->regs + chnenbl); + __clear_bit(channel, &val); + writel_relaxed(val, sdma->regs + chnenbl); +} + +static struct sdma_desc *to_sdma_desc(struct dma_async_tx_descriptor *t) +{ + return container_of(t, struct sdma_desc, vd.tx); +} + +static void sdma_start_desc(struct sdma_channel *sdmac) +{ + struct virt_dma_desc *vd = vchan_next_desc(&sdmac->vc); + struct sdma_desc *desc; + struct sdma_engine *sdma = sdmac->sdma; + int channel = sdmac->channel; + + if (!vd) { + sdmac->desc = NULL; + return; + } + sdmac->desc = desc = to_sdma_desc(&vd->tx); + + list_del(&vd->node); + + sdma->channel_control[channel].base_bd_ptr = desc->bd_phys; + sdma->channel_control[channel].current_bd_ptr = desc->bd_phys; + sdma_enable_channel(sdma, sdmac->channel); +} + +static void sdma_update_channel_loop(struct sdma_channel *sdmac) +{ + struct sdma_buffer_descriptor *bd; + int error = 0; + enum dma_status old_status = sdmac->status; + + /* + * loop mode. Iterate over descriptors, re-setup them and + * call callback function. + */ + while (sdmac->desc) { + struct sdma_desc *desc = sdmac->desc; + + bd = &desc->bd[desc->buf_tail]; + + if (bd->mode.status & BD_DONE) + break; + + if (bd->mode.status & BD_RROR) { + bd->mode.status &= ~BD_RROR; + sdmac->status = DMA_ERROR; + error = -EIO; + } + + /* + * We use bd->mode.count to calculate the residue, since contains + * the number of bytes present in the current buffer descriptor. + */ + + desc->chn_real_count = bd->mode.count; + bd->mode.count = desc->period_len; + desc->buf_ptail = desc->buf_tail; + desc->buf_tail = (desc->buf_tail + 1) % desc->num_bd; + + /* + * The callback is called from the interrupt context in order + * to reduce latency and to avoid the risk of altering the + * SDMA transaction status by the time the client tasklet is + * executed. + */ + spin_unlock(&sdmac->vc.lock); + dmaengine_desc_get_callback_invoke(&desc->vd.tx, NULL); + spin_lock(&sdmac->vc.lock); + + /* Assign buffer ownership to SDMA */ + bd->mode.status |= BD_DONE; + + if (error) + sdmac->status = old_status; + } + + /* + * SDMA stops cyclic channel when DMA request triggers a channel and no SDMA + * owned buffer is available (i.e. BD_DONE was set too late). + */ + if (sdmac->desc && !is_sdma_channel_enabled(sdmac->sdma, sdmac->channel)) { + dev_warn(sdmac->sdma->dev, "restart cyclic channel %d\n", sdmac->channel); + sdma_enable_channel(sdmac->sdma, sdmac->channel); + } +} + +static void mxc_sdma_handle_channel_normal(struct sdma_channel *data) +{ + struct sdma_channel *sdmac = (struct sdma_channel *) data; + struct sdma_buffer_descriptor *bd; + int i, error = 0; + + sdmac->desc->chn_real_count = 0; + /* + * non loop mode. Iterate over all descriptors, collect + * errors and call callback function + */ + for (i = 0; i < sdmac->desc->num_bd; i++) { + bd = &sdmac->desc->bd[i]; + + if (bd->mode.status & (BD_DONE | BD_RROR)) + error = -EIO; + sdmac->desc->chn_real_count += bd->mode.count; + } + + if (error) + sdmac->status = DMA_ERROR; + else + sdmac->status = DMA_COMPLETE; +} + +static irqreturn_t sdma_int_handler(int irq, void *dev_id) +{ + struct sdma_engine *sdma = dev_id; + unsigned long stat; + + stat = readl_relaxed(sdma->regs + SDMA_H_INTR); + writel_relaxed(stat, sdma->regs + SDMA_H_INTR); + /* channel 0 is special and not handled here, see run_channel0() */ + stat &= ~1; + + while (stat) { + int channel = fls(stat) - 1; + struct sdma_channel *sdmac = &sdma->channel[channel]; + struct sdma_desc *desc; + + spin_lock(&sdmac->vc.lock); + desc = sdmac->desc; + if (desc) { + if (sdmac->flags & IMX_DMA_SG_LOOP) { + if (sdmac->peripheral_type != IMX_DMATYPE_HDMI) + sdma_update_channel_loop(sdmac); + else + vchan_cyclic_callback(&desc->vd); + } else { + mxc_sdma_handle_channel_normal(sdmac); + vchan_cookie_complete(&desc->vd); + sdma_start_desc(sdmac); + } + } + + spin_unlock(&sdmac->vc.lock); + __clear_bit(channel, &stat); + } + + return IRQ_HANDLED; +} + +/* + * sets the pc of SDMA script according to the peripheral type + */ +static int sdma_get_pc(struct sdma_channel *sdmac, + enum sdma_peripheral_type peripheral_type) +{ + struct sdma_engine *sdma = sdmac->sdma; + int per_2_emi = 0, emi_2_per = 0; + /* + * These are needed once we start to support transfers between + * two peripherals or memory-to-memory transfers + */ + int per_2_per = 0, emi_2_emi = 0; + + sdmac->pc_from_device = 0; + sdmac->pc_to_device = 0; + sdmac->device_to_device = 0; + sdmac->pc_to_pc = 0; + sdmac->is_ram_script = false; + + switch (peripheral_type) { + case IMX_DMATYPE_MEMORY: + emi_2_emi = sdma->script_addrs->ap_2_ap_addr; + break; + case IMX_DMATYPE_DSP: + emi_2_per = sdma->script_addrs->bp_2_ap_addr; + per_2_emi = sdma->script_addrs->ap_2_bp_addr; + break; + case IMX_DMATYPE_FIRI: + per_2_emi = sdma->script_addrs->firi_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_firi_addr; + break; + case IMX_DMATYPE_UART: + per_2_emi = sdma->script_addrs->uart_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_app_addr; + break; + case IMX_DMATYPE_UART_SP: + per_2_emi = sdma->script_addrs->uartsh_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_shp_addr; + break; + case IMX_DMATYPE_ATA: + per_2_emi = sdma->script_addrs->ata_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_ata_addr; + break; + case IMX_DMATYPE_CSPI: + per_2_emi = sdma->script_addrs->app_2_mcu_addr; + + /* Use rom script mcu_2_app if ERR009165 fixed */ + if (sdmac->sdma->drvdata->ecspi_fixed) { + emi_2_per = sdma->script_addrs->mcu_2_app_addr; + } else { + emi_2_per = sdma->script_addrs->mcu_2_ecspi_addr; + sdmac->is_ram_script = true; + } + + break; + case IMX_DMATYPE_EXT: + case IMX_DMATYPE_SSI: + case IMX_DMATYPE_SAI: + per_2_emi = sdma->script_addrs->app_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_app_addr; + break; + case IMX_DMATYPE_SSI_DUAL: + per_2_emi = sdma->script_addrs->ssish_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_ssish_addr; + sdmac->is_ram_script = true; + break; + case IMX_DMATYPE_SSI_SP: + case IMX_DMATYPE_MMC: + case IMX_DMATYPE_SDHC: + case IMX_DMATYPE_CSPI_SP: + case IMX_DMATYPE_ESAI: + case IMX_DMATYPE_MSHC_SP: + per_2_emi = sdma->script_addrs->shp_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_shp_addr; + break; + case IMX_DMATYPE_ASRC: + per_2_emi = sdma->script_addrs->asrc_2_mcu_addr; + emi_2_per = sdma->script_addrs->asrc_2_mcu_addr; + per_2_per = sdma->script_addrs->per_2_per_addr; + sdmac->is_ram_script = true; + break; + case IMX_DMATYPE_ASRC_SP: + per_2_emi = sdma->script_addrs->shp_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_shp_addr; + per_2_per = sdma->script_addrs->per_2_per_addr; + break; + case IMX_DMATYPE_MSHC: + per_2_emi = sdma->script_addrs->mshc_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_mshc_addr; + break; + case IMX_DMATYPE_CCM: + per_2_emi = sdma->script_addrs->dptc_dvfs_addr; + break; + case IMX_DMATYPE_SPDIF: + per_2_emi = sdma->script_addrs->spdif_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_spdif_addr; + break; + case IMX_DMATYPE_IPU_MEMORY: + emi_2_per = sdma->script_addrs->ext_mem_2_ipu_addr; + break; + case IMX_DMATYPE_MULTI_SAI: + per_2_emi = sdma->script_addrs->sai_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_sai_addr; + break; + case IMX_DMATYPE_HDMI: + emi_2_per = sdma->script_addrs->hdmi_dma_addr; + sdmac->is_ram_script = true; + break; + default: + dev_err(sdma->dev, "Unsupported transfer type %d\n", + peripheral_type); + return -EINVAL; + } + + sdmac->pc_from_device = per_2_emi; + sdmac->pc_to_device = emi_2_per; + sdmac->device_to_device = per_2_per; + sdmac->pc_to_pc = emi_2_emi; + + return 0; +} + +static int sdma_load_context(struct sdma_channel *sdmac) +{ + struct sdma_engine *sdma = sdmac->sdma; + int channel = sdmac->channel; + int load_address; + struct sdma_context_data *context = sdma->context; + struct sdma_buffer_descriptor *bd0 = sdma->bd0; + int ret; + unsigned long flags; + + if (sdmac->direction == DMA_DEV_TO_MEM) + load_address = sdmac->pc_from_device; + else if (sdmac->direction == DMA_DEV_TO_DEV) + load_address = sdmac->device_to_device; + else if (sdmac->direction == DMA_MEM_TO_MEM) + load_address = sdmac->pc_to_pc; + else + load_address = sdmac->pc_to_device; + + if (load_address < 0) + return load_address; + + dev_dbg(sdma->dev, "load_address = %d\n", load_address); + dev_dbg(sdma->dev, "wml = 0x%08x\n", (u32)sdmac->watermark_level); + dev_dbg(sdma->dev, "shp_addr = 0x%08x\n", sdmac->shp_addr); + dev_dbg(sdma->dev, "per_addr = 0x%08x\n", sdmac->per_addr); + dev_dbg(sdma->dev, "event_mask0 = 0x%08x\n", (u32)sdmac->event_mask[0]); + dev_dbg(sdma->dev, "event_mask1 = 0x%08x\n", (u32)sdmac->event_mask[1]); + + spin_lock_irqsave(&sdma->channel_0_lock, flags); + + memset(context, 0, sizeof(*context)); + context->channel_state.pc = load_address; + + /* Send by context the event mask,base address for peripheral + * and watermark level + */ + if (sdmac->peripheral_type == IMX_DMATYPE_HDMI) { + context->gReg[4] = sdmac->per_addr; + context->gReg[6] = sdmac->shp_addr; + } else { + context->gReg[0] = sdmac->event_mask[1]; + context->gReg[1] = sdmac->event_mask[0]; + context->gReg[2] = sdmac->per_addr; + context->gReg[6] = sdmac->shp_addr; + context->gReg[7] = sdmac->watermark_level; + } + + bd0->mode.command = C0_SETDM; + bd0->mode.status = BD_DONE | BD_WRAP | BD_EXTD; + bd0->mode.count = sizeof(*context) / 4; + bd0->buffer_addr = sdma->context_phys; + bd0->ext_buffer_addr = 2048 + (sizeof(*context) / 4) * channel; + ret = sdma_run_channel0(sdma); + + spin_unlock_irqrestore(&sdma->channel_0_lock, flags); + + return ret; +} + +static struct sdma_channel *to_sdma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct sdma_channel, vc.chan); +} + +static int sdma_disable_channel(struct dma_chan *chan) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + struct sdma_engine *sdma = sdmac->sdma; + int channel = sdmac->channel; + + writel_relaxed(BIT(channel), sdma->regs + SDMA_H_STATSTOP); + sdmac->status = DMA_ERROR; + + return 0; +} +static void sdma_channel_terminate_work(struct work_struct *work) +{ + struct sdma_channel *sdmac = container_of(work, struct sdma_channel, + terminate_worker); + /* + * According to NXP R&D team a delay of one BD SDMA cost time + * (maximum is 1ms) should be added after disable of the channel + * bit, to ensure SDMA core has really been stopped after SDMA + * clients call .device_terminate_all. + */ + usleep_range(1000, 2000); + + vchan_dma_desc_free_list(&sdmac->vc, &sdmac->terminated); +} + +static int sdma_terminate_all(struct dma_chan *chan) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&sdmac->vc.lock, flags); + + sdma_disable_channel(chan); + + if (sdmac->desc) { + vchan_terminate_vdesc(&sdmac->desc->vd); + /* + * move out current descriptor into terminated list so that + * it could be free in sdma_channel_terminate_work alone + * later without potential involving next descriptor raised + * up before the last descriptor terminated. + */ + vchan_get_all_descriptors(&sdmac->vc, &sdmac->terminated); + sdmac->desc = NULL; + schedule_work(&sdmac->terminate_worker); + } + + spin_unlock_irqrestore(&sdmac->vc.lock, flags); + + return 0; +} + +static void sdma_channel_synchronize(struct dma_chan *chan) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + + vchan_synchronize(&sdmac->vc); + + flush_work(&sdmac->terminate_worker); +} + +static void sdma_set_watermarklevel_for_p2p(struct sdma_channel *sdmac) +{ + struct sdma_engine *sdma = sdmac->sdma; + + int lwml = sdmac->watermark_level & SDMA_WATERMARK_LEVEL_LWML; + int hwml = (sdmac->watermark_level & SDMA_WATERMARK_LEVEL_HWML) >> 16; + + set_bit(sdmac->event_id0 % 32, &sdmac->event_mask[1]); + set_bit(sdmac->event_id1 % 32, &sdmac->event_mask[0]); + + if (sdmac->event_id0 > 31) + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_LWE; + + if (sdmac->event_id1 > 31) + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_HWE; + + /* + * If LWML(src_maxburst) > HWML(dst_maxburst), we need + * swap LWML and HWML of INFO(A.3.2.5.1), also need swap + * r0(event_mask[1]) and r1(event_mask[0]). + */ + if (lwml > hwml) { + sdmac->watermark_level &= ~(SDMA_WATERMARK_LEVEL_LWML | + SDMA_WATERMARK_LEVEL_HWML); + sdmac->watermark_level |= hwml; + sdmac->watermark_level |= lwml << 16; + swap(sdmac->event_mask[0], sdmac->event_mask[1]); + } + + if (sdmac->per_address2 >= sdma->spba_start_addr && + sdmac->per_address2 <= sdma->spba_end_addr) + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_SP; + + if (sdmac->per_address >= sdma->spba_start_addr && + sdmac->per_address <= sdma->spba_end_addr) + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_DP; + + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_CONT; +} + +static void sdma_set_watermarklevel_for_sais(struct sdma_channel *sdmac) +{ + unsigned int n_fifos; + unsigned int stride_fifos; + unsigned int words_per_fifo; + + if (sdmac->sw_done) + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_SW_DONE; + + if (sdmac->direction == DMA_DEV_TO_MEM) { + n_fifos = sdmac->n_fifos_src; + stride_fifos = sdmac->stride_fifos_src; + } else { + n_fifos = sdmac->n_fifos_dst; + stride_fifos = sdmac->stride_fifos_dst; + } + + words_per_fifo = sdmac->words_per_fifo; + + sdmac->watermark_level |= + FIELD_PREP(SDMA_WATERMARK_LEVEL_N_FIFOS, n_fifos); + sdmac->watermark_level |= + FIELD_PREP(SDMA_WATERMARK_LEVEL_OFF_FIFOS, stride_fifos); + if (words_per_fifo) + sdmac->watermark_level |= + FIELD_PREP(SDMA_WATERMARK_LEVEL_WORDS_PER_FIFO, (words_per_fifo - 1)); +} + +static int sdma_config_channel(struct dma_chan *chan) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + int ret; + + sdma_disable_channel(chan); + + sdmac->event_mask[0] = 0; + sdmac->event_mask[1] = 0; + sdmac->shp_addr = 0; + sdmac->per_addr = 0; + + switch (sdmac->peripheral_type) { + case IMX_DMATYPE_DSP: + sdma_config_ownership(sdmac, false, true, true); + break; + case IMX_DMATYPE_MEMORY: + sdma_config_ownership(sdmac, false, true, false); + break; + default: + sdma_config_ownership(sdmac, true, true, false); + break; + } + + ret = sdma_get_pc(sdmac, sdmac->peripheral_type); + if (ret) + return ret; + + if ((sdmac->peripheral_type != IMX_DMATYPE_MEMORY) && + (sdmac->peripheral_type != IMX_DMATYPE_DSP)) { + /* Handle multiple event channels differently */ + if (sdmac->event_id1) { + if (sdmac->peripheral_type == IMX_DMATYPE_ASRC_SP || + sdmac->peripheral_type == IMX_DMATYPE_ASRC) + sdma_set_watermarklevel_for_p2p(sdmac); + } else { + if (sdmac->peripheral_type == + IMX_DMATYPE_MULTI_SAI) + sdma_set_watermarklevel_for_sais(sdmac); + + __set_bit(sdmac->event_id0, sdmac->event_mask); + } + + /* Address */ + sdmac->shp_addr = sdmac->per_address; + sdmac->per_addr = sdmac->per_address2; + } else { + sdmac->watermark_level = 0; /* FIXME: M3_BASE_ADDRESS */ + } + + return 0; +} + +static int sdma_set_channel_priority(struct sdma_channel *sdmac, + unsigned int priority) +{ + struct sdma_engine *sdma = sdmac->sdma; + int channel = sdmac->channel; + + if (priority < MXC_SDMA_MIN_PRIORITY + || priority > MXC_SDMA_MAX_PRIORITY) { + return -EINVAL; + } + + writel_relaxed(priority, sdma->regs + SDMA_CHNPRI_0 + 4 * channel); + + return 0; +} + +static int sdma_request_channel0(struct sdma_engine *sdma) +{ + int ret = -EBUSY; + + sdma->bd0 = dma_alloc_coherent(sdma->dev, PAGE_SIZE, &sdma->bd0_phys, + GFP_NOWAIT); + if (!sdma->bd0) { + ret = -ENOMEM; + goto out; + } + + sdma->channel_control[0].base_bd_ptr = sdma->bd0_phys; + sdma->channel_control[0].current_bd_ptr = sdma->bd0_phys; + + sdma_set_channel_priority(&sdma->channel[0], MXC_SDMA_DEFAULT_PRIORITY); + return 0; +out: + + return ret; +} + + +static int sdma_alloc_bd(struct sdma_desc *desc) +{ + u32 bd_size = desc->num_bd * sizeof(struct sdma_buffer_descriptor); + int ret = 0; + + desc->bd = dma_alloc_coherent(desc->sdmac->sdma->dev, bd_size, + &desc->bd_phys, GFP_NOWAIT); + if (!desc->bd) { + ret = -ENOMEM; + goto out; + } +out: + return ret; +} + +static void sdma_free_bd(struct sdma_desc *desc) +{ + u32 bd_size = desc->num_bd * sizeof(struct sdma_buffer_descriptor); + + dma_free_coherent(desc->sdmac->sdma->dev, bd_size, desc->bd, + desc->bd_phys); +} + +static void sdma_desc_free(struct virt_dma_desc *vd) +{ + struct sdma_desc *desc = container_of(vd, struct sdma_desc, vd); + + sdma_free_bd(desc); + kfree(desc); +} + +static int sdma_alloc_chan_resources(struct dma_chan *chan) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + struct imx_dma_data *data = chan->private; + struct imx_dma_data mem_data; + int prio, ret; + + /* + * MEMCPY may never setup chan->private by filter function such as + * dmatest, thus create 'struct imx_dma_data mem_data' for this case. + * Please note in any other slave case, you have to setup chan->private + * with 'struct imx_dma_data' in your own filter function if you want to + * request dma channel by dma_request_channel() rather than + * dma_request_slave_channel(). Othwise, 'MEMCPY in case?' will appear + * to warn you to correct your filter function. + */ + if (!data) { + dev_dbg(sdmac->sdma->dev, "MEMCPY in case?\n"); + mem_data.priority = 2; + mem_data.peripheral_type = IMX_DMATYPE_MEMORY; + mem_data.dma_request = 0; + mem_data.dma_request2 = 0; + data = &mem_data; + + ret = sdma_get_pc(sdmac, IMX_DMATYPE_MEMORY); + if (ret) + return ret; + } + + switch (data->priority) { + case DMA_PRIO_HIGH: + prio = 3; + break; + case DMA_PRIO_MEDIUM: + prio = 2; + break; + case DMA_PRIO_LOW: + default: + prio = 1; + break; + } + + sdmac->peripheral_type = data->peripheral_type; + sdmac->event_id0 = data->dma_request; + sdmac->event_id1 = data->dma_request2; + + ret = clk_enable(sdmac->sdma->clk_ipg); + if (ret) + return ret; + ret = clk_enable(sdmac->sdma->clk_ahb); + if (ret) + goto disable_clk_ipg; + + ret = sdma_set_channel_priority(sdmac, prio); + if (ret) + goto disable_clk_ahb; + + return 0; + +disable_clk_ahb: + clk_disable(sdmac->sdma->clk_ahb); +disable_clk_ipg: + clk_disable(sdmac->sdma->clk_ipg); + return ret; +} + +static void sdma_free_chan_resources(struct dma_chan *chan) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + struct sdma_engine *sdma = sdmac->sdma; + + sdma_terminate_all(chan); + + sdma_channel_synchronize(chan); + + sdma_event_disable(sdmac, sdmac->event_id0); + if (sdmac->event_id1) + sdma_event_disable(sdmac, sdmac->event_id1); + + sdmac->event_id0 = 0; + sdmac->event_id1 = 0; + + sdma_set_channel_priority(sdmac, 0); + + clk_disable(sdma->clk_ipg); + clk_disable(sdma->clk_ahb); +} + +static struct sdma_desc *sdma_transfer_init(struct sdma_channel *sdmac, + enum dma_transfer_direction direction, u32 bds) +{ + struct sdma_desc *desc; + + if (!sdmac->sdma->fw_loaded && sdmac->is_ram_script) { + dev_warn_once(sdmac->sdma->dev, "sdma firmware not ready!\n"); + goto err_out; + } + + desc = kzalloc((sizeof(*desc)), GFP_NOWAIT); + if (!desc) + goto err_out; + + sdmac->status = DMA_IN_PROGRESS; + sdmac->direction = direction; + sdmac->flags = 0; + + desc->chn_count = 0; + desc->chn_real_count = 0; + desc->buf_tail = 0; + desc->buf_ptail = 0; + desc->sdmac = sdmac; + desc->num_bd = bds; + + if (bds && sdma_alloc_bd(desc)) + goto err_desc_out; + + /* No slave_config called in MEMCPY case, so do here */ + if (direction == DMA_MEM_TO_MEM) + sdma_config_ownership(sdmac, false, true, false); + + if (sdma_load_context(sdmac)) + goto err_bd_out; + + return desc; + +err_bd_out: + sdma_free_bd(desc); +err_desc_out: + kfree(desc); +err_out: + return NULL; +} + +static struct dma_async_tx_descriptor *sdma_prep_memcpy( + struct dma_chan *chan, dma_addr_t dma_dst, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + struct sdma_engine *sdma = sdmac->sdma; + int channel = sdmac->channel; + size_t count; + int i = 0, param; + struct sdma_buffer_descriptor *bd; + struct sdma_desc *desc; + + if (!chan || !len) + return NULL; + + dev_dbg(sdma->dev, "memcpy: %pad->%pad, len=%zu, channel=%d.\n", + &dma_src, &dma_dst, len, channel); + + desc = sdma_transfer_init(sdmac, DMA_MEM_TO_MEM, + len / SDMA_BD_MAX_CNT + 1); + if (!desc) + return NULL; + + do { + count = min_t(size_t, len, SDMA_BD_MAX_CNT); + bd = &desc->bd[i]; + bd->buffer_addr = dma_src; + bd->ext_buffer_addr = dma_dst; + bd->mode.count = count; + desc->chn_count += count; + bd->mode.command = 0; + + dma_src += count; + dma_dst += count; + len -= count; + i++; + + param = BD_DONE | BD_EXTD | BD_CONT; + /* last bd */ + if (!len) { + param |= BD_INTR; + param |= BD_LAST; + param &= ~BD_CONT; + } + + dev_dbg(sdma->dev, "entry %d: count: %zd dma: 0x%x %s%s\n", + i, count, bd->buffer_addr, + param & BD_WRAP ? "wrap" : "", + param & BD_INTR ? " intr" : ""); + + bd->mode.status = param; + } while (len); + + return vchan_tx_prep(&sdmac->vc, &desc->vd, flags); +} + +static struct dma_async_tx_descriptor *sdma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + struct sdma_engine *sdma = sdmac->sdma; + int i, count; + int channel = sdmac->channel; + struct scatterlist *sg; + struct sdma_desc *desc; + + sdma_config_write(chan, &sdmac->slave_config, direction); + + desc = sdma_transfer_init(sdmac, direction, sg_len); + if (!desc) + goto err_out; + + dev_dbg(sdma->dev, "setting up %d entries for channel %d.\n", + sg_len, channel); + + for_each_sg(sgl, sg, sg_len, i) { + struct sdma_buffer_descriptor *bd = &desc->bd[i]; + int param; + + bd->buffer_addr = sg->dma_address; + + count = sg_dma_len(sg); + + if (count > SDMA_BD_MAX_CNT) { + dev_err(sdma->dev, "SDMA channel %d: maximum bytes for sg entry exceeded: %d > %d\n", + channel, count, SDMA_BD_MAX_CNT); + goto err_bd_out; + } + + bd->mode.count = count; + desc->chn_count += count; + + if (sdmac->word_size > DMA_SLAVE_BUSWIDTH_4_BYTES) + goto err_bd_out; + + switch (sdmac->word_size) { + case DMA_SLAVE_BUSWIDTH_4_BYTES: + bd->mode.command = 0; + if (count & 3 || sg->dma_address & 3) + goto err_bd_out; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + bd->mode.command = 2; + if (count & 1 || sg->dma_address & 1) + goto err_bd_out; + break; + case DMA_SLAVE_BUSWIDTH_1_BYTE: + bd->mode.command = 1; + break; + default: + goto err_bd_out; + } + + param = BD_DONE | BD_EXTD | BD_CONT; + + if (i + 1 == sg_len) { + param |= BD_INTR; + param |= BD_LAST; + param &= ~BD_CONT; + } + + dev_dbg(sdma->dev, "entry %d: count: %d dma: %#llx %s%s\n", + i, count, (u64)sg->dma_address, + param & BD_WRAP ? "wrap" : "", + param & BD_INTR ? " intr" : ""); + + bd->mode.status = param; + } + + return vchan_tx_prep(&sdmac->vc, &desc->vd, flags); +err_bd_out: + sdma_free_bd(desc); + kfree(desc); +err_out: + sdmac->status = DMA_ERROR; + return NULL; +} + +static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + struct sdma_engine *sdma = sdmac->sdma; + int num_periods = 0; + int channel = sdmac->channel; + int i = 0, buf = 0; + struct sdma_desc *desc; + + dev_dbg(sdma->dev, "%s channel: %d\n", __func__, channel); + + if (sdmac->peripheral_type != IMX_DMATYPE_HDMI) + num_periods = buf_len / period_len; + + sdma_config_write(chan, &sdmac->slave_config, direction); + + desc = sdma_transfer_init(sdmac, direction, num_periods); + if (!desc) + goto err_out; + + desc->period_len = period_len; + + sdmac->flags |= IMX_DMA_SG_LOOP; + + if (period_len > SDMA_BD_MAX_CNT) { + dev_err(sdma->dev, "SDMA channel %d: maximum period size exceeded: %zu > %d\n", + channel, period_len, SDMA_BD_MAX_CNT); + goto err_bd_out; + } + + if (sdmac->peripheral_type == IMX_DMATYPE_HDMI) + return vchan_tx_prep(&sdmac->vc, &desc->vd, flags); + + while (buf < buf_len) { + struct sdma_buffer_descriptor *bd = &desc->bd[i]; + int param; + + bd->buffer_addr = dma_addr; + + bd->mode.count = period_len; + + if (sdmac->word_size > DMA_SLAVE_BUSWIDTH_4_BYTES) + goto err_bd_out; + if (sdmac->word_size == DMA_SLAVE_BUSWIDTH_4_BYTES) + bd->mode.command = 0; + else + bd->mode.command = sdmac->word_size; + + param = BD_DONE | BD_EXTD | BD_CONT | BD_INTR; + if (i + 1 == num_periods) + param |= BD_WRAP; + + dev_dbg(sdma->dev, "entry %d: count: %zu dma: %#llx %s%s\n", + i, period_len, (u64)dma_addr, + param & BD_WRAP ? "wrap" : "", + param & BD_INTR ? " intr" : ""); + + bd->mode.status = param; + + dma_addr += period_len; + buf += period_len; + + i++; + } + + return vchan_tx_prep(&sdmac->vc, &desc->vd, flags); +err_bd_out: + sdma_free_bd(desc); + kfree(desc); +err_out: + sdmac->status = DMA_ERROR; + return NULL; +} + +static int sdma_config_write(struct dma_chan *chan, + struct dma_slave_config *dmaengine_cfg, + enum dma_transfer_direction direction) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + + if (direction == DMA_DEV_TO_MEM) { + sdmac->per_address = dmaengine_cfg->src_addr; + sdmac->watermark_level = dmaengine_cfg->src_maxburst * + dmaengine_cfg->src_addr_width; + sdmac->word_size = dmaengine_cfg->src_addr_width; + } else if (direction == DMA_DEV_TO_DEV) { + sdmac->per_address2 = dmaengine_cfg->src_addr; + sdmac->per_address = dmaengine_cfg->dst_addr; + sdmac->watermark_level = dmaengine_cfg->src_maxburst & + SDMA_WATERMARK_LEVEL_LWML; + sdmac->watermark_level |= (dmaengine_cfg->dst_maxburst << 16) & + SDMA_WATERMARK_LEVEL_HWML; + sdmac->word_size = dmaengine_cfg->dst_addr_width; + } else if (sdmac->peripheral_type == IMX_DMATYPE_HDMI) { + sdmac->per_address = dmaengine_cfg->dst_addr; + sdmac->per_address2 = dmaengine_cfg->src_addr; + sdmac->watermark_level = 0; + } else { + sdmac->per_address = dmaengine_cfg->dst_addr; + sdmac->watermark_level = dmaengine_cfg->dst_maxburst * + dmaengine_cfg->dst_addr_width; + sdmac->word_size = dmaengine_cfg->dst_addr_width; + } + sdmac->direction = direction; + return sdma_config_channel(chan); +} + +static int sdma_config(struct dma_chan *chan, + struct dma_slave_config *dmaengine_cfg) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + struct sdma_engine *sdma = sdmac->sdma; + + memcpy(&sdmac->slave_config, dmaengine_cfg, sizeof(*dmaengine_cfg)); + + if (dmaengine_cfg->peripheral_config) { + struct sdma_peripheral_config *sdmacfg = dmaengine_cfg->peripheral_config; + if (dmaengine_cfg->peripheral_size != sizeof(struct sdma_peripheral_config)) { + dev_err(sdma->dev, "Invalid peripheral size %zu, expected %zu\n", + dmaengine_cfg->peripheral_size, + sizeof(struct sdma_peripheral_config)); + return -EINVAL; + } + sdmac->n_fifos_src = sdmacfg->n_fifos_src; + sdmac->n_fifos_dst = sdmacfg->n_fifos_dst; + sdmac->stride_fifos_src = sdmacfg->stride_fifos_src; + sdmac->stride_fifos_dst = sdmacfg->stride_fifos_dst; + sdmac->words_per_fifo = sdmacfg->words_per_fifo; + sdmac->sw_done = sdmacfg->sw_done; + } + + /* Set ENBLn earlier to make sure dma request triggered after that */ + if (sdmac->event_id0 >= sdmac->sdma->drvdata->num_events) + return -EINVAL; + sdma_event_enable(sdmac, sdmac->event_id0); + + if (sdmac->event_id1) { + if (sdmac->event_id1 >= sdmac->sdma->drvdata->num_events) + return -EINVAL; + sdma_event_enable(sdmac, sdmac->event_id1); + } + + return 0; +} + +static enum dma_status sdma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + struct sdma_desc *desc = NULL; + u32 residue; + struct virt_dma_desc *vd; + enum dma_status ret; + unsigned long flags; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE || !txstate) + return ret; + + spin_lock_irqsave(&sdmac->vc.lock, flags); + + vd = vchan_find_desc(&sdmac->vc, cookie); + if (vd) + desc = to_sdma_desc(&vd->tx); + else if (sdmac->desc && sdmac->desc->vd.tx.cookie == cookie) + desc = sdmac->desc; + + if (desc) { + if (sdmac->flags & IMX_DMA_SG_LOOP) + residue = (desc->num_bd - desc->buf_ptail) * + desc->period_len - desc->chn_real_count; + else + residue = desc->chn_count - desc->chn_real_count; + } else { + residue = 0; + } + + spin_unlock_irqrestore(&sdmac->vc.lock, flags); + + dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie, + residue); + + return sdmac->status; +} + +static void sdma_issue_pending(struct dma_chan *chan) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&sdmac->vc.lock, flags); + if (vchan_issue_pending(&sdmac->vc) && !sdmac->desc) + sdma_start_desc(sdmac); + spin_unlock_irqrestore(&sdmac->vc.lock, flags); +} + +#define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1 34 +#define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V2 38 +#define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3 45 +#define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V4 46 + +static void sdma_add_scripts(struct sdma_engine *sdma, + const struct sdma_script_start_addrs *addr) +{ + s32 *addr_arr = (u32 *)addr; + s32 *saddr_arr = (u32 *)sdma->script_addrs; + int i; + + /* use the default firmware in ROM if missing external firmware */ + if (!sdma->script_number) + sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1; + + if (sdma->script_number > sizeof(struct sdma_script_start_addrs) + / sizeof(s32)) { + dev_err(sdma->dev, + "SDMA script number %d not match with firmware.\n", + sdma->script_number); + return; + } + + for (i = 0; i < sdma->script_number; i++) + if (addr_arr[i] > 0) + saddr_arr[i] = addr_arr[i]; + + /* + * For compatibility with NXP internal legacy kernel before 4.19 which + * is based on uart ram script and mainline kernel based on uart rom + * script, both uart ram/rom scripts are present in newer sdma + * firmware. Use the rom versions if they are present (V3 or newer). + */ + if (sdma->script_number >= SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3) { + if (addr->uart_2_mcu_rom_addr) + sdma->script_addrs->uart_2_mcu_addr = addr->uart_2_mcu_rom_addr; + if (addr->uartsh_2_mcu_rom_addr) + sdma->script_addrs->uartsh_2_mcu_addr = addr->uartsh_2_mcu_rom_addr; + } +} + +static void sdma_load_firmware(const struct firmware *fw, void *context) +{ + struct sdma_engine *sdma = context; + const struct sdma_firmware_header *header; + const struct sdma_script_start_addrs *addr; + unsigned short *ram_code; + + if (!fw) { + dev_info(sdma->dev, "external firmware not found, using ROM firmware\n"); + /* In this case we just use the ROM firmware. */ + return; + } + + if (fw->size < sizeof(*header)) + goto err_firmware; + + header = (struct sdma_firmware_header *)fw->data; + + if (header->magic != SDMA_FIRMWARE_MAGIC) + goto err_firmware; + if (header->ram_code_start + header->ram_code_size > fw->size) + goto err_firmware; + switch (header->version_major) { + case 1: + sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1; + break; + case 2: + sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V2; + break; + case 3: + sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3; + break; + case 4: + sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V4; + break; + default: + dev_err(sdma->dev, "unknown firmware version\n"); + goto err_firmware; + } + + addr = (void *)header + header->script_addrs_start; + ram_code = (void *)header + header->ram_code_start; + + clk_enable(sdma->clk_ipg); + clk_enable(sdma->clk_ahb); + /* download the RAM image for SDMA */ + sdma_load_script(sdma, ram_code, + header->ram_code_size, + addr->ram_code_start_addr); + clk_disable(sdma->clk_ipg); + clk_disable(sdma->clk_ahb); + + sdma_add_scripts(sdma, addr); + + sdma->fw_loaded = true; + + dev_info(sdma->dev, "loaded firmware %d.%d\n", + header->version_major, + header->version_minor); + +err_firmware: + release_firmware(fw); +} + +#define EVENT_REMAP_CELLS 3 + +static int sdma_event_remap(struct sdma_engine *sdma) +{ + struct device_node *np = sdma->dev->of_node; + struct device_node *gpr_np = of_parse_phandle(np, "gpr", 0); + struct property *event_remap; + struct regmap *gpr; + char propname[] = "fsl,sdma-event-remap"; + u32 reg, val, shift, num_map, i; + int ret = 0; + + if (IS_ERR(np) || !gpr_np) + goto out; + + event_remap = of_find_property(np, propname, NULL); + num_map = event_remap ? (event_remap->length / sizeof(u32)) : 0; + if (!num_map) { + dev_dbg(sdma->dev, "no event needs to be remapped\n"); + goto out; + } else if (num_map % EVENT_REMAP_CELLS) { + dev_err(sdma->dev, "the property %s must modulo %d\n", + propname, EVENT_REMAP_CELLS); + ret = -EINVAL; + goto out; + } + + gpr = syscon_node_to_regmap(gpr_np); + if (IS_ERR(gpr)) { + dev_err(sdma->dev, "failed to get gpr regmap\n"); + ret = PTR_ERR(gpr); + goto out; + } + + for (i = 0; i < num_map; i += EVENT_REMAP_CELLS) { + ret = of_property_read_u32_index(np, propname, i, ®); + if (ret) { + dev_err(sdma->dev, "failed to read property %s index %d\n", + propname, i); + goto out; + } + + ret = of_property_read_u32_index(np, propname, i + 1, &shift); + if (ret) { + dev_err(sdma->dev, "failed to read property %s index %d\n", + propname, i + 1); + goto out; + } + + ret = of_property_read_u32_index(np, propname, i + 2, &val); + if (ret) { + dev_err(sdma->dev, "failed to read property %s index %d\n", + propname, i + 2); + goto out; + } + + regmap_update_bits(gpr, reg, BIT(shift), val << shift); + } + +out: + if (gpr_np) + of_node_put(gpr_np); + + return ret; +} + +static int sdma_get_firmware(struct sdma_engine *sdma, + const char *fw_name) +{ + int ret; + + ret = request_firmware_nowait(THIS_MODULE, + FW_ACTION_UEVENT, fw_name, sdma->dev, + GFP_KERNEL, sdma, sdma_load_firmware); + + return ret; +} + +static int sdma_init(struct sdma_engine *sdma) +{ + int i, ret; + dma_addr_t ccb_phys; + + ret = clk_enable(sdma->clk_ipg); + if (ret) + return ret; + ret = clk_enable(sdma->clk_ahb); + if (ret) + goto disable_clk_ipg; + + if (sdma->drvdata->check_ratio && + (clk_get_rate(sdma->clk_ahb) == clk_get_rate(sdma->clk_ipg))) + sdma->clk_ratio = 1; + + /* Be sure SDMA has not started yet */ + writel_relaxed(0, sdma->regs + SDMA_H_C0PTR); + + sdma->channel_control = dma_alloc_coherent(sdma->dev, + MAX_DMA_CHANNELS * sizeof(struct sdma_channel_control) + + sizeof(struct sdma_context_data), + &ccb_phys, GFP_KERNEL); + + if (!sdma->channel_control) { + ret = -ENOMEM; + goto err_dma_alloc; + } + + sdma->context = (void *)sdma->channel_control + + MAX_DMA_CHANNELS * sizeof(struct sdma_channel_control); + sdma->context_phys = ccb_phys + + MAX_DMA_CHANNELS * sizeof(struct sdma_channel_control); + + /* disable all channels */ + for (i = 0; i < sdma->drvdata->num_events; i++) + writel_relaxed(0, sdma->regs + chnenbl_ofs(sdma, i)); + + /* All channels have priority 0 */ + for (i = 0; i < MAX_DMA_CHANNELS; i++) + writel_relaxed(0, sdma->regs + SDMA_CHNPRI_0 + i * 4); + + ret = sdma_request_channel0(sdma); + if (ret) + goto err_dma_alloc; + + sdma_config_ownership(&sdma->channel[0], false, true, false); + + /* Set Command Channel (Channel Zero) */ + writel_relaxed(0x4050, sdma->regs + SDMA_CHN0ADDR); + + /* Set bits of CONFIG register but with static context switching */ + if (sdma->clk_ratio) + writel_relaxed(SDMA_H_CONFIG_ACR, sdma->regs + SDMA_H_CONFIG); + else + writel_relaxed(0, sdma->regs + SDMA_H_CONFIG); + + writel_relaxed(ccb_phys, sdma->regs + SDMA_H_C0PTR); + + /* Initializes channel's priorities */ + sdma_set_channel_priority(&sdma->channel[0], 7); + + clk_disable(sdma->clk_ipg); + clk_disable(sdma->clk_ahb); + + return 0; + +err_dma_alloc: + clk_disable(sdma->clk_ahb); +disable_clk_ipg: + clk_disable(sdma->clk_ipg); + dev_err(sdma->dev, "initialisation failed with %d\n", ret); + return ret; +} + +static bool sdma_filter_fn(struct dma_chan *chan, void *fn_param) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + struct imx_dma_data *data = fn_param; + + if (!imx_dma_is_general_purpose(chan)) + return false; + + sdmac->data = *data; + chan->private = &sdmac->data; + + return true; +} + +static struct dma_chan *sdma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct sdma_engine *sdma = ofdma->of_dma_data; + dma_cap_mask_t mask = sdma->dma_device.cap_mask; + struct imx_dma_data data; + + if (dma_spec->args_count != 3) + return NULL; + + data.dma_request = dma_spec->args[0]; + data.peripheral_type = dma_spec->args[1]; + data.priority = dma_spec->args[2]; + /* + * init dma_request2 to zero, which is not used by the dts. + * For P2P, dma_request2 is init from dma_request_channel(), + * chan->private will point to the imx_dma_data, and in + * device_alloc_chan_resources(), imx_dma_data.dma_request2 will + * be set to sdmac->event_id1. + */ + data.dma_request2 = 0; + + return __dma_request_channel(&mask, sdma_filter_fn, &data, + ofdma->of_node); +} + +static int sdma_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct device_node *spba_bus; + const char *fw_name; + int ret; + int irq; + struct resource spba_res; + int i; + struct sdma_engine *sdma; + s32 *saddr_arr; + + ret = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (ret) + return ret; + + sdma = devm_kzalloc(&pdev->dev, sizeof(*sdma), GFP_KERNEL); + if (!sdma) + return -ENOMEM; + + spin_lock_init(&sdma->channel_0_lock); + + sdma->dev = &pdev->dev; + sdma->drvdata = of_device_get_match_data(sdma->dev); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + sdma->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(sdma->regs)) + return PTR_ERR(sdma->regs); + + sdma->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); + if (IS_ERR(sdma->clk_ipg)) + return PTR_ERR(sdma->clk_ipg); + + sdma->clk_ahb = devm_clk_get(&pdev->dev, "ahb"); + if (IS_ERR(sdma->clk_ahb)) + return PTR_ERR(sdma->clk_ahb); + + ret = clk_prepare(sdma->clk_ipg); + if (ret) + return ret; + + ret = clk_prepare(sdma->clk_ahb); + if (ret) + goto err_clk; + + ret = devm_request_irq(&pdev->dev, irq, sdma_int_handler, 0, + dev_name(&pdev->dev), sdma); + if (ret) + goto err_irq; + + sdma->irq = irq; + + sdma->script_addrs = kzalloc(sizeof(*sdma->script_addrs), GFP_KERNEL); + if (!sdma->script_addrs) { + ret = -ENOMEM; + goto err_irq; + } + + /* initially no scripts available */ + saddr_arr = (s32 *)sdma->script_addrs; + for (i = 0; i < sizeof(*sdma->script_addrs) / sizeof(s32); i++) + saddr_arr[i] = -EINVAL; + + dma_cap_set(DMA_SLAVE, sdma->dma_device.cap_mask); + dma_cap_set(DMA_CYCLIC, sdma->dma_device.cap_mask); + dma_cap_set(DMA_MEMCPY, sdma->dma_device.cap_mask); + dma_cap_set(DMA_PRIVATE, sdma->dma_device.cap_mask); + + INIT_LIST_HEAD(&sdma->dma_device.channels); + /* Initialize channel parameters */ + for (i = 0; i < MAX_DMA_CHANNELS; i++) { + struct sdma_channel *sdmac = &sdma->channel[i]; + + sdmac->sdma = sdma; + + sdmac->channel = i; + sdmac->vc.desc_free = sdma_desc_free; + INIT_LIST_HEAD(&sdmac->terminated); + INIT_WORK(&sdmac->terminate_worker, + sdma_channel_terminate_work); + /* + * Add the channel to the DMAC list. Do not add channel 0 though + * because we need it internally in the SDMA driver. This also means + * that channel 0 in dmaengine counting matches sdma channel 1. + */ + if (i) + vchan_init(&sdmac->vc, &sdma->dma_device); + } + + ret = sdma_init(sdma); + if (ret) + goto err_init; + + ret = sdma_event_remap(sdma); + if (ret) + goto err_init; + + if (sdma->drvdata->script_addrs) + sdma_add_scripts(sdma, sdma->drvdata->script_addrs); + + sdma->dma_device.dev = &pdev->dev; + + sdma->dma_device.device_alloc_chan_resources = sdma_alloc_chan_resources; + sdma->dma_device.device_free_chan_resources = sdma_free_chan_resources; + sdma->dma_device.device_tx_status = sdma_tx_status; + sdma->dma_device.device_prep_slave_sg = sdma_prep_slave_sg; + sdma->dma_device.device_prep_dma_cyclic = sdma_prep_dma_cyclic; + sdma->dma_device.device_config = sdma_config; + sdma->dma_device.device_terminate_all = sdma_terminate_all; + sdma->dma_device.device_synchronize = sdma_channel_synchronize; + sdma->dma_device.src_addr_widths = SDMA_DMA_BUSWIDTHS; + sdma->dma_device.dst_addr_widths = SDMA_DMA_BUSWIDTHS; + sdma->dma_device.directions = SDMA_DMA_DIRECTIONS; + sdma->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + sdma->dma_device.device_prep_dma_memcpy = sdma_prep_memcpy; + sdma->dma_device.device_issue_pending = sdma_issue_pending; + sdma->dma_device.copy_align = 2; + dma_set_max_seg_size(sdma->dma_device.dev, SDMA_BD_MAX_CNT); + + platform_set_drvdata(pdev, sdma); + + ret = dma_async_device_register(&sdma->dma_device); + if (ret) { + dev_err(&pdev->dev, "unable to register\n"); + goto err_init; + } + + if (np) { + ret = of_dma_controller_register(np, sdma_xlate, sdma); + if (ret) { + dev_err(&pdev->dev, "failed to register controller\n"); + goto err_register; + } + + spba_bus = of_find_compatible_node(NULL, NULL, "fsl,spba-bus"); + ret = of_address_to_resource(spba_bus, 0, &spba_res); + if (!ret) { + sdma->spba_start_addr = spba_res.start; + sdma->spba_end_addr = spba_res.end; + } + of_node_put(spba_bus); + } + + /* + * Because that device tree does not encode ROM script address, + * the RAM script in firmware is mandatory for device tree + * probe, otherwise it fails. + */ + ret = of_property_read_string(np, "fsl,sdma-ram-script-name", + &fw_name); + if (ret) { + dev_warn(&pdev->dev, "failed to get firmware name\n"); + } else { + ret = sdma_get_firmware(sdma, fw_name); + if (ret) + dev_warn(&pdev->dev, "failed to get firmware from device tree\n"); + } + + return 0; + +err_register: + dma_async_device_unregister(&sdma->dma_device); +err_init: + kfree(sdma->script_addrs); +err_irq: + clk_unprepare(sdma->clk_ahb); +err_clk: + clk_unprepare(sdma->clk_ipg); + return ret; +} + +static int sdma_remove(struct platform_device *pdev) +{ + struct sdma_engine *sdma = platform_get_drvdata(pdev); + int i; + + devm_free_irq(&pdev->dev, sdma->irq, sdma); + dma_async_device_unregister(&sdma->dma_device); + kfree(sdma->script_addrs); + clk_unprepare(sdma->clk_ahb); + clk_unprepare(sdma->clk_ipg); + /* Kill the tasklet */ + for (i = 0; i < MAX_DMA_CHANNELS; i++) { + struct sdma_channel *sdmac = &sdma->channel[i]; + + tasklet_kill(&sdmac->vc.task); + sdma_free_chan_resources(&sdmac->vc.chan); + } + + platform_set_drvdata(pdev, NULL); + return 0; +} + +static struct platform_driver sdma_driver = { + .driver = { + .name = "imx-sdma", + .of_match_table = sdma_dt_ids, + }, + .remove = sdma_remove, + .probe = sdma_probe, +}; + +module_platform_driver(sdma_driver); + +MODULE_AUTHOR("Sascha Hauer, Pengutronix "); +MODULE_DESCRIPTION("i.MX SDMA driver"); +#if IS_ENABLED(CONFIG_SOC_IMX6Q) +MODULE_FIRMWARE("imx/sdma/sdma-imx6q.bin"); +#endif +#if IS_ENABLED(CONFIG_SOC_IMX7D) || IS_ENABLED(CONFIG_SOC_IMX8M) +MODULE_FIRMWARE("imx/sdma/sdma-imx7d.bin"); +#endif +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile new file mode 100644 index 0000000000..86638a608c --- /dev/null +++ b/drivers/dma/ioat/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o +ioatdma-y := init.o dma.o prep.o dca.o sysfs.o diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c new file mode 100644 index 0000000000..17f6b63671 --- /dev/null +++ b/drivers/dma/ioat/dca.c @@ -0,0 +1,331 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2007 - 2009 Intel Corporation. + */ + +#include +#include +#include +#include +#include + +/* either a kernel change is needed, or we need something like this in kernel */ +#ifndef CONFIG_SMP +#include +#undef cpu_physical_id +#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24) +#endif + +#include "dma.h" +#include "registers.h" + +/* + * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6 + * contain the bit number of the APIC ID to map into the DCA tag. If the valid + * bit is not set, then the value must be 0 or 1 and defines the bit in the tag. + */ +#define DCA_TAG_MAP_VALID 0x80 + +#define DCA3_TAG_MAP_BIT_TO_INV 0x80 +#define DCA3_TAG_MAP_BIT_TO_SEL 0x40 +#define DCA3_TAG_MAP_LITERAL_VAL 0x1 + +#define DCA_TAG_MAP_MASK 0xDF + +/* expected tag map bytes for I/OAT ver.2 */ +#define DCA2_TAG_MAP_BYTE0 0x80 +#define DCA2_TAG_MAP_BYTE1 0x0 +#define DCA2_TAG_MAP_BYTE2 0x81 +#define DCA2_TAG_MAP_BYTE3 0x82 +#define DCA2_TAG_MAP_BYTE4 0x82 + +/* + * "Legacy" DCA systems do not implement the DCA register set in the + * I/OAT device. Software needs direct support for their tag mappings. + */ + +#define APICID_BIT(x) (DCA_TAG_MAP_VALID | (x)) +#define IOAT_TAG_MAP_LEN 8 + +/* pack PCI B/D/F into a u16 */ +static inline u16 dcaid_from_pcidev(struct pci_dev *pci) +{ + return pci_dev_id(pci); +} + +static int dca_enabled_in_bios(struct pci_dev *pdev) +{ + /* CPUID level 9 returns DCA configuration */ + /* Bit 0 indicates DCA enabled by the BIOS */ + unsigned long cpuid_level_9; + int res; + + cpuid_level_9 = cpuid_eax(9); + res = test_bit(0, &cpuid_level_9); + if (!res) + dev_dbg(&pdev->dev, "DCA is disabled in BIOS\n"); + + return res; +} + +int system_has_dca_enabled(struct pci_dev *pdev) +{ + if (boot_cpu_has(X86_FEATURE_DCA)) + return dca_enabled_in_bios(pdev); + + dev_dbg(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n"); + return 0; +} + +struct ioat_dca_slot { + struct pci_dev *pdev; /* requester device */ + u16 rid; /* requester id, as used by IOAT */ +}; + +#define IOAT_DCA_MAX_REQ 6 +#define IOAT3_DCA_MAX_REQ 2 + +struct ioat_dca_priv { + void __iomem *iobase; + void __iomem *dca_base; + int max_requesters; + int requester_count; + u8 tag_map[IOAT_TAG_MAP_LEN]; + struct ioat_dca_slot req_slots[]; +}; + +static int ioat_dca_dev_managed(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + + pdev = to_pci_dev(dev); + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) + return 1; + } + return 0; +} + +static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 id; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (!dev_is_pci(dev)) + return -ENODEV; + pdev = to_pci_dev(dev); + id = dcaid_from_pcidev(pdev); + + if (ioatdca->requester_count == ioatdca->max_requesters) + return -ENODEV; + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == NULL) { + /* found an empty slot */ + ioatdca->requester_count++; + ioatdca->req_slots[i].pdev = pdev; + ioatdca->req_slots[i].rid = id; + global_req_table = + readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET); + writel(id | IOAT_DCA_GREQID_VALID, + ioatdca->iobase + global_req_table + (i * 4)); + return i; + } + } + /* Error, ioatdma->requester_count is out of whack */ + return -EFAULT; +} + +static int ioat_dca_remove_requester(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (!dev_is_pci(dev)) + return -ENODEV; + pdev = to_pci_dev(dev); + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) { + global_req_table = + readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET); + writel(0, ioatdca->iobase + global_req_table + (i * 4)); + ioatdca->req_slots[i].pdev = NULL; + ioatdca->req_slots[i].rid = 0; + ioatdca->requester_count--; + return i; + } + } + return -ENODEV; +} + +static u8 ioat_dca_get_tag(struct dca_provider *dca, + struct device *dev, + int cpu) +{ + u8 tag; + + struct ioat_dca_priv *ioatdca = dca_priv(dca); + int i, apic_id, bit, value; + u8 entry; + + tag = 0; + apic_id = cpu_physical_id(cpu); + + for (i = 0; i < IOAT_TAG_MAP_LEN; i++) { + entry = ioatdca->tag_map[i]; + if (entry & DCA3_TAG_MAP_BIT_TO_SEL) { + bit = entry & + ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV); + value = (apic_id & (1 << bit)) ? 1 : 0; + } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) { + bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV; + value = (apic_id & (1 << bit)) ? 0 : 1; + } else { + value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0; + } + tag |= (value << i); + } + + return tag; +} + +static const struct dca_ops ioat_dca_ops = { + .add_requester = ioat_dca_add_requester, + .remove_requester = ioat_dca_remove_requester, + .get_tag = ioat_dca_get_tag, + .dev_managed = ioat_dca_dev_managed, +}; + +static int ioat_dca_count_dca_slots(void *iobase, u16 dca_offset) +{ + int slots = 0; + u32 req; + u16 global_req_table; + + global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET); + if (global_req_table == 0) + return 0; + + do { + req = readl(iobase + global_req_table + (slots * sizeof(u32))); + slots++; + } while ((req & IOAT_DCA_GREQID_LASTID) == 0); + + return slots; +} + +static inline int dca3_tag_map_invalid(u8 *tag_map) +{ + /* + * If the tag map is not programmed by the BIOS the default is: + * 0x80 0x80 0x80 0x80 0x80 0x00 0x00 0x00 + * + * This an invalid map and will result in only 2 possible tags + * 0x1F and 0x00. 0x00 is an invalid DCA tag so we know that + * this entire definition is invalid. + */ + return ((tag_map[0] == DCA_TAG_MAP_VALID) && + (tag_map[1] == DCA_TAG_MAP_VALID) && + (tag_map[2] == DCA_TAG_MAP_VALID) && + (tag_map[3] == DCA_TAG_MAP_VALID) && + (tag_map[4] == DCA_TAG_MAP_VALID)); +} + +struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) +{ + struct dca_provider *dca; + struct ioat_dca_priv *ioatdca; + int slots; + int i; + int err; + u16 dca_offset; + u16 csi_fsb_control; + u16 pcie_control; + u8 bit; + + union { + u64 full; + struct { + u32 low; + u32 high; + }; + } tag_map; + + if (!system_has_dca_enabled(pdev)) + return NULL; + + dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET); + if (dca_offset == 0) + return NULL; + + slots = ioat_dca_count_dca_slots(iobase, dca_offset); + if (slots == 0) + return NULL; + + dca = alloc_dca_provider(&ioat_dca_ops, + struct_size(ioatdca, req_slots, slots)); + if (!dca) + return NULL; + + ioatdca = dca_priv(dca); + ioatdca->iobase = iobase; + ioatdca->dca_base = iobase + dca_offset; + ioatdca->max_requesters = slots; + + /* some bios might not know to turn these on */ + csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET); + if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) { + csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH; + writew(csi_fsb_control, + ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET); + } + pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET); + if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) { + pcie_control |= IOAT3_PCI_CONTROL_MEMWR; + writew(pcie_control, + ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET); + } + + + /* TODO version, compatibility and configuration checks */ + + /* copy out the APIC to DCA tag map */ + tag_map.low = + readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW); + tag_map.high = + readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH); + for (i = 0; i < 8; i++) { + bit = tag_map.full >> (8 * i); + ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK; + } + + if (dca3_tag_map_invalid(ioatdca->tag_map)) { + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); + pr_warn_once("%s %s: APICID_TAG_MAP set incorrectly by BIOS, disabling DCA\n", + dev_driver_string(&pdev->dev), + dev_name(&pdev->dev)); + free_dca_provider(dca); + return NULL; + } + + err = register_dca_provider(dca, &pdev->dev); + if (err) { + free_dca_provider(dca); + return NULL; + } + + return dca; +} diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c new file mode 100644 index 0000000000..79d8957f9e --- /dev/null +++ b/drivers/dma/ioat/dma.c @@ -0,0 +1,1061 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2015 Intel Corporation. + */ + +/* + * This driver supports an Intel I/OAT DMA engine, which does asynchronous + * copy operations. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "dma.h" +#include "registers.h" +#include "hw.h" + +#include "../dmaengine.h" + +static int completion_timeout = 200; +module_param(completion_timeout, int, 0644); +MODULE_PARM_DESC(completion_timeout, + "set ioat completion timeout [msec] (default 200 [msec])"); +static int idle_timeout = 2000; +module_param(idle_timeout, int, 0644); +MODULE_PARM_DESC(idle_timeout, + "set ioat idle timeout [msec] (default 2000 [msec])"); + +#define IDLE_TIMEOUT msecs_to_jiffies(idle_timeout) +#define COMPLETION_TIMEOUT msecs_to_jiffies(completion_timeout) + +static char *chanerr_str[] = { + "DMA Transfer Source Address Error", + "DMA Transfer Destination Address Error", + "Next Descriptor Address Error", + "Descriptor Error", + "Chan Address Value Error", + "CHANCMD Error", + "Chipset Uncorrectable Data Integrity Error", + "DMA Uncorrectable Data Integrity Error", + "Read Data Error", + "Write Data Error", + "Descriptor Control Error", + "Descriptor Transfer Size Error", + "Completion Address Error", + "Interrupt Configuration Error", + "Super extended descriptor Address Error", + "Unaffiliated Error", + "CRC or XOR P Error", + "XOR Q Error", + "Descriptor Count Error", + "DIF All F detect Error", + "Guard Tag verification Error", + "Application Tag verification Error", + "Reference Tag verification Error", + "Bundle Bit Error", + "Result DIF All F detect Error", + "Result Guard Tag verification Error", + "Result Application Tag verification Error", + "Result Reference Tag verification Error", +}; + +static void ioat_eh(struct ioatdma_chan *ioat_chan); + +static void ioat_print_chanerrs(struct ioatdma_chan *ioat_chan, u32 chanerr) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(chanerr_str); i++) { + if ((chanerr >> i) & 1) { + dev_err(to_dev(ioat_chan), "Err(%d): %s\n", + i, chanerr_str[i]); + } + } +} + +/** + * ioat_dma_do_interrupt - handler used for single vector interrupt mode + * @irq: interrupt id + * @data: interrupt data + */ +irqreturn_t ioat_dma_do_interrupt(int irq, void *data) +{ + struct ioatdma_device *instance = data; + struct ioatdma_chan *ioat_chan; + unsigned long attnstatus; + int bit; + u8 intrctrl; + + intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET); + + if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN)) + return IRQ_NONE; + + if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) { + writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); + return IRQ_NONE; + } + + attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); + for_each_set_bit(bit, &attnstatus, BITS_PER_LONG) { + ioat_chan = ioat_chan_by_index(instance, bit); + if (test_bit(IOAT_RUN, &ioat_chan->state)) + tasklet_schedule(&ioat_chan->cleanup_task); + } + + writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); + return IRQ_HANDLED; +} + +/** + * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode + * @irq: interrupt id + * @data: interrupt data + */ +irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) +{ + struct ioatdma_chan *ioat_chan = data; + + if (test_bit(IOAT_RUN, &ioat_chan->state)) + tasklet_schedule(&ioat_chan->cleanup_task); + + return IRQ_HANDLED; +} + +void ioat_stop(struct ioatdma_chan *ioat_chan) +{ + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct pci_dev *pdev = ioat_dma->pdev; + int chan_id = chan_num(ioat_chan); + struct msix_entry *msix; + + /* 1/ stop irq from firing tasklets + * 2/ stop the tasklet from re-arming irqs + */ + clear_bit(IOAT_RUN, &ioat_chan->state); + + /* flush inflight interrupts */ + switch (ioat_dma->irq_mode) { + case IOAT_MSIX: + msix = &ioat_dma->msix_entries[chan_id]; + synchronize_irq(msix->vector); + break; + case IOAT_MSI: + case IOAT_INTX: + synchronize_irq(pdev->irq); + break; + default: + break; + } + + /* flush inflight timers */ + del_timer_sync(&ioat_chan->timer); + + /* flush inflight tasklet runs */ + tasklet_kill(&ioat_chan->cleanup_task); + + /* final cleanup now that everything is quiesced and can't re-arm */ + ioat_cleanup_event(&ioat_chan->cleanup_task); +} + +static void __ioat_issue_pending(struct ioatdma_chan *ioat_chan) +{ + ioat_chan->dmacount += ioat_ring_pending(ioat_chan); + ioat_chan->issued = ioat_chan->head; + writew(ioat_chan->dmacount, + ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); + dev_dbg(to_dev(ioat_chan), + "%s: head: %#x tail: %#x issued: %#x count: %#x\n", + __func__, ioat_chan->head, ioat_chan->tail, + ioat_chan->issued, ioat_chan->dmacount); +} + +void ioat_issue_pending(struct dma_chan *c) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + + if (ioat_ring_pending(ioat_chan)) { + spin_lock_bh(&ioat_chan->prep_lock); + __ioat_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); + } +} + +/** + * ioat_update_pending - log pending descriptors + * @ioat_chan: ioat+ channel + * + * Check if the number of unsubmitted descriptors has exceeded the + * watermark. Called with prep_lock held + */ +static void ioat_update_pending(struct ioatdma_chan *ioat_chan) +{ + if (ioat_ring_pending(ioat_chan) > ioat_pending_level) + __ioat_issue_pending(ioat_chan); +} + +static void __ioat_start_null_desc(struct ioatdma_chan *ioat_chan) +{ + struct ioat_ring_ent *desc; + struct ioat_dma_descriptor *hw; + + if (ioat_ring_space(ioat_chan) < 1) { + dev_err(to_dev(ioat_chan), + "Unable to start null desc - ring full\n"); + return; + } + + dev_dbg(to_dev(ioat_chan), + "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat_chan->head, ioat_chan->tail, ioat_chan->issued); + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head); + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.compl_write = 1; + /* set size to non-zero value (channel returns error when size is 0) */ + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + async_tx_ack(&desc->txd); + ioat_set_chainaddr(ioat_chan, desc->txd.phys); + dump_desc_dbg(ioat_chan, desc); + /* make sure descriptors are written before we submit */ + wmb(); + ioat_chan->head += 1; + __ioat_issue_pending(ioat_chan); +} + +void ioat_start_null_desc(struct ioatdma_chan *ioat_chan) +{ + spin_lock_bh(&ioat_chan->prep_lock); + if (!test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) + __ioat_start_null_desc(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); +} + +static void __ioat_restart_chan(struct ioatdma_chan *ioat_chan) +{ + /* set the tail to be re-issued */ + ioat_chan->issued = ioat_chan->tail; + ioat_chan->dmacount = 0; + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); + + dev_dbg(to_dev(ioat_chan), + "%s: head: %#x tail: %#x issued: %#x count: %#x\n", + __func__, ioat_chan->head, ioat_chan->tail, + ioat_chan->issued, ioat_chan->dmacount); + + if (ioat_ring_pending(ioat_chan)) { + struct ioat_ring_ent *desc; + + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail); + ioat_set_chainaddr(ioat_chan, desc->txd.phys); + __ioat_issue_pending(ioat_chan); + } else + __ioat_start_null_desc(ioat_chan); +} + +static int ioat_quiesce(struct ioatdma_chan *ioat_chan, unsigned long tmo) +{ + unsigned long end = jiffies + tmo; + int err = 0; + u32 status; + + status = ioat_chansts(ioat_chan); + if (is_ioat_active(status) || is_ioat_idle(status)) + ioat_suspend(ioat_chan); + while (is_ioat_active(status) || is_ioat_idle(status)) { + if (tmo && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } + status = ioat_chansts(ioat_chan); + cpu_relax(); + } + + return err; +} + +static int ioat_reset_sync(struct ioatdma_chan *ioat_chan, unsigned long tmo) +{ + unsigned long end = jiffies + tmo; + int err = 0; + + ioat_reset(ioat_chan); + while (ioat_reset_pending(ioat_chan)) { + if (end && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } + cpu_relax(); + } + + return err; +} + +static dma_cookie_t ioat_tx_submit_unlock(struct dma_async_tx_descriptor *tx) + __releases(&ioat_chan->prep_lock) +{ + struct dma_chan *c = tx->chan; + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + dma_cookie_t cookie; + + cookie = dma_cookie_assign(tx); + dev_dbg(to_dev(ioat_chan), "%s: cookie: %d\n", __func__, cookie); + + if (!test_and_set_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state)) + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); + + /* make descriptor updates visible before advancing ioat->head, + * this is purposefully not smp_wmb() since we are also + * publishing the descriptor updates to a dma device + */ + wmb(); + + ioat_chan->head += ioat_chan->produce; + + ioat_update_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); + + return cookie; +} + +static struct ioat_ring_ent * +ioat_alloc_ring_ent(struct dma_chan *chan, int idx, gfp_t flags) +{ + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); + int chunk; + dma_addr_t phys; + u8 *pos; + off_t offs; + + chunk = idx / IOAT_DESCS_PER_CHUNK; + idx &= (IOAT_DESCS_PER_CHUNK - 1); + offs = idx * IOAT_DESC_SZ; + pos = (u8 *)ioat_chan->descs[chunk].virt + offs; + phys = ioat_chan->descs[chunk].hw + offs; + hw = (struct ioat_dma_descriptor *)pos; + memset(hw, 0, sizeof(*hw)); + + desc = kmem_cache_zalloc(ioat_cache, flags); + if (!desc) + return NULL; + + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.tx_submit = ioat_tx_submit_unlock; + desc->hw = hw; + desc->txd.phys = phys; + return desc; +} + +void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) +{ + kmem_cache_free(ioat_cache, desc); +} + +struct ioat_ring_ent ** +ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent **ring; + int total_descs = 1 << order; + int i, chunks; + + /* allocate the array to hold the software ring */ + ring = kcalloc(total_descs, sizeof(*ring), flags); + if (!ring) + return NULL; + + chunks = (total_descs * IOAT_DESC_SZ) / IOAT_CHUNK_SIZE; + ioat_chan->desc_chunks = chunks; + + for (i = 0; i < chunks; i++) { + struct ioat_descs *descs = &ioat_chan->descs[i]; + + descs->virt = dma_alloc_coherent(to_dev(ioat_chan), + IOAT_CHUNK_SIZE, &descs->hw, flags); + if (!descs->virt) { + int idx; + + for (idx = 0; idx < i; idx++) { + descs = &ioat_chan->descs[idx]; + dma_free_coherent(to_dev(ioat_chan), + IOAT_CHUNK_SIZE, + descs->virt, descs->hw); + descs->virt = NULL; + descs->hw = 0; + } + + ioat_chan->desc_chunks = 0; + kfree(ring); + return NULL; + } + } + + for (i = 0; i < total_descs; i++) { + ring[i] = ioat_alloc_ring_ent(c, i, flags); + if (!ring[i]) { + int idx; + + while (i--) + ioat_free_ring_ent(ring[i], c); + + for (idx = 0; idx < ioat_chan->desc_chunks; idx++) { + dma_free_coherent(to_dev(ioat_chan), + IOAT_CHUNK_SIZE, + ioat_chan->descs[idx].virt, + ioat_chan->descs[idx].hw); + ioat_chan->descs[idx].virt = NULL; + ioat_chan->descs[idx].hw = 0; + } + + ioat_chan->desc_chunks = 0; + kfree(ring); + return NULL; + } + set_desc_id(ring[i], i); + } + + /* link descs */ + for (i = 0; i < total_descs-1; i++) { + struct ioat_ring_ent *next = ring[i+1]; + struct ioat_dma_descriptor *hw = ring[i]->hw; + + hw->next = next->txd.phys; + } + ring[i]->hw->next = ring[0]->txd.phys; + + /* setup descriptor pre-fetching for v3.4 */ + if (ioat_dma->cap & IOAT_CAP_DPS) { + u16 drsctl = IOAT_CHAN_DRSZ_2MB | IOAT_CHAN_DRS_EN; + + if (chunks == 1) + drsctl |= IOAT_CHAN_DRS_AUTOWRAP; + + writew(drsctl, ioat_chan->reg_base + IOAT_CHAN_DRSCTL_OFFSET); + + } + + return ring; +} + +/** + * ioat_check_space_lock - verify space and grab ring producer lock + * @ioat_chan: ioat,3 channel (ring) to operate on + * @num_descs: allocation length + */ +int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs) + __acquires(&ioat_chan->prep_lock) +{ + spin_lock_bh(&ioat_chan->prep_lock); + /* never allow the last descriptor to be consumed, we need at + * least one free at all times to allow for on-the-fly ring + * resizing. + */ + if (likely(ioat_ring_space(ioat_chan) > num_descs)) { + dev_dbg(to_dev(ioat_chan), "%s: num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat_chan->head, + ioat_chan->tail, ioat_chan->issued); + ioat_chan->produce = num_descs; + return 0; /* with ioat->prep_lock held */ + } + spin_unlock_bh(&ioat_chan->prep_lock); + + dev_dbg_ratelimited(to_dev(ioat_chan), + "%s: ring full! num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat_chan->head, + ioat_chan->tail, ioat_chan->issued); + + /* progress reclaim in the allocation failure case we may be + * called under bh_disabled so we need to trigger the timer + * event directly + */ + if (time_is_before_jiffies(ioat_chan->timer.expires) + && timer_pending(&ioat_chan->timer)) { + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); + ioat_timer_event(&ioat_chan->timer); + } + + return -ENOMEM; +} + +static bool desc_has_ext(struct ioat_ring_ent *desc) +{ + struct ioat_dma_descriptor *hw = desc->hw; + + if (hw->ctl_f.op == IOAT_OP_XOR || + hw->ctl_f.op == IOAT_OP_XOR_VAL) { + struct ioat_xor_descriptor *xor = desc->xor; + + if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) + return true; + } else if (hw->ctl_f.op == IOAT_OP_PQ || + hw->ctl_f.op == IOAT_OP_PQ_VAL) { + struct ioat_pq_descriptor *pq = desc->pq; + + if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3) + return true; + } + + return false; +} + +static void +ioat_free_sed(struct ioatdma_device *ioat_dma, struct ioat_sed_ent *sed) +{ + if (!sed) + return; + + dma_pool_free(ioat_dma->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma); + kmem_cache_free(ioat_sed_cache, sed); +} + +static u64 ioat_get_current_completion(struct ioatdma_chan *ioat_chan) +{ + u64 phys_complete; + u64 completion; + + completion = *ioat_chan->completion; + phys_complete = ioat_chansts_to_addr(completion); + + dev_dbg(to_dev(ioat_chan), "%s: phys_complete: %#llx\n", __func__, + (unsigned long long) phys_complete); + + return phys_complete; +} + +static bool ioat_cleanup_preamble(struct ioatdma_chan *ioat_chan, + u64 *phys_complete) +{ + *phys_complete = ioat_get_current_completion(ioat_chan); + if (*phys_complete == ioat_chan->last_completion) + return false; + + clear_bit(IOAT_COMPLETION_ACK, &ioat_chan->state); + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); + + return true; +} + +static void +desc_get_errstat(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc) +{ + struct ioat_dma_descriptor *hw = desc->hw; + + switch (hw->ctl_f.op) { + case IOAT_OP_PQ_VAL: + case IOAT_OP_PQ_VAL_16S: + { + struct ioat_pq_descriptor *pq = desc->pq; + + /* check if there's error written */ + if (!pq->dwbes_f.wbes) + return; + + /* need to set a chanerr var for checking to clear later */ + + if (pq->dwbes_f.p_val_err) + *desc->result |= SUM_CHECK_P_RESULT; + + if (pq->dwbes_f.q_val_err) + *desc->result |= SUM_CHECK_Q_RESULT; + + return; + } + default: + return; + } +} + +/** + * __ioat_cleanup - reclaim used descriptors + * @ioat_chan: channel (ring) to clean + * @phys_complete: zeroed (or not) completion address (from status) + */ +static void __ioat_cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) +{ + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *desc; + bool seen_current = false; + int idx = ioat_chan->tail, i; + u16 active; + + dev_dbg(to_dev(ioat_chan), "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat_chan->head, ioat_chan->tail, ioat_chan->issued); + + /* + * At restart of the channel, the completion address and the + * channel status will be 0 due to starting a new chain. Since + * it's new chain and the first descriptor "fails", there is + * nothing to clean up. We do not want to reap the entire submitted + * chain due to this 0 address value and then BUG. + */ + if (!phys_complete) + return; + + active = ioat_ring_active(ioat_chan); + for (i = 0; i < active && !seen_current; i++) { + struct dma_async_tx_descriptor *tx; + + prefetch(ioat_get_ring_ent(ioat_chan, idx + i + 1)); + desc = ioat_get_ring_ent(ioat_chan, idx + i); + dump_desc_dbg(ioat_chan, desc); + + /* set err stat if we are using dwbes */ + if (ioat_dma->cap & IOAT_CAP_DWBES) + desc_get_errstat(ioat_chan, desc); + + tx = &desc->txd; + if (tx->cookie) { + dma_cookie_complete(tx); + dma_descriptor_unmap(tx); + dmaengine_desc_get_callback_invoke(tx, NULL); + tx->callback = NULL; + tx->callback_result = NULL; + } + + if (tx->phys == phys_complete) + seen_current = true; + + /* skip extended descriptors */ + if (desc_has_ext(desc)) { + BUG_ON(i + 1 >= active); + i++; + } + + /* cleanup super extended descriptors */ + if (desc->sed) { + ioat_free_sed(ioat_dma, desc->sed); + desc->sed = NULL; + } + } + + /* finish all descriptor reads before incrementing tail */ + smp_mb(); + ioat_chan->tail = idx + i; + /* no active descs have written a completion? */ + BUG_ON(active && !seen_current); + ioat_chan->last_completion = phys_complete; + + if (active - i == 0) { + dev_dbg(to_dev(ioat_chan), "%s: cancel completion timeout\n", + __func__); + mod_timer_pending(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); + } + + /* microsecond delay by sysfs variable per pending descriptor */ + if (ioat_chan->intr_coalesce != ioat_chan->prev_intr_coalesce) { + writew(min((ioat_chan->intr_coalesce * (active - i)), + IOAT_INTRDELAY_MASK), + ioat_chan->ioat_dma->reg_base + IOAT_INTRDELAY_OFFSET); + ioat_chan->prev_intr_coalesce = ioat_chan->intr_coalesce; + } +} + +static void ioat_cleanup(struct ioatdma_chan *ioat_chan) +{ + u64 phys_complete; + + spin_lock_bh(&ioat_chan->cleanup_lock); + + if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) + __ioat_cleanup(ioat_chan, phys_complete); + + if (is_ioat_halted(*ioat_chan->completion)) { + u32 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + + if (chanerr & + (IOAT_CHANERR_HANDLE_MASK | IOAT_CHANERR_RECOVER_MASK)) { + mod_timer_pending(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); + ioat_eh(ioat_chan); + } + } + + spin_unlock_bh(&ioat_chan->cleanup_lock); +} + +void ioat_cleanup_event(struct tasklet_struct *t) +{ + struct ioatdma_chan *ioat_chan = from_tasklet(ioat_chan, t, cleanup_task); + + ioat_cleanup(ioat_chan); + if (!test_bit(IOAT_RUN, &ioat_chan->state)) + return; + writew(IOAT_CHANCTRL_RUN, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); +} + +static void ioat_restart_channel(struct ioatdma_chan *ioat_chan) +{ + u64 phys_complete; + + /* set the completion address register again */ + writel(lower_32_bits(ioat_chan->completion_dma), + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); + writel(upper_32_bits(ioat_chan->completion_dma), + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); + + ioat_quiesce(ioat_chan, 0); + if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) + __ioat_cleanup(ioat_chan, phys_complete); + + __ioat_restart_chan(ioat_chan); +} + + +static void ioat_abort_descs(struct ioatdma_chan *ioat_chan) +{ + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *desc; + u16 active; + int idx = ioat_chan->tail, i; + + /* + * We assume that the failed descriptor has been processed. + * Now we are just returning all the remaining submitted + * descriptors to abort. + */ + active = ioat_ring_active(ioat_chan); + + /* we skip the failed descriptor that tail points to */ + for (i = 1; i < active; i++) { + struct dma_async_tx_descriptor *tx; + + prefetch(ioat_get_ring_ent(ioat_chan, idx + i + 1)); + desc = ioat_get_ring_ent(ioat_chan, idx + i); + + tx = &desc->txd; + if (tx->cookie) { + struct dmaengine_result res; + + dma_cookie_complete(tx); + dma_descriptor_unmap(tx); + res.result = DMA_TRANS_ABORTED; + dmaengine_desc_get_callback_invoke(tx, &res); + tx->callback = NULL; + tx->callback_result = NULL; + } + + /* skip extended descriptors */ + if (desc_has_ext(desc)) { + WARN_ON(i + 1 >= active); + i++; + } + + /* cleanup super extended descriptors */ + if (desc->sed) { + ioat_free_sed(ioat_dma, desc->sed); + desc->sed = NULL; + } + } + + smp_mb(); /* finish all descriptor reads before incrementing tail */ + ioat_chan->tail = idx + active; + + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail); + ioat_chan->last_completion = *ioat_chan->completion = desc->txd.phys; +} + +static void ioat_eh(struct ioatdma_chan *ioat_chan) +{ + struct pci_dev *pdev = to_pdev(ioat_chan); + struct ioat_dma_descriptor *hw; + struct dma_async_tx_descriptor *tx; + u64 phys_complete; + struct ioat_ring_ent *desc; + u32 err_handled = 0; + u32 chanerr_int; + u32 chanerr; + bool abort = false; + struct dmaengine_result res; + + /* cleanup so tail points to descriptor that caused the error */ + if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) + __ioat_cleanup(ioat_chan, phys_complete); + + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int); + + dev_dbg(to_dev(ioat_chan), "%s: error = %x:%x\n", + __func__, chanerr, chanerr_int); + + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail); + hw = desc->hw; + dump_desc_dbg(ioat_chan, desc); + + switch (hw->ctl_f.op) { + case IOAT_OP_XOR_VAL: + if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { + *desc->result |= SUM_CHECK_P_RESULT; + err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; + } + break; + case IOAT_OP_PQ_VAL: + case IOAT_OP_PQ_VAL_16S: + if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { + *desc->result |= SUM_CHECK_P_RESULT; + err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; + } + if (chanerr & IOAT_CHANERR_XOR_Q_ERR) { + *desc->result |= SUM_CHECK_Q_RESULT; + err_handled |= IOAT_CHANERR_XOR_Q_ERR; + } + break; + } + + if (chanerr & IOAT_CHANERR_RECOVER_MASK) { + if (chanerr & IOAT_CHANERR_READ_DATA_ERR) { + res.result = DMA_TRANS_READ_FAILED; + err_handled |= IOAT_CHANERR_READ_DATA_ERR; + } else if (chanerr & IOAT_CHANERR_WRITE_DATA_ERR) { + res.result = DMA_TRANS_WRITE_FAILED; + err_handled |= IOAT_CHANERR_WRITE_DATA_ERR; + } + + abort = true; + } else + res.result = DMA_TRANS_NOERROR; + + /* fault on unhandled error or spurious halt */ + if (chanerr ^ err_handled || chanerr == 0) { + dev_err(to_dev(ioat_chan), "%s: fatal error (%x:%x)\n", + __func__, chanerr, err_handled); + dev_err(to_dev(ioat_chan), "Errors handled:\n"); + ioat_print_chanerrs(ioat_chan, err_handled); + dev_err(to_dev(ioat_chan), "Errors not handled:\n"); + ioat_print_chanerrs(ioat_chan, (chanerr & ~err_handled)); + + BUG(); + } + + /* cleanup the faulty descriptor since we are continuing */ + tx = &desc->txd; + if (tx->cookie) { + dma_cookie_complete(tx); + dma_descriptor_unmap(tx); + dmaengine_desc_get_callback_invoke(tx, &res); + tx->callback = NULL; + tx->callback_result = NULL; + } + + /* mark faulting descriptor as complete */ + *ioat_chan->completion = desc->txd.phys; + + spin_lock_bh(&ioat_chan->prep_lock); + /* we need abort all descriptors */ + if (abort) { + ioat_abort_descs(ioat_chan); + /* clean up the channel, we could be in weird state */ + ioat_reset_hw(ioat_chan); + } + + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int); + + ioat_restart_channel(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); +} + +static void check_active(struct ioatdma_chan *ioat_chan) +{ + if (ioat_ring_active(ioat_chan)) { + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); + return; + } + + if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state)) + mod_timer_pending(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); +} + +static void ioat_reboot_chan(struct ioatdma_chan *ioat_chan) +{ + spin_lock_bh(&ioat_chan->prep_lock); + set_bit(IOAT_CHAN_DOWN, &ioat_chan->state); + spin_unlock_bh(&ioat_chan->prep_lock); + + ioat_abort_descs(ioat_chan); + dev_warn(to_dev(ioat_chan), "Reset channel...\n"); + ioat_reset_hw(ioat_chan); + dev_warn(to_dev(ioat_chan), "Restart channel...\n"); + ioat_restart_channel(ioat_chan); + + spin_lock_bh(&ioat_chan->prep_lock); + clear_bit(IOAT_CHAN_DOWN, &ioat_chan->state); + spin_unlock_bh(&ioat_chan->prep_lock); +} + +void ioat_timer_event(struct timer_list *t) +{ + struct ioatdma_chan *ioat_chan = from_timer(ioat_chan, t, timer); + dma_addr_t phys_complete; + u64 status; + + status = ioat_chansts(ioat_chan); + + /* when halted due to errors check for channel + * programming errors before advancing the completion state + */ + if (is_ioat_halted(status)) { + u32 chanerr; + + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + dev_err(to_dev(ioat_chan), "%s: Channel halted (%x)\n", + __func__, chanerr); + dev_err(to_dev(ioat_chan), "Errors:\n"); + ioat_print_chanerrs(ioat_chan, chanerr); + + if (test_bit(IOAT_RUN, &ioat_chan->state)) { + spin_lock_bh(&ioat_chan->cleanup_lock); + ioat_reboot_chan(ioat_chan); + spin_unlock_bh(&ioat_chan->cleanup_lock); + } + + return; + } + + spin_lock_bh(&ioat_chan->cleanup_lock); + + /* handle the no-actives case */ + if (!ioat_ring_active(ioat_chan)) { + spin_lock_bh(&ioat_chan->prep_lock); + check_active(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); + goto unlock_out; + } + + /* handle the missed cleanup case */ + if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) { + /* timer restarted in ioat_cleanup_preamble + * and IOAT_COMPLETION_ACK cleared + */ + __ioat_cleanup(ioat_chan, phys_complete); + goto unlock_out; + } + + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + if (test_bit(IOAT_COMPLETION_ACK, &ioat_chan->state)) { + u32 chanerr; + + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + dev_err(to_dev(ioat_chan), "CHANSTS: %#Lx CHANERR: %#x\n", + status, chanerr); + dev_err(to_dev(ioat_chan), "Errors:\n"); + ioat_print_chanerrs(ioat_chan, chanerr); + + dev_dbg(to_dev(ioat_chan), "Active descriptors: %d\n", + ioat_ring_active(ioat_chan)); + + ioat_reboot_chan(ioat_chan); + + goto unlock_out; + } + + /* handle missed issue pending case */ + if (ioat_ring_pending(ioat_chan)) { + dev_warn(to_dev(ioat_chan), + "Completion timeout with pending descriptors\n"); + spin_lock_bh(&ioat_chan->prep_lock); + __ioat_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); + } + + set_bit(IOAT_COMPLETION_ACK, &ioat_chan->state); + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); +unlock_out: + spin_unlock_bh(&ioat_chan->cleanup_lock); +} + +enum dma_status +ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + enum dma_status ret; + + ret = dma_cookie_status(c, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; + + ioat_cleanup(ioat_chan); + + return dma_cookie_status(c, cookie, txstate); +} + +int ioat_reset_hw(struct ioatdma_chan *ioat_chan) +{ + /* throw away whatever the channel was doing and get it + * initialized, with ioat3 specific workarounds + */ + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct pci_dev *pdev = ioat_dma->pdev; + u32 chanerr; + u16 dev_id; + int err; + + ioat_quiesce(ioat_chan, msecs_to_jiffies(100)); + + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + + if (ioat_dma->version < IOAT_VER_3_3) { + /* clear any pending errors */ + err = pci_read_config_dword(pdev, + IOAT_PCI_CHANERR_INT_OFFSET, &chanerr); + if (err) { + dev_err(&pdev->dev, + "channel error register unreachable\n"); + return err; + } + pci_write_config_dword(pdev, + IOAT_PCI_CHANERR_INT_OFFSET, chanerr); + + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { + pci_write_config_dword(pdev, + IOAT_PCI_DMAUNCERRSTS_OFFSET, + 0x10); + } + } + + if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) { + ioat_dma->msixtba0 = readq(ioat_dma->reg_base + 0x1000); + ioat_dma->msixdata0 = readq(ioat_dma->reg_base + 0x1008); + ioat_dma->msixpba = readq(ioat_dma->reg_base + 0x1800); + } + + + err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200)); + if (!err) { + if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) { + writeq(ioat_dma->msixtba0, ioat_dma->reg_base + 0x1000); + writeq(ioat_dma->msixdata0, ioat_dma->reg_base + 0x1008); + writeq(ioat_dma->msixpba, ioat_dma->reg_base + 0x1800); + } + } + + if (err) + dev_err(&pdev->dev, "Failed to reset: %d\n", err); + + return err; +} diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h new file mode 100644 index 0000000000..a180171087 --- /dev/null +++ b/drivers/dma/ioat/dma.h @@ -0,0 +1,407 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + */ +#ifndef IOATDMA_H +#define IOATDMA_H + +#include +#include +#include +#include +#include +#include +#include +#include "registers.h" +#include "hw.h" + +#define IOAT_DMA_VERSION "5.00" + +#define IOAT_DMA_DCA_ANY_CPU ~0 + +#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, dma_dev) +#define to_dev(ioat_chan) (&(ioat_chan)->ioat_dma->pdev->dev) +#define to_pdev(ioat_chan) ((ioat_chan)->ioat_dma->pdev) + +#define chan_num(ch) ((int)((ch)->reg_base - (ch)->ioat_dma->reg_base) / 0x80) + +/* ioat hardware assumes at least two sources for raid operations */ +#define src_cnt_to_sw(x) ((x) + 2) +#define src_cnt_to_hw(x) ((x) - 2) +#define ndest_to_sw(x) ((x) + 1) +#define ndest_to_hw(x) ((x) - 1) +#define src16_cnt_to_sw(x) ((x) + 9) +#define src16_cnt_to_hw(x) ((x) - 9) + +/* + * workaround for IOAT ver.3.0 null descriptor issue + * (channel returns error when size is 0) + */ +#define NULL_DESC_BUFFER_SIZE 1 + +enum ioat_irq_mode { + IOAT_NOIRQ = 0, + IOAT_MSIX, + IOAT_MSI, + IOAT_INTX +}; + +/** + * struct ioatdma_device - internal representation of a IOAT device + * @pdev: PCI-Express device + * @reg_base: MMIO register space base address + * @completion_pool: DMA buffers for completion ops + * @sed_hw_pool: DMA super descriptor pools + * @dma_dev: embedded struct dma_device + * @version: version of ioatdma device + * @msix_entries: irq handlers + * @idx: per channel data + * @dca: direct cache access context + * @irq_mode: interrupt mode (INTX, MSI, MSIX) + * @cap: read DMA capabilities register + */ +struct ioatdma_device { + struct pci_dev *pdev; + void __iomem *reg_base; + struct dma_pool *completion_pool; +#define MAX_SED_POOLS 5 + struct dma_pool *sed_hw_pool[MAX_SED_POOLS]; + struct dma_device dma_dev; + u8 version; +#define IOAT_MAX_CHANS 4 + struct msix_entry msix_entries[IOAT_MAX_CHANS]; + struct ioatdma_chan *idx[IOAT_MAX_CHANS]; + struct dca_provider *dca; + enum ioat_irq_mode irq_mode; + u32 cap; + int chancnt; + + /* shadow version for CB3.3 chan reset errata workaround */ + u64 msixtba0; + u64 msixdata0; + u32 msixpba; +}; + +#define IOAT_MAX_ORDER 16 +#define IOAT_MAX_DESCS (1 << IOAT_MAX_ORDER) +#define IOAT_CHUNK_SIZE (SZ_512K) +#define IOAT_DESCS_PER_CHUNK (IOAT_CHUNK_SIZE / IOAT_DESC_SZ) + +struct ioat_descs { + void *virt; + dma_addr_t hw; +}; + +struct ioatdma_chan { + struct dma_chan dma_chan; + void __iomem *reg_base; + dma_addr_t last_completion; + spinlock_t cleanup_lock; + unsigned long state; + #define IOAT_CHAN_DOWN 0 + #define IOAT_COMPLETION_ACK 1 + #define IOAT_RESET_PENDING 2 + #define IOAT_KOBJ_INIT_FAIL 3 + #define IOAT_RUN 5 + #define IOAT_CHAN_ACTIVE 6 + struct timer_list timer; + #define RESET_DELAY msecs_to_jiffies(100) + struct ioatdma_device *ioat_dma; + dma_addr_t completion_dma; + u64 *completion; + struct tasklet_struct cleanup_task; + struct kobject kobj; + +/* ioat v2 / v3 channel attributes + * @xfercap_log; log2 of channel max transfer length (for fast division) + * @head: allocated index + * @issued: hardware notification point + * @tail: cleanup index + * @dmacount: identical to 'head' except for occasionally resetting to zero + * @alloc_order: log2 of the number of allocated descriptors + * @produce: number of descriptors to produce at submit time + * @ring: software ring buffer implementation of hardware ring + * @prep_lock: serializes descriptor preparation (producers) + */ + size_t xfercap_log; + u16 head; + u16 issued; + u16 tail; + u16 dmacount; + u16 alloc_order; + u16 produce; + struct ioat_ring_ent **ring; + spinlock_t prep_lock; + struct ioat_descs descs[IOAT_MAX_DESCS / IOAT_DESCS_PER_CHUNK]; + int desc_chunks; + int intr_coalesce; + int prev_intr_coalesce; +}; + +struct ioat_sysfs_entry { + struct attribute attr; + ssize_t (*show)(struct dma_chan *, char *); + ssize_t (*store)(struct dma_chan *, const char *, size_t); +}; + +/** + * struct ioat_sed_ent - wrapper around super extended hardware descriptor + * @hw: hardware SED + * @dma: dma address for the SED + * @parent: point to the dma descriptor that's the parent + * @hw_pool: descriptor pool index + */ +struct ioat_sed_ent { + struct ioat_sed_raw_descriptor *hw; + dma_addr_t dma; + struct ioat_ring_ent *parent; + unsigned int hw_pool; +}; + +/** + * struct ioat_ring_ent - wrapper around hardware descriptor + * @hw: hardware DMA descriptor (for memcpy) + * @xor: hardware xor descriptor + * @xor_ex: hardware xor extension descriptor + * @pq: hardware pq descriptor + * @pq_ex: hardware pq extension descriptor + * @pqu: hardware pq update descriptor + * @raw: hardware raw (un-typed) descriptor + * @txd: the generic software descriptor for all engines + * @len: total transaction length for unmap + * @result: asynchronous result of validate operations + * @id: identifier for debug + * @sed: pointer to super extended descriptor sw desc + */ + +struct ioat_ring_ent { + union { + struct ioat_dma_descriptor *hw; + struct ioat_xor_descriptor *xor; + struct ioat_xor_ext_descriptor *xor_ex; + struct ioat_pq_descriptor *pq; + struct ioat_pq_ext_descriptor *pq_ex; + struct ioat_pq_update_descriptor *pqu; + struct ioat_raw_descriptor *raw; + }; + size_t len; + struct dma_async_tx_descriptor txd; + enum sum_check_flags *result; + #ifdef DEBUG + int id; + #endif + struct ioat_sed_ent *sed; +}; + +extern const struct sysfs_ops ioat_sysfs_ops; +extern struct ioat_sysfs_entry ioat_version_attr; +extern struct ioat_sysfs_entry ioat_cap_attr; +extern int ioat_pending_level; +extern struct kobj_type ioat_ktype; +extern struct kmem_cache *ioat_cache; +extern struct kmem_cache *ioat_sed_cache; + +static inline struct ioatdma_chan *to_ioat_chan(struct dma_chan *c) +{ + return container_of(c, struct ioatdma_chan, dma_chan); +} + +/* wrapper around hardware descriptor format + additional software fields */ +#ifdef DEBUG +#define set_desc_id(desc, i) ((desc)->id = (i)) +#define desc_id(desc) ((desc)->id) +#else +#define set_desc_id(desc, i) +#define desc_id(desc) (0) +#endif + +static inline void +__dump_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_dma_descriptor *hw, + struct dma_async_tx_descriptor *tx, int id) +{ + struct device *dev = to_dev(ioat_chan); + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x" + " ctl: %#10.8x (op: %#x int_en: %d compl: %d)\n", id, + (unsigned long long) tx->phys, + (unsigned long long) hw->next, tx->cookie, tx->flags, + hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write); +} + +#define dump_desc_dbg(c, d) \ + ({ if (d) __dump_desc_dbg(c, d->hw, &d->txd, desc_id(d)); 0; }) + +static inline struct ioatdma_chan * +ioat_chan_by_index(struct ioatdma_device *ioat_dma, int index) +{ + return ioat_dma->idx[index]; +} + +static inline u64 ioat_chansts(struct ioatdma_chan *ioat_chan) +{ + return readq(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET); +} + +static inline u64 ioat_chansts_to_addr(u64 status) +{ + return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; +} + +static inline u32 ioat_chanerr(struct ioatdma_chan *ioat_chan) +{ + return readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); +} + +static inline void ioat_suspend(struct ioatdma_chan *ioat_chan) +{ + u8 ver = ioat_chan->ioat_dma->version; + + writeb(IOAT_CHANCMD_SUSPEND, + ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); +} + +static inline void ioat_reset(struct ioatdma_chan *ioat_chan) +{ + u8 ver = ioat_chan->ioat_dma->version; + + writeb(IOAT_CHANCMD_RESET, + ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); +} + +static inline bool ioat_reset_pending(struct ioatdma_chan *ioat_chan) +{ + u8 ver = ioat_chan->ioat_dma->version; + u8 cmd; + + cmd = readb(ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); + return (cmd & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET; +} + +static inline bool is_ioat_active(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE); +} + +static inline bool is_ioat_idle(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE); +} + +static inline bool is_ioat_halted(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED); +} + +static inline bool is_ioat_suspended(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED); +} + +/* channel was fatally programmed */ +static inline bool is_ioat_bug(unsigned long err) +{ + return !!err; +} + + +static inline u32 ioat_ring_size(struct ioatdma_chan *ioat_chan) +{ + return 1 << ioat_chan->alloc_order; +} + +/* count of descriptors in flight with the engine */ +static inline u16 ioat_ring_active(struct ioatdma_chan *ioat_chan) +{ + return CIRC_CNT(ioat_chan->head, ioat_chan->tail, + ioat_ring_size(ioat_chan)); +} + +/* count of descriptors pending submission to hardware */ +static inline u16 ioat_ring_pending(struct ioatdma_chan *ioat_chan) +{ + return CIRC_CNT(ioat_chan->head, ioat_chan->issued, + ioat_ring_size(ioat_chan)); +} + +static inline u32 ioat_ring_space(struct ioatdma_chan *ioat_chan) +{ + return ioat_ring_size(ioat_chan) - ioat_ring_active(ioat_chan); +} + +static inline u16 +ioat_xferlen_to_descs(struct ioatdma_chan *ioat_chan, size_t len) +{ + u16 num_descs = len >> ioat_chan->xfercap_log; + + num_descs += !!(len & ((1 << ioat_chan->xfercap_log) - 1)); + return num_descs; +} + +static inline struct ioat_ring_ent * +ioat_get_ring_ent(struct ioatdma_chan *ioat_chan, u16 idx) +{ + return ioat_chan->ring[idx & (ioat_ring_size(ioat_chan) - 1)]; +} + +static inline void +ioat_set_chainaddr(struct ioatdma_chan *ioat_chan, u64 addr) +{ + writel(addr & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(addr >> 32, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); +} + +/* IOAT Prep functions */ +struct dma_async_tx_descriptor * +ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags); + +/* IOAT Operation functions */ +irqreturn_t ioat_dma_do_interrupt(int irq, void *data); +irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data); +struct ioat_ring_ent ** +ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags); +void ioat_start_null_desc(struct ioatdma_chan *ioat_chan); +void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan); +int ioat_reset_hw(struct ioatdma_chan *ioat_chan); +enum dma_status +ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie, + struct dma_tx_state *txstate); +void ioat_cleanup_event(struct tasklet_struct *t); +void ioat_timer_event(struct timer_list *t); +int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs); +void ioat_issue_pending(struct dma_chan *chan); + +/* IOAT Init functions */ +bool is_bwd_ioat(struct pci_dev *pdev); +struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); +void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type); +void ioat_kobject_del(struct ioatdma_device *ioat_dma); +int ioat_dma_setup_interrupts(struct ioatdma_device *ioat_dma); +void ioat_stop(struct ioatdma_chan *ioat_chan); +#endif /* IOATDMA_H */ diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h new file mode 100644 index 0000000000..79e4e4c09c --- /dev/null +++ b/drivers/dma/ioat/hw.h @@ -0,0 +1,271 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + */ +#ifndef _IOAT_HW_H_ +#define _IOAT_HW_H_ + +/* PCI Configuration Space Values */ +#define IOAT_MMIO_BAR 0 + +/* CB device ID's */ +#define PCI_DEVICE_ID_INTEL_IOAT_IVB0 0x0e20 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB1 0x0e21 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB2 0x0e22 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB3 0x0e23 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB4 0x0e24 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB5 0x0e25 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB6 0x0e26 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB7 0x0e27 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB8 0x0e2e +#define PCI_DEVICE_ID_INTEL_IOAT_IVB9 0x0e2f + +#define PCI_DEVICE_ID_INTEL_IOAT_HSW0 0x2f20 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW1 0x2f21 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW2 0x2f22 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW3 0x2f23 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW4 0x2f24 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW5 0x2f25 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW6 0x2f26 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW7 0x2f27 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW8 0x2f2e +#define PCI_DEVICE_ID_INTEL_IOAT_HSW9 0x2f2f + +#define PCI_DEVICE_ID_INTEL_IOAT_BWD0 0x0C50 +#define PCI_DEVICE_ID_INTEL_IOAT_BWD1 0x0C51 +#define PCI_DEVICE_ID_INTEL_IOAT_BWD2 0x0C52 +#define PCI_DEVICE_ID_INTEL_IOAT_BWD3 0x0C53 + +#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE0 0x6f50 +#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE1 0x6f51 +#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE2 0x6f52 +#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE3 0x6f53 + +#define PCI_DEVICE_ID_INTEL_IOAT_BDX0 0x6f20 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX1 0x6f21 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX2 0x6f22 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX3 0x6f23 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX4 0x6f24 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX5 0x6f25 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX6 0x6f26 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX7 0x6f27 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX8 0x6f2e +#define PCI_DEVICE_ID_INTEL_IOAT_BDX9 0x6f2f + +#define PCI_DEVICE_ID_INTEL_IOAT_SKX 0x2021 + +#define PCI_DEVICE_ID_INTEL_IOAT_ICX 0x0b00 + +#define IOAT_VER_1_2 0x12 /* Version 1.2 */ +#define IOAT_VER_2_0 0x20 /* Version 2.0 */ +#define IOAT_VER_3_0 0x30 /* Version 3.0 */ +#define IOAT_VER_3_2 0x32 /* Version 3.2 */ +#define IOAT_VER_3_3 0x33 /* Version 3.3 */ +#define IOAT_VER_3_4 0x34 /* Version 3.4 */ + + +int system_has_dca_enabled(struct pci_dev *pdev); + +#define IOAT_DESC_SZ 64 + +struct ioat_dma_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int null:1; + unsigned int src_brk:1; + unsigned int dest_brk:1; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int rsvd2:13; + #define IOAT_OP_COPY 0x00 + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t dst_addr; + uint64_t next; + uint64_t rsv1; + uint64_t rsv2; + /* store some driver data in an unused portion of the descriptor */ + union { + uint64_t user1; + uint64_t tx_cnt; + }; + uint64_t user2; +}; + +struct ioat_xor_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int src_cnt:3; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int rsvd:13; + #define IOAT_OP_XOR 0x87 + #define IOAT_OP_XOR_VAL 0x88 + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t dst_addr; + uint64_t next; + uint64_t src_addr2; + uint64_t src_addr3; + uint64_t src_addr4; + uint64_t src_addr5; +}; + +struct ioat_xor_ext_descriptor { + uint64_t src_addr6; + uint64_t src_addr7; + uint64_t src_addr8; + uint64_t next; + uint64_t rsvd[4]; +}; + +struct ioat_pq_descriptor { + union { + uint32_t size; + uint32_t dwbes; + struct { + unsigned int rsvd:25; + unsigned int p_val_err:1; + unsigned int q_val_err:1; + unsigned int rsvd1:4; + unsigned int wbes:1; + } dwbes_f; + }; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int src_cnt:3; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int p_disable:1; + unsigned int q_disable:1; + unsigned int rsvd2:2; + unsigned int wb_en:1; + unsigned int prl_en:1; + unsigned int rsvd3:7; + #define IOAT_OP_PQ 0x89 + #define IOAT_OP_PQ_VAL 0x8a + #define IOAT_OP_PQ_16S 0xa0 + #define IOAT_OP_PQ_VAL_16S 0xa1 + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t p_addr; + uint64_t next; + uint64_t src_addr2; + union { + uint64_t src_addr3; + uint64_t sed_addr; + }; + uint8_t coef[8]; + uint64_t q_addr; +}; + +struct ioat_pq_ext_descriptor { + uint64_t src_addr4; + uint64_t src_addr5; + uint64_t src_addr6; + uint64_t next; + uint64_t src_addr7; + uint64_t src_addr8; + uint64_t rsvd[2]; +}; + +struct ioat_pq_update_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int src_cnt:3; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int p_disable:1; + unsigned int q_disable:1; + unsigned int rsvd:3; + unsigned int coef:8; + #define IOAT_OP_PQ_UP 0x8b + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t p_addr; + uint64_t next; + uint64_t src_addr2; + uint64_t p_src; + uint64_t q_src; + uint64_t q_addr; +}; + +struct ioat_raw_descriptor { + uint64_t field[8]; +}; + +struct ioat_pq16a_descriptor { + uint8_t coef[8]; + uint64_t src_addr3; + uint64_t src_addr4; + uint64_t src_addr5; + uint64_t src_addr6; + uint64_t src_addr7; + uint64_t src_addr8; + uint64_t src_addr9; +}; + +struct ioat_pq16b_descriptor { + uint64_t src_addr10; + uint64_t src_addr11; + uint64_t src_addr12; + uint64_t src_addr13; + uint64_t src_addr14; + uint64_t src_addr15; + uint64_t src_addr16; + uint64_t rsvd; +}; + +union ioat_sed_pq_descriptor { + struct ioat_pq16a_descriptor a; + struct ioat_pq16b_descriptor b; +}; + +#define SED_SIZE 64 + +struct ioat_sed_raw_descriptor { + uint64_t a[8]; + uint64_t b[8]; + uint64_t c[8]; +}; + +#endif diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c new file mode 100644 index 0000000000..9c364e92cb --- /dev/null +++ b/drivers/dma/ioat/init.c @@ -0,0 +1,1450 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2015 Intel Corporation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "dma.h" +#include "registers.h" +#include "hw.h" + +#include "../dmaengine.h" + +MODULE_VERSION(IOAT_DMA_VERSION); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Intel Corporation"); + +static const struct pci_device_id ioat_pci_tbl[] = { + /* I/OAT v3 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, + + /* I/OAT v3.2 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW9) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX9) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SKX) }, + + /* I/OAT v3.3 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD3) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE3) }, + + /* I/OAT v3.4 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_ICX) }, + + { 0, } +}; +MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); + +static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id); +static void ioat_remove(struct pci_dev *pdev); +static void +ioat_init_channel(struct ioatdma_device *ioat_dma, + struct ioatdma_chan *ioat_chan, int idx); +static void ioat_intr_quirk(struct ioatdma_device *ioat_dma); +static void ioat_enumerate_channels(struct ioatdma_device *ioat_dma); +static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma); + +static int ioat_dca_enabled = 1; +module_param(ioat_dca_enabled, int, 0644); +MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); +int ioat_pending_level = 7; +module_param(ioat_pending_level, int, 0644); +MODULE_PARM_DESC(ioat_pending_level, + "high-water mark for pushing ioat descriptors (default: 7)"); +static char ioat_interrupt_style[32] = "msix"; +module_param_string(ioat_interrupt_style, ioat_interrupt_style, + sizeof(ioat_interrupt_style), 0644); +MODULE_PARM_DESC(ioat_interrupt_style, + "set ioat interrupt style: msix (default), msi, intx"); + +struct kmem_cache *ioat_cache; +struct kmem_cache *ioat_sed_cache; + +static bool is_jf_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_JSF0: + case PCI_DEVICE_ID_INTEL_IOAT_JSF1: + case PCI_DEVICE_ID_INTEL_IOAT_JSF2: + case PCI_DEVICE_ID_INTEL_IOAT_JSF3: + case PCI_DEVICE_ID_INTEL_IOAT_JSF4: + case PCI_DEVICE_ID_INTEL_IOAT_JSF5: + case PCI_DEVICE_ID_INTEL_IOAT_JSF6: + case PCI_DEVICE_ID_INTEL_IOAT_JSF7: + case PCI_DEVICE_ID_INTEL_IOAT_JSF8: + case PCI_DEVICE_ID_INTEL_IOAT_JSF9: + return true; + default: + return false; + } +} + +static bool is_snb_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_SNB0: + case PCI_DEVICE_ID_INTEL_IOAT_SNB1: + case PCI_DEVICE_ID_INTEL_IOAT_SNB2: + case PCI_DEVICE_ID_INTEL_IOAT_SNB3: + case PCI_DEVICE_ID_INTEL_IOAT_SNB4: + case PCI_DEVICE_ID_INTEL_IOAT_SNB5: + case PCI_DEVICE_ID_INTEL_IOAT_SNB6: + case PCI_DEVICE_ID_INTEL_IOAT_SNB7: + case PCI_DEVICE_ID_INTEL_IOAT_SNB8: + case PCI_DEVICE_ID_INTEL_IOAT_SNB9: + return true; + default: + return false; + } +} + +static bool is_ivb_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_IVB0: + case PCI_DEVICE_ID_INTEL_IOAT_IVB1: + case PCI_DEVICE_ID_INTEL_IOAT_IVB2: + case PCI_DEVICE_ID_INTEL_IOAT_IVB3: + case PCI_DEVICE_ID_INTEL_IOAT_IVB4: + case PCI_DEVICE_ID_INTEL_IOAT_IVB5: + case PCI_DEVICE_ID_INTEL_IOAT_IVB6: + case PCI_DEVICE_ID_INTEL_IOAT_IVB7: + case PCI_DEVICE_ID_INTEL_IOAT_IVB8: + case PCI_DEVICE_ID_INTEL_IOAT_IVB9: + return true; + default: + return false; + } + +} + +static bool is_hsw_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_HSW0: + case PCI_DEVICE_ID_INTEL_IOAT_HSW1: + case PCI_DEVICE_ID_INTEL_IOAT_HSW2: + case PCI_DEVICE_ID_INTEL_IOAT_HSW3: + case PCI_DEVICE_ID_INTEL_IOAT_HSW4: + case PCI_DEVICE_ID_INTEL_IOAT_HSW5: + case PCI_DEVICE_ID_INTEL_IOAT_HSW6: + case PCI_DEVICE_ID_INTEL_IOAT_HSW7: + case PCI_DEVICE_ID_INTEL_IOAT_HSW8: + case PCI_DEVICE_ID_INTEL_IOAT_HSW9: + return true; + default: + return false; + } + +} + +static bool is_bdx_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_BDX0: + case PCI_DEVICE_ID_INTEL_IOAT_BDX1: + case PCI_DEVICE_ID_INTEL_IOAT_BDX2: + case PCI_DEVICE_ID_INTEL_IOAT_BDX3: + case PCI_DEVICE_ID_INTEL_IOAT_BDX4: + case PCI_DEVICE_ID_INTEL_IOAT_BDX5: + case PCI_DEVICE_ID_INTEL_IOAT_BDX6: + case PCI_DEVICE_ID_INTEL_IOAT_BDX7: + case PCI_DEVICE_ID_INTEL_IOAT_BDX8: + case PCI_DEVICE_ID_INTEL_IOAT_BDX9: + return true; + default: + return false; + } +} + +static inline bool is_skx_ioat(struct pci_dev *pdev) +{ + return (pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SKX) ? true : false; +} + +static bool is_xeon_cb32(struct pci_dev *pdev) +{ + return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) || + is_hsw_ioat(pdev) || is_bdx_ioat(pdev) || is_skx_ioat(pdev); +} + +bool is_bwd_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_BWD0: + case PCI_DEVICE_ID_INTEL_IOAT_BWD1: + case PCI_DEVICE_ID_INTEL_IOAT_BWD2: + case PCI_DEVICE_ID_INTEL_IOAT_BWD3: + /* even though not Atom, BDX-DE has same DMA silicon */ + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3: + return true; + default: + return false; + } +} + +static bool is_bwd_noraid(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_BWD2: + case PCI_DEVICE_ID_INTEL_IOAT_BWD3: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3: + return true; + default: + return false; + } + +} + +/* + * Perform a IOAT transaction to verify the HW works. + */ +#define IOAT_TEST_SIZE 2000 + +static void ioat_dma_test_callback(void *dma_async_param) +{ + struct completion *cmp = dma_async_param; + + complete(cmp); +} + +/** + * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. + * @ioat_dma: dma device to be tested + */ +static int ioat_dma_self_test(struct ioatdma_device *ioat_dma) +{ + int i; + u8 *src; + u8 *dest; + struct dma_device *dma = &ioat_dma->dma_dev; + struct device *dev = &ioat_dma->pdev->dev; + struct dma_chan *dma_chan; + struct dma_async_tx_descriptor *tx; + dma_addr_t dma_dest, dma_src; + dma_cookie_t cookie; + int err = 0; + struct completion cmp; + unsigned long tmo; + unsigned long flags; + + src = kzalloc(IOAT_TEST_SIZE, GFP_KERNEL); + if (!src) + return -ENOMEM; + dest = kzalloc(IOAT_TEST_SIZE, GFP_KERNEL); + if (!dest) { + kfree(src); + return -ENOMEM; + } + + /* Fill in src buffer */ + for (i = 0; i < IOAT_TEST_SIZE; i++) + src[i] = (u8)i; + + /* Start copy, using first DMA channel */ + dma_chan = container_of(dma->channels.next, struct dma_chan, + device_node); + if (dma->device_alloc_chan_resources(dma_chan) < 1) { + dev_err(dev, "selftest cannot allocate chan resource\n"); + err = -ENODEV; + goto out; + } + + dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_src)) { + dev_err(dev, "mapping src buffer failed\n"); + err = -ENOMEM; + goto free_resources; + } + dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dev, dma_dest)) { + dev_err(dev, "mapping dest buffer failed\n"); + err = -ENOMEM; + goto unmap_src; + } + flags = DMA_PREP_INTERRUPT; + tx = ioat_dma->dma_dev.device_prep_dma_memcpy(dma_chan, dma_dest, + dma_src, IOAT_TEST_SIZE, + flags); + if (!tx) { + dev_err(dev, "Self-test prep failed, disabling\n"); + err = -ENODEV; + goto unmap_dma; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test setup failed, disabling\n"); + err = -ENODEV; + goto unmap_dma; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_tx_status(dma_chan, cookie, NULL) + != DMA_COMPLETE) { + dev_err(dev, "Self-test copy timed out, disabling\n"); + err = -ENODEV; + goto unmap_dma; + } + if (memcmp(src, dest, IOAT_TEST_SIZE)) { + dev_err(dev, "Self-test copy failed compare, disabling\n"); + err = -ENODEV; + goto unmap_dma; + } + +unmap_dma: + dma_unmap_single(dev, dma_dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); +unmap_src: + dma_unmap_single(dev, dma_src, IOAT_TEST_SIZE, DMA_TO_DEVICE); +free_resources: + dma->device_free_chan_resources(dma_chan); +out: + kfree(src); + kfree(dest); + return err; +} + +/** + * ioat_dma_setup_interrupts - setup interrupt handler + * @ioat_dma: ioat dma device + */ +int ioat_dma_setup_interrupts(struct ioatdma_device *ioat_dma) +{ + struct ioatdma_chan *ioat_chan; + struct pci_dev *pdev = ioat_dma->pdev; + struct device *dev = &pdev->dev; + struct msix_entry *msix; + int i, j, msixcnt; + int err = -EINVAL; + u8 intrctrl = 0; + + if (!strcmp(ioat_interrupt_style, "msix")) + goto msix; + if (!strcmp(ioat_interrupt_style, "msi")) + goto msi; + if (!strcmp(ioat_interrupt_style, "intx")) + goto intx; + dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style); + goto err_no_irq; + +msix: + /* The number of MSI-X vectors should equal the number of channels */ + msixcnt = ioat_dma->chancnt; + for (i = 0; i < msixcnt; i++) + ioat_dma->msix_entries[i].entry = i; + + err = pci_enable_msix_exact(pdev, ioat_dma->msix_entries, msixcnt); + if (err) + goto msi; + + for (i = 0; i < msixcnt; i++) { + msix = &ioat_dma->msix_entries[i]; + ioat_chan = ioat_chan_by_index(ioat_dma, i); + err = devm_request_irq(dev, msix->vector, + ioat_dma_do_interrupt_msix, 0, + "ioat-msix", ioat_chan); + if (err) { + for (j = 0; j < i; j++) { + msix = &ioat_dma->msix_entries[j]; + ioat_chan = ioat_chan_by_index(ioat_dma, j); + devm_free_irq(dev, msix->vector, ioat_chan); + } + goto msi; + } + } + intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; + ioat_dma->irq_mode = IOAT_MSIX; + goto done; + +msi: + err = pci_enable_msi(pdev); + if (err) + goto intx; + + err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0, + "ioat-msi", ioat_dma); + if (err) { + pci_disable_msi(pdev); + goto intx; + } + ioat_dma->irq_mode = IOAT_MSI; + goto done; + +intx: + err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, + IRQF_SHARED, "ioat-intx", ioat_dma); + if (err) + goto err_no_irq; + + ioat_dma->irq_mode = IOAT_INTX; +done: + if (is_bwd_ioat(pdev)) + ioat_intr_quirk(ioat_dma); + intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; + writeb(intrctrl, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET); + return 0; + +err_no_irq: + /* Disable all interrupt generation */ + writeb(0, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET); + ioat_dma->irq_mode = IOAT_NOIRQ; + dev_err(dev, "no usable interrupts\n"); + return err; +} + +static void ioat_disable_interrupts(struct ioatdma_device *ioat_dma) +{ + /* Disable all interrupt generation */ + writeb(0, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET); +} + +static int ioat_probe(struct ioatdma_device *ioat_dma) +{ + int err = -ENODEV; + struct dma_device *dma = &ioat_dma->dma_dev; + struct pci_dev *pdev = ioat_dma->pdev; + struct device *dev = &pdev->dev; + + ioat_dma->completion_pool = dma_pool_create("completion_pool", dev, + sizeof(u64), + SMP_CACHE_BYTES, + SMP_CACHE_BYTES); + + if (!ioat_dma->completion_pool) { + err = -ENOMEM; + goto err_out; + } + + ioat_enumerate_channels(ioat_dma); + + dma_cap_set(DMA_MEMCPY, dma->cap_mask); + dma->dev = &pdev->dev; + + if (!ioat_dma->chancnt) { + dev_err(dev, "channel enumeration error\n"); + goto err_setup_interrupts; + } + + err = ioat_dma_setup_interrupts(ioat_dma); + if (err) + goto err_setup_interrupts; + + err = ioat3_dma_self_test(ioat_dma); + if (err) + goto err_self_test; + + return 0; + +err_self_test: + ioat_disable_interrupts(ioat_dma); +err_setup_interrupts: + dma_pool_destroy(ioat_dma->completion_pool); +err_out: + return err; +} + +static int ioat_register(struct ioatdma_device *ioat_dma) +{ + int err = dma_async_device_register(&ioat_dma->dma_dev); + + if (err) { + ioat_disable_interrupts(ioat_dma); + dma_pool_destroy(ioat_dma->completion_pool); + } + + return err; +} + +static void ioat_dma_remove(struct ioatdma_device *ioat_dma) +{ + struct dma_device *dma = &ioat_dma->dma_dev; + + ioat_disable_interrupts(ioat_dma); + + ioat_kobject_del(ioat_dma); + + dma_async_device_unregister(dma); +} + +/** + * ioat_enumerate_channels - find and initialize the device's channels + * @ioat_dma: the ioat dma device to be enumerated + */ +static void ioat_enumerate_channels(struct ioatdma_device *ioat_dma) +{ + struct ioatdma_chan *ioat_chan; + struct device *dev = &ioat_dma->pdev->dev; + struct dma_device *dma = &ioat_dma->dma_dev; + u8 xfercap_log; + int chancnt; + int i; + + INIT_LIST_HEAD(&dma->channels); + chancnt = readb(ioat_dma->reg_base + IOAT_CHANCNT_OFFSET); + chancnt &= 0x1f; /* bits [4:0] valid */ + if (chancnt > ARRAY_SIZE(ioat_dma->idx)) { + dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", + chancnt, ARRAY_SIZE(ioat_dma->idx)); + chancnt = ARRAY_SIZE(ioat_dma->idx); + } + xfercap_log = readb(ioat_dma->reg_base + IOAT_XFERCAP_OFFSET); + xfercap_log &= 0x1f; /* bits [4:0] valid */ + if (xfercap_log == 0) + return; + dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log); + + for (i = 0; i < chancnt; i++) { + ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL); + if (!ioat_chan) + break; + + ioat_init_channel(ioat_dma, ioat_chan, i); + ioat_chan->xfercap_log = xfercap_log; + spin_lock_init(&ioat_chan->prep_lock); + if (ioat_reset_hw(ioat_chan)) { + i = 0; + break; + } + } + ioat_dma->chancnt = i; +} + +/** + * ioat_free_chan_resources - release all the descriptors + * @c: the channel to be cleaned + */ +static void ioat_free_chan_resources(struct dma_chan *c) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *desc; + const int total_descs = 1 << ioat_chan->alloc_order; + int descs; + int i; + + /* Before freeing channel resources first check + * if they have been previously allocated for this channel. + */ + if (!ioat_chan->ring) + return; + + ioat_stop(ioat_chan); + + if (!test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) { + ioat_reset_hw(ioat_chan); + + /* Put LTR to idle */ + if (ioat_dma->version >= IOAT_VER_3_4) + writeb(IOAT_CHAN_LTR_SWSEL_IDLE, + ioat_chan->reg_base + + IOAT_CHAN_LTR_SWSEL_OFFSET); + } + + spin_lock_bh(&ioat_chan->cleanup_lock); + spin_lock_bh(&ioat_chan->prep_lock); + descs = ioat_ring_space(ioat_chan); + dev_dbg(to_dev(ioat_chan), "freeing %d idle descriptors\n", descs); + for (i = 0; i < descs; i++) { + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head + i); + ioat_free_ring_ent(desc, c); + } + + if (descs < total_descs) + dev_err(to_dev(ioat_chan), "Freeing %d in use descriptors!\n", + total_descs - descs); + + for (i = 0; i < total_descs - descs; i++) { + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail + i); + dump_desc_dbg(ioat_chan, desc); + ioat_free_ring_ent(desc, c); + } + + for (i = 0; i < ioat_chan->desc_chunks; i++) { + dma_free_coherent(to_dev(ioat_chan), IOAT_CHUNK_SIZE, + ioat_chan->descs[i].virt, + ioat_chan->descs[i].hw); + ioat_chan->descs[i].virt = NULL; + ioat_chan->descs[i].hw = 0; + } + ioat_chan->desc_chunks = 0; + + kfree(ioat_chan->ring); + ioat_chan->ring = NULL; + ioat_chan->alloc_order = 0; + dma_pool_free(ioat_dma->completion_pool, ioat_chan->completion, + ioat_chan->completion_dma); + spin_unlock_bh(&ioat_chan->prep_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); + + ioat_chan->last_completion = 0; + ioat_chan->completion_dma = 0; + ioat_chan->dmacount = 0; +} + +/* ioat_alloc_chan_resources - allocate/initialize ioat descriptor ring + * @chan: channel to be initialized + */ +static int ioat_alloc_chan_resources(struct dma_chan *c) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_ring_ent **ring; + u64 status; + int order; + int i = 0; + u32 chanerr; + + /* have we already been set up? */ + if (ioat_chan->ring) + return 1 << ioat_chan->alloc_order; + + /* Setup register to interrupt and write completion status on error */ + writew(IOAT_CHANCTRL_RUN, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); + + /* allocate a completion writeback area */ + /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ + ioat_chan->completion = + dma_pool_zalloc(ioat_chan->ioat_dma->completion_pool, + GFP_NOWAIT, &ioat_chan->completion_dma); + if (!ioat_chan->completion) + return -ENOMEM; + + writel(((u64)ioat_chan->completion_dma) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); + writel(((u64)ioat_chan->completion_dma) >> 32, + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); + + order = IOAT_MAX_ORDER; + ring = ioat_alloc_ring(c, order, GFP_NOWAIT); + if (!ring) + return -ENOMEM; + + spin_lock_bh(&ioat_chan->cleanup_lock); + spin_lock_bh(&ioat_chan->prep_lock); + ioat_chan->ring = ring; + ioat_chan->head = 0; + ioat_chan->issued = 0; + ioat_chan->tail = 0; + ioat_chan->alloc_order = order; + set_bit(IOAT_RUN, &ioat_chan->state); + spin_unlock_bh(&ioat_chan->prep_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); + + /* Setting up LTR values for 3.4 or later */ + if (ioat_chan->ioat_dma->version >= IOAT_VER_3_4) { + u32 lat_val; + + lat_val = IOAT_CHAN_LTR_ACTIVE_SNVAL | + IOAT_CHAN_LTR_ACTIVE_SNLATSCALE | + IOAT_CHAN_LTR_ACTIVE_SNREQMNT; + writel(lat_val, ioat_chan->reg_base + + IOAT_CHAN_LTR_ACTIVE_OFFSET); + + lat_val = IOAT_CHAN_LTR_IDLE_SNVAL | + IOAT_CHAN_LTR_IDLE_SNLATSCALE | + IOAT_CHAN_LTR_IDLE_SNREQMNT; + writel(lat_val, ioat_chan->reg_base + + IOAT_CHAN_LTR_IDLE_OFFSET); + + /* Select to active */ + writeb(IOAT_CHAN_LTR_SWSEL_ACTIVE, + ioat_chan->reg_base + + IOAT_CHAN_LTR_SWSEL_OFFSET); + } + + ioat_start_null_desc(ioat_chan); + + /* check that we got off the ground */ + do { + udelay(1); + status = ioat_chansts(ioat_chan); + } while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status)); + + if (is_ioat_active(status) || is_ioat_idle(status)) + return 1 << ioat_chan->alloc_order; + + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + + dev_WARN(to_dev(ioat_chan), + "failed to start channel chanerr: %#x\n", chanerr); + ioat_free_chan_resources(c); + return -EFAULT; +} + +/* common channel initialization */ +static void +ioat_init_channel(struct ioatdma_device *ioat_dma, + struct ioatdma_chan *ioat_chan, int idx) +{ + struct dma_device *dma = &ioat_dma->dma_dev; + + ioat_chan->ioat_dma = ioat_dma; + ioat_chan->reg_base = ioat_dma->reg_base + (0x80 * (idx + 1)); + spin_lock_init(&ioat_chan->cleanup_lock); + ioat_chan->dma_chan.device = dma; + dma_cookie_init(&ioat_chan->dma_chan); + list_add_tail(&ioat_chan->dma_chan.device_node, &dma->channels); + ioat_dma->idx[idx] = ioat_chan; + timer_setup(&ioat_chan->timer, ioat_timer_event, 0); + tasklet_setup(&ioat_chan->cleanup_task, ioat_cleanup_event); +} + +#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */ +static int ioat_xor_val_self_test(struct ioatdma_device *ioat_dma) +{ + int i, src_idx; + struct page *dest; + struct page *xor_srcs[IOAT_NUM_SRC_TEST]; + struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1]; + dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1]; + dma_addr_t dest_dma; + struct dma_async_tx_descriptor *tx; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u8 cmp_byte = 0; + u32 cmp_word; + u32 xor_val_result; + int err = 0; + struct completion cmp; + unsigned long tmo; + struct device *dev = &ioat_dma->pdev->dev; + struct dma_device *dma = &ioat_dma->dma_dev; + u8 op = 0; + + dev_dbg(dev, "%s\n", __func__); + + if (!dma_has_cap(DMA_XOR, dma->cap_mask)) + return 0; + + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { + xor_srcs[src_idx] = alloc_page(GFP_KERNEL); + if (!xor_srcs[src_idx]) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + } + + dest = alloc_page(GFP_KERNEL); + if (!dest) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + + /* Fill in src buffers */ + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { + u8 *ptr = page_address(xor_srcs[src_idx]); + + for (i = 0; i < PAGE_SIZE; i++) + ptr[i] = (1 << src_idx); + } + + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) + cmp_byte ^= (u8) (1 << src_idx); + + cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | + (cmp_byte << 8) | cmp_byte; + + memset(page_address(dest), 0, PAGE_SIZE); + + dma_chan = container_of(dma->channels.next, struct dma_chan, + device_node); + if (dma->device_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + /* test xor */ + op = IOAT_OP_XOR; + + dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dev, dest_dma)) { + err = -ENOMEM; + goto free_resources; + } + + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) { + dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_srcs[i])) { + err = -ENOMEM; + goto dma_unmap; + } + } + tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs, + IOAT_NUM_SRC_TEST, PAGE_SIZE, + DMA_PREP_INTERRUPT); + + if (!tx) { + dev_err(dev, "Self-test xor prep failed\n"); + err = -ENODEV; + goto dma_unmap; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test xor setup failed\n"); + err = -ENODEV; + goto dma_unmap; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { + dev_err(dev, "Self-test xor timed out\n"); + err = -ENODEV; + goto dma_unmap; + } + + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); + + dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { + u32 *ptr = page_address(dest); + + if (ptr[i] != cmp_word) { + dev_err(dev, "Self-test xor failed compare\n"); + err = -ENODEV; + goto free_resources; + } + } + dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + + dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + + /* skip validate if the capability is not present */ + if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) + goto free_resources; + + op = IOAT_OP_XOR_VAL; + + /* validate the sources with the destintation page */ + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + xor_val_srcs[i] = xor_srcs[i]; + xor_val_srcs[i] = dest; + + xor_val_result = 1; + + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) { + dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_srcs[i])) { + err = -ENOMEM; + goto dma_unmap; + } + } + tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, + IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, + &xor_val_result, DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test zero prep failed\n"); + err = -ENODEV; + goto dma_unmap; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test zero setup failed\n"); + err = -ENODEV; + goto dma_unmap; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { + dev_err(dev, "Self-test validate timed out\n"); + err = -ENODEV; + goto dma_unmap; + } + + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); + + if (xor_val_result != 0) { + dev_err(dev, "Self-test validate failed compare\n"); + err = -ENODEV; + goto free_resources; + } + + memset(page_address(dest), 0, PAGE_SIZE); + + /* test for non-zero parity sum */ + op = IOAT_OP_XOR_VAL; + + xor_val_result = 0; + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) { + dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_srcs[i])) { + err = -ENOMEM; + goto dma_unmap; + } + } + tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, + IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, + &xor_val_result, DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test 2nd zero prep failed\n"); + err = -ENODEV; + goto dma_unmap; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test 2nd zero setup failed\n"); + err = -ENODEV; + goto dma_unmap; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { + dev_err(dev, "Self-test 2nd validate timed out\n"); + err = -ENODEV; + goto dma_unmap; + } + + if (xor_val_result != SUM_CHECK_P_RESULT) { + dev_err(dev, "Self-test validate failed compare\n"); + err = -ENODEV; + goto dma_unmap; + } + + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); + + goto free_resources; +dma_unmap: + if (op == IOAT_OP_XOR) { + while (--i >= 0) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, + DMA_TO_DEVICE); + dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + } else if (op == IOAT_OP_XOR_VAL) { + while (--i >= 0) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, + DMA_TO_DEVICE); + } +free_resources: + dma->device_free_chan_resources(dma_chan); +out: + src_idx = IOAT_NUM_SRC_TEST; + while (src_idx--) + __free_page(xor_srcs[src_idx]); + __free_page(dest); + return err; +} + +static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma) +{ + int rc; + + rc = ioat_dma_self_test(ioat_dma); + if (rc) + return rc; + + rc = ioat_xor_val_self_test(ioat_dma); + + return rc; +} + +static void ioat_intr_quirk(struct ioatdma_device *ioat_dma) +{ + struct dma_device *dma; + struct dma_chan *c; + struct ioatdma_chan *ioat_chan; + u32 errmask; + + dma = &ioat_dma->dma_dev; + + /* + * if we have descriptor write back error status, we mask the + * error interrupts + */ + if (ioat_dma->cap & IOAT_CAP_DWBES) { + list_for_each_entry(c, &dma->channels, device_node) { + ioat_chan = to_ioat_chan(c); + errmask = readl(ioat_chan->reg_base + + IOAT_CHANERR_MASK_OFFSET); + errmask |= IOAT_CHANERR_XOR_P_OR_CRC_ERR | + IOAT_CHANERR_XOR_Q_ERR; + writel(errmask, ioat_chan->reg_base + + IOAT_CHANERR_MASK_OFFSET); + } + } +} + +static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca) +{ + struct pci_dev *pdev = ioat_dma->pdev; + int dca_en = system_has_dca_enabled(pdev); + struct dma_device *dma; + struct dma_chan *c; + struct ioatdma_chan *ioat_chan; + int err; + u16 val16; + + dma = &ioat_dma->dma_dev; + dma->device_prep_dma_memcpy = ioat_dma_prep_memcpy_lock; + dma->device_issue_pending = ioat_issue_pending; + dma->device_alloc_chan_resources = ioat_alloc_chan_resources; + dma->device_free_chan_resources = ioat_free_chan_resources; + + dma_cap_set(DMA_INTERRUPT, dma->cap_mask); + dma->device_prep_dma_interrupt = ioat_prep_interrupt_lock; + + ioat_dma->cap = readl(ioat_dma->reg_base + IOAT_DMA_CAP_OFFSET); + + if (is_xeon_cb32(pdev) || is_bwd_noraid(pdev)) + ioat_dma->cap &= + ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS); + + /* dca is incompatible with raid operations */ + if (dca_en && (ioat_dma->cap & (IOAT_CAP_XOR|IOAT_CAP_PQ))) + ioat_dma->cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ); + + if (ioat_dma->cap & IOAT_CAP_XOR) { + dma->max_xor = 8; + + dma_cap_set(DMA_XOR, dma->cap_mask); + dma->device_prep_dma_xor = ioat_prep_xor; + + dma_cap_set(DMA_XOR_VAL, dma->cap_mask); + dma->device_prep_dma_xor_val = ioat_prep_xor_val; + } + + if (ioat_dma->cap & IOAT_CAP_PQ) { + + dma->device_prep_dma_pq = ioat_prep_pq; + dma->device_prep_dma_pq_val = ioat_prep_pq_val; + dma_cap_set(DMA_PQ, dma->cap_mask); + dma_cap_set(DMA_PQ_VAL, dma->cap_mask); + + if (ioat_dma->cap & IOAT_CAP_RAID16SS) + dma_set_maxpq(dma, 16, 0); + else + dma_set_maxpq(dma, 8, 0); + + if (!(ioat_dma->cap & IOAT_CAP_XOR)) { + dma->device_prep_dma_xor = ioat_prep_pqxor; + dma->device_prep_dma_xor_val = ioat_prep_pqxor_val; + dma_cap_set(DMA_XOR, dma->cap_mask); + dma_cap_set(DMA_XOR_VAL, dma->cap_mask); + + if (ioat_dma->cap & IOAT_CAP_RAID16SS) + dma->max_xor = 16; + else + dma->max_xor = 8; + } + } + + dma->device_tx_status = ioat_tx_status; + + /* starting with CB3.3 super extended descriptors are supported */ + if (ioat_dma->cap & IOAT_CAP_RAID16SS) { + char pool_name[14]; + int i; + + for (i = 0; i < MAX_SED_POOLS; i++) { + snprintf(pool_name, 14, "ioat_hw%d_sed", i); + + /* allocate SED DMA pool */ + ioat_dma->sed_hw_pool[i] = dmam_pool_create(pool_name, + &pdev->dev, + SED_SIZE * (i + 1), 64, 0); + if (!ioat_dma->sed_hw_pool[i]) + return -ENOMEM; + + } + } + + if (!(ioat_dma->cap & (IOAT_CAP_XOR | IOAT_CAP_PQ))) + dma_cap_set(DMA_PRIVATE, dma->cap_mask); + + err = ioat_probe(ioat_dma); + if (err) + return err; + + list_for_each_entry(c, &dma->channels, device_node) { + ioat_chan = to_ioat_chan(c); + writel(IOAT_DMA_DCA_ANY_CPU, + ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(ioat_dma); + if (err) + return err; + + ioat_kobject_add(ioat_dma, &ioat_ktype); + + if (dca) + ioat_dma->dca = ioat_dca_init(pdev, ioat_dma->reg_base); + + /* disable relaxed ordering */ + err = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &val16); + if (err) + return pcibios_err_to_errno(err); + + /* clear relaxed ordering enable */ + val16 &= ~PCI_EXP_DEVCTL_RELAX_EN; + err = pcie_capability_write_word(pdev, PCI_EXP_DEVCTL, val16); + if (err) + return pcibios_err_to_errno(err); + + if (ioat_dma->cap & IOAT_CAP_DPS) + writeb(ioat_pending_level + 1, + ioat_dma->reg_base + IOAT_PREFETCH_LIMIT_OFFSET); + + return 0; +} + +static void ioat_shutdown(struct pci_dev *pdev) +{ + struct ioatdma_device *ioat_dma = pci_get_drvdata(pdev); + struct ioatdma_chan *ioat_chan; + int i; + + if (!ioat_dma) + return; + + for (i = 0; i < IOAT_MAX_CHANS; i++) { + ioat_chan = ioat_dma->idx[i]; + if (!ioat_chan) + continue; + + spin_lock_bh(&ioat_chan->prep_lock); + set_bit(IOAT_CHAN_DOWN, &ioat_chan->state); + spin_unlock_bh(&ioat_chan->prep_lock); + /* + * Synchronization rule for del_timer_sync(): + * - The caller must not hold locks which would prevent + * completion of the timer's handler. + * So prep_lock cannot be held before calling it. + */ + del_timer_sync(&ioat_chan->timer); + + /* this should quiesce then reset */ + ioat_reset_hw(ioat_chan); + } + + ioat_disable_interrupts(ioat_dma); +} + +static void ioat_resume(struct ioatdma_device *ioat_dma) +{ + struct ioatdma_chan *ioat_chan; + u32 chanerr; + int i; + + for (i = 0; i < IOAT_MAX_CHANS; i++) { + ioat_chan = ioat_dma->idx[i]; + if (!ioat_chan) + continue; + + spin_lock_bh(&ioat_chan->prep_lock); + clear_bit(IOAT_CHAN_DOWN, &ioat_chan->state); + spin_unlock_bh(&ioat_chan->prep_lock); + + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + + /* no need to reset as shutdown already did that */ + } +} + +#define DRV_NAME "ioatdma" + +static pci_ers_result_t ioat_pcie_error_detected(struct pci_dev *pdev, + pci_channel_state_t error) +{ + dev_dbg(&pdev->dev, "%s: PCIe AER error %d\n", DRV_NAME, error); + + /* quiesce and block I/O */ + ioat_shutdown(pdev); + + return PCI_ERS_RESULT_NEED_RESET; +} + +static pci_ers_result_t ioat_pcie_error_slot_reset(struct pci_dev *pdev) +{ + pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED; + + dev_dbg(&pdev->dev, "%s post reset handling\n", DRV_NAME); + + if (pci_enable_device_mem(pdev) < 0) { + dev_err(&pdev->dev, + "Failed to enable PCIe device after reset.\n"); + result = PCI_ERS_RESULT_DISCONNECT; + } else { + pci_set_master(pdev); + pci_restore_state(pdev); + pci_save_state(pdev); + pci_wake_from_d3(pdev, false); + } + + return result; +} + +static void ioat_pcie_error_resume(struct pci_dev *pdev) +{ + struct ioatdma_device *ioat_dma = pci_get_drvdata(pdev); + + dev_dbg(&pdev->dev, "%s: AER handling resuming\n", DRV_NAME); + + /* initialize and bring everything back */ + ioat_resume(ioat_dma); +} + +static const struct pci_error_handlers ioat_err_handler = { + .error_detected = ioat_pcie_error_detected, + .slot_reset = ioat_pcie_error_slot_reset, + .resume = ioat_pcie_error_resume, +}; + +static struct pci_driver ioat_pci_driver = { + .name = DRV_NAME, + .id_table = ioat_pci_tbl, + .probe = ioat_pci_probe, + .remove = ioat_remove, + .shutdown = ioat_shutdown, + .err_handler = &ioat_err_handler, +}; + +static void release_ioatdma(struct dma_device *device) +{ + struct ioatdma_device *d = to_ioatdma_device(device); + int i; + + for (i = 0; i < IOAT_MAX_CHANS; i++) + kfree(d->idx[i]); + + dma_pool_destroy(d->completion_pool); + kfree(d); +} + +static struct ioatdma_device * +alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase) +{ + struct ioatdma_device *d = kzalloc(sizeof(*d), GFP_KERNEL); + + if (!d) + return NULL; + d->pdev = pdev; + d->reg_base = iobase; + d->dma_dev.device_release = release_ioatdma; + return d; +} + +static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + void __iomem * const *iomap; + struct device *dev = &pdev->dev; + struct ioatdma_device *device; + int err; + + err = pcim_enable_device(pdev); + if (err) + return err; + + err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME); + if (err) + return err; + iomap = pcim_iomap_table(pdev); + if (!iomap) + return -ENOMEM; + + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) + return err; + + device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]); + if (!device) + return -ENOMEM; + pci_set_master(pdev); + pci_set_drvdata(pdev, device); + + device->version = readb(device->reg_base + IOAT_VER_OFFSET); + if (device->version >= IOAT_VER_3_4) + ioat_dca_enabled = 0; + if (device->version >= IOAT_VER_3_0) { + if (is_skx_ioat(pdev)) + device->version = IOAT_VER_3_2; + err = ioat3_dma_probe(device, ioat_dca_enabled); + } else + return -ENODEV; + + if (err) { + dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); + return -ENODEV; + } + + return 0; +} + +static void ioat_remove(struct pci_dev *pdev) +{ + struct ioatdma_device *device = pci_get_drvdata(pdev); + + if (!device) + return; + + ioat_shutdown(pdev); + + dev_err(&pdev->dev, "Removing dma and dca services\n"); + if (device->dca) { + unregister_dca_provider(device->dca, &pdev->dev); + free_dca_provider(device->dca); + device->dca = NULL; + } + + ioat_dma_remove(device); +} + +static int __init ioat_init_module(void) +{ + int err = -ENOMEM; + + pr_info("%s: Intel(R) QuickData Technology Driver %s\n", + DRV_NAME, IOAT_DMA_VERSION); + + ioat_cache = kmem_cache_create("ioat", sizeof(struct ioat_ring_ent), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ioat_cache) + return -ENOMEM; + + ioat_sed_cache = KMEM_CACHE(ioat_sed_ent, 0); + if (!ioat_sed_cache) + goto err_ioat_cache; + + err = pci_register_driver(&ioat_pci_driver); + if (err) + goto err_ioat3_cache; + + return 0; + + err_ioat3_cache: + kmem_cache_destroy(ioat_sed_cache); + + err_ioat_cache: + kmem_cache_destroy(ioat_cache); + + return err; +} +module_init(ioat_init_module); + +static void __exit ioat_exit_module(void) +{ + pci_unregister_driver(&ioat_pci_driver); + kmem_cache_destroy(ioat_cache); +} +module_exit(ioat_exit_module); diff --git a/drivers/dma/ioat/prep.c b/drivers/dma/ioat/prep.c new file mode 100644 index 0000000000..033d9f2b3e --- /dev/null +++ b/drivers/dma/ioat/prep.c @@ -0,0 +1,737 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2015 Intel Corporation. + */ +#include +#include +#include +#include +#include +#include +#include "../dmaengine.h" +#include "registers.h" +#include "hw.h" +#include "dma.h" + +#define MAX_SCF 256 + +/* provide a lookup table for setting the source address in the base or + * extended descriptor of an xor or pq descriptor + */ +static const u8 xor_idx_to_desc = 0xe0; +static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; +static const u8 pq_idx_to_desc = 0xf8; +static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2 }; +static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; +static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6 }; + +static void xor_set_src(struct ioat_raw_descriptor *descs[2], + dma_addr_t addr, u32 offset, int idx) +{ + struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; + + raw->field[xor_idx_to_field[idx]] = addr + offset; +} + +static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) +{ + struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; + + return raw->field[pq_idx_to_field[idx]]; +} + +static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx) +{ + struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; + + return raw->field[pq16_idx_to_field[idx]]; +} + +static void pq_set_src(struct ioat_raw_descriptor *descs[2], + dma_addr_t addr, u32 offset, u8 coef, int idx) +{ + struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; + struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; + + raw->field[pq_idx_to_field[idx]] = addr + offset; + pq->coef[idx] = coef; +} + +static void pq16_set_src(struct ioat_raw_descriptor *desc[3], + dma_addr_t addr, u32 offset, u8 coef, unsigned idx) +{ + struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0]; + struct ioat_pq16a_descriptor *pq16 = + (struct ioat_pq16a_descriptor *)desc[1]; + struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; + + raw->field[pq16_idx_to_field[idx]] = addr + offset; + + if (idx < 8) + pq->coef[idx] = coef; + else + pq16->coef[idx - 8] = coef; +} + +static struct ioat_sed_ent * +ioat3_alloc_sed(struct ioatdma_device *ioat_dma, unsigned int hw_pool) +{ + struct ioat_sed_ent *sed; + gfp_t flags = __GFP_ZERO | GFP_ATOMIC; + + sed = kmem_cache_alloc(ioat_sed_cache, flags); + if (!sed) + return NULL; + + sed->hw_pool = hw_pool; + sed->hw = dma_pool_alloc(ioat_dma->sed_hw_pool[hw_pool], + flags, &sed->dma); + if (!sed->hw) { + kmem_cache_free(ioat_sed_cache, sed); + return NULL; + } + + return sed; +} + +struct dma_async_tx_descriptor * +ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + dma_addr_t dst = dma_dest; + dma_addr_t src = dma_src; + size_t total_len = len; + int num_descs, idx, i; + + if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) + return NULL; + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + if (likely(num_descs) && + ioat_check_space_lock(ioat_chan, num_descs) == 0) + idx = ioat_chan->head; + else + return NULL; + i = 0; + do { + size_t copy = min_t(size_t, len, 1 << ioat_chan->xfercap_log); + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + hw = desc->hw; + + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dst; + + len -= copy; + dst += copy; + src += copy; + dump_desc_dbg(ioat_chan, desc); + } while (++i < num_descs); + + desc->txd.flags = flags; + desc->len = total_len; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + hw->ctl_f.compl_write = 1; + dump_desc_dbg(ioat_chan, desc); + /* we leave the channel locked to ensure in order submission */ + + return &desc->txd; +} + + +static struct dma_async_tx_descriptor * +__ioat_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, + dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, + size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_ring_ent *compl_desc; + struct ioat_ring_ent *desc; + struct ioat_ring_ent *ext; + size_t total_len = len; + struct ioat_xor_descriptor *xor; + struct ioat_xor_ext_descriptor *xor_ex = NULL; + struct ioat_dma_descriptor *hw; + int num_descs, with_ext, idx, i; + u32 offset = 0; + u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; + + BUG_ON(src_cnt < 2); + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + /* we need 2x the number of descriptors to cover greater than 5 + * sources + */ + if (src_cnt > 5) { + with_ext = 1; + num_descs *= 2; + } else + with_ext = 0; + + /* completion writes from the raid engine may pass completion + * writes from the legacy engine, so we need one extra null + * (legacy) descriptor to ensure all completion writes arrive in + * order. + */ + if (likely(num_descs) && + ioat_check_space_lock(ioat_chan, num_descs+1) == 0) + idx = ioat_chan->head; + else + return NULL; + i = 0; + do { + struct ioat_raw_descriptor *descs[2]; + size_t xfer_size = min_t(size_t, + len, 1 << ioat_chan->xfercap_log); + int s; + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + xor = desc->xor; + + /* save a branch by unconditionally retrieving the + * extended descriptor xor_set_src() knows to not write + * to it in the single descriptor case + */ + ext = ioat_get_ring_ent(ioat_chan, idx + i + 1); + xor_ex = ext->xor_ex; + + descs[0] = (struct ioat_raw_descriptor *) xor; + descs[1] = (struct ioat_raw_descriptor *) xor_ex; + for (s = 0; s < src_cnt; s++) + xor_set_src(descs, src[s], offset, s); + xor->size = xfer_size; + xor->dst_addr = dest + offset; + xor->ctl = 0; + xor->ctl_f.op = op; + xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); + + len -= xfer_size; + offset += xfer_size; + dump_desc_dbg(ioat_chan, desc); + } while ((i += 1 + with_ext) < num_descs); + + /* last xor descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + + /* completion descriptor carries interrupt bit */ + compl_desc = ioat_get_ring_ent(ioat_chan, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat_chan, compl_desc); + + /* we leave the channel locked to ensure in order submission */ + return &compl_desc->txd; +} + +struct dma_async_tx_descriptor * +ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); + + if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) + return NULL; + + return __ioat_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); + + if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) + return NULL; + + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *result = 0; + + return __ioat_prep_xor_lock(chan, result, src[0], &src[1], + src_cnt - 1, len, flags); +} + +static void +dump_pq_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc, + struct ioat_ring_ent *ext) +{ + struct device *dev = to_dev(ioat_chan); + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; + struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; + int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); + int i; + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" + " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" + " src_cnt: %d)\n", + desc_id(desc), (unsigned long long) desc->txd.phys, + (unsigned long long) (pq_ex ? pq_ex->next : pq->next), + desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, + pq->ctl_f.int_en, pq->ctl_f.compl_write, + pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", + pq->ctl_f.src_cnt); + for (i = 0; i < src_cnt; i++) + dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, + (unsigned long long) pq_get_src(descs, i), pq->coef[i]); + dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); + dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); + dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); +} + +static void dump_pq16_desc_dbg(struct ioatdma_chan *ioat_chan, + struct ioat_ring_ent *desc) +{ + struct device *dev = to_dev(ioat_chan); + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_raw_descriptor *descs[] = { (void *)pq, + (void *)pq, + (void *)pq }; + int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); + int i; + + if (desc->sed) { + descs[1] = (void *)desc->sed->hw; + descs[2] = (void *)desc->sed->hw + 64; + } + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" + " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" + " src_cnt: %d)\n", + desc_id(desc), (unsigned long long) desc->txd.phys, + (unsigned long long) pq->next, + desc->txd.flags, pq->size, pq->ctl, + pq->ctl_f.op, pq->ctl_f.int_en, + pq->ctl_f.compl_write, + pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", + pq->ctl_f.src_cnt); + for (i = 0; i < src_cnt; i++) { + dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, + (unsigned long long) pq16_get_src(descs, i), + pq->coef[i]); + } + dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); + dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); +} + +static struct dma_async_tx_descriptor * +__ioat_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, + const dma_addr_t *dst, const dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *compl_desc; + struct ioat_ring_ent *desc; + struct ioat_ring_ent *ext; + size_t total_len = len; + struct ioat_pq_descriptor *pq; + struct ioat_pq_ext_descriptor *pq_ex = NULL; + struct ioat_dma_descriptor *hw; + u32 offset = 0; + u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; + int i, s, idx, with_ext, num_descs; + int cb32 = (ioat_dma->version < IOAT_VER_3_3) ? 1 : 0; + + dev_dbg(to_dev(ioat_chan), "%s\n", __func__); + /* the engine requires at least two sources (we provide + * at least 1 implied source in the DMA_PREP_CONTINUE case) + */ + BUG_ON(src_cnt + dmaf_continue(flags) < 2); + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + /* we need 2x the number of descriptors to cover greater than 3 + * sources (we need 1 extra source in the q-only continuation + * case and 3 extra sources in the p+q continuation case. + */ + if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || + (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { + with_ext = 1; + num_descs *= 2; + } else + with_ext = 0; + + /* completion writes from the raid engine may pass completion + * writes from the legacy engine, so we need one extra null + * (legacy) descriptor to ensure all completion writes arrive in + * order. + */ + if (likely(num_descs) && + ioat_check_space_lock(ioat_chan, num_descs + cb32) == 0) + idx = ioat_chan->head; + else + return NULL; + i = 0; + do { + struct ioat_raw_descriptor *descs[2]; + size_t xfer_size = min_t(size_t, len, + 1 << ioat_chan->xfercap_log); + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + pq = desc->pq; + + /* save a branch by unconditionally retrieving the + * extended descriptor pq_set_src() knows to not write + * to it in the single descriptor case + */ + ext = ioat_get_ring_ent(ioat_chan, idx + i + with_ext); + pq_ex = ext->pq_ex; + + descs[0] = (struct ioat_raw_descriptor *) pq; + descs[1] = (struct ioat_raw_descriptor *) pq_ex; + + for (s = 0; s < src_cnt; s++) + pq_set_src(descs, src[s], offset, scf[s], s); + + /* see the comment for dma_maxpq in include/linux/dmaengine.h */ + if (dmaf_p_disabled_continue(flags)) + pq_set_src(descs, dst[1], offset, 1, s++); + else if (dmaf_continue(flags)) { + pq_set_src(descs, dst[0], offset, 0, s++); + pq_set_src(descs, dst[1], offset, 1, s++); + pq_set_src(descs, dst[1], offset, 0, s++); + } + pq->size = xfer_size; + pq->p_addr = dst[0] + offset; + pq->q_addr = dst[1] + offset; + pq->ctl = 0; + pq->ctl_f.op = op; + /* we turn on descriptor write back error status */ + if (ioat_dma->cap & IOAT_CAP_DWBES) + pq->ctl_f.wb_en = result ? 1 : 0; + pq->ctl_f.src_cnt = src_cnt_to_hw(s); + pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); + pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); + + len -= xfer_size; + offset += xfer_size; + } while ((i += 1 + with_ext) < num_descs); + + /* last pq descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + dump_pq_desc_dbg(ioat_chan, desc, ext); + + if (!cb32) { + pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + pq->ctl_f.compl_write = 1; + compl_desc = desc; + } else { + /* completion descriptor carries interrupt bit */ + compl_desc = ioat_get_ring_ent(ioat_chan, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat_chan, compl_desc); + } + + + /* we leave the channel locked to ensure in order submission */ + return &compl_desc->txd; +} + +static struct dma_async_tx_descriptor * +__ioat_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, + const dma_addr_t *dst, const dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *desc; + size_t total_len = len; + struct ioat_pq_descriptor *pq; + u32 offset = 0; + u8 op; + int i, s, idx, num_descs; + + /* this function is only called with 9-16 sources */ + op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S; + + dev_dbg(to_dev(ioat_chan), "%s\n", __func__); + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + + /* + * 16 source pq is only available on cb3.3 and has no completion + * write hw bug. + */ + if (num_descs && ioat_check_space_lock(ioat_chan, num_descs) == 0) + idx = ioat_chan->head; + else + return NULL; + + i = 0; + + do { + struct ioat_raw_descriptor *descs[4]; + size_t xfer_size = min_t(size_t, len, + 1 << ioat_chan->xfercap_log); + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + pq = desc->pq; + + descs[0] = (struct ioat_raw_descriptor *) pq; + + desc->sed = ioat3_alloc_sed(ioat_dma, (src_cnt-2) >> 3); + if (!desc->sed) { + dev_err(to_dev(ioat_chan), + "%s: no free sed entries\n", __func__); + return NULL; + } + + pq->sed_addr = desc->sed->dma; + desc->sed->parent = desc; + + descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw; + descs[2] = (void *)descs[1] + 64; + + for (s = 0; s < src_cnt; s++) + pq16_set_src(descs, src[s], offset, scf[s], s); + + /* see the comment for dma_maxpq in include/linux/dmaengine.h */ + if (dmaf_p_disabled_continue(flags)) + pq16_set_src(descs, dst[1], offset, 1, s++); + else if (dmaf_continue(flags)) { + pq16_set_src(descs, dst[0], offset, 0, s++); + pq16_set_src(descs, dst[1], offset, 1, s++); + pq16_set_src(descs, dst[1], offset, 0, s++); + } + + pq->size = xfer_size; + pq->p_addr = dst[0] + offset; + pq->q_addr = dst[1] + offset; + pq->ctl = 0; + pq->ctl_f.op = op; + pq->ctl_f.src_cnt = src16_cnt_to_hw(s); + /* we turn on descriptor write back error status */ + if (ioat_dma->cap & IOAT_CAP_DWBES) + pq->ctl_f.wb_en = result ? 1 : 0; + pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); + pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); + + len -= xfer_size; + offset += xfer_size; + } while (++i < num_descs); + + /* last pq descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + + /* with cb3.3 we should be able to do completion w/o a null desc */ + pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + pq->ctl_f.compl_write = 1; + + dump_pq16_desc_dbg(ioat_chan, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static int src_cnt_flags(unsigned int src_cnt, unsigned long flags) +{ + if (dmaf_p_disabled_continue(flags)) + return src_cnt + 1; + else if (dmaf_continue(flags)) + return src_cnt + 3; + else + return src_cnt; +} + +struct dma_async_tx_descriptor * +ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); + + if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) + return NULL; + + /* specify valid address for disabled result */ + if (flags & DMA_PREP_PQ_DISABLE_P) + dst[0] = dst[1]; + if (flags & DMA_PREP_PQ_DISABLE_Q) + dst[1] = dst[0]; + + /* handle the single source multiply case from the raid6 + * recovery path + */ + if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) { + dma_addr_t single_source[2]; + unsigned char single_source_coef[2]; + + BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); + single_source[0] = src[0]; + single_source[1] = src[0]; + single_source_coef[0] = scf[0]; + single_source_coef[1] = 0; + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, NULL, dst, single_source, + 2, single_source_coef, len, + flags) : + __ioat_prep_pq_lock(chan, NULL, dst, single_source, 2, + single_source_coef, len, flags); + + } else { + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, NULL, dst, src, src_cnt, + scf, len, flags) : + __ioat_prep_pq_lock(chan, NULL, dst, src, src_cnt, + scf, len, flags); + } +} + +struct dma_async_tx_descriptor * +ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); + + if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) + return NULL; + + /* specify valid address for disabled result */ + if (flags & DMA_PREP_PQ_DISABLE_P) + pq[0] = pq[1]; + if (flags & DMA_PREP_PQ_DISABLE_Q) + pq[1] = pq[0]; + + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *pqres = 0; + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags) : + __ioat_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + unsigned char scf[MAX_SCF]; + dma_addr_t pq[2]; + struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); + + if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) + return NULL; + + if (src_cnt > MAX_SCF) + return NULL; + + memset(scf, 0, src_cnt); + pq[0] = dst; + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[1] = dst; /* specify valid address for disabled result */ + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags) : + __ioat_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags) +{ + unsigned char scf[MAX_SCF]; + dma_addr_t pq[2]; + struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); + + if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) + return NULL; + + if (src_cnt > MAX_SCF) + return NULL; + + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *result = 0; + + memset(scf, 0, src_cnt); + pq[0] = src[0]; + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[1] = pq[0]; /* specify valid address for disabled result */ + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1, + scf, len, flags) : + __ioat_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, + scf, len, flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_ring_ent *desc; + struct ioat_dma_descriptor *hw; + + if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) + return NULL; + + if (ioat_check_space_lock(ioat_chan, 1) == 0) + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head); + else + return NULL; + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + + desc->txd.flags = flags; + desc->len = 1; + + dump_desc_dbg(ioat_chan, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h new file mode 100644 index 0000000000..54cf0ad398 --- /dev/null +++ b/drivers/dma/ioat/registers.h @@ -0,0 +1,251 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + */ +#ifndef _IOAT_REGISTERS_H_ +#define _IOAT_REGISTERS_H_ + +#define IOAT_PCI_DMACTRL_OFFSET 0x48 +#define IOAT_PCI_DMACTRL_DMA_EN 0x00000001 +#define IOAT_PCI_DMACTRL_MSI_EN 0x00000002 + +#define IOAT_PCI_DEVICE_ID_OFFSET 0x02 +#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148 +#define IOAT_PCI_CHANERR_INT_OFFSET 0x180 +#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184 + +/* MMIO Device Registers */ +#define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */ + +#define IOAT_XFERCAP_OFFSET 0x01 /* 8-bit */ +#define IOAT_XFERCAP_4KB 12 +#define IOAT_XFERCAP_8KB 13 +#define IOAT_XFERCAP_16KB 14 +#define IOAT_XFERCAP_32KB 15 +#define IOAT_XFERCAP_32GB 0 + +#define IOAT_GENCTRL_OFFSET 0x02 /* 8-bit */ +#define IOAT_GENCTRL_DEBUG_EN 0x01 + +#define IOAT_INTRCTRL_OFFSET 0x03 /* 8-bit */ +#define IOAT_INTRCTRL_MASTER_INT_EN 0x01 /* Master Interrupt Enable */ +#define IOAT_INTRCTRL_INT_STATUS 0x02 /* ATTNSTATUS -or- Channel Int */ +#define IOAT_INTRCTRL_INT 0x04 /* INT_STATUS -and- MASTER_INT_EN */ +#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */ + +#define IOAT_ATTNSTATUS_OFFSET 0x04 /* Each bit is a channel */ + +#define IOAT_VER_OFFSET 0x08 /* 8-bit */ +#define IOAT_VER_MAJOR_MASK 0xF0 +#define IOAT_VER_MINOR_MASK 0x0F +#define GET_IOAT_VER_MAJOR(x) (((x) & IOAT_VER_MAJOR_MASK) >> 4) +#define GET_IOAT_VER_MINOR(x) ((x) & IOAT_VER_MINOR_MASK) + +#define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */ + +#define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */ +#define IOAT_INTRDELAY_MASK 0x3FFF /* Interrupt Delay Time */ +#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */ + +#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */ +#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001 +#define IOAT_DEVICE_MMIO_RESTRICTED 0x0002 +#define IOAT_DEVICE_MEMORY_BYPASS 0x0004 +#define IOAT_DEVICE_ADDRESS_REMAPPING 0x0008 + +#define IOAT_DMA_CAP_OFFSET 0x10 /* 32-bit */ +#define IOAT_CAP_PAGE_BREAK 0x00000001 +#define IOAT_CAP_CRC 0x00000002 +#define IOAT_CAP_SKIP_MARKER 0x00000004 +#define IOAT_CAP_DCA 0x00000010 +#define IOAT_CAP_CRC_MOVE 0x00000020 +#define IOAT_CAP_FILL_BLOCK 0x00000040 +#define IOAT_CAP_APIC 0x00000080 +#define IOAT_CAP_XOR 0x00000100 +#define IOAT_CAP_PQ 0x00000200 +#define IOAT_CAP_DWBES 0x00002000 +#define IOAT_CAP_RAID16SS 0x00020000 +#define IOAT_CAP_DPS 0x00800000 + +#define IOAT_PREFETCH_LIMIT_OFFSET 0x4C /* CHWPREFLMT */ + +#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ + +/* DMA Channel Registers */ +#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */ +#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000 +#define IOAT3_CHANCTRL_COMPL_DCA_EN 0x0200 +#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100 +#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020 +#define IOAT_CHANCTRL_ERR_INT_EN 0x0010 +#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008 +#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 +#define IOAT_CHANCTRL_INT_REARM 0x0001 +#define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\ + IOAT_CHANCTRL_ERR_INT_EN |\ + IOAT_CHANCTRL_ERR_COMPLETION_EN |\ + IOAT_CHANCTRL_ANY_ERR_ABORT_EN) + +#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */ +#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */ +#define IOAT_DMA_COMP_V2 0x0002 /* Compatibility with DMA version 2 */ + +#define IOAT_CHANSTS_OFFSET 0x08 /* 64-bit Channel Status Register */ +#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL) +#define IOAT_CHANSTS_SOFT_ERR 0x10ULL +#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL +#define IOAT_CHANSTS_STATUS 0x7ULL +#define IOAT_CHANSTS_ACTIVE 0x0 +#define IOAT_CHANSTS_DONE 0x1 +#define IOAT_CHANSTS_SUSPENDED 0x2 +#define IOAT_CHANSTS_HALTED 0x3 + + + +#define IOAT_CHAN_DMACOUNT_OFFSET 0x06 /* 16-bit DMA Count register */ + +#define IOAT_DCACTRL_OFFSET 0x30 /* 32 bit Direct Cache Access Control Register */ +#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000 +#define IOAT_DCACTRL_TARGET_CPU_MASK 0xFFFF /* APIC ID */ + +/* CB DCA Memory Space Registers */ +#define IOAT_DCAOFFSET_OFFSET 0x14 +/* CB_BAR + IOAT_DCAOFFSET value */ +#define IOAT_DCA_VER_OFFSET 0x00 +#define IOAT_DCA_VER_MAJOR_MASK 0xF0 +#define IOAT_DCA_VER_MINOR_MASK 0x0F + +#define IOAT_DCA_COMP_OFFSET 0x02 +#define IOAT_DCA_COMP_V1 0x1 + +#define IOAT_FSB_CAPABILITY_OFFSET 0x04 +#define IOAT_FSB_CAPABILITY_PREFETCH 0x1 + +#define IOAT_PCI_CAPABILITY_OFFSET 0x06 +#define IOAT_PCI_CAPABILITY_MEMWR 0x1 + +#define IOAT_FSB_CAP_ENABLE_OFFSET 0x08 +#define IOAT_FSB_CAP_ENABLE_PREFETCH 0x1 + +#define IOAT_PCI_CAP_ENABLE_OFFSET 0x0A +#define IOAT_PCI_CAP_ENABLE_MEMWR 0x1 + +#define IOAT_APICID_TAG_MAP_OFFSET 0x0C +#define IOAT_APICID_TAG_MAP_TAG0 0x0000000F +#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0 +#define IOAT_APICID_TAG_MAP_TAG1 0x000000F0 +#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4 +#define IOAT_APICID_TAG_MAP_TAG2 0x00000F00 +#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8 +#define IOAT_APICID_TAG_MAP_TAG3 0x0000F000 +#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12 +#define IOAT_APICID_TAG_MAP_TAG4 0x000F0000 +#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16 +#define IOAT_APICID_TAG_CB2_VALID 0x8080808080 + +#define IOAT_DCA_GREQID_OFFSET 0x10 +#define IOAT_DCA_GREQID_SIZE 0x04 +#define IOAT_DCA_GREQID_MASK 0xFFFF +#define IOAT_DCA_GREQID_IGNOREFUN 0x10000000 +#define IOAT_DCA_GREQID_VALID 0x20000000 +#define IOAT_DCA_GREQID_LASTID 0x80000000 + +#define IOAT3_CSI_CAPABILITY_OFFSET 0x08 +#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1 + +#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A +#define IOAT3_PCI_CAPABILITY_MEMWR 0x1 + +#define IOAT3_CSI_CONTROL_OFFSET 0x0C +#define IOAT3_CSI_CONTROL_PREFETCH 0x1 + +#define IOAT3_PCI_CONTROL_OFFSET 0x0E +#define IOAT3_PCI_CONTROL_MEMWR 0x1 + +#define IOAT3_APICID_TAG_MAP_OFFSET 0x10 +#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10 +#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14 + +#define IOAT3_DCA_GREQID_OFFSET 0x02 + +#define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */ +#define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */ +#define IOAT_CHAINADDR_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET) +#define IOAT1_CHAINADDR_OFFSET_LOW 0x0C +#define IOAT2_CHAINADDR_OFFSET_LOW 0x10 +#define IOAT_CHAINADDR_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW) +#define IOAT1_CHAINADDR_OFFSET_HIGH 0x10 +#define IOAT2_CHAINADDR_OFFSET_HIGH 0x14 +#define IOAT_CHAINADDR_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH) + +#define IOAT1_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */ +#define IOAT2_CHANCMD_OFFSET 0x04 /* 8-bit DMA Channel Command Register */ +#define IOAT_CHANCMD_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET) +#define IOAT_CHANCMD_RESET 0x20 +#define IOAT_CHANCMD_RESUME 0x10 +#define IOAT_CHANCMD_ABORT 0x08 +#define IOAT_CHANCMD_SUSPEND 0x04 +#define IOAT_CHANCMD_APPEND 0x02 +#define IOAT_CHANCMD_START 0x01 + +#define IOAT_CHANCMP_OFFSET 0x18 /* 64-bit Channel Completion Address Register */ +#define IOAT_CHANCMP_OFFSET_LOW 0x18 +#define IOAT_CHANCMP_OFFSET_HIGH 0x1C + +#define IOAT_CDAR_OFFSET 0x20 /* 64-bit Current Descriptor Address Register */ +#define IOAT_CDAR_OFFSET_LOW 0x20 +#define IOAT_CDAR_OFFSET_HIGH 0x24 + +#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */ +#define IOAT_CHANERR_SRC_ADDR_ERR 0x0001 +#define IOAT_CHANERR_DEST_ADDR_ERR 0x0002 +#define IOAT_CHANERR_NEXT_ADDR_ERR 0x0004 +#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR 0x0008 +#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010 +#define IOAT_CHANERR_CHANCMD_ERR 0x0020 +#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040 +#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080 +#define IOAT_CHANERR_READ_DATA_ERR 0x0100 +#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200 +#define IOAT_CHANERR_CONTROL_ERR 0x0400 +#define IOAT_CHANERR_LENGTH_ERR 0x0800 +#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000 +#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000 +#define IOAT_CHANERR_SOFT_ERR 0x4000 +#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000 +#define IOAT_CHANERR_XOR_P_OR_CRC_ERR 0x10000 +#define IOAT_CHANERR_XOR_Q_ERR 0x20000 +#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000 + +#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR) +#define IOAT_CHANERR_RECOVER_MASK (IOAT_CHANERR_READ_DATA_ERR | \ + IOAT_CHANERR_WRITE_DATA_ERR) + +#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */ + +#define IOAT_CHAN_DRSCTL_OFFSET 0xB6 +#define IOAT_CHAN_DRSZ_4KB 0x0000 +#define IOAT_CHAN_DRSZ_8KB 0x0001 +#define IOAT_CHAN_DRSZ_2MB 0x0009 +#define IOAT_CHAN_DRS_EN 0x0100 +#define IOAT_CHAN_DRS_AUTOWRAP 0x0200 + +#define IOAT_CHAN_LTR_SWSEL_OFFSET 0xBC +#define IOAT_CHAN_LTR_SWSEL_ACTIVE 0x0 +#define IOAT_CHAN_LTR_SWSEL_IDLE 0x1 + +#define IOAT_CHAN_LTR_ACTIVE_OFFSET 0xC0 +#define IOAT_CHAN_LTR_ACTIVE_SNVAL 0x0000 /* 0 us */ +#define IOAT_CHAN_LTR_ACTIVE_SNLATSCALE 0x0800 /* 1us scale */ +#define IOAT_CHAN_LTR_ACTIVE_SNREQMNT 0x8000 /* snoop req enable */ + +#define IOAT_CHAN_LTR_IDLE_OFFSET 0xC4 +#define IOAT_CHAN_LTR_IDLE_SNVAL 0x0258 /* 600 us */ +#define IOAT_CHAN_LTR_IDLE_SNLATSCALE 0x0800 /* 1us scale */ +#define IOAT_CHAN_LTR_IDLE_SNREQMNT 0x8000 /* snoop req enable */ + +#endif /* _IOAT_REGISTERS_H_ */ diff --git a/drivers/dma/ioat/sysfs.c b/drivers/dma/ioat/sysfs.c new file mode 100644 index 0000000000..168adf28c5 --- /dev/null +++ b/drivers/dma/ioat/sysfs.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2015 Intel Corporation. + */ + +#include +#include +#include +#include +#include "dma.h" +#include "registers.h" +#include "hw.h" + +#include "../dmaengine.h" + +static ssize_t cap_show(struct dma_chan *c, char *page) +{ + struct dma_device *dma = c->device; + + return sprintf(page, "copy%s%s%s%s%s\n", + dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "", + dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "", + dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "", + dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "", + dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : ""); + +} +struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap); + +static ssize_t version_show(struct dma_chan *c, char *page) +{ + struct dma_device *dma = c->device; + struct ioatdma_device *ioat_dma = to_ioatdma_device(dma); + + return sprintf(page, "%d.%d\n", + ioat_dma->version >> 4, ioat_dma->version & 0xf); +} +struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version); + +static ssize_t +ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page) +{ + struct ioat_sysfs_entry *entry; + struct ioatdma_chan *ioat_chan; + + entry = container_of(attr, struct ioat_sysfs_entry, attr); + ioat_chan = container_of(kobj, struct ioatdma_chan, kobj); + + if (!entry->show) + return -EIO; + return entry->show(&ioat_chan->dma_chan, page); +} + +static ssize_t +ioat_attr_store(struct kobject *kobj, struct attribute *attr, +const char *page, size_t count) +{ + struct ioat_sysfs_entry *entry; + struct ioatdma_chan *ioat_chan; + + entry = container_of(attr, struct ioat_sysfs_entry, attr); + ioat_chan = container_of(kobj, struct ioatdma_chan, kobj); + + if (!entry->store) + return -EIO; + return entry->store(&ioat_chan->dma_chan, page, count); +} + +const struct sysfs_ops ioat_sysfs_ops = { + .show = ioat_attr_show, + .store = ioat_attr_store, +}; + +void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type) +{ + struct dma_device *dma = &ioat_dma->dma_dev; + struct dma_chan *c; + + list_for_each_entry(c, &dma->channels, device_node) { + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct kobject *parent = &c->dev->device.kobj; + int err; + + err = kobject_init_and_add(&ioat_chan->kobj, type, + parent, "quickdata"); + if (err) { + dev_warn(to_dev(ioat_chan), + "sysfs init error (%d), continuing...\n", err); + kobject_put(&ioat_chan->kobj); + set_bit(IOAT_KOBJ_INIT_FAIL, &ioat_chan->state); + } + } +} + +void ioat_kobject_del(struct ioatdma_device *ioat_dma) +{ + struct dma_device *dma = &ioat_dma->dma_dev; + struct dma_chan *c; + + list_for_each_entry(c, &dma->channels, device_node) { + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + + if (!test_bit(IOAT_KOBJ_INIT_FAIL, &ioat_chan->state)) { + kobject_del(&ioat_chan->kobj); + kobject_put(&ioat_chan->kobj); + } + } +} + +static ssize_t ring_size_show(struct dma_chan *c, char *page) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + + return sprintf(page, "%d\n", (1 << ioat_chan->alloc_order) & ~1); +} +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); + +static ssize_t ring_active_show(struct dma_chan *c, char *page) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + + /* ...taken outside the lock, no need to be precise */ + return sprintf(page, "%d\n", ioat_ring_active(ioat_chan)); +} +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); + +static ssize_t intr_coalesce_show(struct dma_chan *c, char *page) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + + return sprintf(page, "%d\n", ioat_chan->intr_coalesce); +} + +static ssize_t intr_coalesce_store(struct dma_chan *c, const char *page, +size_t count) +{ + int intr_coalesce = 0; + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + + if (sscanf(page, "%du", &intr_coalesce) != -1) { + if ((intr_coalesce < 0) || + (intr_coalesce > IOAT_INTRDELAY_MASK)) + return -EINVAL; + ioat_chan->intr_coalesce = intr_coalesce; + } + + return count; +} + +static struct ioat_sysfs_entry intr_coalesce_attr = __ATTR_RW(intr_coalesce); + +static struct attribute *ioat_attrs[] = { + &ring_size_attr.attr, + &ring_active_attr.attr, + &ioat_cap_attr.attr, + &ioat_version_attr.attr, + &intr_coalesce_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(ioat); + +struct kobj_type ioat_ktype = { + .sysfs_ops = &ioat_sysfs_ops, + .default_groups = ioat_groups, +}; diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c new file mode 100644 index 0000000000..ecdaada951 --- /dev/null +++ b/drivers/dma/k3dma.c @@ -0,0 +1,1044 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2013 - 2015 Linaro Ltd. + * Copyright (c) 2013 HiSilicon Limited. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virt-dma.h" + +#define DRIVER_NAME "k3-dma" +#define DMA_MAX_SIZE 0x1ffc +#define DMA_CYCLIC_MAX_PERIOD 0x1000 +#define LLI_BLOCK_SIZE (4 * PAGE_SIZE) + +#define INT_STAT 0x00 +#define INT_TC1 0x04 +#define INT_TC2 0x08 +#define INT_ERR1 0x0c +#define INT_ERR2 0x10 +#define INT_TC1_MASK 0x18 +#define INT_TC2_MASK 0x1c +#define INT_ERR1_MASK 0x20 +#define INT_ERR2_MASK 0x24 +#define INT_TC1_RAW 0x600 +#define INT_TC2_RAW 0x608 +#define INT_ERR1_RAW 0x610 +#define INT_ERR2_RAW 0x618 +#define CH_PRI 0x688 +#define CH_STAT 0x690 +#define CX_CUR_CNT 0x704 +#define CX_LLI 0x800 +#define CX_CNT1 0x80c +#define CX_CNT0 0x810 +#define CX_SRC 0x814 +#define CX_DST 0x818 +#define CX_CFG 0x81c + +#define CX_LLI_CHAIN_EN 0x2 +#define CX_CFG_EN 0x1 +#define CX_CFG_NODEIRQ BIT(1) +#define CX_CFG_MEM2PER (0x1 << 2) +#define CX_CFG_PER2MEM (0x2 << 2) +#define CX_CFG_SRCINCR (0x1 << 31) +#define CX_CFG_DSTINCR (0x1 << 30) + +struct k3_desc_hw { + u32 lli; + u32 reserved[3]; + u32 count; + u32 saddr; + u32 daddr; + u32 config; +} __aligned(32); + +struct k3_dma_desc_sw { + struct virt_dma_desc vd; + dma_addr_t desc_hw_lli; + size_t desc_num; + size_t size; + struct k3_desc_hw *desc_hw; +}; + +struct k3_dma_phy; + +struct k3_dma_chan { + u32 ccfg; + struct virt_dma_chan vc; + struct k3_dma_phy *phy; + struct list_head node; + dma_addr_t dev_addr; + enum dma_status status; + bool cyclic; + struct dma_slave_config slave_config; +}; + +struct k3_dma_phy { + u32 idx; + void __iomem *base; + struct k3_dma_chan *vchan; + struct k3_dma_desc_sw *ds_run; + struct k3_dma_desc_sw *ds_done; +}; + +struct k3_dma_dev { + struct dma_device slave; + void __iomem *base; + struct tasklet_struct task; + spinlock_t lock; + struct list_head chan_pending; + struct k3_dma_phy *phy; + struct k3_dma_chan *chans; + struct clk *clk; + struct dma_pool *pool; + u32 dma_channels; + u32 dma_requests; + u32 dma_channel_mask; + unsigned int irq; +}; + + +#define K3_FLAG_NOCLK BIT(1) + +struct k3dma_soc_data { + unsigned long flags; +}; + + +#define to_k3_dma(dmadev) container_of(dmadev, struct k3_dma_dev, slave) + +static int k3_dma_config_write(struct dma_chan *chan, + enum dma_transfer_direction dir, + struct dma_slave_config *cfg); + +static struct k3_dma_chan *to_k3_chan(struct dma_chan *chan) +{ + return container_of(chan, struct k3_dma_chan, vc.chan); +} + +static void k3_dma_pause_dma(struct k3_dma_phy *phy, bool on) +{ + u32 val = 0; + + if (on) { + val = readl_relaxed(phy->base + CX_CFG); + val |= CX_CFG_EN; + writel_relaxed(val, phy->base + CX_CFG); + } else { + val = readl_relaxed(phy->base + CX_CFG); + val &= ~CX_CFG_EN; + writel_relaxed(val, phy->base + CX_CFG); + } +} + +static void k3_dma_terminate_chan(struct k3_dma_phy *phy, struct k3_dma_dev *d) +{ + u32 val = 0; + + k3_dma_pause_dma(phy, false); + + val = 0x1 << phy->idx; + writel_relaxed(val, d->base + INT_TC1_RAW); + writel_relaxed(val, d->base + INT_TC2_RAW); + writel_relaxed(val, d->base + INT_ERR1_RAW); + writel_relaxed(val, d->base + INT_ERR2_RAW); +} + +static void k3_dma_set_desc(struct k3_dma_phy *phy, struct k3_desc_hw *hw) +{ + writel_relaxed(hw->lli, phy->base + CX_LLI); + writel_relaxed(hw->count, phy->base + CX_CNT0); + writel_relaxed(hw->saddr, phy->base + CX_SRC); + writel_relaxed(hw->daddr, phy->base + CX_DST); + writel_relaxed(hw->config, phy->base + CX_CFG); +} + +static u32 k3_dma_get_curr_cnt(struct k3_dma_dev *d, struct k3_dma_phy *phy) +{ + u32 cnt = 0; + + cnt = readl_relaxed(d->base + CX_CUR_CNT + phy->idx * 0x10); + cnt &= 0xffff; + return cnt; +} + +static u32 k3_dma_get_curr_lli(struct k3_dma_phy *phy) +{ + return readl_relaxed(phy->base + CX_LLI); +} + +static u32 k3_dma_get_chan_stat(struct k3_dma_dev *d) +{ + return readl_relaxed(d->base + CH_STAT); +} + +static void k3_dma_enable_dma(struct k3_dma_dev *d, bool on) +{ + if (on) { + /* set same priority */ + writel_relaxed(0x0, d->base + CH_PRI); + + /* unmask irq */ + writel_relaxed(0xffff, d->base + INT_TC1_MASK); + writel_relaxed(0xffff, d->base + INT_TC2_MASK); + writel_relaxed(0xffff, d->base + INT_ERR1_MASK); + writel_relaxed(0xffff, d->base + INT_ERR2_MASK); + } else { + /* mask irq */ + writel_relaxed(0x0, d->base + INT_TC1_MASK); + writel_relaxed(0x0, d->base + INT_TC2_MASK); + writel_relaxed(0x0, d->base + INT_ERR1_MASK); + writel_relaxed(0x0, d->base + INT_ERR2_MASK); + } +} + +static irqreturn_t k3_dma_int_handler(int irq, void *dev_id) +{ + struct k3_dma_dev *d = (struct k3_dma_dev *)dev_id; + struct k3_dma_phy *p; + struct k3_dma_chan *c; + u32 stat = readl_relaxed(d->base + INT_STAT); + u32 tc1 = readl_relaxed(d->base + INT_TC1); + u32 tc2 = readl_relaxed(d->base + INT_TC2); + u32 err1 = readl_relaxed(d->base + INT_ERR1); + u32 err2 = readl_relaxed(d->base + INT_ERR2); + u32 i, irq_chan = 0; + + while (stat) { + i = __ffs(stat); + stat &= ~BIT(i); + if (likely(tc1 & BIT(i)) || (tc2 & BIT(i))) { + + p = &d->phy[i]; + c = p->vchan; + if (c && (tc1 & BIT(i))) { + spin_lock(&c->vc.lock); + if (p->ds_run != NULL) { + vchan_cookie_complete(&p->ds_run->vd); + p->ds_done = p->ds_run; + p->ds_run = NULL; + } + spin_unlock(&c->vc.lock); + } + if (c && (tc2 & BIT(i))) { + spin_lock(&c->vc.lock); + if (p->ds_run != NULL) + vchan_cyclic_callback(&p->ds_run->vd); + spin_unlock(&c->vc.lock); + } + irq_chan |= BIT(i); + } + if (unlikely((err1 & BIT(i)) || (err2 & BIT(i)))) + dev_warn(d->slave.dev, "DMA ERR\n"); + } + + writel_relaxed(irq_chan, d->base + INT_TC1_RAW); + writel_relaxed(irq_chan, d->base + INT_TC2_RAW); + writel_relaxed(err1, d->base + INT_ERR1_RAW); + writel_relaxed(err2, d->base + INT_ERR2_RAW); + + if (irq_chan) + tasklet_schedule(&d->task); + + if (irq_chan || err1 || err2) + return IRQ_HANDLED; + + return IRQ_NONE; +} + +static int k3_dma_start_txd(struct k3_dma_chan *c) +{ + struct k3_dma_dev *d = to_k3_dma(c->vc.chan.device); + struct virt_dma_desc *vd = vchan_next_desc(&c->vc); + + if (!c->phy) + return -EAGAIN; + + if (BIT(c->phy->idx) & k3_dma_get_chan_stat(d)) + return -EAGAIN; + + /* Avoid losing track of ds_run if a transaction is in flight */ + if (c->phy->ds_run) + return -EAGAIN; + + if (vd) { + struct k3_dma_desc_sw *ds = + container_of(vd, struct k3_dma_desc_sw, vd); + /* + * fetch and remove request from vc->desc_issued + * so vc->desc_issued only contains desc pending + */ + list_del(&ds->vd.node); + + c->phy->ds_run = ds; + c->phy->ds_done = NULL; + /* start dma */ + k3_dma_set_desc(c->phy, &ds->desc_hw[0]); + return 0; + } + c->phy->ds_run = NULL; + c->phy->ds_done = NULL; + return -EAGAIN; +} + +static void k3_dma_tasklet(struct tasklet_struct *t) +{ + struct k3_dma_dev *d = from_tasklet(d, t, task); + struct k3_dma_phy *p; + struct k3_dma_chan *c, *cn; + unsigned pch, pch_alloc = 0; + + /* check new dma request of running channel in vc->desc_issued */ + list_for_each_entry_safe(c, cn, &d->slave.channels, vc.chan.device_node) { + spin_lock_irq(&c->vc.lock); + p = c->phy; + if (p && p->ds_done) { + if (k3_dma_start_txd(c)) { + /* No current txd associated with this channel */ + dev_dbg(d->slave.dev, "pchan %u: free\n", p->idx); + /* Mark this channel free */ + c->phy = NULL; + p->vchan = NULL; + } + } + spin_unlock_irq(&c->vc.lock); + } + + /* check new channel request in d->chan_pending */ + spin_lock_irq(&d->lock); + for (pch = 0; pch < d->dma_channels; pch++) { + if (!(d->dma_channel_mask & (1 << pch))) + continue; + + p = &d->phy[pch]; + + if (p->vchan == NULL && !list_empty(&d->chan_pending)) { + c = list_first_entry(&d->chan_pending, + struct k3_dma_chan, node); + /* remove from d->chan_pending */ + list_del_init(&c->node); + pch_alloc |= 1 << pch; + /* Mark this channel allocated */ + p->vchan = c; + c->phy = p; + dev_dbg(d->slave.dev, "pchan %u: alloc vchan %p\n", pch, &c->vc); + } + } + spin_unlock_irq(&d->lock); + + for (pch = 0; pch < d->dma_channels; pch++) { + if (!(d->dma_channel_mask & (1 << pch))) + continue; + + if (pch_alloc & (1 << pch)) { + p = &d->phy[pch]; + c = p->vchan; + if (c) { + spin_lock_irq(&c->vc.lock); + k3_dma_start_txd(c); + spin_unlock_irq(&c->vc.lock); + } + } + } +} + +static void k3_dma_free_chan_resources(struct dma_chan *chan) +{ + struct k3_dma_chan *c = to_k3_chan(chan); + struct k3_dma_dev *d = to_k3_dma(chan->device); + unsigned long flags; + + spin_lock_irqsave(&d->lock, flags); + list_del_init(&c->node); + spin_unlock_irqrestore(&d->lock, flags); + + vchan_free_chan_resources(&c->vc); + c->ccfg = 0; +} + +static enum dma_status k3_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *state) +{ + struct k3_dma_chan *c = to_k3_chan(chan); + struct k3_dma_dev *d = to_k3_dma(chan->device); + struct k3_dma_phy *p; + struct virt_dma_desc *vd; + unsigned long flags; + enum dma_status ret; + size_t bytes = 0; + + ret = dma_cookie_status(&c->vc.chan, cookie, state); + if (ret == DMA_COMPLETE) + return ret; + + spin_lock_irqsave(&c->vc.lock, flags); + p = c->phy; + ret = c->status; + + /* + * If the cookie is on our issue queue, then the residue is + * its total size. + */ + vd = vchan_find_desc(&c->vc, cookie); + if (vd && !c->cyclic) { + bytes = container_of(vd, struct k3_dma_desc_sw, vd)->size; + } else if ((!p) || (!p->ds_run)) { + bytes = 0; + } else { + struct k3_dma_desc_sw *ds = p->ds_run; + u32 clli = 0, index = 0; + + bytes = k3_dma_get_curr_cnt(d, p); + clli = k3_dma_get_curr_lli(p); + index = ((clli - ds->desc_hw_lli) / + sizeof(struct k3_desc_hw)) + 1; + for (; index < ds->desc_num; index++) { + bytes += ds->desc_hw[index].count; + /* end of lli */ + if (!ds->desc_hw[index].lli) + break; + } + } + spin_unlock_irqrestore(&c->vc.lock, flags); + dma_set_residue(state, bytes); + return ret; +} + +static void k3_dma_issue_pending(struct dma_chan *chan) +{ + struct k3_dma_chan *c = to_k3_chan(chan); + struct k3_dma_dev *d = to_k3_dma(chan->device); + unsigned long flags; + + spin_lock_irqsave(&c->vc.lock, flags); + /* add request to vc->desc_issued */ + if (vchan_issue_pending(&c->vc)) { + spin_lock(&d->lock); + if (!c->phy) { + if (list_empty(&c->node)) { + /* if new channel, add chan_pending */ + list_add_tail(&c->node, &d->chan_pending); + /* check in tasklet */ + tasklet_schedule(&d->task); + dev_dbg(d->slave.dev, "vchan %p: issued\n", &c->vc); + } + } + spin_unlock(&d->lock); + } else + dev_dbg(d->slave.dev, "vchan %p: nothing to issue\n", &c->vc); + spin_unlock_irqrestore(&c->vc.lock, flags); +} + +static void k3_dma_fill_desc(struct k3_dma_desc_sw *ds, dma_addr_t dst, + dma_addr_t src, size_t len, u32 num, u32 ccfg) +{ + if (num != ds->desc_num - 1) + ds->desc_hw[num].lli = ds->desc_hw_lli + (num + 1) * + sizeof(struct k3_desc_hw); + + ds->desc_hw[num].lli |= CX_LLI_CHAIN_EN; + ds->desc_hw[num].count = len; + ds->desc_hw[num].saddr = src; + ds->desc_hw[num].daddr = dst; + ds->desc_hw[num].config = ccfg; +} + +static struct k3_dma_desc_sw *k3_dma_alloc_desc_resource(int num, + struct dma_chan *chan) +{ + struct k3_dma_chan *c = to_k3_chan(chan); + struct k3_dma_desc_sw *ds; + struct k3_dma_dev *d = to_k3_dma(chan->device); + int lli_limit = LLI_BLOCK_SIZE / sizeof(struct k3_desc_hw); + + if (num > lli_limit) { + dev_dbg(chan->device->dev, "vch %p: sg num %d exceed max %d\n", + &c->vc, num, lli_limit); + return NULL; + } + + ds = kzalloc(sizeof(*ds), GFP_NOWAIT); + if (!ds) + return NULL; + + ds->desc_hw = dma_pool_zalloc(d->pool, GFP_NOWAIT, &ds->desc_hw_lli); + if (!ds->desc_hw) { + dev_dbg(chan->device->dev, "vch %p: dma alloc fail\n", &c->vc); + kfree(ds); + return NULL; + } + ds->desc_num = num; + return ds; +} + +static struct dma_async_tx_descriptor *k3_dma_prep_memcpy( + struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct k3_dma_chan *c = to_k3_chan(chan); + struct k3_dma_desc_sw *ds; + size_t copy = 0; + int num = 0; + + if (!len) + return NULL; + + num = DIV_ROUND_UP(len, DMA_MAX_SIZE); + + ds = k3_dma_alloc_desc_resource(num, chan); + if (!ds) + return NULL; + + c->cyclic = 0; + ds->size = len; + num = 0; + + if (!c->ccfg) { + /* default is memtomem, without calling device_config */ + c->ccfg = CX_CFG_SRCINCR | CX_CFG_DSTINCR | CX_CFG_EN; + c->ccfg |= (0xf << 20) | (0xf << 24); /* burst = 16 */ + c->ccfg |= (0x3 << 12) | (0x3 << 16); /* width = 64 bit */ + } + + do { + copy = min_t(size_t, len, DMA_MAX_SIZE); + k3_dma_fill_desc(ds, dst, src, copy, num++, c->ccfg); + + src += copy; + dst += copy; + len -= copy; + } while (len); + + ds->desc_hw[num-1].lli = 0; /* end of link */ + return vchan_tx_prep(&c->vc, &ds->vd, flags); +} + +static struct dma_async_tx_descriptor *k3_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, unsigned int sglen, + enum dma_transfer_direction dir, unsigned long flags, void *context) +{ + struct k3_dma_chan *c = to_k3_chan(chan); + struct k3_dma_desc_sw *ds; + size_t len, avail, total = 0; + struct scatterlist *sg; + dma_addr_t addr, src = 0, dst = 0; + int num = sglen, i; + + if (sgl == NULL) + return NULL; + + c->cyclic = 0; + + for_each_sg(sgl, sg, sglen, i) { + avail = sg_dma_len(sg); + if (avail > DMA_MAX_SIZE) + num += DIV_ROUND_UP(avail, DMA_MAX_SIZE) - 1; + } + + ds = k3_dma_alloc_desc_resource(num, chan); + if (!ds) + return NULL; + num = 0; + k3_dma_config_write(chan, dir, &c->slave_config); + + for_each_sg(sgl, sg, sglen, i) { + addr = sg_dma_address(sg); + avail = sg_dma_len(sg); + total += avail; + + do { + len = min_t(size_t, avail, DMA_MAX_SIZE); + + if (dir == DMA_MEM_TO_DEV) { + src = addr; + dst = c->dev_addr; + } else if (dir == DMA_DEV_TO_MEM) { + src = c->dev_addr; + dst = addr; + } + + k3_dma_fill_desc(ds, dst, src, len, num++, c->ccfg); + + addr += len; + avail -= len; + } while (avail); + } + + ds->desc_hw[num-1].lli = 0; /* end of link */ + ds->size = total; + return vchan_tx_prep(&c->vc, &ds->vd, flags); +} + +static struct dma_async_tx_descriptor * +k3_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, + size_t buf_len, size_t period_len, + enum dma_transfer_direction dir, + unsigned long flags) +{ + struct k3_dma_chan *c = to_k3_chan(chan); + struct k3_dma_desc_sw *ds; + size_t len, avail, total = 0; + dma_addr_t addr, src = 0, dst = 0; + int num = 1, since = 0; + size_t modulo = DMA_CYCLIC_MAX_PERIOD; + u32 en_tc2 = 0; + + dev_dbg(chan->device->dev, "%s: buf %pad, dst %pad, buf len %zu, period_len = %zu, dir %d\n", + __func__, &buf_addr, &to_k3_chan(chan)->dev_addr, + buf_len, period_len, (int)dir); + + avail = buf_len; + if (avail > modulo) + num += DIV_ROUND_UP(avail, modulo) - 1; + + ds = k3_dma_alloc_desc_resource(num, chan); + if (!ds) + return NULL; + + c->cyclic = 1; + addr = buf_addr; + avail = buf_len; + total = avail; + num = 0; + k3_dma_config_write(chan, dir, &c->slave_config); + + if (period_len < modulo) + modulo = period_len; + + do { + len = min_t(size_t, avail, modulo); + + if (dir == DMA_MEM_TO_DEV) { + src = addr; + dst = c->dev_addr; + } else if (dir == DMA_DEV_TO_MEM) { + src = c->dev_addr; + dst = addr; + } + since += len; + if (since >= period_len) { + /* descriptor asks for TC2 interrupt on completion */ + en_tc2 = CX_CFG_NODEIRQ; + since -= period_len; + } else + en_tc2 = 0; + + k3_dma_fill_desc(ds, dst, src, len, num++, c->ccfg | en_tc2); + + addr += len; + avail -= len; + } while (avail); + + /* "Cyclic" == end of link points back to start of link */ + ds->desc_hw[num - 1].lli |= ds->desc_hw_lli; + + ds->size = total; + + return vchan_tx_prep(&c->vc, &ds->vd, flags); +} + +static int k3_dma_config(struct dma_chan *chan, + struct dma_slave_config *cfg) +{ + struct k3_dma_chan *c = to_k3_chan(chan); + + memcpy(&c->slave_config, cfg, sizeof(*cfg)); + + return 0; +} + +static int k3_dma_config_write(struct dma_chan *chan, + enum dma_transfer_direction dir, + struct dma_slave_config *cfg) +{ + struct k3_dma_chan *c = to_k3_chan(chan); + u32 maxburst = 0, val = 0; + enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED; + + if (dir == DMA_DEV_TO_MEM) { + c->ccfg = CX_CFG_DSTINCR; + c->dev_addr = cfg->src_addr; + maxburst = cfg->src_maxburst; + width = cfg->src_addr_width; + } else if (dir == DMA_MEM_TO_DEV) { + c->ccfg = CX_CFG_SRCINCR; + c->dev_addr = cfg->dst_addr; + maxburst = cfg->dst_maxburst; + width = cfg->dst_addr_width; + } + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + case DMA_SLAVE_BUSWIDTH_2_BYTES: + case DMA_SLAVE_BUSWIDTH_4_BYTES: + case DMA_SLAVE_BUSWIDTH_8_BYTES: + val = __ffs(width); + break; + default: + val = 3; + break; + } + c->ccfg |= (val << 12) | (val << 16); + + if ((maxburst == 0) || (maxburst > 16)) + val = 15; + else + val = maxburst - 1; + c->ccfg |= (val << 20) | (val << 24); + c->ccfg |= CX_CFG_MEM2PER | CX_CFG_EN; + + /* specific request line */ + c->ccfg |= c->vc.chan.chan_id << 4; + + return 0; +} + +static void k3_dma_free_desc(struct virt_dma_desc *vd) +{ + struct k3_dma_desc_sw *ds = + container_of(vd, struct k3_dma_desc_sw, vd); + struct k3_dma_dev *d = to_k3_dma(vd->tx.chan->device); + + dma_pool_free(d->pool, ds->desc_hw, ds->desc_hw_lli); + kfree(ds); +} + +static int k3_dma_terminate_all(struct dma_chan *chan) +{ + struct k3_dma_chan *c = to_k3_chan(chan); + struct k3_dma_dev *d = to_k3_dma(chan->device); + struct k3_dma_phy *p = c->phy; + unsigned long flags; + LIST_HEAD(head); + + dev_dbg(d->slave.dev, "vchan %p: terminate all\n", &c->vc); + + /* Prevent this channel being scheduled */ + spin_lock(&d->lock); + list_del_init(&c->node); + spin_unlock(&d->lock); + + /* Clear the tx descriptor lists */ + spin_lock_irqsave(&c->vc.lock, flags); + vchan_get_all_descriptors(&c->vc, &head); + if (p) { + /* vchan is assigned to a pchan - stop the channel */ + k3_dma_terminate_chan(p, d); + c->phy = NULL; + p->vchan = NULL; + if (p->ds_run) { + vchan_terminate_vdesc(&p->ds_run->vd); + p->ds_run = NULL; + } + p->ds_done = NULL; + } + spin_unlock_irqrestore(&c->vc.lock, flags); + vchan_dma_desc_free_list(&c->vc, &head); + + return 0; +} + +static void k3_dma_synchronize(struct dma_chan *chan) +{ + struct k3_dma_chan *c = to_k3_chan(chan); + + vchan_synchronize(&c->vc); +} + +static int k3_dma_transfer_pause(struct dma_chan *chan) +{ + struct k3_dma_chan *c = to_k3_chan(chan); + struct k3_dma_dev *d = to_k3_dma(chan->device); + struct k3_dma_phy *p = c->phy; + + dev_dbg(d->slave.dev, "vchan %p: pause\n", &c->vc); + if (c->status == DMA_IN_PROGRESS) { + c->status = DMA_PAUSED; + if (p) { + k3_dma_pause_dma(p, false); + } else { + spin_lock(&d->lock); + list_del_init(&c->node); + spin_unlock(&d->lock); + } + } + + return 0; +} + +static int k3_dma_transfer_resume(struct dma_chan *chan) +{ + struct k3_dma_chan *c = to_k3_chan(chan); + struct k3_dma_dev *d = to_k3_dma(chan->device); + struct k3_dma_phy *p = c->phy; + unsigned long flags; + + dev_dbg(d->slave.dev, "vchan %p: resume\n", &c->vc); + spin_lock_irqsave(&c->vc.lock, flags); + if (c->status == DMA_PAUSED) { + c->status = DMA_IN_PROGRESS; + if (p) { + k3_dma_pause_dma(p, true); + } else if (!list_empty(&c->vc.desc_issued)) { + spin_lock(&d->lock); + list_add_tail(&c->node, &d->chan_pending); + spin_unlock(&d->lock); + } + } + spin_unlock_irqrestore(&c->vc.lock, flags); + + return 0; +} + +static const struct k3dma_soc_data k3_v1_dma_data = { + .flags = 0, +}; + +static const struct k3dma_soc_data asp_v1_dma_data = { + .flags = K3_FLAG_NOCLK, +}; + +static const struct of_device_id k3_pdma_dt_ids[] = { + { .compatible = "hisilicon,k3-dma-1.0", + .data = &k3_v1_dma_data + }, + { .compatible = "hisilicon,hisi-pcm-asp-dma-1.0", + .data = &asp_v1_dma_data + }, + {} +}; +MODULE_DEVICE_TABLE(of, k3_pdma_dt_ids); + +static struct dma_chan *k3_of_dma_simple_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct k3_dma_dev *d = ofdma->of_dma_data; + unsigned int request = dma_spec->args[0]; + + if (request >= d->dma_requests) + return NULL; + + return dma_get_slave_channel(&(d->chans[request].vc.chan)); +} + +static int k3_dma_probe(struct platform_device *op) +{ + const struct k3dma_soc_data *soc_data; + struct k3_dma_dev *d; + const struct of_device_id *of_id; + int i, ret, irq = 0; + + d = devm_kzalloc(&op->dev, sizeof(*d), GFP_KERNEL); + if (!d) + return -ENOMEM; + + soc_data = device_get_match_data(&op->dev); + if (!soc_data) + return -EINVAL; + + d->base = devm_platform_ioremap_resource(op, 0); + if (IS_ERR(d->base)) + return PTR_ERR(d->base); + + of_id = of_match_device(k3_pdma_dt_ids, &op->dev); + if (of_id) { + of_property_read_u32((&op->dev)->of_node, + "dma-channels", &d->dma_channels); + of_property_read_u32((&op->dev)->of_node, + "dma-requests", &d->dma_requests); + ret = of_property_read_u32((&op->dev)->of_node, + "dma-channel-mask", &d->dma_channel_mask); + if (ret) { + dev_warn(&op->dev, + "dma-channel-mask doesn't exist, considering all as available.\n"); + d->dma_channel_mask = (u32)~0UL; + } + } + + if (!(soc_data->flags & K3_FLAG_NOCLK)) { + d->clk = devm_clk_get(&op->dev, NULL); + if (IS_ERR(d->clk)) { + dev_err(&op->dev, "no dma clk\n"); + return PTR_ERR(d->clk); + } + } + + irq = platform_get_irq(op, 0); + ret = devm_request_irq(&op->dev, irq, + k3_dma_int_handler, 0, DRIVER_NAME, d); + if (ret) + return ret; + + d->irq = irq; + + /* A DMA memory pool for LLIs, align on 32-byte boundary */ + d->pool = dmam_pool_create(DRIVER_NAME, &op->dev, + LLI_BLOCK_SIZE, 32, 0); + if (!d->pool) + return -ENOMEM; + + /* init phy channel */ + d->phy = devm_kcalloc(&op->dev, + d->dma_channels, sizeof(struct k3_dma_phy), GFP_KERNEL); + if (d->phy == NULL) + return -ENOMEM; + + for (i = 0; i < d->dma_channels; i++) { + struct k3_dma_phy *p; + + if (!(d->dma_channel_mask & BIT(i))) + continue; + + p = &d->phy[i]; + p->idx = i; + p->base = d->base + i * 0x40; + } + + INIT_LIST_HEAD(&d->slave.channels); + dma_cap_set(DMA_SLAVE, d->slave.cap_mask); + dma_cap_set(DMA_MEMCPY, d->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, d->slave.cap_mask); + d->slave.dev = &op->dev; + d->slave.device_free_chan_resources = k3_dma_free_chan_resources; + d->slave.device_tx_status = k3_dma_tx_status; + d->slave.device_prep_dma_memcpy = k3_dma_prep_memcpy; + d->slave.device_prep_slave_sg = k3_dma_prep_slave_sg; + d->slave.device_prep_dma_cyclic = k3_dma_prep_dma_cyclic; + d->slave.device_issue_pending = k3_dma_issue_pending; + d->slave.device_config = k3_dma_config; + d->slave.device_pause = k3_dma_transfer_pause; + d->slave.device_resume = k3_dma_transfer_resume; + d->slave.device_terminate_all = k3_dma_terminate_all; + d->slave.device_synchronize = k3_dma_synchronize; + d->slave.copy_align = DMAENGINE_ALIGN_8_BYTES; + + /* init virtual channel */ + d->chans = devm_kcalloc(&op->dev, + d->dma_requests, sizeof(struct k3_dma_chan), GFP_KERNEL); + if (d->chans == NULL) + return -ENOMEM; + + for (i = 0; i < d->dma_requests; i++) { + struct k3_dma_chan *c = &d->chans[i]; + + c->status = DMA_IN_PROGRESS; + INIT_LIST_HEAD(&c->node); + c->vc.desc_free = k3_dma_free_desc; + vchan_init(&c->vc, &d->slave); + } + + /* Enable clock before accessing registers */ + ret = clk_prepare_enable(d->clk); + if (ret < 0) { + dev_err(&op->dev, "clk_prepare_enable failed: %d\n", ret); + return ret; + } + + k3_dma_enable_dma(d, true); + + ret = dma_async_device_register(&d->slave); + if (ret) + goto dma_async_register_fail; + + ret = of_dma_controller_register((&op->dev)->of_node, + k3_of_dma_simple_xlate, d); + if (ret) + goto of_dma_register_fail; + + spin_lock_init(&d->lock); + INIT_LIST_HEAD(&d->chan_pending); + tasklet_setup(&d->task, k3_dma_tasklet); + platform_set_drvdata(op, d); + dev_info(&op->dev, "initialized\n"); + + return 0; + +of_dma_register_fail: + dma_async_device_unregister(&d->slave); +dma_async_register_fail: + clk_disable_unprepare(d->clk); + return ret; +} + +static int k3_dma_remove(struct platform_device *op) +{ + struct k3_dma_chan *c, *cn; + struct k3_dma_dev *d = platform_get_drvdata(op); + + dma_async_device_unregister(&d->slave); + of_dma_controller_free((&op->dev)->of_node); + + devm_free_irq(&op->dev, d->irq, d); + + list_for_each_entry_safe(c, cn, &d->slave.channels, vc.chan.device_node) { + list_del(&c->vc.chan.device_node); + tasklet_kill(&c->vc.task); + } + tasklet_kill(&d->task); + clk_disable_unprepare(d->clk); + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int k3_dma_suspend_dev(struct device *dev) +{ + struct k3_dma_dev *d = dev_get_drvdata(dev); + u32 stat = 0; + + stat = k3_dma_get_chan_stat(d); + if (stat) { + dev_warn(d->slave.dev, + "chan %d is running fail to suspend\n", stat); + return -1; + } + k3_dma_enable_dma(d, false); + clk_disable_unprepare(d->clk); + return 0; +} + +static int k3_dma_resume_dev(struct device *dev) +{ + struct k3_dma_dev *d = dev_get_drvdata(dev); + int ret = 0; + + ret = clk_prepare_enable(d->clk); + if (ret < 0) { + dev_err(d->slave.dev, "clk_prepare_enable failed: %d\n", ret); + return ret; + } + k3_dma_enable_dma(d, true); + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(k3_dma_pmops, k3_dma_suspend_dev, k3_dma_resume_dev); + +static struct platform_driver k3_pdma_driver = { + .driver = { + .name = DRIVER_NAME, + .pm = &k3_dma_pmops, + .of_match_table = k3_pdma_dt_ids, + }, + .probe = k3_dma_probe, + .remove = k3_dma_remove, +}; + +module_platform_driver(k3_pdma_driver); + +MODULE_DESCRIPTION("HiSilicon k3 DMA Driver"); +MODULE_ALIAS("platform:k3dma"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/lgm/Kconfig b/drivers/dma/lgm/Kconfig new file mode 100644 index 0000000000..9194330ed0 --- /dev/null +++ b/drivers/dma/lgm/Kconfig @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +config INTEL_LDMA + bool "Lightning Mountain centralized DMA controllers" + depends on X86 || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for Intel Lightning Mountain SOC DMA controllers. + These controllers provide DMA capabilities for a variety of on-chip + devices such as HSNAND and GSWIP (Gigabit Switch IP). diff --git a/drivers/dma/lgm/Makefile b/drivers/dma/lgm/Makefile new file mode 100644 index 0000000000..f318a8eff4 --- /dev/null +++ b/drivers/dma/lgm/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_INTEL_LDMA) += lgm-dma.o diff --git a/drivers/dma/lgm/lgm-dma.c b/drivers/dma/lgm/lgm-dma.c new file mode 100644 index 0000000000..4117c7b67e --- /dev/null +++ b/drivers/dma/lgm/lgm-dma.c @@ -0,0 +1,1735 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Lightning Mountain centralized DMA controller driver + * + * Copyright (c) 2016 - 2020 Intel Corporation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../dmaengine.h" +#include "../virt-dma.h" + +#define DRIVER_NAME "lgm-dma" + +#define DMA_ID 0x0008 +#define DMA_ID_REV GENMASK(7, 0) +#define DMA_ID_PNR GENMASK(19, 16) +#define DMA_ID_CHNR GENMASK(26, 20) +#define DMA_ID_DW_128B BIT(27) +#define DMA_ID_AW_36B BIT(28) +#define DMA_VER32 0x32 +#define DMA_VER31 0x31 +#define DMA_VER22 0x0A + +#define DMA_CTRL 0x0010 +#define DMA_CTRL_RST BIT(0) +#define DMA_CTRL_DSRAM_PATH BIT(1) +#define DMA_CTRL_DBURST_WR BIT(3) +#define DMA_CTRL_VLD_DF_ACK BIT(4) +#define DMA_CTRL_CH_FL BIT(6) +#define DMA_CTRL_DS_FOD BIT(7) +#define DMA_CTRL_DRB BIT(8) +#define DMA_CTRL_ENBE BIT(9) +#define DMA_CTRL_DESC_TMOUT_CNT_V31 GENMASK(27, 16) +#define DMA_CTRL_DESC_TMOUT_EN_V31 BIT(30) +#define DMA_CTRL_PKTARB BIT(31) + +#define DMA_CPOLL 0x0014 +#define DMA_CPOLL_CNT GENMASK(15, 4) +#define DMA_CPOLL_EN BIT(31) + +#define DMA_CS 0x0018 +#define DMA_CS_MASK GENMASK(5, 0) + +#define DMA_CCTRL 0x001C +#define DMA_CCTRL_ON BIT(0) +#define DMA_CCTRL_RST BIT(1) +#define DMA_CCTRL_CH_POLL_EN BIT(2) +#define DMA_CCTRL_CH_ABC BIT(3) /* Adaptive Burst Chop */ +#define DMA_CDBA_MSB GENMASK(7, 4) +#define DMA_CCTRL_DIR_TX BIT(8) +#define DMA_CCTRL_CLASS GENMASK(11, 9) +#define DMA_CCTRL_CLASSH GENMASK(19, 18) +#define DMA_CCTRL_WR_NP_EN BIT(21) +#define DMA_CCTRL_PDEN BIT(23) +#define DMA_MAX_CLASS (SZ_32 - 1) + +#define DMA_CDBA 0x0020 +#define DMA_CDLEN 0x0024 +#define DMA_CIS 0x0028 +#define DMA_CIE 0x002C +#define DMA_CI_EOP BIT(1) +#define DMA_CI_DUR BIT(2) +#define DMA_CI_DESCPT BIT(3) +#define DMA_CI_CHOFF BIT(4) +#define DMA_CI_RDERR BIT(5) +#define DMA_CI_ALL \ + (DMA_CI_EOP | DMA_CI_DUR | DMA_CI_DESCPT | DMA_CI_CHOFF | DMA_CI_RDERR) + +#define DMA_PS 0x0040 +#define DMA_PCTRL 0x0044 +#define DMA_PCTRL_RXBL16 BIT(0) +#define DMA_PCTRL_TXBL16 BIT(1) +#define DMA_PCTRL_RXBL GENMASK(3, 2) +#define DMA_PCTRL_RXBL_8 3 +#define DMA_PCTRL_TXBL GENMASK(5, 4) +#define DMA_PCTRL_TXBL_8 3 +#define DMA_PCTRL_PDEN BIT(6) +#define DMA_PCTRL_RXBL32 BIT(7) +#define DMA_PCTRL_RXENDI GENMASK(9, 8) +#define DMA_PCTRL_TXENDI GENMASK(11, 10) +#define DMA_PCTRL_TXBL32 BIT(15) +#define DMA_PCTRL_MEM_FLUSH BIT(16) + +#define DMA_IRNEN1 0x00E8 +#define DMA_IRNCR1 0x00EC +#define DMA_IRNEN 0x00F4 +#define DMA_IRNCR 0x00F8 +#define DMA_C_DP_TICK 0x100 +#define DMA_C_DP_TICK_TIKNARB GENMASK(15, 0) +#define DMA_C_DP_TICK_TIKARB GENMASK(31, 16) + +#define DMA_C_HDRM 0x110 +/* + * If header mode is set in DMA descriptor, + * If bit 30 is disabled, HDR_LEN must be configured according to channel + * requirement. + * If bit 30 is enabled(checksum with heade mode), HDR_LEN has no need to + * be configured. It will enable check sum for switch + * If header mode is not set in DMA descriptor, + * This register setting doesn't matter + */ +#define DMA_C_HDRM_HDR_SUM BIT(30) + +#define DMA_C_BOFF 0x120 +#define DMA_C_BOFF_BOF_LEN GENMASK(7, 0) +#define DMA_C_BOFF_EN BIT(31) + +#define DMA_ORRC 0x190 +#define DMA_ORRC_ORRCNT GENMASK(8, 4) +#define DMA_ORRC_EN BIT(31) + +#define DMA_C_ENDIAN 0x200 +#define DMA_C_END_DATAENDI GENMASK(1, 0) +#define DMA_C_END_DE_EN BIT(7) +#define DMA_C_END_DESENDI GENMASK(9, 8) +#define DMA_C_END_DES_EN BIT(16) + +/* DMA controller capability */ +#define DMA_ADDR_36BIT BIT(0) +#define DMA_DATA_128BIT BIT(1) +#define DMA_CHAN_FLOW_CTL BIT(2) +#define DMA_DESC_FOD BIT(3) +#define DMA_DESC_IN_SRAM BIT(4) +#define DMA_EN_BYTE_EN BIT(5) +#define DMA_DBURST_WR BIT(6) +#define DMA_VALID_DESC_FETCH_ACK BIT(7) +#define DMA_DFT_DRB BIT(8) + +#define DMA_ORRC_MAX_CNT (SZ_32 - 1) +#define DMA_DFT_POLL_CNT SZ_4 +#define DMA_DFT_BURST_V22 SZ_2 +#define DMA_BURSTL_8DW SZ_8 +#define DMA_BURSTL_16DW SZ_16 +#define DMA_BURSTL_32DW SZ_32 +#define DMA_DFT_BURST DMA_BURSTL_16DW +#define DMA_MAX_DESC_NUM (SZ_8K - 1) +#define DMA_CHAN_BOFF_MAX (SZ_256 - 1) +#define DMA_DFT_ENDIAN 0 + +#define DMA_DFT_DESC_TCNT 50 +#define DMA_HDR_LEN_MAX (SZ_16K - 1) + +/* DMA flags */ +#define DMA_TX_CH BIT(0) +#define DMA_RX_CH BIT(1) +#define DEVICE_ALLOC_DESC BIT(2) +#define CHAN_IN_USE BIT(3) +#define DMA_HW_DESC BIT(4) + +/* Descriptor fields */ +#define DESC_DATA_LEN GENMASK(15, 0) +#define DESC_BYTE_OFF GENMASK(25, 23) +#define DESC_EOP BIT(28) +#define DESC_SOP BIT(29) +#define DESC_C BIT(30) +#define DESC_OWN BIT(31) + +#define DMA_CHAN_RST 1 +#define DMA_MAX_SIZE (BIT(16) - 1) +#define MAX_LOWER_CHANS 32 +#define MASK_LOWER_CHANS GENMASK(4, 0) +#define DMA_OWN 1 +#define HIGH_4_BITS GENMASK(3, 0) +#define DMA_DFT_DESC_NUM 1 +#define DMA_PKT_DROP_DIS 0 + +enum ldma_chan_on_off { + DMA_CH_OFF = 0, + DMA_CH_ON = 1, +}; + +enum { + DMA_TYPE_TX = 0, + DMA_TYPE_RX, + DMA_TYPE_MCPY, +}; + +struct ldma_dev; +struct ldma_port; + +struct ldma_chan { + struct virt_dma_chan vchan; + struct ldma_port *port; /* back pointer */ + char name[8]; /* Channel name */ + int nr; /* Channel id in hardware */ + u32 flags; /* central way or channel based way */ + enum ldma_chan_on_off onoff; + dma_addr_t desc_phys; + void *desc_base; /* Virtual address */ + u32 desc_cnt; /* Number of descriptors */ + int rst; + u32 hdrm_len; + bool hdrm_csum; + u32 boff_len; + u32 data_endian; + u32 desc_endian; + bool pden; + bool desc_rx_np; + bool data_endian_en; + bool desc_endian_en; + bool abc_en; + bool desc_init; + struct dma_pool *desc_pool; /* Descriptors pool */ + u32 desc_num; + struct dw2_desc_sw *ds; + struct work_struct work; + struct dma_slave_config config; +}; + +struct ldma_port { + struct ldma_dev *ldev; /* back pointer */ + u32 portid; + u32 rxbl; + u32 txbl; + u32 rxendi; + u32 txendi; + u32 pkt_drop; +}; + +/* Instance specific data */ +struct ldma_inst_data { + bool desc_in_sram; + bool chan_fc; + bool desc_fod; /* Fetch On Demand */ + bool valid_desc_fetch_ack; + u32 orrc; /* Outstanding read count */ + const char *name; + u32 type; +}; + +struct ldma_dev { + struct device *dev; + void __iomem *base; + struct reset_control *rst; + struct clk *core_clk; + struct dma_device dma_dev; + u32 ver; + int irq; + struct ldma_port *ports; + struct ldma_chan *chans; /* channel list on this DMA or port */ + spinlock_t dev_lock; /* Controller register exclusive */ + u32 chan_nrs; + u32 port_nrs; + u32 channels_mask; + u32 flags; + u32 pollcnt; + const struct ldma_inst_data *inst; + struct workqueue_struct *wq; +}; + +struct dw2_desc { + u32 field; + u32 addr; +} __packed __aligned(8); + +struct dw2_desc_sw { + struct virt_dma_desc vdesc; + struct ldma_chan *chan; + dma_addr_t desc_phys; + size_t desc_cnt; + size_t size; + struct dw2_desc *desc_hw; +}; + +static inline void +ldma_update_bits(struct ldma_dev *d, u32 mask, u32 val, u32 ofs) +{ + u32 old_val, new_val; + + old_val = readl(d->base + ofs); + new_val = (old_val & ~mask) | (val & mask); + + if (new_val != old_val) + writel(new_val, d->base + ofs); +} + +static inline struct ldma_chan *to_ldma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct ldma_chan, vchan.chan); +} + +static inline struct ldma_dev *to_ldma_dev(struct dma_device *dma_dev) +{ + return container_of(dma_dev, struct ldma_dev, dma_dev); +} + +static inline struct dw2_desc_sw *to_lgm_dma_desc(struct virt_dma_desc *vdesc) +{ + return container_of(vdesc, struct dw2_desc_sw, vdesc); +} + +static inline bool ldma_chan_tx(struct ldma_chan *c) +{ + return !!(c->flags & DMA_TX_CH); +} + +static inline bool ldma_chan_is_hw_desc(struct ldma_chan *c) +{ + return !!(c->flags & DMA_HW_DESC); +} + +static void ldma_dev_reset(struct ldma_dev *d) + +{ + unsigned long flags; + + spin_lock_irqsave(&d->dev_lock, flags); + ldma_update_bits(d, DMA_CTRL_RST, DMA_CTRL_RST, DMA_CTRL); + spin_unlock_irqrestore(&d->dev_lock, flags); +} + +static void ldma_dev_pkt_arb_cfg(struct ldma_dev *d, bool enable) +{ + unsigned long flags; + u32 mask = DMA_CTRL_PKTARB; + u32 val = enable ? DMA_CTRL_PKTARB : 0; + + spin_lock_irqsave(&d->dev_lock, flags); + ldma_update_bits(d, mask, val, DMA_CTRL); + spin_unlock_irqrestore(&d->dev_lock, flags); +} + +static void ldma_dev_sram_desc_cfg(struct ldma_dev *d, bool enable) +{ + unsigned long flags; + u32 mask = DMA_CTRL_DSRAM_PATH; + u32 val = enable ? DMA_CTRL_DSRAM_PATH : 0; + + spin_lock_irqsave(&d->dev_lock, flags); + ldma_update_bits(d, mask, val, DMA_CTRL); + spin_unlock_irqrestore(&d->dev_lock, flags); +} + +static void ldma_dev_chan_flow_ctl_cfg(struct ldma_dev *d, bool enable) +{ + unsigned long flags; + u32 mask, val; + + if (d->inst->type != DMA_TYPE_TX) + return; + + mask = DMA_CTRL_CH_FL; + val = enable ? DMA_CTRL_CH_FL : 0; + + spin_lock_irqsave(&d->dev_lock, flags); + ldma_update_bits(d, mask, val, DMA_CTRL); + spin_unlock_irqrestore(&d->dev_lock, flags); +} + +static void ldma_dev_global_polling_enable(struct ldma_dev *d) +{ + unsigned long flags; + u32 mask = DMA_CPOLL_EN | DMA_CPOLL_CNT; + u32 val = DMA_CPOLL_EN; + + val |= FIELD_PREP(DMA_CPOLL_CNT, d->pollcnt); + + spin_lock_irqsave(&d->dev_lock, flags); + ldma_update_bits(d, mask, val, DMA_CPOLL); + spin_unlock_irqrestore(&d->dev_lock, flags); +} + +static void ldma_dev_desc_fetch_on_demand_cfg(struct ldma_dev *d, bool enable) +{ + unsigned long flags; + u32 mask, val; + + if (d->inst->type == DMA_TYPE_MCPY) + return; + + mask = DMA_CTRL_DS_FOD; + val = enable ? DMA_CTRL_DS_FOD : 0; + + spin_lock_irqsave(&d->dev_lock, flags); + ldma_update_bits(d, mask, val, DMA_CTRL); + spin_unlock_irqrestore(&d->dev_lock, flags); +} + +static void ldma_dev_byte_enable_cfg(struct ldma_dev *d, bool enable) +{ + unsigned long flags; + u32 mask = DMA_CTRL_ENBE; + u32 val = enable ? DMA_CTRL_ENBE : 0; + + spin_lock_irqsave(&d->dev_lock, flags); + ldma_update_bits(d, mask, val, DMA_CTRL); + spin_unlock_irqrestore(&d->dev_lock, flags); +} + +static void ldma_dev_orrc_cfg(struct ldma_dev *d) +{ + unsigned long flags; + u32 val = 0; + u32 mask; + + if (d->inst->type == DMA_TYPE_RX) + return; + + mask = DMA_ORRC_EN | DMA_ORRC_ORRCNT; + if (d->inst->orrc > 0 && d->inst->orrc <= DMA_ORRC_MAX_CNT) + val = DMA_ORRC_EN | FIELD_PREP(DMA_ORRC_ORRCNT, d->inst->orrc); + + spin_lock_irqsave(&d->dev_lock, flags); + ldma_update_bits(d, mask, val, DMA_ORRC); + spin_unlock_irqrestore(&d->dev_lock, flags); +} + +static void ldma_dev_df_tout_cfg(struct ldma_dev *d, bool enable, int tcnt) +{ + u32 mask = DMA_CTRL_DESC_TMOUT_CNT_V31; + unsigned long flags; + u32 val; + + if (enable) + val = DMA_CTRL_DESC_TMOUT_EN_V31 | FIELD_PREP(DMA_CTRL_DESC_TMOUT_CNT_V31, tcnt); + else + val = 0; + + spin_lock_irqsave(&d->dev_lock, flags); + ldma_update_bits(d, mask, val, DMA_CTRL); + spin_unlock_irqrestore(&d->dev_lock, flags); +} + +static void ldma_dev_dburst_wr_cfg(struct ldma_dev *d, bool enable) +{ + unsigned long flags; + u32 mask, val; + + if (d->inst->type != DMA_TYPE_RX && d->inst->type != DMA_TYPE_MCPY) + return; + + mask = DMA_CTRL_DBURST_WR; + val = enable ? DMA_CTRL_DBURST_WR : 0; + + spin_lock_irqsave(&d->dev_lock, flags); + ldma_update_bits(d, mask, val, DMA_CTRL); + spin_unlock_irqrestore(&d->dev_lock, flags); +} + +static void ldma_dev_vld_fetch_ack_cfg(struct ldma_dev *d, bool enable) +{ + unsigned long flags; + u32 mask, val; + + if (d->inst->type != DMA_TYPE_TX) + return; + + mask = DMA_CTRL_VLD_DF_ACK; + val = enable ? DMA_CTRL_VLD_DF_ACK : 0; + + spin_lock_irqsave(&d->dev_lock, flags); + ldma_update_bits(d, mask, val, DMA_CTRL); + spin_unlock_irqrestore(&d->dev_lock, flags); +} + +static void ldma_dev_drb_cfg(struct ldma_dev *d, int enable) +{ + unsigned long flags; + u32 mask = DMA_CTRL_DRB; + u32 val = enable ? DMA_CTRL_DRB : 0; + + spin_lock_irqsave(&d->dev_lock, flags); + ldma_update_bits(d, mask, val, DMA_CTRL); + spin_unlock_irqrestore(&d->dev_lock, flags); +} + +static int ldma_dev_cfg(struct ldma_dev *d) +{ + bool enable; + + ldma_dev_pkt_arb_cfg(d, true); + ldma_dev_global_polling_enable(d); + + enable = !!(d->flags & DMA_DFT_DRB); + ldma_dev_drb_cfg(d, enable); + + enable = !!(d->flags & DMA_EN_BYTE_EN); + ldma_dev_byte_enable_cfg(d, enable); + + enable = !!(d->flags & DMA_CHAN_FLOW_CTL); + ldma_dev_chan_flow_ctl_cfg(d, enable); + + enable = !!(d->flags & DMA_DESC_FOD); + ldma_dev_desc_fetch_on_demand_cfg(d, enable); + + enable = !!(d->flags & DMA_DESC_IN_SRAM); + ldma_dev_sram_desc_cfg(d, enable); + + enable = !!(d->flags & DMA_DBURST_WR); + ldma_dev_dburst_wr_cfg(d, enable); + + enable = !!(d->flags & DMA_VALID_DESC_FETCH_ACK); + ldma_dev_vld_fetch_ack_cfg(d, enable); + + if (d->ver > DMA_VER22) { + ldma_dev_orrc_cfg(d); + ldma_dev_df_tout_cfg(d, true, DMA_DFT_DESC_TCNT); + } + + dev_dbg(d->dev, "%s Controller 0x%08x configuration done\n", + d->inst->name, readl(d->base + DMA_CTRL)); + + return 0; +} + +static int ldma_chan_cctrl_cfg(struct ldma_chan *c, u32 val) +{ + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + u32 class_low, class_high; + unsigned long flags; + u32 reg; + + spin_lock_irqsave(&d->dev_lock, flags); + ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS); + reg = readl(d->base + DMA_CCTRL); + /* Read from hardware */ + if (reg & DMA_CCTRL_DIR_TX) + c->flags |= DMA_TX_CH; + else + c->flags |= DMA_RX_CH; + + /* Keep the class value unchanged */ + class_low = FIELD_GET(DMA_CCTRL_CLASS, reg); + class_high = FIELD_GET(DMA_CCTRL_CLASSH, reg); + val &= ~DMA_CCTRL_CLASS; + val |= FIELD_PREP(DMA_CCTRL_CLASS, class_low); + val &= ~DMA_CCTRL_CLASSH; + val |= FIELD_PREP(DMA_CCTRL_CLASSH, class_high); + writel(val, d->base + DMA_CCTRL); + spin_unlock_irqrestore(&d->dev_lock, flags); + + return 0; +} + +static void ldma_chan_irq_init(struct ldma_chan *c) +{ + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + unsigned long flags; + u32 enofs, crofs; + u32 cn_bit; + + if (c->nr < MAX_LOWER_CHANS) { + enofs = DMA_IRNEN; + crofs = DMA_IRNCR; + } else { + enofs = DMA_IRNEN1; + crofs = DMA_IRNCR1; + } + + cn_bit = BIT(c->nr & MASK_LOWER_CHANS); + spin_lock_irqsave(&d->dev_lock, flags); + ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS); + + /* Clear all interrupts and disabled it */ + writel(0, d->base + DMA_CIE); + writel(DMA_CI_ALL, d->base + DMA_CIS); + + ldma_update_bits(d, cn_bit, 0, enofs); + writel(cn_bit, d->base + crofs); + spin_unlock_irqrestore(&d->dev_lock, flags); +} + +static void ldma_chan_set_class(struct ldma_chan *c, u32 val) +{ + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + u32 class_val; + + if (d->inst->type == DMA_TYPE_MCPY || val > DMA_MAX_CLASS) + return; + + /* 3 bits low */ + class_val = FIELD_PREP(DMA_CCTRL_CLASS, val & 0x7); + /* 2 bits high */ + class_val |= FIELD_PREP(DMA_CCTRL_CLASSH, (val >> 3) & 0x3); + + ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS); + ldma_update_bits(d, DMA_CCTRL_CLASS | DMA_CCTRL_CLASSH, class_val, + DMA_CCTRL); +} + +static int ldma_chan_on(struct ldma_chan *c) +{ + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + unsigned long flags; + + /* If descriptors not configured, not allow to turn on channel */ + if (WARN_ON(!c->desc_init)) + return -EINVAL; + + spin_lock_irqsave(&d->dev_lock, flags); + ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS); + ldma_update_bits(d, DMA_CCTRL_ON, DMA_CCTRL_ON, DMA_CCTRL); + spin_unlock_irqrestore(&d->dev_lock, flags); + + c->onoff = DMA_CH_ON; + + return 0; +} + +static int ldma_chan_off(struct ldma_chan *c) +{ + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + unsigned long flags; + u32 val; + int ret; + + spin_lock_irqsave(&d->dev_lock, flags); + ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS); + ldma_update_bits(d, DMA_CCTRL_ON, 0, DMA_CCTRL); + spin_unlock_irqrestore(&d->dev_lock, flags); + + ret = readl_poll_timeout_atomic(d->base + DMA_CCTRL, val, + !(val & DMA_CCTRL_ON), 0, 10000); + if (ret) + return ret; + + c->onoff = DMA_CH_OFF; + + return 0; +} + +static void ldma_chan_desc_hw_cfg(struct ldma_chan *c, dma_addr_t desc_base, + int desc_num) +{ + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + unsigned long flags; + + spin_lock_irqsave(&d->dev_lock, flags); + ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS); + writel(lower_32_bits(desc_base), d->base + DMA_CDBA); + + /* Higher 4 bits of 36 bit addressing */ + if (IS_ENABLED(CONFIG_64BIT)) { + u32 hi = upper_32_bits(desc_base) & HIGH_4_BITS; + + ldma_update_bits(d, DMA_CDBA_MSB, + FIELD_PREP(DMA_CDBA_MSB, hi), DMA_CCTRL); + } + writel(desc_num, d->base + DMA_CDLEN); + spin_unlock_irqrestore(&d->dev_lock, flags); + + c->desc_init = true; +} + +static struct dma_async_tx_descriptor * +ldma_chan_desc_cfg(struct dma_chan *chan, dma_addr_t desc_base, int desc_num) +{ + struct ldma_chan *c = to_ldma_chan(chan); + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + struct dma_async_tx_descriptor *tx; + struct dw2_desc_sw *ds; + + if (!desc_num) { + dev_err(d->dev, "Channel %d must allocate descriptor first\n", + c->nr); + return NULL; + } + + if (desc_num > DMA_MAX_DESC_NUM) { + dev_err(d->dev, "Channel %d descriptor number out of range %d\n", + c->nr, desc_num); + return NULL; + } + + ldma_chan_desc_hw_cfg(c, desc_base, desc_num); + + c->flags |= DMA_HW_DESC; + c->desc_cnt = desc_num; + c->desc_phys = desc_base; + + ds = kzalloc(sizeof(*ds), GFP_NOWAIT); + if (!ds) + return NULL; + + tx = &ds->vdesc.tx; + dma_async_tx_descriptor_init(tx, chan); + + return tx; +} + +static int ldma_chan_reset(struct ldma_chan *c) +{ + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + unsigned long flags; + u32 val; + int ret; + + ret = ldma_chan_off(c); + if (ret) + return ret; + + spin_lock_irqsave(&d->dev_lock, flags); + ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS); + ldma_update_bits(d, DMA_CCTRL_RST, DMA_CCTRL_RST, DMA_CCTRL); + spin_unlock_irqrestore(&d->dev_lock, flags); + + ret = readl_poll_timeout_atomic(d->base + DMA_CCTRL, val, + !(val & DMA_CCTRL_RST), 0, 10000); + if (ret) + return ret; + + c->rst = 1; + c->desc_init = false; + + return 0; +} + +static void ldma_chan_byte_offset_cfg(struct ldma_chan *c, u32 boff_len) +{ + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + u32 mask = DMA_C_BOFF_EN | DMA_C_BOFF_BOF_LEN; + u32 val; + + if (boff_len > 0 && boff_len <= DMA_CHAN_BOFF_MAX) + val = FIELD_PREP(DMA_C_BOFF_BOF_LEN, boff_len) | DMA_C_BOFF_EN; + else + val = 0; + + ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS); + ldma_update_bits(d, mask, val, DMA_C_BOFF); +} + +static void ldma_chan_data_endian_cfg(struct ldma_chan *c, bool enable, + u32 endian_type) +{ + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + u32 mask = DMA_C_END_DE_EN | DMA_C_END_DATAENDI; + u32 val; + + if (enable) + val = DMA_C_END_DE_EN | FIELD_PREP(DMA_C_END_DATAENDI, endian_type); + else + val = 0; + + ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS); + ldma_update_bits(d, mask, val, DMA_C_ENDIAN); +} + +static void ldma_chan_desc_endian_cfg(struct ldma_chan *c, bool enable, + u32 endian_type) +{ + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + u32 mask = DMA_C_END_DES_EN | DMA_C_END_DESENDI; + u32 val; + + if (enable) + val = DMA_C_END_DES_EN | FIELD_PREP(DMA_C_END_DESENDI, endian_type); + else + val = 0; + + ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS); + ldma_update_bits(d, mask, val, DMA_C_ENDIAN); +} + +static void ldma_chan_hdr_mode_cfg(struct ldma_chan *c, u32 hdr_len, bool csum) +{ + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + u32 mask, val; + + /* NB, csum disabled, hdr length must be provided */ + if (!csum && (!hdr_len || hdr_len > DMA_HDR_LEN_MAX)) + return; + + mask = DMA_C_HDRM_HDR_SUM; + val = DMA_C_HDRM_HDR_SUM; + + if (!csum && hdr_len) + val = hdr_len; + + ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS); + ldma_update_bits(d, mask, val, DMA_C_HDRM); +} + +static void ldma_chan_rxwr_np_cfg(struct ldma_chan *c, bool enable) +{ + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + u32 mask, val; + + /* Only valid for RX channel */ + if (ldma_chan_tx(c)) + return; + + mask = DMA_CCTRL_WR_NP_EN; + val = enable ? DMA_CCTRL_WR_NP_EN : 0; + + ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS); + ldma_update_bits(d, mask, val, DMA_CCTRL); +} + +static void ldma_chan_abc_cfg(struct ldma_chan *c, bool enable) +{ + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + u32 mask, val; + + if (d->ver < DMA_VER32 || ldma_chan_tx(c)) + return; + + mask = DMA_CCTRL_CH_ABC; + val = enable ? DMA_CCTRL_CH_ABC : 0; + + ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS); + ldma_update_bits(d, mask, val, DMA_CCTRL); +} + +static int ldma_port_cfg(struct ldma_port *p) +{ + unsigned long flags; + struct ldma_dev *d; + u32 reg; + + d = p->ldev; + reg = FIELD_PREP(DMA_PCTRL_TXENDI, p->txendi); + reg |= FIELD_PREP(DMA_PCTRL_RXENDI, p->rxendi); + + if (d->ver == DMA_VER22) { + reg |= FIELD_PREP(DMA_PCTRL_TXBL, p->txbl); + reg |= FIELD_PREP(DMA_PCTRL_RXBL, p->rxbl); + } else { + reg |= FIELD_PREP(DMA_PCTRL_PDEN, p->pkt_drop); + + if (p->txbl == DMA_BURSTL_32DW) + reg |= DMA_PCTRL_TXBL32; + else if (p->txbl == DMA_BURSTL_16DW) + reg |= DMA_PCTRL_TXBL16; + else + reg |= FIELD_PREP(DMA_PCTRL_TXBL, DMA_PCTRL_TXBL_8); + + if (p->rxbl == DMA_BURSTL_32DW) + reg |= DMA_PCTRL_RXBL32; + else if (p->rxbl == DMA_BURSTL_16DW) + reg |= DMA_PCTRL_RXBL16; + else + reg |= FIELD_PREP(DMA_PCTRL_RXBL, DMA_PCTRL_RXBL_8); + } + + spin_lock_irqsave(&d->dev_lock, flags); + writel(p->portid, d->base + DMA_PS); + writel(reg, d->base + DMA_PCTRL); + spin_unlock_irqrestore(&d->dev_lock, flags); + + reg = readl(d->base + DMA_PCTRL); /* read back */ + dev_dbg(d->dev, "Port Control 0x%08x configuration done\n", reg); + + return 0; +} + +static int ldma_chan_cfg(struct ldma_chan *c) +{ + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + unsigned long flags; + u32 reg; + + reg = c->pden ? DMA_CCTRL_PDEN : 0; + reg |= c->onoff ? DMA_CCTRL_ON : 0; + reg |= c->rst ? DMA_CCTRL_RST : 0; + + ldma_chan_cctrl_cfg(c, reg); + ldma_chan_irq_init(c); + + if (d->ver <= DMA_VER22) + return 0; + + spin_lock_irqsave(&d->dev_lock, flags); + ldma_chan_set_class(c, c->nr); + ldma_chan_byte_offset_cfg(c, c->boff_len); + ldma_chan_data_endian_cfg(c, c->data_endian_en, c->data_endian); + ldma_chan_desc_endian_cfg(c, c->desc_endian_en, c->desc_endian); + ldma_chan_hdr_mode_cfg(c, c->hdrm_len, c->hdrm_csum); + ldma_chan_rxwr_np_cfg(c, c->desc_rx_np); + ldma_chan_abc_cfg(c, c->abc_en); + spin_unlock_irqrestore(&d->dev_lock, flags); + + if (ldma_chan_is_hw_desc(c)) + ldma_chan_desc_hw_cfg(c, c->desc_phys, c->desc_cnt); + + return 0; +} + +static void ldma_dev_init(struct ldma_dev *d) +{ + unsigned long ch_mask = (unsigned long)d->channels_mask; + struct ldma_port *p; + struct ldma_chan *c; + int i; + u32 j; + + spin_lock_init(&d->dev_lock); + ldma_dev_reset(d); + ldma_dev_cfg(d); + + /* DMA port initialization */ + for (i = 0; i < d->port_nrs; i++) { + p = &d->ports[i]; + ldma_port_cfg(p); + } + + /* DMA channel initialization */ + for_each_set_bit(j, &ch_mask, d->chan_nrs) { + c = &d->chans[j]; + ldma_chan_cfg(c); + } +} + +static int ldma_parse_dt(struct ldma_dev *d) +{ + struct fwnode_handle *fwnode = dev_fwnode(d->dev); + struct ldma_port *p; + int i; + + if (fwnode_property_read_bool(fwnode, "intel,dma-byte-en")) + d->flags |= DMA_EN_BYTE_EN; + + if (fwnode_property_read_bool(fwnode, "intel,dma-dburst-wr")) + d->flags |= DMA_DBURST_WR; + + if (fwnode_property_read_bool(fwnode, "intel,dma-drb")) + d->flags |= DMA_DFT_DRB; + + if (fwnode_property_read_u32(fwnode, "intel,dma-poll-cnt", + &d->pollcnt)) + d->pollcnt = DMA_DFT_POLL_CNT; + + if (d->inst->chan_fc) + d->flags |= DMA_CHAN_FLOW_CTL; + + if (d->inst->desc_fod) + d->flags |= DMA_DESC_FOD; + + if (d->inst->desc_in_sram) + d->flags |= DMA_DESC_IN_SRAM; + + if (d->inst->valid_desc_fetch_ack) + d->flags |= DMA_VALID_DESC_FETCH_ACK; + + if (d->ver > DMA_VER22) { + if (!d->port_nrs) + return -EINVAL; + + for (i = 0; i < d->port_nrs; i++) { + p = &d->ports[i]; + p->rxendi = DMA_DFT_ENDIAN; + p->txendi = DMA_DFT_ENDIAN; + p->rxbl = DMA_DFT_BURST; + p->txbl = DMA_DFT_BURST; + p->pkt_drop = DMA_PKT_DROP_DIS; + } + } + + return 0; +} + +static void dma_free_desc_resource(struct virt_dma_desc *vdesc) +{ + struct dw2_desc_sw *ds = to_lgm_dma_desc(vdesc); + struct ldma_chan *c = ds->chan; + + dma_pool_free(c->desc_pool, ds->desc_hw, ds->desc_phys); + kfree(ds); +} + +static struct dw2_desc_sw * +dma_alloc_desc_resource(int num, struct ldma_chan *c) +{ + struct device *dev = c->vchan.chan.device->dev; + struct dw2_desc_sw *ds; + + if (num > c->desc_num) { + dev_err(dev, "sg num %d exceed max %d\n", num, c->desc_num); + return NULL; + } + + ds = kzalloc(sizeof(*ds), GFP_NOWAIT); + if (!ds) + return NULL; + + ds->chan = c; + ds->desc_hw = dma_pool_zalloc(c->desc_pool, GFP_ATOMIC, + &ds->desc_phys); + if (!ds->desc_hw) { + dev_dbg(dev, "out of memory for link descriptor\n"); + kfree(ds); + return NULL; + } + ds->desc_cnt = num; + + return ds; +} + +static void ldma_chan_irq_en(struct ldma_chan *c) +{ + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + unsigned long flags; + + spin_lock_irqsave(&d->dev_lock, flags); + writel(c->nr, d->base + DMA_CS); + writel(DMA_CI_EOP, d->base + DMA_CIE); + writel(BIT(c->nr), d->base + DMA_IRNEN); + spin_unlock_irqrestore(&d->dev_lock, flags); +} + +static void ldma_issue_pending(struct dma_chan *chan) +{ + struct ldma_chan *c = to_ldma_chan(chan); + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + unsigned long flags; + + if (d->ver == DMA_VER22) { + spin_lock_irqsave(&c->vchan.lock, flags); + if (vchan_issue_pending(&c->vchan)) { + struct virt_dma_desc *vdesc; + + /* Get the next descriptor */ + vdesc = vchan_next_desc(&c->vchan); + if (!vdesc) { + c->ds = NULL; + spin_unlock_irqrestore(&c->vchan.lock, flags); + return; + } + list_del(&vdesc->node); + c->ds = to_lgm_dma_desc(vdesc); + ldma_chan_desc_hw_cfg(c, c->ds->desc_phys, c->ds->desc_cnt); + ldma_chan_irq_en(c); + } + spin_unlock_irqrestore(&c->vchan.lock, flags); + } + ldma_chan_on(c); +} + +static void ldma_synchronize(struct dma_chan *chan) +{ + struct ldma_chan *c = to_ldma_chan(chan); + + /* + * clear any pending work if any. In that + * case the resource needs to be free here. + */ + cancel_work_sync(&c->work); + vchan_synchronize(&c->vchan); + if (c->ds) + dma_free_desc_resource(&c->ds->vdesc); +} + +static int ldma_terminate_all(struct dma_chan *chan) +{ + struct ldma_chan *c = to_ldma_chan(chan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&c->vchan.lock, flags); + vchan_get_all_descriptors(&c->vchan, &head); + spin_unlock_irqrestore(&c->vchan.lock, flags); + vchan_dma_desc_free_list(&c->vchan, &head); + + return ldma_chan_reset(c); +} + +static int ldma_resume_chan(struct dma_chan *chan) +{ + struct ldma_chan *c = to_ldma_chan(chan); + + ldma_chan_on(c); + + return 0; +} + +static int ldma_pause_chan(struct dma_chan *chan) +{ + struct ldma_chan *c = to_ldma_chan(chan); + + return ldma_chan_off(c); +} + +static enum dma_status +ldma_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct ldma_chan *c = to_ldma_chan(chan); + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + enum dma_status status = DMA_COMPLETE; + + if (d->ver == DMA_VER22) + status = dma_cookie_status(chan, cookie, txstate); + + return status; +} + +static void dma_chan_irq(int irq, void *data) +{ + struct ldma_chan *c = data; + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + u32 stat; + + /* Disable channel interrupts */ + writel(c->nr, d->base + DMA_CS); + stat = readl(d->base + DMA_CIS); + if (!stat) + return; + + writel(readl(d->base + DMA_CIE) & ~DMA_CI_ALL, d->base + DMA_CIE); + writel(stat, d->base + DMA_CIS); + queue_work(d->wq, &c->work); +} + +static irqreturn_t dma_interrupt(int irq, void *dev_id) +{ + struct ldma_dev *d = dev_id; + struct ldma_chan *c; + unsigned long irncr; + u32 cid; + + irncr = readl(d->base + DMA_IRNCR); + if (!irncr) { + dev_err(d->dev, "dummy interrupt\n"); + return IRQ_NONE; + } + + for_each_set_bit(cid, &irncr, d->chan_nrs) { + /* Mask */ + writel(readl(d->base + DMA_IRNEN) & ~BIT(cid), d->base + DMA_IRNEN); + /* Ack */ + writel(readl(d->base + DMA_IRNCR) | BIT(cid), d->base + DMA_IRNCR); + + c = &d->chans[cid]; + dma_chan_irq(irq, c); + } + + return IRQ_HANDLED; +} + +static void prep_slave_burst_len(struct ldma_chan *c) +{ + struct ldma_port *p = c->port; + struct dma_slave_config *cfg = &c->config; + + if (cfg->dst_maxburst) + cfg->src_maxburst = cfg->dst_maxburst; + + /* TX and RX has the same burst length */ + p->txbl = ilog2(cfg->src_maxburst); + p->rxbl = p->txbl; +} + +static struct dma_async_tx_descriptor * +ldma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sglen, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct ldma_chan *c = to_ldma_chan(chan); + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + size_t len, avail, total = 0; + struct dw2_desc *hw_ds; + struct dw2_desc_sw *ds; + struct scatterlist *sg; + int num = sglen, i; + dma_addr_t addr; + + if (!sgl) + return NULL; + + if (d->ver > DMA_VER22) + return ldma_chan_desc_cfg(chan, sgl->dma_address, sglen); + + for_each_sg(sgl, sg, sglen, i) { + avail = sg_dma_len(sg); + if (avail > DMA_MAX_SIZE) + num += DIV_ROUND_UP(avail, DMA_MAX_SIZE) - 1; + } + + ds = dma_alloc_desc_resource(num, c); + if (!ds) + return NULL; + + c->ds = ds; + + num = 0; + /* sop and eop has to be handled nicely */ + for_each_sg(sgl, sg, sglen, i) { + addr = sg_dma_address(sg); + avail = sg_dma_len(sg); + total += avail; + + do { + len = min_t(size_t, avail, DMA_MAX_SIZE); + + hw_ds = &ds->desc_hw[num]; + switch (sglen) { + case 1: + hw_ds->field &= ~DESC_SOP; + hw_ds->field |= FIELD_PREP(DESC_SOP, 1); + + hw_ds->field &= ~DESC_EOP; + hw_ds->field |= FIELD_PREP(DESC_EOP, 1); + break; + default: + if (num == 0) { + hw_ds->field &= ~DESC_SOP; + hw_ds->field |= FIELD_PREP(DESC_SOP, 1); + + hw_ds->field &= ~DESC_EOP; + hw_ds->field |= FIELD_PREP(DESC_EOP, 0); + } else if (num == (sglen - 1)) { + hw_ds->field &= ~DESC_SOP; + hw_ds->field |= FIELD_PREP(DESC_SOP, 0); + hw_ds->field &= ~DESC_EOP; + hw_ds->field |= FIELD_PREP(DESC_EOP, 1); + } else { + hw_ds->field &= ~DESC_SOP; + hw_ds->field |= FIELD_PREP(DESC_SOP, 0); + + hw_ds->field &= ~DESC_EOP; + hw_ds->field |= FIELD_PREP(DESC_EOP, 0); + } + break; + } + /* Only 32 bit address supported */ + hw_ds->addr = (u32)addr; + + hw_ds->field &= ~DESC_DATA_LEN; + hw_ds->field |= FIELD_PREP(DESC_DATA_LEN, len); + + hw_ds->field &= ~DESC_C; + hw_ds->field |= FIELD_PREP(DESC_C, 0); + + hw_ds->field &= ~DESC_BYTE_OFF; + hw_ds->field |= FIELD_PREP(DESC_BYTE_OFF, addr & 0x3); + + /* Ensure data ready before ownership change */ + wmb(); + hw_ds->field &= ~DESC_OWN; + hw_ds->field |= FIELD_PREP(DESC_OWN, DMA_OWN); + + /* Ensure ownership changed before moving forward */ + wmb(); + num++; + addr += len; + avail -= len; + } while (avail); + } + + ds->size = total; + prep_slave_burst_len(c); + + return vchan_tx_prep(&c->vchan, &ds->vdesc, DMA_CTRL_ACK); +} + +static int +ldma_slave_config(struct dma_chan *chan, struct dma_slave_config *cfg) +{ + struct ldma_chan *c = to_ldma_chan(chan); + + memcpy(&c->config, cfg, sizeof(c->config)); + + return 0; +} + +static int ldma_alloc_chan_resources(struct dma_chan *chan) +{ + struct ldma_chan *c = to_ldma_chan(chan); + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + struct device *dev = c->vchan.chan.device->dev; + size_t desc_sz; + + if (d->ver > DMA_VER22) { + c->flags |= CHAN_IN_USE; + return 0; + } + + if (c->desc_pool) + return c->desc_num; + + desc_sz = c->desc_num * sizeof(struct dw2_desc); + c->desc_pool = dma_pool_create(c->name, dev, desc_sz, + __alignof__(struct dw2_desc), 0); + + if (!c->desc_pool) { + dev_err(dev, "unable to allocate descriptor pool\n"); + return -ENOMEM; + } + + return c->desc_num; +} + +static void ldma_free_chan_resources(struct dma_chan *chan) +{ + struct ldma_chan *c = to_ldma_chan(chan); + struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device); + + if (d->ver == DMA_VER22) { + dma_pool_destroy(c->desc_pool); + c->desc_pool = NULL; + vchan_free_chan_resources(to_virt_chan(chan)); + ldma_chan_reset(c); + } else { + c->flags &= ~CHAN_IN_USE; + } +} + +static void dma_work(struct work_struct *work) +{ + struct ldma_chan *c = container_of(work, struct ldma_chan, work); + struct dma_async_tx_descriptor *tx = &c->ds->vdesc.tx; + struct virt_dma_chan *vc = &c->vchan; + struct dmaengine_desc_callback cb; + struct virt_dma_desc *vd, *_vd; + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&c->vchan.lock, flags); + list_splice_tail_init(&vc->desc_completed, &head); + spin_unlock_irqrestore(&c->vchan.lock, flags); + dmaengine_desc_get_callback(tx, &cb); + dma_cookie_complete(tx); + dmaengine_desc_callback_invoke(&cb, NULL); + + list_for_each_entry_safe(vd, _vd, &head, node) { + dmaengine_desc_get_callback(tx, &cb); + dma_cookie_complete(tx); + list_del(&vd->node); + dmaengine_desc_callback_invoke(&cb, NULL); + + vchan_vdesc_fini(vd); + } + c->ds = NULL; +} + +static void +update_burst_len_v22(struct ldma_chan *c, struct ldma_port *p, u32 burst) +{ + if (ldma_chan_tx(c)) + p->txbl = ilog2(burst); + else + p->rxbl = ilog2(burst); +} + +static void +update_burst_len_v3X(struct ldma_chan *c, struct ldma_port *p, u32 burst) +{ + if (ldma_chan_tx(c)) + p->txbl = burst; + else + p->rxbl = burst; +} + +static int +update_client_configs(struct of_dma *ofdma, struct of_phandle_args *spec) +{ + struct ldma_dev *d = ofdma->of_dma_data; + u32 chan_id = spec->args[0]; + u32 port_id = spec->args[1]; + u32 burst = spec->args[2]; + struct ldma_port *p; + struct ldma_chan *c; + + if (chan_id >= d->chan_nrs || port_id >= d->port_nrs) + return 0; + + p = &d->ports[port_id]; + c = &d->chans[chan_id]; + c->port = p; + + if (d->ver == DMA_VER22) + update_burst_len_v22(c, p, burst); + else + update_burst_len_v3X(c, p, burst); + + ldma_port_cfg(p); + + return 1; +} + +static struct dma_chan *ldma_xlate(struct of_phandle_args *spec, + struct of_dma *ofdma) +{ + struct ldma_dev *d = ofdma->of_dma_data; + u32 chan_id = spec->args[0]; + int ret; + + if (!spec->args_count) + return NULL; + + /* if args_count is 1 driver use default settings */ + if (spec->args_count > 1) { + ret = update_client_configs(ofdma, spec); + if (!ret) + return NULL; + } + + return dma_get_slave_channel(&d->chans[chan_id].vchan.chan); +} + +static void ldma_dma_init_v22(int i, struct ldma_dev *d) +{ + struct ldma_chan *c; + + c = &d->chans[i]; + c->nr = i; /* Real channel number */ + c->rst = DMA_CHAN_RST; + c->desc_num = DMA_DFT_DESC_NUM; + snprintf(c->name, sizeof(c->name), "chan%d", c->nr); + INIT_WORK(&c->work, dma_work); + c->vchan.desc_free = dma_free_desc_resource; + vchan_init(&c->vchan, &d->dma_dev); +} + +static void ldma_dma_init_v3X(int i, struct ldma_dev *d) +{ + struct ldma_chan *c; + + c = &d->chans[i]; + c->data_endian = DMA_DFT_ENDIAN; + c->desc_endian = DMA_DFT_ENDIAN; + c->data_endian_en = false; + c->desc_endian_en = false; + c->desc_rx_np = false; + c->flags |= DEVICE_ALLOC_DESC; + c->onoff = DMA_CH_OFF; + c->rst = DMA_CHAN_RST; + c->abc_en = true; + c->hdrm_csum = false; + c->boff_len = 0; + c->nr = i; + c->vchan.desc_free = dma_free_desc_resource; + vchan_init(&c->vchan, &d->dma_dev); +} + +static int ldma_init_v22(struct ldma_dev *d, struct platform_device *pdev) +{ + int ret; + + ret = device_property_read_u32(d->dev, "dma-channels", &d->chan_nrs); + if (ret < 0) { + dev_err(d->dev, "unable to read dma-channels property\n"); + return ret; + } + + d->irq = platform_get_irq(pdev, 0); + if (d->irq < 0) + return d->irq; + + ret = devm_request_irq(&pdev->dev, d->irq, dma_interrupt, 0, + DRIVER_NAME, d); + if (ret) + return ret; + + d->wq = alloc_ordered_workqueue("dma_wq", WQ_MEM_RECLAIM | + WQ_HIGHPRI); + if (!d->wq) + return -ENOMEM; + + return 0; +} + +static void ldma_clk_disable(void *data) +{ + struct ldma_dev *d = data; + + clk_disable_unprepare(d->core_clk); + reset_control_assert(d->rst); +} + +static const struct ldma_inst_data dma0 = { + .name = "dma0", + .chan_fc = false, + .desc_fod = false, + .desc_in_sram = false, + .valid_desc_fetch_ack = false, +}; + +static const struct ldma_inst_data dma2tx = { + .name = "dma2tx", + .type = DMA_TYPE_TX, + .orrc = 16, + .chan_fc = true, + .desc_fod = true, + .desc_in_sram = true, + .valid_desc_fetch_ack = true, +}; + +static const struct ldma_inst_data dma1rx = { + .name = "dma1rx", + .type = DMA_TYPE_RX, + .orrc = 16, + .chan_fc = false, + .desc_fod = true, + .desc_in_sram = true, + .valid_desc_fetch_ack = false, +}; + +static const struct ldma_inst_data dma1tx = { + .name = "dma1tx", + .type = DMA_TYPE_TX, + .orrc = 16, + .chan_fc = true, + .desc_fod = true, + .desc_in_sram = true, + .valid_desc_fetch_ack = true, +}; + +static const struct ldma_inst_data dma0tx = { + .name = "dma0tx", + .type = DMA_TYPE_TX, + .orrc = 16, + .chan_fc = true, + .desc_fod = true, + .desc_in_sram = true, + .valid_desc_fetch_ack = true, +}; + +static const struct ldma_inst_data dma3 = { + .name = "dma3", + .type = DMA_TYPE_MCPY, + .orrc = 16, + .chan_fc = false, + .desc_fod = false, + .desc_in_sram = true, + .valid_desc_fetch_ack = false, +}; + +static const struct ldma_inst_data toe_dma30 = { + .name = "toe_dma30", + .type = DMA_TYPE_MCPY, + .orrc = 16, + .chan_fc = false, + .desc_fod = false, + .desc_in_sram = true, + .valid_desc_fetch_ack = true, +}; + +static const struct ldma_inst_data toe_dma31 = { + .name = "toe_dma31", + .type = DMA_TYPE_MCPY, + .orrc = 16, + .chan_fc = false, + .desc_fod = false, + .desc_in_sram = true, + .valid_desc_fetch_ack = true, +}; + +static const struct of_device_id intel_ldma_match[] = { + { .compatible = "intel,lgm-cdma", .data = &dma0}, + { .compatible = "intel,lgm-dma2tx", .data = &dma2tx}, + { .compatible = "intel,lgm-dma1rx", .data = &dma1rx}, + { .compatible = "intel,lgm-dma1tx", .data = &dma1tx}, + { .compatible = "intel,lgm-dma0tx", .data = &dma0tx}, + { .compatible = "intel,lgm-dma3", .data = &dma3}, + { .compatible = "intel,lgm-toe-dma30", .data = &toe_dma30}, + { .compatible = "intel,lgm-toe-dma31", .data = &toe_dma31}, + {} +}; + +static int intel_ldma_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct dma_device *dma_dev; + unsigned long ch_mask; + struct ldma_chan *c; + struct ldma_port *p; + struct ldma_dev *d; + u32 id, bitn = 32, j; + int i, ret; + + d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); + if (!d) + return -ENOMEM; + + /* Link controller to platform device */ + d->dev = &pdev->dev; + + d->inst = device_get_match_data(dev); + if (!d->inst) { + dev_err(dev, "No device match found\n"); + return -ENODEV; + } + + d->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(d->base)) + return PTR_ERR(d->base); + + /* Power up and reset the dma engine, some DMAs always on?? */ + d->core_clk = devm_clk_get_optional(dev, NULL); + if (IS_ERR(d->core_clk)) + return PTR_ERR(d->core_clk); + + d->rst = devm_reset_control_get_optional(dev, NULL); + if (IS_ERR(d->rst)) + return PTR_ERR(d->rst); + + clk_prepare_enable(d->core_clk); + reset_control_deassert(d->rst); + + ret = devm_add_action_or_reset(dev, ldma_clk_disable, d); + if (ret) { + dev_err(dev, "Failed to devm_add_action_or_reset, %d\n", ret); + return ret; + } + + id = readl(d->base + DMA_ID); + d->chan_nrs = FIELD_GET(DMA_ID_CHNR, id); + d->port_nrs = FIELD_GET(DMA_ID_PNR, id); + d->ver = FIELD_GET(DMA_ID_REV, id); + + if (id & DMA_ID_AW_36B) + d->flags |= DMA_ADDR_36BIT; + + if (IS_ENABLED(CONFIG_64BIT) && (id & DMA_ID_AW_36B)) + bitn = 36; + + if (id & DMA_ID_DW_128B) + d->flags |= DMA_DATA_128BIT; + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(bitn)); + if (ret) { + dev_err(dev, "No usable DMA configuration\n"); + return ret; + } + + if (d->ver == DMA_VER22) { + ret = ldma_init_v22(d, pdev); + if (ret) + return ret; + } + + ret = device_property_read_u32(dev, "dma-channel-mask", &d->channels_mask); + if (ret < 0) + d->channels_mask = GENMASK(d->chan_nrs - 1, 0); + + dma_dev = &d->dma_dev; + + dma_cap_zero(dma_dev->cap_mask); + dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); + + /* Channel initializations */ + INIT_LIST_HEAD(&dma_dev->channels); + + /* Port Initializations */ + d->ports = devm_kcalloc(dev, d->port_nrs, sizeof(*p), GFP_KERNEL); + if (!d->ports) + return -ENOMEM; + + /* Channels Initializations */ + d->chans = devm_kcalloc(d->dev, d->chan_nrs, sizeof(*c), GFP_KERNEL); + if (!d->chans) + return -ENOMEM; + + for (i = 0; i < d->port_nrs; i++) { + p = &d->ports[i]; + p->portid = i; + p->ldev = d; + } + + dma_dev->dev = &pdev->dev; + + ch_mask = (unsigned long)d->channels_mask; + for_each_set_bit(j, &ch_mask, d->chan_nrs) { + if (d->ver == DMA_VER22) + ldma_dma_init_v22(j, d); + else + ldma_dma_init_v3X(j, d); + } + + ret = ldma_parse_dt(d); + if (ret) + return ret; + + dma_dev->device_alloc_chan_resources = ldma_alloc_chan_resources; + dma_dev->device_free_chan_resources = ldma_free_chan_resources; + dma_dev->device_terminate_all = ldma_terminate_all; + dma_dev->device_issue_pending = ldma_issue_pending; + dma_dev->device_tx_status = ldma_tx_status; + dma_dev->device_resume = ldma_resume_chan; + dma_dev->device_pause = ldma_pause_chan; + dma_dev->device_prep_slave_sg = ldma_prep_slave_sg; + + if (d->ver == DMA_VER22) { + dma_dev->device_config = ldma_slave_config; + dma_dev->device_synchronize = ldma_synchronize; + dma_dev->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + dma_dev->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + dma_dev->directions = BIT(DMA_MEM_TO_DEV) | + BIT(DMA_DEV_TO_MEM); + dma_dev->residue_granularity = + DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + } + + platform_set_drvdata(pdev, d); + + ldma_dev_init(d); + + ret = dma_async_device_register(dma_dev); + if (ret) { + dev_err(dev, "Failed to register slave DMA engine device\n"); + return ret; + } + + ret = of_dma_controller_register(pdev->dev.of_node, ldma_xlate, d); + if (ret) { + dev_err(dev, "Failed to register of DMA controller\n"); + dma_async_device_unregister(dma_dev); + return ret; + } + + dev_info(dev, "Init done - rev: %x, ports: %d channels: %d\n", d->ver, + d->port_nrs, d->chan_nrs); + + return 0; +} + +static struct platform_driver intel_ldma_driver = { + .probe = intel_ldma_probe, + .driver = { + .name = DRIVER_NAME, + .of_match_table = intel_ldma_match, + }, +}; + +/* + * Perform this driver as device_initcall to make sure initialization happens + * before its DMA clients of some are platform specific and also to provide + * registered DMA channels and DMA capabilities to clients before their + * initialization. + */ +builtin_platform_driver(intel_ldma_driver); diff --git a/drivers/dma/lpc18xx-dmamux.c b/drivers/dma/lpc18xx-dmamux.c new file mode 100644 index 0000000000..2b6436f4b1 --- /dev/null +++ b/drivers/dma/lpc18xx-dmamux.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * DMA Router driver for LPC18xx/43xx DMA MUX + * + * Copyright (C) 2015 Joachim Eastwood + * + * Based on TI DMA Crossbar driver by: + * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com + * Author: Peter Ujfalusi + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* CREG register offset and macros for mux manipulation */ +#define LPC18XX_CREG_DMAMUX 0x11c +#define LPC18XX_DMAMUX_VAL(v, n) ((v) << (n * 2)) +#define LPC18XX_DMAMUX_MASK(n) (0x3 << (n * 2)) +#define LPC18XX_DMAMUX_MAX_VAL 0x3 + +struct lpc18xx_dmamux { + u32 value; + bool busy; +}; + +struct lpc18xx_dmamux_data { + struct dma_router dmarouter; + struct lpc18xx_dmamux *muxes; + u32 dma_master_requests; + u32 dma_mux_requests; + struct regmap *reg; + spinlock_t lock; +}; + +static void lpc18xx_dmamux_free(struct device *dev, void *route_data) +{ + struct lpc18xx_dmamux_data *dmamux = dev_get_drvdata(dev); + struct lpc18xx_dmamux *mux = route_data; + unsigned long flags; + + spin_lock_irqsave(&dmamux->lock, flags); + mux->busy = false; + spin_unlock_irqrestore(&dmamux->lock, flags); +} + +static void *lpc18xx_dmamux_reserve(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct platform_device *pdev = of_find_device_by_node(ofdma->of_node); + struct lpc18xx_dmamux_data *dmamux = platform_get_drvdata(pdev); + unsigned long flags; + unsigned mux; + + if (dma_spec->args_count != 3) { + dev_err(&pdev->dev, "invalid number of dma mux args\n"); + return ERR_PTR(-EINVAL); + } + + mux = dma_spec->args[0]; + if (mux >= dmamux->dma_master_requests) { + dev_err(&pdev->dev, "invalid mux number: %d\n", + dma_spec->args[0]); + return ERR_PTR(-EINVAL); + } + + if (dma_spec->args[1] > LPC18XX_DMAMUX_MAX_VAL) { + dev_err(&pdev->dev, "invalid dma mux value: %d\n", + dma_spec->args[1]); + return ERR_PTR(-EINVAL); + } + + /* The of_node_put() will be done in the core for the node */ + dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0); + if (!dma_spec->np) { + dev_err(&pdev->dev, "can't get dma master\n"); + return ERR_PTR(-EINVAL); + } + + spin_lock_irqsave(&dmamux->lock, flags); + if (dmamux->muxes[mux].busy) { + spin_unlock_irqrestore(&dmamux->lock, flags); + dev_err(&pdev->dev, "dma request %u busy with %u.%u\n", + mux, mux, dmamux->muxes[mux].value); + of_node_put(dma_spec->np); + return ERR_PTR(-EBUSY); + } + + dmamux->muxes[mux].busy = true; + dmamux->muxes[mux].value = dma_spec->args[1]; + + regmap_update_bits(dmamux->reg, LPC18XX_CREG_DMAMUX, + LPC18XX_DMAMUX_MASK(mux), + LPC18XX_DMAMUX_VAL(dmamux->muxes[mux].value, mux)); + spin_unlock_irqrestore(&dmamux->lock, flags); + + dma_spec->args[1] = dma_spec->args[2]; + dma_spec->args_count = 2; + + dev_dbg(&pdev->dev, "mapping dmamux %u.%u to dma request %u\n", mux, + dmamux->muxes[mux].value, mux); + + return &dmamux->muxes[mux]; +} + +static int lpc18xx_dmamux_probe(struct platform_device *pdev) +{ + struct device_node *dma_np, *np = pdev->dev.of_node; + struct lpc18xx_dmamux_data *dmamux; + int ret; + + dmamux = devm_kzalloc(&pdev->dev, sizeof(*dmamux), GFP_KERNEL); + if (!dmamux) + return -ENOMEM; + + dmamux->reg = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg"); + if (IS_ERR(dmamux->reg)) { + dev_err(&pdev->dev, "syscon lookup failed\n"); + return PTR_ERR(dmamux->reg); + } + + ret = of_property_read_u32(np, "dma-requests", + &dmamux->dma_mux_requests); + if (ret) { + dev_err(&pdev->dev, "missing dma-requests property\n"); + return ret; + } + + dma_np = of_parse_phandle(np, "dma-masters", 0); + if (!dma_np) { + dev_err(&pdev->dev, "can't get dma master\n"); + return -ENODEV; + } + + ret = of_property_read_u32(dma_np, "dma-requests", + &dmamux->dma_master_requests); + of_node_put(dma_np); + if (ret) { + dev_err(&pdev->dev, "missing master dma-requests property\n"); + return ret; + } + + dmamux->muxes = devm_kcalloc(&pdev->dev, dmamux->dma_master_requests, + sizeof(struct lpc18xx_dmamux), + GFP_KERNEL); + if (!dmamux->muxes) + return -ENOMEM; + + spin_lock_init(&dmamux->lock); + platform_set_drvdata(pdev, dmamux); + dmamux->dmarouter.dev = &pdev->dev; + dmamux->dmarouter.route_free = lpc18xx_dmamux_free; + + return of_dma_router_register(np, lpc18xx_dmamux_reserve, + &dmamux->dmarouter); +} + +static const struct of_device_id lpc18xx_dmamux_match[] = { + { .compatible = "nxp,lpc1850-dmamux" }, + {}, +}; + +static struct platform_driver lpc18xx_dmamux_driver = { + .probe = lpc18xx_dmamux_probe, + .driver = { + .name = "lpc18xx-dmamux", + .of_match_table = lpc18xx_dmamux_match, + }, +}; + +static int __init lpc18xx_dmamux_init(void) +{ + return platform_driver_register(&lpc18xx_dmamux_driver); +} +arch_initcall(lpc18xx_dmamux_init); diff --git a/drivers/dma/mcf-edma-main.c b/drivers/dma/mcf-edma-main.c new file mode 100644 index 0000000000..b359421ee9 --- /dev/null +++ b/drivers/dma/mcf-edma-main.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Copyright (c) 2013-2014 Freescale Semiconductor, Inc +// Copyright (c) 2017 Sysam, Angelo Dureghello + +#include +#include +#include +#include +#include + +#include "fsl-edma-common.h" + +#define EDMA_CHANNELS 64 +#define EDMA_MASK_CH(x) ((x) & GENMASK(5, 0)) + +static irqreturn_t mcf_edma_tx_handler(int irq, void *dev_id) +{ + struct fsl_edma_engine *mcf_edma = dev_id; + struct edma_regs *regs = &mcf_edma->regs; + unsigned int ch; + u64 intmap; + + intmap = ioread32(regs->inth); + intmap <<= 32; + intmap |= ioread32(regs->intl); + if (!intmap) + return IRQ_NONE; + + for (ch = 0; ch < mcf_edma->n_chans; ch++) { + if (intmap & BIT(ch)) { + iowrite8(EDMA_MASK_CH(ch), regs->cint); + fsl_edma_tx_chan_handler(&mcf_edma->chans[ch]); + } + } + + return IRQ_HANDLED; +} + +static irqreturn_t mcf_edma_err_handler(int irq, void *dev_id) +{ + struct fsl_edma_engine *mcf_edma = dev_id; + struct edma_regs *regs = &mcf_edma->regs; + unsigned int err, ch; + + err = ioread32(regs->errl); + if (!err) + return IRQ_NONE; + + for (ch = 0; ch < (EDMA_CHANNELS / 2); ch++) { + if (err & BIT(ch)) { + fsl_edma_disable_request(&mcf_edma->chans[ch]); + iowrite8(EDMA_CERR_CERR(ch), regs->cerr); + fsl_edma_err_chan_handler(&mcf_edma->chans[ch]); + } + } + + err = ioread32(regs->errh); + if (!err) + return IRQ_NONE; + + for (ch = (EDMA_CHANNELS / 2); ch < EDMA_CHANNELS; ch++) { + if (err & (BIT(ch - (EDMA_CHANNELS / 2)))) { + fsl_edma_disable_request(&mcf_edma->chans[ch]); + iowrite8(EDMA_CERR_CERR(ch), regs->cerr); + mcf_edma->chans[ch].status = DMA_ERROR; + mcf_edma->chans[ch].idle = true; + } + } + + return IRQ_HANDLED; +} + +static int mcf_edma_irq_init(struct platform_device *pdev, + struct fsl_edma_engine *mcf_edma) +{ + int ret = 0, i; + struct resource *res; + + res = platform_get_resource_byname(pdev, + IORESOURCE_IRQ, "edma-tx-00-15"); + if (!res) + return -1; + + for (ret = 0, i = res->start; i <= res->end; ++i) + ret |= request_irq(i, mcf_edma_tx_handler, 0, "eDMA", mcf_edma); + if (ret) + return ret; + + res = platform_get_resource_byname(pdev, + IORESOURCE_IRQ, "edma-tx-16-55"); + if (!res) + return -1; + + for (ret = 0, i = res->start; i <= res->end; ++i) + ret |= request_irq(i, mcf_edma_tx_handler, 0, "eDMA", mcf_edma); + if (ret) + return ret; + + ret = platform_get_irq_byname(pdev, "edma-tx-56-63"); + if (ret != -ENXIO) { + ret = request_irq(ret, mcf_edma_tx_handler, + 0, "eDMA", mcf_edma); + if (ret) + return ret; + } + + ret = platform_get_irq_byname(pdev, "edma-err"); + if (ret != -ENXIO) { + ret = request_irq(ret, mcf_edma_err_handler, + 0, "eDMA", mcf_edma); + if (ret) + return ret; + } + + return 0; +} + +static void mcf_edma_irq_free(struct platform_device *pdev, + struct fsl_edma_engine *mcf_edma) +{ + int irq; + struct resource *res; + + res = platform_get_resource_byname(pdev, + IORESOURCE_IRQ, "edma-tx-00-15"); + if (res) { + for (irq = res->start; irq <= res->end; irq++) + free_irq(irq, mcf_edma); + } + + res = platform_get_resource_byname(pdev, + IORESOURCE_IRQ, "edma-tx-16-55"); + if (res) { + for (irq = res->start; irq <= res->end; irq++) + free_irq(irq, mcf_edma); + } + + irq = platform_get_irq_byname(pdev, "edma-tx-56-63"); + if (irq != -ENXIO) + free_irq(irq, mcf_edma); + + irq = platform_get_irq_byname(pdev, "edma-err"); + if (irq != -ENXIO) + free_irq(irq, mcf_edma); +} + +static struct fsl_edma_drvdata mcf_data = { + .flags = FSL_EDMA_DRV_EDMA64, + .setup_irq = mcf_edma_irq_init, +}; + +static int mcf_edma_probe(struct platform_device *pdev) +{ + struct mcf_edma_platform_data *pdata; + struct fsl_edma_engine *mcf_edma; + struct edma_regs *regs; + int ret, i, chans; + + pdata = dev_get_platdata(&pdev->dev); + if (!pdata) { + dev_err(&pdev->dev, "no platform data supplied\n"); + return -EINVAL; + } + + if (!pdata->dma_channels) { + dev_info(&pdev->dev, "setting default channel number to 64"); + chans = 64; + } else { + chans = pdata->dma_channels; + } + + mcf_edma = devm_kzalloc(&pdev->dev, struct_size(mcf_edma, chans, chans), + GFP_KERNEL); + if (!mcf_edma) + return -ENOMEM; + + mcf_edma->n_chans = chans; + + /* Set up drvdata for ColdFire edma */ + mcf_edma->drvdata = &mcf_data; + mcf_edma->big_endian = 1; + + mutex_init(&mcf_edma->fsl_edma_mutex); + + mcf_edma->membase = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(mcf_edma->membase)) + return PTR_ERR(mcf_edma->membase); + + fsl_edma_setup_regs(mcf_edma); + regs = &mcf_edma->regs; + + INIT_LIST_HEAD(&mcf_edma->dma_dev.channels); + for (i = 0; i < mcf_edma->n_chans; i++) { + struct fsl_edma_chan *mcf_chan = &mcf_edma->chans[i]; + + mcf_chan->edma = mcf_edma; + mcf_chan->slave_id = i; + mcf_chan->idle = true; + mcf_chan->dma_dir = DMA_NONE; + mcf_chan->vchan.desc_free = fsl_edma_free_desc; + vchan_init(&mcf_chan->vchan, &mcf_edma->dma_dev); + mcf_chan->tcd = mcf_edma->membase + EDMA_TCD + + i * sizeof(struct fsl_edma_hw_tcd); + iowrite32(0x0, &mcf_chan->tcd->csr); + } + + iowrite32(~0, regs->inth); + iowrite32(~0, regs->intl); + + ret = mcf_edma->drvdata->setup_irq(pdev, mcf_edma); + if (ret) + return ret; + + dma_cap_set(DMA_PRIVATE, mcf_edma->dma_dev.cap_mask); + dma_cap_set(DMA_SLAVE, mcf_edma->dma_dev.cap_mask); + dma_cap_set(DMA_CYCLIC, mcf_edma->dma_dev.cap_mask); + + mcf_edma->dma_dev.dev = &pdev->dev; + mcf_edma->dma_dev.device_alloc_chan_resources = + fsl_edma_alloc_chan_resources; + mcf_edma->dma_dev.device_free_chan_resources = + fsl_edma_free_chan_resources; + mcf_edma->dma_dev.device_config = fsl_edma_slave_config; + mcf_edma->dma_dev.device_prep_dma_cyclic = + fsl_edma_prep_dma_cyclic; + mcf_edma->dma_dev.device_prep_slave_sg = fsl_edma_prep_slave_sg; + mcf_edma->dma_dev.device_tx_status = fsl_edma_tx_status; + mcf_edma->dma_dev.device_pause = fsl_edma_pause; + mcf_edma->dma_dev.device_resume = fsl_edma_resume; + mcf_edma->dma_dev.device_terminate_all = fsl_edma_terminate_all; + mcf_edma->dma_dev.device_issue_pending = fsl_edma_issue_pending; + + mcf_edma->dma_dev.src_addr_widths = FSL_EDMA_BUSWIDTHS; + mcf_edma->dma_dev.dst_addr_widths = FSL_EDMA_BUSWIDTHS; + mcf_edma->dma_dev.directions = + BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + + mcf_edma->dma_dev.filter.fn = mcf_edma_filter_fn; + mcf_edma->dma_dev.filter.map = pdata->slave_map; + mcf_edma->dma_dev.filter.mapcnt = pdata->slavecnt; + + platform_set_drvdata(pdev, mcf_edma); + + ret = dma_async_device_register(&mcf_edma->dma_dev); + if (ret) { + dev_err(&pdev->dev, + "Can't register Freescale eDMA engine. (%d)\n", ret); + return ret; + } + + /* Enable round robin arbitration */ + iowrite32(EDMA_CR_ERGA | EDMA_CR_ERCA, regs->cr); + + return 0; +} + +static int mcf_edma_remove(struct platform_device *pdev) +{ + struct fsl_edma_engine *mcf_edma = platform_get_drvdata(pdev); + + mcf_edma_irq_free(pdev, mcf_edma); + fsl_edma_cleanup_vchan(&mcf_edma->dma_dev); + dma_async_device_unregister(&mcf_edma->dma_dev); + + return 0; +} + +static struct platform_driver mcf_edma_driver = { + .driver = { + .name = "mcf-edma", + }, + .probe = mcf_edma_probe, + .remove = mcf_edma_remove, +}; + +bool mcf_edma_filter_fn(struct dma_chan *chan, void *param) +{ + if (chan->device->dev->driver == &mcf_edma_driver.driver) { + struct fsl_edma_chan *mcf_chan = to_fsl_edma_chan(chan); + + return (mcf_chan->slave_id == (uintptr_t)param); + } + + return false; +} +EXPORT_SYMBOL(mcf_edma_filter_fn); + +static int __init mcf_edma_init(void) +{ + return platform_driver_register(&mcf_edma_driver); +} +subsys_initcall(mcf_edma_init); + +static void __exit mcf_edma_exit(void) +{ + platform_driver_unregister(&mcf_edma_driver); +} +module_exit(mcf_edma_exit); + +MODULE_ALIAS("platform:mcf-edma"); +MODULE_DESCRIPTION("Freescale eDMA engine driver, ColdFire family"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/mediatek/Kconfig b/drivers/dma/mediatek/Kconfig new file mode 100644 index 0000000000..7a46a54559 --- /dev/null +++ b/drivers/dma/mediatek/Kconfig @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config MTK_HSDMA + tristate "MediaTek High-Speed DMA controller support" + depends on ARCH_MEDIATEK || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for High-Speed DMA controller on MediaTek + SoCs. + + This controller provides the channels which is dedicated to + memory-to-memory transfer to offload from CPU through ring- + based descriptor management. + +config MTK_CQDMA + tristate "MediaTek Command-Queue DMA controller support" + depends on ARCH_MEDIATEK || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + select ASYNC_TX_ENABLE_CHANNEL_SWITCH + help + Enable support for Command-Queue DMA controller on MediaTek + SoCs. + + This controller provides the channels which is dedicated to + memory-to-memory transfer to offload from CPU. + +config MTK_UART_APDMA + tristate "MediaTek SoCs APDMA support for UART" + depends on OF && SERIAL_8250_MT6577 + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support for the UART DMA engine found on MediaTek MTK SoCs. + When SERIAL_8250_MT6577 is enabled, and if you want to use DMA, + you can enable the config. The DMA engine can only be used + with MediaTek SoCs. diff --git a/drivers/dma/mediatek/Makefile b/drivers/dma/mediatek/Makefile new file mode 100644 index 0000000000..5ba39a5edc --- /dev/null +++ b/drivers/dma/mediatek/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_MTK_UART_APDMA) += mtk-uart-apdma.o +obj-$(CONFIG_MTK_HSDMA) += mtk-hsdma.o +obj-$(CONFIG_MTK_CQDMA) += mtk-cqdma.o diff --git a/drivers/dma/mediatek/mtk-cqdma.c b/drivers/dma/mediatek/mtk-cqdma.c new file mode 100644 index 0000000000..324b7387b1 --- /dev/null +++ b/drivers/dma/mediatek/mtk-cqdma.c @@ -0,0 +1,937 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018-2019 MediaTek Inc. + +/* + * Driver for MediaTek Command-Queue DMA Controller + * + * Author: Shun-Chih Yu + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../virt-dma.h" + +#define MTK_CQDMA_USEC_POLL 10 +#define MTK_CQDMA_TIMEOUT_POLL 1000 +#define MTK_CQDMA_DMA_BUSWIDTHS BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) +#define MTK_CQDMA_ALIGN_SIZE 1 + +/* The default number of virtual channel */ +#define MTK_CQDMA_NR_VCHANS 32 + +/* The default number of physical channel */ +#define MTK_CQDMA_NR_PCHANS 3 + +/* Registers for underlying dma manipulation */ +#define MTK_CQDMA_INT_FLAG 0x0 +#define MTK_CQDMA_INT_EN 0x4 +#define MTK_CQDMA_EN 0x8 +#define MTK_CQDMA_RESET 0xc +#define MTK_CQDMA_FLUSH 0x14 +#define MTK_CQDMA_SRC 0x1c +#define MTK_CQDMA_DST 0x20 +#define MTK_CQDMA_LEN1 0x24 +#define MTK_CQDMA_LEN2 0x28 +#define MTK_CQDMA_SRC2 0x60 +#define MTK_CQDMA_DST2 0x64 + +/* Registers setting */ +#define MTK_CQDMA_EN_BIT BIT(0) +#define MTK_CQDMA_INT_FLAG_BIT BIT(0) +#define MTK_CQDMA_INT_EN_BIT BIT(0) +#define MTK_CQDMA_FLUSH_BIT BIT(0) + +#define MTK_CQDMA_WARM_RST_BIT BIT(0) +#define MTK_CQDMA_HARD_RST_BIT BIT(1) + +#define MTK_CQDMA_MAX_LEN GENMASK(27, 0) +#define MTK_CQDMA_ADDR_LIMIT GENMASK(31, 0) +#define MTK_CQDMA_ADDR2_SHFIT (32) + +/** + * struct mtk_cqdma_vdesc - The struct holding info describing virtual + * descriptor (CVD) + * @vd: An instance for struct virt_dma_desc + * @len: The total data size device wants to move + * @residue: The remaining data size device will move + * @dest: The destination address device wants to move to + * @src: The source address device wants to move from + * @ch: The pointer to the corresponding dma channel + * @node: The lise_head struct to build link-list for VDs + * @parent: The pointer to the parent CVD + */ +struct mtk_cqdma_vdesc { + struct virt_dma_desc vd; + size_t len; + size_t residue; + dma_addr_t dest; + dma_addr_t src; + struct dma_chan *ch; + + struct list_head node; + struct mtk_cqdma_vdesc *parent; +}; + +/** + * struct mtk_cqdma_pchan - The struct holding info describing physical + * channel (PC) + * @queue: Queue for the PDs issued to this PC + * @base: The mapped register I/O base of this PC + * @irq: The IRQ that this PC are using + * @refcnt: Track how many VCs are using this PC + * @tasklet: Tasklet for this PC + * @lock: Lock protect agaisting multiple VCs access PC + */ +struct mtk_cqdma_pchan { + struct list_head queue; + void __iomem *base; + u32 irq; + + refcount_t refcnt; + + struct tasklet_struct tasklet; + + /* lock to protect PC */ + spinlock_t lock; +}; + +/** + * struct mtk_cqdma_vchan - The struct holding info describing virtual + * channel (VC) + * @vc: An instance for struct virt_dma_chan + * @pc: The pointer to the underlying PC + * @issue_completion: The wait for all issued descriptors completited + * @issue_synchronize: Bool indicating channel synchronization starts + */ +struct mtk_cqdma_vchan { + struct virt_dma_chan vc; + struct mtk_cqdma_pchan *pc; + struct completion issue_completion; + bool issue_synchronize; +}; + +/** + * struct mtk_cqdma_device - The struct holding info describing CQDMA + * device + * @ddev: An instance for struct dma_device + * @clk: The clock that device internal is using + * @dma_requests: The number of VCs the device supports to + * @dma_channels: The number of PCs the device supports to + * @vc: The pointer to all available VCs + * @pc: The pointer to all the underlying PCs + */ +struct mtk_cqdma_device { + struct dma_device ddev; + struct clk *clk; + + u32 dma_requests; + u32 dma_channels; + struct mtk_cqdma_vchan *vc; + struct mtk_cqdma_pchan **pc; +}; + +static struct mtk_cqdma_device *to_cqdma_dev(struct dma_chan *chan) +{ + return container_of(chan->device, struct mtk_cqdma_device, ddev); +} + +static struct mtk_cqdma_vchan *to_cqdma_vchan(struct dma_chan *chan) +{ + return container_of(chan, struct mtk_cqdma_vchan, vc.chan); +} + +static struct mtk_cqdma_vdesc *to_cqdma_vdesc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct mtk_cqdma_vdesc, vd); +} + +static struct device *cqdma2dev(struct mtk_cqdma_device *cqdma) +{ + return cqdma->ddev.dev; +} + +static u32 mtk_dma_read(struct mtk_cqdma_pchan *pc, u32 reg) +{ + return readl(pc->base + reg); +} + +static void mtk_dma_write(struct mtk_cqdma_pchan *pc, u32 reg, u32 val) +{ + writel_relaxed(val, pc->base + reg); +} + +static void mtk_dma_rmw(struct mtk_cqdma_pchan *pc, u32 reg, + u32 mask, u32 set) +{ + u32 val; + + val = mtk_dma_read(pc, reg); + val &= ~mask; + val |= set; + mtk_dma_write(pc, reg, val); +} + +static void mtk_dma_set(struct mtk_cqdma_pchan *pc, u32 reg, u32 val) +{ + mtk_dma_rmw(pc, reg, 0, val); +} + +static void mtk_dma_clr(struct mtk_cqdma_pchan *pc, u32 reg, u32 val) +{ + mtk_dma_rmw(pc, reg, val, 0); +} + +static void mtk_cqdma_vdesc_free(struct virt_dma_desc *vd) +{ + kfree(to_cqdma_vdesc(vd)); +} + +static int mtk_cqdma_poll_engine_done(struct mtk_cqdma_pchan *pc, bool atomic) +{ + u32 status = 0; + + if (!atomic) + return readl_poll_timeout(pc->base + MTK_CQDMA_EN, + status, + !(status & MTK_CQDMA_EN_BIT), + MTK_CQDMA_USEC_POLL, + MTK_CQDMA_TIMEOUT_POLL); + + return readl_poll_timeout_atomic(pc->base + MTK_CQDMA_EN, + status, + !(status & MTK_CQDMA_EN_BIT), + MTK_CQDMA_USEC_POLL, + MTK_CQDMA_TIMEOUT_POLL); +} + +static int mtk_cqdma_hard_reset(struct mtk_cqdma_pchan *pc) +{ + mtk_dma_set(pc, MTK_CQDMA_RESET, MTK_CQDMA_HARD_RST_BIT); + mtk_dma_clr(pc, MTK_CQDMA_RESET, MTK_CQDMA_HARD_RST_BIT); + + return mtk_cqdma_poll_engine_done(pc, true); +} + +static void mtk_cqdma_start(struct mtk_cqdma_pchan *pc, + struct mtk_cqdma_vdesc *cvd) +{ + /* wait for the previous transaction done */ + if (mtk_cqdma_poll_engine_done(pc, true) < 0) + dev_err(cqdma2dev(to_cqdma_dev(cvd->ch)), "cqdma wait transaction timeout\n"); + + /* warm reset the dma engine for the new transaction */ + mtk_dma_set(pc, MTK_CQDMA_RESET, MTK_CQDMA_WARM_RST_BIT); + if (mtk_cqdma_poll_engine_done(pc, true) < 0) + dev_err(cqdma2dev(to_cqdma_dev(cvd->ch)), "cqdma warm reset timeout\n"); + + /* setup the source */ + mtk_dma_set(pc, MTK_CQDMA_SRC, cvd->src & MTK_CQDMA_ADDR_LIMIT); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + mtk_dma_set(pc, MTK_CQDMA_SRC2, cvd->src >> MTK_CQDMA_ADDR2_SHFIT); +#else + mtk_dma_set(pc, MTK_CQDMA_SRC2, 0); +#endif + + /* setup the destination */ + mtk_dma_set(pc, MTK_CQDMA_DST, cvd->dest & MTK_CQDMA_ADDR_LIMIT); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + mtk_dma_set(pc, MTK_CQDMA_DST2, cvd->dest >> MTK_CQDMA_ADDR2_SHFIT); +#else + mtk_dma_set(pc, MTK_CQDMA_DST2, 0); +#endif + + /* setup the length */ + mtk_dma_set(pc, MTK_CQDMA_LEN1, cvd->len); + + /* start dma engine */ + mtk_dma_set(pc, MTK_CQDMA_EN, MTK_CQDMA_EN_BIT); +} + +static void mtk_cqdma_issue_vchan_pending(struct mtk_cqdma_vchan *cvc) +{ + struct virt_dma_desc *vd, *vd2; + struct mtk_cqdma_pchan *pc = cvc->pc; + struct mtk_cqdma_vdesc *cvd; + bool trigger_engine = false; + + lockdep_assert_held(&cvc->vc.lock); + lockdep_assert_held(&pc->lock); + + list_for_each_entry_safe(vd, vd2, &cvc->vc.desc_issued, node) { + /* need to trigger dma engine if PC's queue is empty */ + if (list_empty(&pc->queue)) + trigger_engine = true; + + cvd = to_cqdma_vdesc(vd); + + /* add VD into PC's queue */ + list_add_tail(&cvd->node, &pc->queue); + + /* start the dma engine */ + if (trigger_engine) + mtk_cqdma_start(pc, cvd); + + /* remove VD from list desc_issued */ + list_del(&vd->node); + } +} + +/* + * return true if this VC is active, + * meaning that there are VDs under processing by the PC + */ +static bool mtk_cqdma_is_vchan_active(struct mtk_cqdma_vchan *cvc) +{ + struct mtk_cqdma_vdesc *cvd; + + list_for_each_entry(cvd, &cvc->pc->queue, node) + if (cvc == to_cqdma_vchan(cvd->ch)) + return true; + + return false; +} + +/* + * return the pointer of the CVD that is just consumed by the PC + */ +static struct mtk_cqdma_vdesc +*mtk_cqdma_consume_work_queue(struct mtk_cqdma_pchan *pc) +{ + struct mtk_cqdma_vchan *cvc; + struct mtk_cqdma_vdesc *cvd, *ret = NULL; + + /* consume a CVD from PC's queue */ + cvd = list_first_entry_or_null(&pc->queue, + struct mtk_cqdma_vdesc, node); + if (unlikely(!cvd || !cvd->parent)) + return NULL; + + cvc = to_cqdma_vchan(cvd->ch); + ret = cvd; + + /* update residue of the parent CVD */ + cvd->parent->residue -= cvd->len; + + /* delete CVD from PC's queue */ + list_del(&cvd->node); + + spin_lock(&cvc->vc.lock); + + /* check whether all the child CVDs completed */ + if (!cvd->parent->residue) { + /* add the parent VD into list desc_completed */ + vchan_cookie_complete(&cvd->parent->vd); + + /* setup completion if this VC is under synchronization */ + if (cvc->issue_synchronize && !mtk_cqdma_is_vchan_active(cvc)) { + complete(&cvc->issue_completion); + cvc->issue_synchronize = false; + } + } + + spin_unlock(&cvc->vc.lock); + + /* start transaction for next CVD in the queue */ + cvd = list_first_entry_or_null(&pc->queue, + struct mtk_cqdma_vdesc, node); + if (cvd) + mtk_cqdma_start(pc, cvd); + + return ret; +} + +static void mtk_cqdma_tasklet_cb(struct tasklet_struct *t) +{ + struct mtk_cqdma_pchan *pc = from_tasklet(pc, t, tasklet); + struct mtk_cqdma_vdesc *cvd = NULL; + unsigned long flags; + + spin_lock_irqsave(&pc->lock, flags); + /* consume the queue */ + cvd = mtk_cqdma_consume_work_queue(pc); + spin_unlock_irqrestore(&pc->lock, flags); + + /* submit the next CVD */ + if (cvd) { + dma_run_dependencies(&cvd->vd.tx); + + /* + * free child CVD after completion. + * the parent CVD would be freed with desc_free by user. + */ + if (cvd->parent != cvd) + kfree(cvd); + } + + /* re-enable interrupt before leaving tasklet */ + enable_irq(pc->irq); +} + +static irqreturn_t mtk_cqdma_irq(int irq, void *devid) +{ + struct mtk_cqdma_device *cqdma = devid; + irqreturn_t ret = IRQ_NONE; + bool schedule_tasklet = false; + u32 i; + + /* clear interrupt flags for each PC */ + for (i = 0; i < cqdma->dma_channels; ++i, schedule_tasklet = false) { + spin_lock(&cqdma->pc[i]->lock); + if (mtk_dma_read(cqdma->pc[i], + MTK_CQDMA_INT_FLAG) & MTK_CQDMA_INT_FLAG_BIT) { + /* clear interrupt */ + mtk_dma_clr(cqdma->pc[i], MTK_CQDMA_INT_FLAG, + MTK_CQDMA_INT_FLAG_BIT); + + schedule_tasklet = true; + ret = IRQ_HANDLED; + } + spin_unlock(&cqdma->pc[i]->lock); + + if (schedule_tasklet) { + /* disable interrupt */ + disable_irq_nosync(cqdma->pc[i]->irq); + + /* schedule the tasklet to handle the transactions */ + tasklet_schedule(&cqdma->pc[i]->tasklet); + } + } + + return ret; +} + +static struct virt_dma_desc *mtk_cqdma_find_active_desc(struct dma_chan *c, + dma_cookie_t cookie) +{ + struct mtk_cqdma_vchan *cvc = to_cqdma_vchan(c); + struct virt_dma_desc *vd; + unsigned long flags; + + spin_lock_irqsave(&cvc->pc->lock, flags); + list_for_each_entry(vd, &cvc->pc->queue, node) + if (vd->tx.cookie == cookie) { + spin_unlock_irqrestore(&cvc->pc->lock, flags); + return vd; + } + spin_unlock_irqrestore(&cvc->pc->lock, flags); + + list_for_each_entry(vd, &cvc->vc.desc_issued, node) + if (vd->tx.cookie == cookie) + return vd; + + return NULL; +} + +static enum dma_status mtk_cqdma_tx_status(struct dma_chan *c, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct mtk_cqdma_vchan *cvc = to_cqdma_vchan(c); + struct mtk_cqdma_vdesc *cvd; + struct virt_dma_desc *vd; + enum dma_status ret; + unsigned long flags; + size_t bytes = 0; + + ret = dma_cookie_status(c, cookie, txstate); + if (ret == DMA_COMPLETE || !txstate) + return ret; + + spin_lock_irqsave(&cvc->vc.lock, flags); + vd = mtk_cqdma_find_active_desc(c, cookie); + spin_unlock_irqrestore(&cvc->vc.lock, flags); + + if (vd) { + cvd = to_cqdma_vdesc(vd); + bytes = cvd->residue; + } + + dma_set_residue(txstate, bytes); + + return ret; +} + +static void mtk_cqdma_issue_pending(struct dma_chan *c) +{ + struct mtk_cqdma_vchan *cvc = to_cqdma_vchan(c); + unsigned long pc_flags; + unsigned long vc_flags; + + /* acquire PC's lock before VS's lock for lock dependency in tasklet */ + spin_lock_irqsave(&cvc->pc->lock, pc_flags); + spin_lock_irqsave(&cvc->vc.lock, vc_flags); + + if (vchan_issue_pending(&cvc->vc)) + mtk_cqdma_issue_vchan_pending(cvc); + + spin_unlock_irqrestore(&cvc->vc.lock, vc_flags); + spin_unlock_irqrestore(&cvc->pc->lock, pc_flags); +} + +static struct dma_async_tx_descriptor * +mtk_cqdma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dest, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct mtk_cqdma_vdesc **cvd; + struct dma_async_tx_descriptor *tx = NULL, *prev_tx = NULL; + size_t i, tlen, nr_vd; + + /* + * In the case that trsanction length is larger than the + * DMA engine supports, a single memcpy transaction needs + * to be separated into several DMA transactions. + * Each DMA transaction would be described by a CVD, + * and the first one is referred as the parent CVD, + * while the others are child CVDs. + * The parent CVD's tx descriptor is the only tx descriptor + * returned to the DMA user, and it should not be completed + * until all the child CVDs completed. + */ + nr_vd = DIV_ROUND_UP(len, MTK_CQDMA_MAX_LEN); + cvd = kcalloc(nr_vd, sizeof(*cvd), GFP_NOWAIT); + if (!cvd) + return NULL; + + for (i = 0; i < nr_vd; ++i) { + cvd[i] = kzalloc(sizeof(*cvd[i]), GFP_NOWAIT); + if (!cvd[i]) { + for (; i > 0; --i) + kfree(cvd[i - 1]); + return NULL; + } + + /* setup dma channel */ + cvd[i]->ch = c; + + /* setup sourece, destination, and length */ + tlen = (len > MTK_CQDMA_MAX_LEN) ? MTK_CQDMA_MAX_LEN : len; + cvd[i]->len = tlen; + cvd[i]->src = src; + cvd[i]->dest = dest; + + /* setup tx descriptor */ + tx = vchan_tx_prep(to_virt_chan(c), &cvd[i]->vd, flags); + tx->next = NULL; + + if (!i) { + cvd[0]->residue = len; + } else { + prev_tx->next = tx; + cvd[i]->residue = tlen; + } + + cvd[i]->parent = cvd[0]; + + /* update the src, dest, len, prev_tx for the next CVD */ + src += tlen; + dest += tlen; + len -= tlen; + prev_tx = tx; + } + + return &cvd[0]->vd.tx; +} + +static void mtk_cqdma_free_inactive_desc(struct dma_chan *c) +{ + struct virt_dma_chan *vc = to_virt_chan(c); + unsigned long flags; + LIST_HEAD(head); + + /* + * set desc_allocated, desc_submitted, + * and desc_issued as the candicates to be freed + */ + spin_lock_irqsave(&vc->lock, flags); + list_splice_tail_init(&vc->desc_allocated, &head); + list_splice_tail_init(&vc->desc_submitted, &head); + list_splice_tail_init(&vc->desc_issued, &head); + spin_unlock_irqrestore(&vc->lock, flags); + + /* free descriptor lists */ + vchan_dma_desc_free_list(vc, &head); +} + +static void mtk_cqdma_free_active_desc(struct dma_chan *c) +{ + struct mtk_cqdma_vchan *cvc = to_cqdma_vchan(c); + bool sync_needed = false; + unsigned long pc_flags; + unsigned long vc_flags; + + /* acquire PC's lock first due to lock dependency in dma ISR */ + spin_lock_irqsave(&cvc->pc->lock, pc_flags); + spin_lock_irqsave(&cvc->vc.lock, vc_flags); + + /* synchronization is required if this VC is active */ + if (mtk_cqdma_is_vchan_active(cvc)) { + cvc->issue_synchronize = true; + sync_needed = true; + } + + spin_unlock_irqrestore(&cvc->vc.lock, vc_flags); + spin_unlock_irqrestore(&cvc->pc->lock, pc_flags); + + /* waiting for the completion of this VC */ + if (sync_needed) + wait_for_completion(&cvc->issue_completion); + + /* free all descriptors in list desc_completed */ + vchan_synchronize(&cvc->vc); + + WARN_ONCE(!list_empty(&cvc->vc.desc_completed), + "Desc pending still in list desc_completed\n"); +} + +static int mtk_cqdma_terminate_all(struct dma_chan *c) +{ + /* free descriptors not processed yet by hardware */ + mtk_cqdma_free_inactive_desc(c); + + /* free descriptors being processed by hardware */ + mtk_cqdma_free_active_desc(c); + + return 0; +} + +static int mtk_cqdma_alloc_chan_resources(struct dma_chan *c) +{ + struct mtk_cqdma_device *cqdma = to_cqdma_dev(c); + struct mtk_cqdma_vchan *vc = to_cqdma_vchan(c); + struct mtk_cqdma_pchan *pc = NULL; + u32 i, min_refcnt = U32_MAX, refcnt; + unsigned long flags; + + /* allocate PC with the minimun refcount */ + for (i = 0; i < cqdma->dma_channels; ++i) { + refcnt = refcount_read(&cqdma->pc[i]->refcnt); + if (refcnt < min_refcnt) { + pc = cqdma->pc[i]; + min_refcnt = refcnt; + } + } + + if (!pc) + return -ENOSPC; + + spin_lock_irqsave(&pc->lock, flags); + + if (!refcount_read(&pc->refcnt)) { + /* allocate PC when the refcount is zero */ + mtk_cqdma_hard_reset(pc); + + /* enable interrupt for this PC */ + mtk_dma_set(pc, MTK_CQDMA_INT_EN, MTK_CQDMA_INT_EN_BIT); + + /* + * refcount_inc would complain increment on 0; use-after-free. + * Thus, we need to explicitly set it as 1 initially. + */ + refcount_set(&pc->refcnt, 1); + } else { + refcount_inc(&pc->refcnt); + } + + spin_unlock_irqrestore(&pc->lock, flags); + + vc->pc = pc; + + return 0; +} + +static void mtk_cqdma_free_chan_resources(struct dma_chan *c) +{ + struct mtk_cqdma_vchan *cvc = to_cqdma_vchan(c); + unsigned long flags; + + /* free all descriptors in all lists on the VC */ + mtk_cqdma_terminate_all(c); + + spin_lock_irqsave(&cvc->pc->lock, flags); + + /* PC is not freed until there is no VC mapped to it */ + if (refcount_dec_and_test(&cvc->pc->refcnt)) { + /* start the flush operation and stop the engine */ + mtk_dma_set(cvc->pc, MTK_CQDMA_FLUSH, MTK_CQDMA_FLUSH_BIT); + + /* wait for the completion of flush operation */ + if (mtk_cqdma_poll_engine_done(cvc->pc, true) < 0) + dev_err(cqdma2dev(to_cqdma_dev(c)), "cqdma flush timeout\n"); + + /* clear the flush bit and interrupt flag */ + mtk_dma_clr(cvc->pc, MTK_CQDMA_FLUSH, MTK_CQDMA_FLUSH_BIT); + mtk_dma_clr(cvc->pc, MTK_CQDMA_INT_FLAG, + MTK_CQDMA_INT_FLAG_BIT); + + /* disable interrupt for this PC */ + mtk_dma_clr(cvc->pc, MTK_CQDMA_INT_EN, MTK_CQDMA_INT_EN_BIT); + } + + spin_unlock_irqrestore(&cvc->pc->lock, flags); +} + +static int mtk_cqdma_hw_init(struct mtk_cqdma_device *cqdma) +{ + unsigned long flags; + int err; + u32 i; + + pm_runtime_enable(cqdma2dev(cqdma)); + pm_runtime_get_sync(cqdma2dev(cqdma)); + + err = clk_prepare_enable(cqdma->clk); + + if (err) { + pm_runtime_put_sync(cqdma2dev(cqdma)); + pm_runtime_disable(cqdma2dev(cqdma)); + return err; + } + + /* reset all PCs */ + for (i = 0; i < cqdma->dma_channels; ++i) { + spin_lock_irqsave(&cqdma->pc[i]->lock, flags); + if (mtk_cqdma_hard_reset(cqdma->pc[i]) < 0) { + dev_err(cqdma2dev(cqdma), "cqdma hard reset timeout\n"); + spin_unlock_irqrestore(&cqdma->pc[i]->lock, flags); + + clk_disable_unprepare(cqdma->clk); + pm_runtime_put_sync(cqdma2dev(cqdma)); + pm_runtime_disable(cqdma2dev(cqdma)); + return -EINVAL; + } + spin_unlock_irqrestore(&cqdma->pc[i]->lock, flags); + } + + return 0; +} + +static void mtk_cqdma_hw_deinit(struct mtk_cqdma_device *cqdma) +{ + unsigned long flags; + u32 i; + + /* reset all PCs */ + for (i = 0; i < cqdma->dma_channels; ++i) { + spin_lock_irqsave(&cqdma->pc[i]->lock, flags); + if (mtk_cqdma_hard_reset(cqdma->pc[i]) < 0) + dev_err(cqdma2dev(cqdma), "cqdma hard reset timeout\n"); + spin_unlock_irqrestore(&cqdma->pc[i]->lock, flags); + } + + clk_disable_unprepare(cqdma->clk); + + pm_runtime_put_sync(cqdma2dev(cqdma)); + pm_runtime_disable(cqdma2dev(cqdma)); +} + +static const struct of_device_id mtk_cqdma_match[] = { + { .compatible = "mediatek,mt6765-cqdma" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, mtk_cqdma_match); + +static int mtk_cqdma_probe(struct platform_device *pdev) +{ + struct mtk_cqdma_device *cqdma; + struct mtk_cqdma_vchan *vc; + struct dma_device *dd; + int err; + u32 i; + + cqdma = devm_kzalloc(&pdev->dev, sizeof(*cqdma), GFP_KERNEL); + if (!cqdma) + return -ENOMEM; + + dd = &cqdma->ddev; + + cqdma->clk = devm_clk_get(&pdev->dev, "cqdma"); + if (IS_ERR(cqdma->clk)) { + dev_err(&pdev->dev, "No clock for %s\n", + dev_name(&pdev->dev)); + return PTR_ERR(cqdma->clk); + } + + dma_cap_set(DMA_MEMCPY, dd->cap_mask); + + dd->copy_align = MTK_CQDMA_ALIGN_SIZE; + dd->device_alloc_chan_resources = mtk_cqdma_alloc_chan_resources; + dd->device_free_chan_resources = mtk_cqdma_free_chan_resources; + dd->device_tx_status = mtk_cqdma_tx_status; + dd->device_issue_pending = mtk_cqdma_issue_pending; + dd->device_prep_dma_memcpy = mtk_cqdma_prep_dma_memcpy; + dd->device_terminate_all = mtk_cqdma_terminate_all; + dd->src_addr_widths = MTK_CQDMA_DMA_BUSWIDTHS; + dd->dst_addr_widths = MTK_CQDMA_DMA_BUSWIDTHS; + dd->directions = BIT(DMA_MEM_TO_MEM); + dd->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + dd->dev = &pdev->dev; + INIT_LIST_HEAD(&dd->channels); + + if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node, + "dma-requests", + &cqdma->dma_requests)) { + dev_info(&pdev->dev, + "Using %u as missing dma-requests property\n", + MTK_CQDMA_NR_VCHANS); + + cqdma->dma_requests = MTK_CQDMA_NR_VCHANS; + } + + if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node, + "dma-channels", + &cqdma->dma_channels)) { + dev_info(&pdev->dev, + "Using %u as missing dma-channels property\n", + MTK_CQDMA_NR_PCHANS); + + cqdma->dma_channels = MTK_CQDMA_NR_PCHANS; + } + + cqdma->pc = devm_kcalloc(&pdev->dev, cqdma->dma_channels, + sizeof(*cqdma->pc), GFP_KERNEL); + if (!cqdma->pc) + return -ENOMEM; + + /* initialization for PCs */ + for (i = 0; i < cqdma->dma_channels; ++i) { + cqdma->pc[i] = devm_kcalloc(&pdev->dev, 1, + sizeof(**cqdma->pc), GFP_KERNEL); + if (!cqdma->pc[i]) + return -ENOMEM; + + INIT_LIST_HEAD(&cqdma->pc[i]->queue); + spin_lock_init(&cqdma->pc[i]->lock); + refcount_set(&cqdma->pc[i]->refcnt, 0); + cqdma->pc[i]->base = devm_platform_ioremap_resource(pdev, i); + if (IS_ERR(cqdma->pc[i]->base)) + return PTR_ERR(cqdma->pc[i]->base); + + /* allocate IRQ resource */ + err = platform_get_irq(pdev, i); + if (err < 0) + return err; + cqdma->pc[i]->irq = err; + + err = devm_request_irq(&pdev->dev, cqdma->pc[i]->irq, + mtk_cqdma_irq, 0, dev_name(&pdev->dev), + cqdma); + if (err) { + dev_err(&pdev->dev, + "request_irq failed with err %d\n", err); + return -EINVAL; + } + } + + /* allocate resource for VCs */ + cqdma->vc = devm_kcalloc(&pdev->dev, cqdma->dma_requests, + sizeof(*cqdma->vc), GFP_KERNEL); + if (!cqdma->vc) + return -ENOMEM; + + for (i = 0; i < cqdma->dma_requests; i++) { + vc = &cqdma->vc[i]; + vc->vc.desc_free = mtk_cqdma_vdesc_free; + vchan_init(&vc->vc, dd); + init_completion(&vc->issue_completion); + } + + err = dma_async_device_register(dd); + if (err) + return err; + + err = of_dma_controller_register(pdev->dev.of_node, + of_dma_xlate_by_chan_id, cqdma); + if (err) { + dev_err(&pdev->dev, + "MediaTek CQDMA OF registration failed %d\n", err); + goto err_unregister; + } + + err = mtk_cqdma_hw_init(cqdma); + if (err) { + dev_err(&pdev->dev, + "MediaTek CQDMA HW initialization failed %d\n", err); + goto err_unregister; + } + + platform_set_drvdata(pdev, cqdma); + + /* initialize tasklet for each PC */ + for (i = 0; i < cqdma->dma_channels; ++i) + tasklet_setup(&cqdma->pc[i]->tasklet, mtk_cqdma_tasklet_cb); + + dev_info(&pdev->dev, "MediaTek CQDMA driver registered\n"); + + return 0; + +err_unregister: + dma_async_device_unregister(dd); + + return err; +} + +static int mtk_cqdma_remove(struct platform_device *pdev) +{ + struct mtk_cqdma_device *cqdma = platform_get_drvdata(pdev); + struct mtk_cqdma_vchan *vc; + unsigned long flags; + int i; + + /* kill VC task */ + for (i = 0; i < cqdma->dma_requests; i++) { + vc = &cqdma->vc[i]; + + list_del(&vc->vc.chan.device_node); + tasklet_kill(&vc->vc.task); + } + + /* disable interrupt */ + for (i = 0; i < cqdma->dma_channels; i++) { + spin_lock_irqsave(&cqdma->pc[i]->lock, flags); + mtk_dma_clr(cqdma->pc[i], MTK_CQDMA_INT_EN, + MTK_CQDMA_INT_EN_BIT); + spin_unlock_irqrestore(&cqdma->pc[i]->lock, flags); + + /* Waits for any pending IRQ handlers to complete */ + synchronize_irq(cqdma->pc[i]->irq); + + tasklet_kill(&cqdma->pc[i]->tasklet); + } + + /* disable hardware */ + mtk_cqdma_hw_deinit(cqdma); + + dma_async_device_unregister(&cqdma->ddev); + of_dma_controller_free(pdev->dev.of_node); + + return 0; +} + +static struct platform_driver mtk_cqdma_driver = { + .probe = mtk_cqdma_probe, + .remove = mtk_cqdma_remove, + .driver = { + .name = KBUILD_MODNAME, + .of_match_table = mtk_cqdma_match, + }, +}; +module_platform_driver(mtk_cqdma_driver); + +MODULE_DESCRIPTION("MediaTek CQDMA Controller Driver"); +MODULE_AUTHOR("Shun-Chih Yu "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/mediatek/mtk-hsdma.c b/drivers/dma/mediatek/mtk-hsdma.c new file mode 100644 index 0000000000..64120767d9 --- /dev/null +++ b/drivers/dma/mediatek/mtk-hsdma.c @@ -0,0 +1,1053 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017-2018 MediaTek Inc. + +/* + * Driver for MediaTek High-Speed DMA Controller + * + * Author: Sean Wang + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../virt-dma.h" + +#define MTK_HSDMA_USEC_POLL 20 +#define MTK_HSDMA_TIMEOUT_POLL 200000 +#define MTK_HSDMA_DMA_BUSWIDTHS BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) + +/* The default number of virtual channel */ +#define MTK_HSDMA_NR_VCHANS 3 + +/* Only one physical channel supported */ +#define MTK_HSDMA_NR_MAX_PCHANS 1 + +/* Macro for physical descriptor (PD) manipulation */ +/* The number of PD which must be 2 of power */ +#define MTK_DMA_SIZE 64 +#define MTK_HSDMA_NEXT_DESP_IDX(x, y) (((x) + 1) & ((y) - 1)) +#define MTK_HSDMA_LAST_DESP_IDX(x, y) (((x) - 1) & ((y) - 1)) +#define MTK_HSDMA_MAX_LEN 0x3f80 +#define MTK_HSDMA_ALIGN_SIZE 4 +#define MTK_HSDMA_PLEN_MASK 0x3fff +#define MTK_HSDMA_DESC_PLEN(x) (((x) & MTK_HSDMA_PLEN_MASK) << 16) +#define MTK_HSDMA_DESC_PLEN_GET(x) (((x) >> 16) & MTK_HSDMA_PLEN_MASK) + +/* Registers for underlying ring manipulation */ +#define MTK_HSDMA_TX_BASE 0x0 +#define MTK_HSDMA_TX_CNT 0x4 +#define MTK_HSDMA_TX_CPU 0x8 +#define MTK_HSDMA_TX_DMA 0xc +#define MTK_HSDMA_RX_BASE 0x100 +#define MTK_HSDMA_RX_CNT 0x104 +#define MTK_HSDMA_RX_CPU 0x108 +#define MTK_HSDMA_RX_DMA 0x10c + +/* Registers for global setup */ +#define MTK_HSDMA_GLO 0x204 +#define MTK_HSDMA_GLO_MULTI_DMA BIT(10) +#define MTK_HSDMA_TX_WB_DDONE BIT(6) +#define MTK_HSDMA_BURST_64BYTES (0x2 << 4) +#define MTK_HSDMA_GLO_RX_BUSY BIT(3) +#define MTK_HSDMA_GLO_RX_DMA BIT(2) +#define MTK_HSDMA_GLO_TX_BUSY BIT(1) +#define MTK_HSDMA_GLO_TX_DMA BIT(0) +#define MTK_HSDMA_GLO_DMA (MTK_HSDMA_GLO_TX_DMA | \ + MTK_HSDMA_GLO_RX_DMA) +#define MTK_HSDMA_GLO_BUSY (MTK_HSDMA_GLO_RX_BUSY | \ + MTK_HSDMA_GLO_TX_BUSY) +#define MTK_HSDMA_GLO_DEFAULT (MTK_HSDMA_GLO_TX_DMA | \ + MTK_HSDMA_GLO_RX_DMA | \ + MTK_HSDMA_TX_WB_DDONE | \ + MTK_HSDMA_BURST_64BYTES | \ + MTK_HSDMA_GLO_MULTI_DMA) + +/* Registers for reset */ +#define MTK_HSDMA_RESET 0x208 +#define MTK_HSDMA_RST_TX BIT(0) +#define MTK_HSDMA_RST_RX BIT(16) + +/* Registers for interrupt control */ +#define MTK_HSDMA_DLYINT 0x20c +#define MTK_HSDMA_RXDLY_INT_EN BIT(15) + +/* Interrupt fires when the pending number's more than the specified */ +#define MTK_HSDMA_RXMAX_PINT(x) (((x) & 0x7f) << 8) + +/* Interrupt fires when the pending time's more than the specified in 20 us */ +#define MTK_HSDMA_RXMAX_PTIME(x) ((x) & 0x7f) +#define MTK_HSDMA_DLYINT_DEFAULT (MTK_HSDMA_RXDLY_INT_EN | \ + MTK_HSDMA_RXMAX_PINT(20) | \ + MTK_HSDMA_RXMAX_PTIME(20)) +#define MTK_HSDMA_INT_STATUS 0x220 +#define MTK_HSDMA_INT_ENABLE 0x228 +#define MTK_HSDMA_INT_RXDONE BIT(16) + +enum mtk_hsdma_vdesc_flag { + MTK_HSDMA_VDESC_FINISHED = 0x01, +}; + +#define IS_MTK_HSDMA_VDESC_FINISHED(x) ((x) == MTK_HSDMA_VDESC_FINISHED) + +/** + * struct mtk_hsdma_pdesc - This is the struct holding info describing physical + * descriptor (PD) and its placement must be kept at + * 4-bytes alignment in little endian order. + * @desc1: | The control pad used to indicate hardware how to + * @desc2: | deal with the descriptor such as source and + * @desc3: | destination address and data length. The maximum + * @desc4: | data length each pdesc can handle is 0x3f80 bytes + */ +struct mtk_hsdma_pdesc { + __le32 desc1; + __le32 desc2; + __le32 desc3; + __le32 desc4; +} __packed __aligned(4); + +/** + * struct mtk_hsdma_vdesc - This is the struct holding info describing virtual + * descriptor (VD) + * @vd: An instance for struct virt_dma_desc + * @len: The total data size device wants to move + * @residue: The remaining data size device will move + * @dest: The destination address device wants to move to + * @src: The source address device wants to move from + */ +struct mtk_hsdma_vdesc { + struct virt_dma_desc vd; + size_t len; + size_t residue; + dma_addr_t dest; + dma_addr_t src; +}; + +/** + * struct mtk_hsdma_cb - This is the struct holding extra info required for RX + * ring to know what relevant VD the PD is being + * mapped to. + * @vd: Pointer to the relevant VD. + * @flag: Flag indicating what action should be taken when VD + * is completed. + */ +struct mtk_hsdma_cb { + struct virt_dma_desc *vd; + enum mtk_hsdma_vdesc_flag flag; +}; + +/** + * struct mtk_hsdma_ring - This struct holds info describing underlying ring + * space + * @txd: The descriptor TX ring which describes DMA source + * information + * @rxd: The descriptor RX ring which describes DMA + * destination information + * @cb: The extra information pointed at by RX ring + * @tphys: The physical addr of TX ring + * @rphys: The physical addr of RX ring + * @cur_tptr: Pointer to the next free descriptor used by the host + * @cur_rptr: Pointer to the last done descriptor by the device + */ +struct mtk_hsdma_ring { + struct mtk_hsdma_pdesc *txd; + struct mtk_hsdma_pdesc *rxd; + struct mtk_hsdma_cb *cb; + dma_addr_t tphys; + dma_addr_t rphys; + u16 cur_tptr; + u16 cur_rptr; +}; + +/** + * struct mtk_hsdma_pchan - This is the struct holding info describing physical + * channel (PC) + * @ring: An instance for the underlying ring + * @sz_ring: Total size allocated for the ring + * @nr_free: Total number of free rooms in the ring. It would + * be accessed and updated frequently between IRQ + * context and user context to reflect whether ring + * can accept requests from VD. + */ +struct mtk_hsdma_pchan { + struct mtk_hsdma_ring ring; + size_t sz_ring; + atomic_t nr_free; +}; + +/** + * struct mtk_hsdma_vchan - This is the struct holding info describing virtual + * channel (VC) + * @vc: An instance for struct virt_dma_chan + * @issue_completion: The wait for all issued descriptors completited + * @issue_synchronize: Bool indicating channel synchronization starts + * @desc_hw_processing: List those descriptors the hardware is processing, + * which is protected by vc.lock + */ +struct mtk_hsdma_vchan { + struct virt_dma_chan vc; + struct completion issue_completion; + bool issue_synchronize; + struct list_head desc_hw_processing; +}; + +/** + * struct mtk_hsdma_soc - This is the struct holding differences among SoCs + * @ddone: Bit mask for DDONE + * @ls0: Bit mask for LS0 + */ +struct mtk_hsdma_soc { + __le32 ddone; + __le32 ls0; +}; + +/** + * struct mtk_hsdma_device - This is the struct holding info describing HSDMA + * device + * @ddev: An instance for struct dma_device + * @base: The mapped register I/O base + * @clk: The clock that device internal is using + * @irq: The IRQ that device are using + * @dma_requests: The number of VCs the device supports to + * @vc: The pointer to all available VCs + * @pc: The pointer to the underlying PC + * @pc_refcnt: Track how many VCs are using the PC + * @lock: Lock protect agaisting multiple VCs access PC + * @soc: The pointer to area holding differences among + * vaious platform + */ +struct mtk_hsdma_device { + struct dma_device ddev; + void __iomem *base; + struct clk *clk; + u32 irq; + + u32 dma_requests; + struct mtk_hsdma_vchan *vc; + struct mtk_hsdma_pchan *pc; + refcount_t pc_refcnt; + + /* Lock used to protect against multiple VCs access PC */ + spinlock_t lock; + + const struct mtk_hsdma_soc *soc; +}; + +static struct mtk_hsdma_device *to_hsdma_dev(struct dma_chan *chan) +{ + return container_of(chan->device, struct mtk_hsdma_device, ddev); +} + +static inline struct mtk_hsdma_vchan *to_hsdma_vchan(struct dma_chan *chan) +{ + return container_of(chan, struct mtk_hsdma_vchan, vc.chan); +} + +static struct mtk_hsdma_vdesc *to_hsdma_vdesc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct mtk_hsdma_vdesc, vd); +} + +static struct device *hsdma2dev(struct mtk_hsdma_device *hsdma) +{ + return hsdma->ddev.dev; +} + +static u32 mtk_dma_read(struct mtk_hsdma_device *hsdma, u32 reg) +{ + return readl(hsdma->base + reg); +} + +static void mtk_dma_write(struct mtk_hsdma_device *hsdma, u32 reg, u32 val) +{ + writel(val, hsdma->base + reg); +} + +static void mtk_dma_rmw(struct mtk_hsdma_device *hsdma, u32 reg, + u32 mask, u32 set) +{ + u32 val; + + val = mtk_dma_read(hsdma, reg); + val &= ~mask; + val |= set; + mtk_dma_write(hsdma, reg, val); +} + +static void mtk_dma_set(struct mtk_hsdma_device *hsdma, u32 reg, u32 val) +{ + mtk_dma_rmw(hsdma, reg, 0, val); +} + +static void mtk_dma_clr(struct mtk_hsdma_device *hsdma, u32 reg, u32 val) +{ + mtk_dma_rmw(hsdma, reg, val, 0); +} + +static void mtk_hsdma_vdesc_free(struct virt_dma_desc *vd) +{ + kfree(container_of(vd, struct mtk_hsdma_vdesc, vd)); +} + +static int mtk_hsdma_busy_wait(struct mtk_hsdma_device *hsdma) +{ + u32 status = 0; + + return readl_poll_timeout(hsdma->base + MTK_HSDMA_GLO, status, + !(status & MTK_HSDMA_GLO_BUSY), + MTK_HSDMA_USEC_POLL, + MTK_HSDMA_TIMEOUT_POLL); +} + +static int mtk_hsdma_alloc_pchan(struct mtk_hsdma_device *hsdma, + struct mtk_hsdma_pchan *pc) +{ + struct mtk_hsdma_ring *ring = &pc->ring; + int err; + + memset(pc, 0, sizeof(*pc)); + + /* + * Allocate ring space where [0 ... MTK_DMA_SIZE - 1] is for TX ring + * and [MTK_DMA_SIZE ... 2 * MTK_DMA_SIZE - 1] is for RX ring. + */ + pc->sz_ring = 2 * MTK_DMA_SIZE * sizeof(*ring->txd); + ring->txd = dma_alloc_coherent(hsdma2dev(hsdma), pc->sz_ring, + &ring->tphys, GFP_NOWAIT); + if (!ring->txd) + return -ENOMEM; + + ring->rxd = &ring->txd[MTK_DMA_SIZE]; + ring->rphys = ring->tphys + MTK_DMA_SIZE * sizeof(*ring->txd); + ring->cur_tptr = 0; + ring->cur_rptr = MTK_DMA_SIZE - 1; + + ring->cb = kcalloc(MTK_DMA_SIZE, sizeof(*ring->cb), GFP_NOWAIT); + if (!ring->cb) { + err = -ENOMEM; + goto err_free_dma; + } + + atomic_set(&pc->nr_free, MTK_DMA_SIZE - 1); + + /* Disable HSDMA and wait for the completion */ + mtk_dma_clr(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA); + err = mtk_hsdma_busy_wait(hsdma); + if (err) + goto err_free_cb; + + /* Reset */ + mtk_dma_set(hsdma, MTK_HSDMA_RESET, + MTK_HSDMA_RST_TX | MTK_HSDMA_RST_RX); + mtk_dma_clr(hsdma, MTK_HSDMA_RESET, + MTK_HSDMA_RST_TX | MTK_HSDMA_RST_RX); + + /* Setup HSDMA initial pointer in the ring */ + mtk_dma_write(hsdma, MTK_HSDMA_TX_BASE, ring->tphys); + mtk_dma_write(hsdma, MTK_HSDMA_TX_CNT, MTK_DMA_SIZE); + mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, ring->cur_tptr); + mtk_dma_write(hsdma, MTK_HSDMA_TX_DMA, 0); + mtk_dma_write(hsdma, MTK_HSDMA_RX_BASE, ring->rphys); + mtk_dma_write(hsdma, MTK_HSDMA_RX_CNT, MTK_DMA_SIZE); + mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, ring->cur_rptr); + mtk_dma_write(hsdma, MTK_HSDMA_RX_DMA, 0); + + /* Enable HSDMA */ + mtk_dma_set(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA); + + /* Setup delayed interrupt */ + mtk_dma_write(hsdma, MTK_HSDMA_DLYINT, MTK_HSDMA_DLYINT_DEFAULT); + + /* Enable interrupt */ + mtk_dma_set(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE); + + return 0; + +err_free_cb: + kfree(ring->cb); + +err_free_dma: + dma_free_coherent(hsdma2dev(hsdma), + pc->sz_ring, ring->txd, ring->tphys); + return err; +} + +static void mtk_hsdma_free_pchan(struct mtk_hsdma_device *hsdma, + struct mtk_hsdma_pchan *pc) +{ + struct mtk_hsdma_ring *ring = &pc->ring; + + /* Disable HSDMA and then wait for the completion */ + mtk_dma_clr(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA); + mtk_hsdma_busy_wait(hsdma); + + /* Reset pointer in the ring */ + mtk_dma_clr(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE); + mtk_dma_write(hsdma, MTK_HSDMA_TX_BASE, 0); + mtk_dma_write(hsdma, MTK_HSDMA_TX_CNT, 0); + mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, 0); + mtk_dma_write(hsdma, MTK_HSDMA_RX_BASE, 0); + mtk_dma_write(hsdma, MTK_HSDMA_RX_CNT, 0); + mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, MTK_DMA_SIZE - 1); + + kfree(ring->cb); + + dma_free_coherent(hsdma2dev(hsdma), + pc->sz_ring, ring->txd, ring->tphys); +} + +static int mtk_hsdma_issue_pending_vdesc(struct mtk_hsdma_device *hsdma, + struct mtk_hsdma_pchan *pc, + struct mtk_hsdma_vdesc *hvd) +{ + struct mtk_hsdma_ring *ring = &pc->ring; + struct mtk_hsdma_pdesc *txd, *rxd; + u16 reserved, prev, tlen, num_sgs; + unsigned long flags; + + /* Protect against PC is accessed by multiple VCs simultaneously */ + spin_lock_irqsave(&hsdma->lock, flags); + + /* + * Reserve rooms, where pc->nr_free is used to track how many free + * rooms in the ring being updated in user and IRQ context. + */ + num_sgs = DIV_ROUND_UP(hvd->len, MTK_HSDMA_MAX_LEN); + reserved = min_t(u16, num_sgs, atomic_read(&pc->nr_free)); + + if (!reserved) { + spin_unlock_irqrestore(&hsdma->lock, flags); + return -ENOSPC; + } + + atomic_sub(reserved, &pc->nr_free); + + while (reserved--) { + /* Limit size by PD capability for valid data moving */ + tlen = (hvd->len > MTK_HSDMA_MAX_LEN) ? + MTK_HSDMA_MAX_LEN : hvd->len; + + /* + * Setup PDs using the remaining VD info mapped on those + * reserved rooms. And since RXD is shared memory between the + * host and the device allocated by dma_alloc_coherent call, + * the helper macro WRITE_ONCE can ensure the data written to + * RAM would really happens. + */ + txd = &ring->txd[ring->cur_tptr]; + WRITE_ONCE(txd->desc1, hvd->src); + WRITE_ONCE(txd->desc2, + hsdma->soc->ls0 | MTK_HSDMA_DESC_PLEN(tlen)); + + rxd = &ring->rxd[ring->cur_tptr]; + WRITE_ONCE(rxd->desc1, hvd->dest); + WRITE_ONCE(rxd->desc2, MTK_HSDMA_DESC_PLEN(tlen)); + + /* Associate VD, the PD belonged to */ + ring->cb[ring->cur_tptr].vd = &hvd->vd; + + /* Move forward the pointer of TX ring */ + ring->cur_tptr = MTK_HSDMA_NEXT_DESP_IDX(ring->cur_tptr, + MTK_DMA_SIZE); + + /* Update VD with remaining data */ + hvd->src += tlen; + hvd->dest += tlen; + hvd->len -= tlen; + } + + /* + * Tagging flag for the last PD for VD will be responsible for + * completing VD. + */ + if (!hvd->len) { + prev = MTK_HSDMA_LAST_DESP_IDX(ring->cur_tptr, MTK_DMA_SIZE); + ring->cb[prev].flag = MTK_HSDMA_VDESC_FINISHED; + } + + /* Ensure all changes indeed done before we're going on */ + wmb(); + + /* + * Updating into hardware the pointer of TX ring lets HSDMA to take + * action for those pending PDs. + */ + mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, ring->cur_tptr); + + spin_unlock_irqrestore(&hsdma->lock, flags); + + return 0; +} + +static void mtk_hsdma_issue_vchan_pending(struct mtk_hsdma_device *hsdma, + struct mtk_hsdma_vchan *hvc) +{ + struct virt_dma_desc *vd, *vd2; + int err; + + lockdep_assert_held(&hvc->vc.lock); + + list_for_each_entry_safe(vd, vd2, &hvc->vc.desc_issued, node) { + struct mtk_hsdma_vdesc *hvd; + + hvd = to_hsdma_vdesc(vd); + + /* Map VD into PC and all VCs shares a single PC */ + err = mtk_hsdma_issue_pending_vdesc(hsdma, hsdma->pc, hvd); + + /* + * Move VD from desc_issued to desc_hw_processing when entire + * VD is fit into available PDs. Otherwise, the uncompleted + * VDs would stay in list desc_issued and then restart the + * processing as soon as possible once underlying ring space + * got freed. + */ + if (err == -ENOSPC || hvd->len > 0) + break; + + /* + * The extra list desc_hw_processing is used because + * hardware can't provide sufficient information allowing us + * to know what VDs are still working on the underlying ring. + * Through the additional list, it can help us to implement + * terminate_all, residue calculation and such thing needed + * to know detail descriptor status on the hardware. + */ + list_move_tail(&vd->node, &hvc->desc_hw_processing); + } +} + +static void mtk_hsdma_free_rooms_in_ring(struct mtk_hsdma_device *hsdma) +{ + struct mtk_hsdma_vchan *hvc; + struct mtk_hsdma_pdesc *rxd; + struct mtk_hsdma_vdesc *hvd; + struct mtk_hsdma_pchan *pc; + struct mtk_hsdma_cb *cb; + int i = MTK_DMA_SIZE; + __le32 desc2; + u32 status; + u16 next; + + /* Read IRQ status */ + status = mtk_dma_read(hsdma, MTK_HSDMA_INT_STATUS); + if (unlikely(!(status & MTK_HSDMA_INT_RXDONE))) + goto rx_done; + + pc = hsdma->pc; + + /* + * Using a fail-safe loop with iterations of up to MTK_DMA_SIZE to + * reclaim these finished descriptors: The most number of PDs the ISR + * can handle at one time shouldn't be more than MTK_DMA_SIZE so we + * take it as limited count instead of just using a dangerous infinite + * poll. + */ + while (i--) { + next = MTK_HSDMA_NEXT_DESP_IDX(pc->ring.cur_rptr, + MTK_DMA_SIZE); + rxd = &pc->ring.rxd[next]; + + /* + * If MTK_HSDMA_DESC_DDONE is no specified, that means data + * moving for the PD is still under going. + */ + desc2 = READ_ONCE(rxd->desc2); + if (!(desc2 & hsdma->soc->ddone)) + break; + + cb = &pc->ring.cb[next]; + if (unlikely(!cb->vd)) { + dev_err(hsdma2dev(hsdma), "cb->vd cannot be null\n"); + break; + } + + /* Update residue of VD the associated PD belonged to */ + hvd = to_hsdma_vdesc(cb->vd); + hvd->residue -= MTK_HSDMA_DESC_PLEN_GET(rxd->desc2); + + /* Complete VD until the relevant last PD is finished */ + if (IS_MTK_HSDMA_VDESC_FINISHED(cb->flag)) { + hvc = to_hsdma_vchan(cb->vd->tx.chan); + + spin_lock(&hvc->vc.lock); + + /* Remove VD from list desc_hw_processing */ + list_del(&cb->vd->node); + + /* Add VD into list desc_completed */ + vchan_cookie_complete(cb->vd); + + if (hvc->issue_synchronize && + list_empty(&hvc->desc_hw_processing)) { + complete(&hvc->issue_completion); + hvc->issue_synchronize = false; + } + spin_unlock(&hvc->vc.lock); + + cb->flag = 0; + } + + cb->vd = NULL; + + /* + * Recycle the RXD with the helper WRITE_ONCE that can ensure + * data written into RAM would really happens. + */ + WRITE_ONCE(rxd->desc1, 0); + WRITE_ONCE(rxd->desc2, 0); + pc->ring.cur_rptr = next; + + /* Release rooms */ + atomic_inc(&pc->nr_free); + } + + /* Ensure all changes indeed done before we're going on */ + wmb(); + + /* Update CPU pointer for those completed PDs */ + mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, pc->ring.cur_rptr); + + /* + * Acking the pending IRQ allows hardware no longer to keep the used + * IRQ line in certain trigger state when software has completed all + * the finished physical descriptors. + */ + if (atomic_read(&pc->nr_free) >= MTK_DMA_SIZE - 1) + mtk_dma_write(hsdma, MTK_HSDMA_INT_STATUS, status); + + /* ASAP handles pending VDs in all VCs after freeing some rooms */ + for (i = 0; i < hsdma->dma_requests; i++) { + hvc = &hsdma->vc[i]; + spin_lock(&hvc->vc.lock); + mtk_hsdma_issue_vchan_pending(hsdma, hvc); + spin_unlock(&hvc->vc.lock); + } + +rx_done: + /* All completed PDs are cleaned up, so enable interrupt again */ + mtk_dma_set(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE); +} + +static irqreturn_t mtk_hsdma_irq(int irq, void *devid) +{ + struct mtk_hsdma_device *hsdma = devid; + + /* + * Disable interrupt until all completed PDs are cleaned up in + * mtk_hsdma_free_rooms call. + */ + mtk_dma_clr(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE); + + mtk_hsdma_free_rooms_in_ring(hsdma); + + return IRQ_HANDLED; +} + +static struct virt_dma_desc *mtk_hsdma_find_active_desc(struct dma_chan *c, + dma_cookie_t cookie) +{ + struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c); + struct virt_dma_desc *vd; + + list_for_each_entry(vd, &hvc->desc_hw_processing, node) + if (vd->tx.cookie == cookie) + return vd; + + list_for_each_entry(vd, &hvc->vc.desc_issued, node) + if (vd->tx.cookie == cookie) + return vd; + + return NULL; +} + +static enum dma_status mtk_hsdma_tx_status(struct dma_chan *c, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c); + struct mtk_hsdma_vdesc *hvd; + struct virt_dma_desc *vd; + enum dma_status ret; + unsigned long flags; + size_t bytes = 0; + + ret = dma_cookie_status(c, cookie, txstate); + if (ret == DMA_COMPLETE || !txstate) + return ret; + + spin_lock_irqsave(&hvc->vc.lock, flags); + vd = mtk_hsdma_find_active_desc(c, cookie); + spin_unlock_irqrestore(&hvc->vc.lock, flags); + + if (vd) { + hvd = to_hsdma_vdesc(vd); + bytes = hvd->residue; + } + + dma_set_residue(txstate, bytes); + + return ret; +} + +static void mtk_hsdma_issue_pending(struct dma_chan *c) +{ + struct mtk_hsdma_device *hsdma = to_hsdma_dev(c); + struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c); + unsigned long flags; + + spin_lock_irqsave(&hvc->vc.lock, flags); + + if (vchan_issue_pending(&hvc->vc)) + mtk_hsdma_issue_vchan_pending(hsdma, hvc); + + spin_unlock_irqrestore(&hvc->vc.lock, flags); +} + +static struct dma_async_tx_descriptor * +mtk_hsdma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dest, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct mtk_hsdma_vdesc *hvd; + + hvd = kzalloc(sizeof(*hvd), GFP_NOWAIT); + if (!hvd) + return NULL; + + hvd->len = len; + hvd->residue = len; + hvd->src = src; + hvd->dest = dest; + + return vchan_tx_prep(to_virt_chan(c), &hvd->vd, flags); +} + +static int mtk_hsdma_free_inactive_desc(struct dma_chan *c) +{ + struct virt_dma_chan *vc = to_virt_chan(c); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&vc->lock, flags); + list_splice_tail_init(&vc->desc_allocated, &head); + list_splice_tail_init(&vc->desc_submitted, &head); + list_splice_tail_init(&vc->desc_issued, &head); + spin_unlock_irqrestore(&vc->lock, flags); + + /* At the point, we don't expect users put descriptor into VC again */ + vchan_dma_desc_free_list(vc, &head); + + return 0; +} + +static void mtk_hsdma_free_active_desc(struct dma_chan *c) +{ + struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c); + bool sync_needed = false; + + /* + * Once issue_synchronize is being set, which means once the hardware + * consumes all descriptors for the channel in the ring, the + * synchronization must be notified immediately it is completed. + */ + spin_lock(&hvc->vc.lock); + if (!list_empty(&hvc->desc_hw_processing)) { + hvc->issue_synchronize = true; + sync_needed = true; + } + spin_unlock(&hvc->vc.lock); + + if (sync_needed) + wait_for_completion(&hvc->issue_completion); + /* + * At the point, we expect that all remaining descriptors in the ring + * for the channel should be all processing done. + */ + WARN_ONCE(!list_empty(&hvc->desc_hw_processing), + "Desc pending still in list desc_hw_processing\n"); + + /* Free all descriptors in list desc_completed */ + vchan_synchronize(&hvc->vc); + + WARN_ONCE(!list_empty(&hvc->vc.desc_completed), + "Desc pending still in list desc_completed\n"); +} + +static int mtk_hsdma_terminate_all(struct dma_chan *c) +{ + /* + * Free pending descriptors not processed yet by hardware that have + * previously been submitted to the channel. + */ + mtk_hsdma_free_inactive_desc(c); + + /* + * However, the DMA engine doesn't provide any way to stop these + * descriptors being processed currently by hardware. The only way is + * to just waiting until these descriptors are all processed completely + * through mtk_hsdma_free_active_desc call. + */ + mtk_hsdma_free_active_desc(c); + + return 0; +} + +static int mtk_hsdma_alloc_chan_resources(struct dma_chan *c) +{ + struct mtk_hsdma_device *hsdma = to_hsdma_dev(c); + int err; + + /* + * Since HSDMA has only one PC, the resource for PC is being allocated + * when the first VC is being created and the other VCs would run on + * the same PC. + */ + if (!refcount_read(&hsdma->pc_refcnt)) { + err = mtk_hsdma_alloc_pchan(hsdma, hsdma->pc); + if (err) + return err; + /* + * refcount_inc would complain increment on 0; use-after-free. + * Thus, we need to explicitly set it as 1 initially. + */ + refcount_set(&hsdma->pc_refcnt, 1); + } else { + refcount_inc(&hsdma->pc_refcnt); + } + + return 0; +} + +static void mtk_hsdma_free_chan_resources(struct dma_chan *c) +{ + struct mtk_hsdma_device *hsdma = to_hsdma_dev(c); + + /* Free all descriptors in all lists on the VC */ + mtk_hsdma_terminate_all(c); + + /* The resource for PC is not freed until all the VCs are destroyed */ + if (!refcount_dec_and_test(&hsdma->pc_refcnt)) + return; + + mtk_hsdma_free_pchan(hsdma, hsdma->pc); +} + +static int mtk_hsdma_hw_init(struct mtk_hsdma_device *hsdma) +{ + int err; + + pm_runtime_enable(hsdma2dev(hsdma)); + pm_runtime_get_sync(hsdma2dev(hsdma)); + + err = clk_prepare_enable(hsdma->clk); + if (err) + return err; + + mtk_dma_write(hsdma, MTK_HSDMA_INT_ENABLE, 0); + mtk_dma_write(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DEFAULT); + + return 0; +} + +static int mtk_hsdma_hw_deinit(struct mtk_hsdma_device *hsdma) +{ + mtk_dma_write(hsdma, MTK_HSDMA_GLO, 0); + + clk_disable_unprepare(hsdma->clk); + + pm_runtime_put_sync(hsdma2dev(hsdma)); + pm_runtime_disable(hsdma2dev(hsdma)); + + return 0; +} + +static const struct mtk_hsdma_soc mt7623_soc = { + .ddone = BIT(31), + .ls0 = BIT(30), +}; + +static const struct mtk_hsdma_soc mt7622_soc = { + .ddone = BIT(15), + .ls0 = BIT(14), +}; + +static const struct of_device_id mtk_hsdma_match[] = { + { .compatible = "mediatek,mt7623-hsdma", .data = &mt7623_soc}, + { .compatible = "mediatek,mt7622-hsdma", .data = &mt7622_soc}, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, mtk_hsdma_match); + +static int mtk_hsdma_probe(struct platform_device *pdev) +{ + struct mtk_hsdma_device *hsdma; + struct mtk_hsdma_vchan *vc; + struct dma_device *dd; + int i, err; + + hsdma = devm_kzalloc(&pdev->dev, sizeof(*hsdma), GFP_KERNEL); + if (!hsdma) + return -ENOMEM; + + dd = &hsdma->ddev; + + hsdma->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(hsdma->base)) + return PTR_ERR(hsdma->base); + + hsdma->soc = of_device_get_match_data(&pdev->dev); + if (!hsdma->soc) { + dev_err(&pdev->dev, "No device match found\n"); + return -ENODEV; + } + + hsdma->clk = devm_clk_get(&pdev->dev, "hsdma"); + if (IS_ERR(hsdma->clk)) { + dev_err(&pdev->dev, "No clock for %s\n", + dev_name(&pdev->dev)); + return PTR_ERR(hsdma->clk); + } + + err = platform_get_irq(pdev, 0); + if (err < 0) + return err; + hsdma->irq = err; + + refcount_set(&hsdma->pc_refcnt, 0); + spin_lock_init(&hsdma->lock); + + dma_cap_set(DMA_MEMCPY, dd->cap_mask); + + dd->copy_align = MTK_HSDMA_ALIGN_SIZE; + dd->device_alloc_chan_resources = mtk_hsdma_alloc_chan_resources; + dd->device_free_chan_resources = mtk_hsdma_free_chan_resources; + dd->device_tx_status = mtk_hsdma_tx_status; + dd->device_issue_pending = mtk_hsdma_issue_pending; + dd->device_prep_dma_memcpy = mtk_hsdma_prep_dma_memcpy; + dd->device_terminate_all = mtk_hsdma_terminate_all; + dd->src_addr_widths = MTK_HSDMA_DMA_BUSWIDTHS; + dd->dst_addr_widths = MTK_HSDMA_DMA_BUSWIDTHS; + dd->directions = BIT(DMA_MEM_TO_MEM); + dd->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + dd->dev = &pdev->dev; + INIT_LIST_HEAD(&dd->channels); + + hsdma->dma_requests = MTK_HSDMA_NR_VCHANS; + if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node, + "dma-requests", + &hsdma->dma_requests)) { + dev_info(&pdev->dev, + "Using %u as missing dma-requests property\n", + MTK_HSDMA_NR_VCHANS); + } + + hsdma->pc = devm_kcalloc(&pdev->dev, MTK_HSDMA_NR_MAX_PCHANS, + sizeof(*hsdma->pc), GFP_KERNEL); + if (!hsdma->pc) + return -ENOMEM; + + hsdma->vc = devm_kcalloc(&pdev->dev, hsdma->dma_requests, + sizeof(*hsdma->vc), GFP_KERNEL); + if (!hsdma->vc) + return -ENOMEM; + + for (i = 0; i < hsdma->dma_requests; i++) { + vc = &hsdma->vc[i]; + vc->vc.desc_free = mtk_hsdma_vdesc_free; + vchan_init(&vc->vc, dd); + init_completion(&vc->issue_completion); + INIT_LIST_HEAD(&vc->desc_hw_processing); + } + + err = dma_async_device_register(dd); + if (err) + return err; + + err = of_dma_controller_register(pdev->dev.of_node, + of_dma_xlate_by_chan_id, hsdma); + if (err) { + dev_err(&pdev->dev, + "MediaTek HSDMA OF registration failed %d\n", err); + goto err_unregister; + } + + mtk_hsdma_hw_init(hsdma); + + err = devm_request_irq(&pdev->dev, hsdma->irq, + mtk_hsdma_irq, 0, + dev_name(&pdev->dev), hsdma); + if (err) { + dev_err(&pdev->dev, + "request_irq failed with err %d\n", err); + goto err_free; + } + + platform_set_drvdata(pdev, hsdma); + + dev_info(&pdev->dev, "MediaTek HSDMA driver registered\n"); + + return 0; + +err_free: + mtk_hsdma_hw_deinit(hsdma); + of_dma_controller_free(pdev->dev.of_node); +err_unregister: + dma_async_device_unregister(dd); + + return err; +} + +static int mtk_hsdma_remove(struct platform_device *pdev) +{ + struct mtk_hsdma_device *hsdma = platform_get_drvdata(pdev); + struct mtk_hsdma_vchan *vc; + int i; + + /* Kill VC task */ + for (i = 0; i < hsdma->dma_requests; i++) { + vc = &hsdma->vc[i]; + + list_del(&vc->vc.chan.device_node); + tasklet_kill(&vc->vc.task); + } + + /* Disable DMA interrupt */ + mtk_dma_write(hsdma, MTK_HSDMA_INT_ENABLE, 0); + + /* Waits for any pending IRQ handlers to complete */ + synchronize_irq(hsdma->irq); + + /* Disable hardware */ + mtk_hsdma_hw_deinit(hsdma); + + dma_async_device_unregister(&hsdma->ddev); + of_dma_controller_free(pdev->dev.of_node); + + return 0; +} + +static struct platform_driver mtk_hsdma_driver = { + .probe = mtk_hsdma_probe, + .remove = mtk_hsdma_remove, + .driver = { + .name = KBUILD_MODNAME, + .of_match_table = mtk_hsdma_match, + }, +}; +module_platform_driver(mtk_hsdma_driver); + +MODULE_DESCRIPTION("MediaTek High-Speed DMA Controller Driver"); +MODULE_AUTHOR("Sean Wang "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c new file mode 100644 index 0000000000..06d12ac391 --- /dev/null +++ b/drivers/dma/mediatek/mtk-uart-apdma.c @@ -0,0 +1,654 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * MediaTek UART APDMA driver. + * + * Copyright (c) 2019 MediaTek Inc. + * Author: Long Cheng + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../virt-dma.h" + +/* The default number of virtual channel */ +#define MTK_UART_APDMA_NR_VCHANS 8 + +#define VFF_EN_B BIT(0) +#define VFF_STOP_B BIT(0) +#define VFF_FLUSH_B BIT(0) +#define VFF_4G_EN_B BIT(0) +/* rx valid size >= vff thre */ +#define VFF_RX_INT_EN_B (BIT(0) | BIT(1)) +/* tx left size >= vff thre */ +#define VFF_TX_INT_EN_B BIT(0) +#define VFF_WARM_RST_B BIT(0) +#define VFF_RX_INT_CLR_B (BIT(0) | BIT(1)) +#define VFF_TX_INT_CLR_B 0 +#define VFF_STOP_CLR_B 0 +#define VFF_EN_CLR_B 0 +#define VFF_INT_EN_CLR_B 0 +#define VFF_4G_SUPPORT_CLR_B 0 + +/* + * interrupt trigger level for tx + * if threshold is n, no polling is required to start tx. + * otherwise need polling VFF_FLUSH. + */ +#define VFF_TX_THRE(n) (n) +/* interrupt trigger level for rx */ +#define VFF_RX_THRE(n) ((n) * 3 / 4) + +#define VFF_RING_SIZE 0xffff +/* invert this bit when wrap ring head again */ +#define VFF_RING_WRAP 0x10000 + +#define VFF_INT_FLAG 0x00 +#define VFF_INT_EN 0x04 +#define VFF_EN 0x08 +#define VFF_RST 0x0c +#define VFF_STOP 0x10 +#define VFF_FLUSH 0x14 +#define VFF_ADDR 0x1c +#define VFF_LEN 0x24 +#define VFF_THRE 0x28 +#define VFF_WPT 0x2c +#define VFF_RPT 0x30 +/* TX: the buffer size HW can read. RX: the buffer size SW can read. */ +#define VFF_VALID_SIZE 0x3c +/* TX: the buffer size SW can write. RX: the buffer size HW can write. */ +#define VFF_LEFT_SIZE 0x40 +#define VFF_DEBUG_STATUS 0x50 +#define VFF_4G_SUPPORT 0x54 + +struct mtk_uart_apdmadev { + struct dma_device ddev; + struct clk *clk; + bool support_33bits; + unsigned int dma_requests; +}; + +struct mtk_uart_apdma_desc { + struct virt_dma_desc vd; + + dma_addr_t addr; + unsigned int avail_len; +}; + +struct mtk_chan { + struct virt_dma_chan vc; + struct dma_slave_config cfg; + struct mtk_uart_apdma_desc *desc; + enum dma_transfer_direction dir; + + void __iomem *base; + unsigned int irq; + + unsigned int rx_status; +}; + +static inline struct mtk_uart_apdmadev * +to_mtk_uart_apdma_dev(struct dma_device *d) +{ + return container_of(d, struct mtk_uart_apdmadev, ddev); +} + +static inline struct mtk_chan *to_mtk_uart_apdma_chan(struct dma_chan *c) +{ + return container_of(c, struct mtk_chan, vc.chan); +} + +static inline struct mtk_uart_apdma_desc *to_mtk_uart_apdma_desc + (struct dma_async_tx_descriptor *t) +{ + return container_of(t, struct mtk_uart_apdma_desc, vd.tx); +} + +static void mtk_uart_apdma_write(struct mtk_chan *c, + unsigned int reg, unsigned int val) +{ + writel(val, c->base + reg); +} + +static unsigned int mtk_uart_apdma_read(struct mtk_chan *c, unsigned int reg) +{ + return readl(c->base + reg); +} + +static void mtk_uart_apdma_desc_free(struct virt_dma_desc *vd) +{ + kfree(container_of(vd, struct mtk_uart_apdma_desc, vd)); +} + +static void mtk_uart_apdma_start_tx(struct mtk_chan *c) +{ + struct mtk_uart_apdmadev *mtkd = + to_mtk_uart_apdma_dev(c->vc.chan.device); + struct mtk_uart_apdma_desc *d = c->desc; + unsigned int wpt, vff_sz; + + vff_sz = c->cfg.dst_port_window_size; + if (!mtk_uart_apdma_read(c, VFF_LEN)) { + mtk_uart_apdma_write(c, VFF_ADDR, d->addr); + mtk_uart_apdma_write(c, VFF_LEN, vff_sz); + mtk_uart_apdma_write(c, VFF_THRE, VFF_TX_THRE(vff_sz)); + mtk_uart_apdma_write(c, VFF_WPT, 0); + mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_TX_INT_CLR_B); + + if (mtkd->support_33bits) + mtk_uart_apdma_write(c, VFF_4G_SUPPORT, VFF_4G_EN_B); + } + + mtk_uart_apdma_write(c, VFF_EN, VFF_EN_B); + if (mtk_uart_apdma_read(c, VFF_EN) != VFF_EN_B) + dev_err(c->vc.chan.device->dev, "Enable TX fail\n"); + + if (!mtk_uart_apdma_read(c, VFF_LEFT_SIZE)) { + mtk_uart_apdma_write(c, VFF_INT_EN, VFF_TX_INT_EN_B); + return; + } + + wpt = mtk_uart_apdma_read(c, VFF_WPT); + + wpt += c->desc->avail_len; + if ((wpt & VFF_RING_SIZE) == vff_sz) + wpt = (wpt & VFF_RING_WRAP) ^ VFF_RING_WRAP; + + /* Let DMA start moving data */ + mtk_uart_apdma_write(c, VFF_WPT, wpt); + + /* HW auto set to 0 when left size >= threshold */ + mtk_uart_apdma_write(c, VFF_INT_EN, VFF_TX_INT_EN_B); + if (!mtk_uart_apdma_read(c, VFF_FLUSH)) + mtk_uart_apdma_write(c, VFF_FLUSH, VFF_FLUSH_B); +} + +static void mtk_uart_apdma_start_rx(struct mtk_chan *c) +{ + struct mtk_uart_apdmadev *mtkd = + to_mtk_uart_apdma_dev(c->vc.chan.device); + struct mtk_uart_apdma_desc *d = c->desc; + unsigned int vff_sz; + + vff_sz = c->cfg.src_port_window_size; + if (!mtk_uart_apdma_read(c, VFF_LEN)) { + mtk_uart_apdma_write(c, VFF_ADDR, d->addr); + mtk_uart_apdma_write(c, VFF_LEN, vff_sz); + mtk_uart_apdma_write(c, VFF_THRE, VFF_RX_THRE(vff_sz)); + mtk_uart_apdma_write(c, VFF_RPT, 0); + mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_RX_INT_CLR_B); + + if (mtkd->support_33bits) + mtk_uart_apdma_write(c, VFF_4G_SUPPORT, VFF_4G_EN_B); + } + + mtk_uart_apdma_write(c, VFF_INT_EN, VFF_RX_INT_EN_B); + mtk_uart_apdma_write(c, VFF_EN, VFF_EN_B); + if (mtk_uart_apdma_read(c, VFF_EN) != VFF_EN_B) + dev_err(c->vc.chan.device->dev, "Enable RX fail\n"); +} + +static void mtk_uart_apdma_tx_handler(struct mtk_chan *c) +{ + mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_TX_INT_CLR_B); + mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B); + mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B); +} + +static void mtk_uart_apdma_rx_handler(struct mtk_chan *c) +{ + struct mtk_uart_apdma_desc *d = c->desc; + unsigned int len, wg, rg; + int cnt; + + mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_RX_INT_CLR_B); + + if (!mtk_uart_apdma_read(c, VFF_VALID_SIZE)) + return; + + mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B); + mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B); + + len = c->cfg.src_port_window_size; + rg = mtk_uart_apdma_read(c, VFF_RPT); + wg = mtk_uart_apdma_read(c, VFF_WPT); + cnt = (wg & VFF_RING_SIZE) - (rg & VFF_RING_SIZE); + + /* + * The buffer is ring buffer. If wrap bit different, + * represents the start of the next cycle for WPT + */ + if ((rg ^ wg) & VFF_RING_WRAP) + cnt += len; + + c->rx_status = d->avail_len - cnt; + mtk_uart_apdma_write(c, VFF_RPT, wg); +} + +static void mtk_uart_apdma_chan_complete_handler(struct mtk_chan *c) +{ + struct mtk_uart_apdma_desc *d = c->desc; + + if (d) { + list_del(&d->vd.node); + vchan_cookie_complete(&d->vd); + c->desc = NULL; + } +} + +static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id) +{ + struct dma_chan *chan = (struct dma_chan *)dev_id; + struct mtk_chan *c = to_mtk_uart_apdma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&c->vc.lock, flags); + if (c->dir == DMA_DEV_TO_MEM) + mtk_uart_apdma_rx_handler(c); + else if (c->dir == DMA_MEM_TO_DEV) + mtk_uart_apdma_tx_handler(c); + mtk_uart_apdma_chan_complete_handler(c); + spin_unlock_irqrestore(&c->vc.lock, flags); + + return IRQ_HANDLED; +} + +static int mtk_uart_apdma_alloc_chan_resources(struct dma_chan *chan) +{ + struct mtk_uart_apdmadev *mtkd = to_mtk_uart_apdma_dev(chan->device); + struct mtk_chan *c = to_mtk_uart_apdma_chan(chan); + unsigned int status; + int ret; + + ret = pm_runtime_resume_and_get(mtkd->ddev.dev); + if (ret < 0) { + pm_runtime_put_noidle(chan->device->dev); + return ret; + } + + mtk_uart_apdma_write(c, VFF_ADDR, 0); + mtk_uart_apdma_write(c, VFF_THRE, 0); + mtk_uart_apdma_write(c, VFF_LEN, 0); + mtk_uart_apdma_write(c, VFF_RST, VFF_WARM_RST_B); + + ret = readx_poll_timeout(readl, c->base + VFF_EN, + status, !status, 10, 100); + if (ret) + goto err_pm; + + ret = request_irq(c->irq, mtk_uart_apdma_irq_handler, + IRQF_TRIGGER_NONE, KBUILD_MODNAME, chan); + if (ret < 0) { + dev_err(chan->device->dev, "Can't request dma IRQ\n"); + ret = -EINVAL; + goto err_pm; + } + + if (mtkd->support_33bits) + mtk_uart_apdma_write(c, VFF_4G_SUPPORT, VFF_4G_SUPPORT_CLR_B); + +err_pm: + pm_runtime_put_noidle(mtkd->ddev.dev); + return ret; +} + +static void mtk_uart_apdma_free_chan_resources(struct dma_chan *chan) +{ + struct mtk_uart_apdmadev *mtkd = to_mtk_uart_apdma_dev(chan->device); + struct mtk_chan *c = to_mtk_uart_apdma_chan(chan); + + free_irq(c->irq, chan); + + tasklet_kill(&c->vc.task); + + vchan_free_chan_resources(&c->vc); + + pm_runtime_put_sync(mtkd->ddev.dev); +} + +static enum dma_status mtk_uart_apdma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct mtk_chan *c = to_mtk_uart_apdma_chan(chan); + enum dma_status ret; + + ret = dma_cookie_status(chan, cookie, txstate); + if (!txstate) + return ret; + + dma_set_residue(txstate, c->rx_status); + + return ret; +} + +/* + * dmaengine_prep_slave_single will call the function. and sglen is 1. + * 8250 uart using one ring buffer, and deal with one sg. + */ +static struct dma_async_tx_descriptor *mtk_uart_apdma_prep_slave_sg + (struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sglen, enum dma_transfer_direction dir, + unsigned long tx_flags, void *context) +{ + struct mtk_chan *c = to_mtk_uart_apdma_chan(chan); + struct mtk_uart_apdma_desc *d; + + if (!is_slave_direction(dir) || sglen != 1) + return NULL; + + /* Now allocate and setup the descriptor */ + d = kzalloc(sizeof(*d), GFP_NOWAIT); + if (!d) + return NULL; + + d->avail_len = sg_dma_len(sgl); + d->addr = sg_dma_address(sgl); + c->dir = dir; + + return vchan_tx_prep(&c->vc, &d->vd, tx_flags); +} + +static void mtk_uart_apdma_issue_pending(struct dma_chan *chan) +{ + struct mtk_chan *c = to_mtk_uart_apdma_chan(chan); + struct virt_dma_desc *vd; + unsigned long flags; + + spin_lock_irqsave(&c->vc.lock, flags); + if (vchan_issue_pending(&c->vc) && !c->desc) { + vd = vchan_next_desc(&c->vc); + c->desc = to_mtk_uart_apdma_desc(&vd->tx); + + if (c->dir == DMA_DEV_TO_MEM) + mtk_uart_apdma_start_rx(c); + else if (c->dir == DMA_MEM_TO_DEV) + mtk_uart_apdma_start_tx(c); + } + + spin_unlock_irqrestore(&c->vc.lock, flags); +} + +static int mtk_uart_apdma_slave_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct mtk_chan *c = to_mtk_uart_apdma_chan(chan); + + memcpy(&c->cfg, config, sizeof(*config)); + + return 0; +} + +static int mtk_uart_apdma_terminate_all(struct dma_chan *chan) +{ + struct mtk_chan *c = to_mtk_uart_apdma_chan(chan); + unsigned long flags; + unsigned int status; + LIST_HEAD(head); + int ret; + + mtk_uart_apdma_write(c, VFF_FLUSH, VFF_FLUSH_B); + + ret = readx_poll_timeout(readl, c->base + VFF_FLUSH, + status, status != VFF_FLUSH_B, 10, 100); + if (ret) + dev_err(c->vc.chan.device->dev, "flush: fail, status=0x%x\n", + mtk_uart_apdma_read(c, VFF_DEBUG_STATUS)); + + /* + * Stop need 3 steps. + * 1. set stop to 1 + * 2. wait en to 0 + * 3. set stop as 0 + */ + mtk_uart_apdma_write(c, VFF_STOP, VFF_STOP_B); + ret = readx_poll_timeout(readl, c->base + VFF_EN, + status, !status, 10, 100); + if (ret) + dev_err(c->vc.chan.device->dev, "stop: fail, status=0x%x\n", + mtk_uart_apdma_read(c, VFF_DEBUG_STATUS)); + + mtk_uart_apdma_write(c, VFF_STOP, VFF_STOP_CLR_B); + mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B); + + if (c->dir == DMA_DEV_TO_MEM) + mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_RX_INT_CLR_B); + else if (c->dir == DMA_MEM_TO_DEV) + mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_TX_INT_CLR_B); + + synchronize_irq(c->irq); + + spin_lock_irqsave(&c->vc.lock, flags); + vchan_get_all_descriptors(&c->vc, &head); + spin_unlock_irqrestore(&c->vc.lock, flags); + + vchan_dma_desc_free_list(&c->vc, &head); + + return 0; +} + +static int mtk_uart_apdma_device_pause(struct dma_chan *chan) +{ + struct mtk_chan *c = to_mtk_uart_apdma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&c->vc.lock, flags); + + mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B); + mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B); + + spin_unlock_irqrestore(&c->vc.lock, flags); + synchronize_irq(c->irq); + + return 0; +} + +static void mtk_uart_apdma_free(struct mtk_uart_apdmadev *mtkd) +{ + while (!list_empty(&mtkd->ddev.channels)) { + struct mtk_chan *c = list_first_entry(&mtkd->ddev.channels, + struct mtk_chan, vc.chan.device_node); + + list_del(&c->vc.chan.device_node); + tasklet_kill(&c->vc.task); + } +} + +static const struct of_device_id mtk_uart_apdma_match[] = { + { .compatible = "mediatek,mt6577-uart-dma", }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, mtk_uart_apdma_match); + +static int mtk_uart_apdma_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct mtk_uart_apdmadev *mtkd; + int bit_mask = 32, rc; + struct mtk_chan *c; + unsigned int i; + + mtkd = devm_kzalloc(&pdev->dev, sizeof(*mtkd), GFP_KERNEL); + if (!mtkd) + return -ENOMEM; + + mtkd->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(mtkd->clk)) { + dev_err(&pdev->dev, "No clock specified\n"); + rc = PTR_ERR(mtkd->clk); + return rc; + } + + if (of_property_read_bool(np, "mediatek,dma-33bits")) + mtkd->support_33bits = true; + + if (mtkd->support_33bits) + bit_mask = 33; + + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(bit_mask)); + if (rc) + return rc; + + dma_cap_set(DMA_SLAVE, mtkd->ddev.cap_mask); + mtkd->ddev.device_alloc_chan_resources = + mtk_uart_apdma_alloc_chan_resources; + mtkd->ddev.device_free_chan_resources = + mtk_uart_apdma_free_chan_resources; + mtkd->ddev.device_tx_status = mtk_uart_apdma_tx_status; + mtkd->ddev.device_issue_pending = mtk_uart_apdma_issue_pending; + mtkd->ddev.device_prep_slave_sg = mtk_uart_apdma_prep_slave_sg; + mtkd->ddev.device_config = mtk_uart_apdma_slave_config; + mtkd->ddev.device_pause = mtk_uart_apdma_device_pause; + mtkd->ddev.device_terminate_all = mtk_uart_apdma_terminate_all; + mtkd->ddev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE); + mtkd->ddev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE); + mtkd->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + mtkd->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + mtkd->ddev.dev = &pdev->dev; + INIT_LIST_HEAD(&mtkd->ddev.channels); + + mtkd->dma_requests = MTK_UART_APDMA_NR_VCHANS; + if (of_property_read_u32(np, "dma-requests", &mtkd->dma_requests)) { + dev_info(&pdev->dev, + "Using %u as missing dma-requests property\n", + MTK_UART_APDMA_NR_VCHANS); + } + + for (i = 0; i < mtkd->dma_requests; i++) { + c = devm_kzalloc(mtkd->ddev.dev, sizeof(*c), GFP_KERNEL); + if (!c) { + rc = -ENODEV; + goto err_no_dma; + } + + c->base = devm_platform_ioremap_resource(pdev, i); + if (IS_ERR(c->base)) { + rc = PTR_ERR(c->base); + goto err_no_dma; + } + c->vc.desc_free = mtk_uart_apdma_desc_free; + vchan_init(&c->vc, &mtkd->ddev); + + rc = platform_get_irq(pdev, i); + if (rc < 0) + goto err_no_dma; + c->irq = rc; + } + + pm_runtime_enable(&pdev->dev); + + rc = dma_async_device_register(&mtkd->ddev); + if (rc) + goto rpm_disable; + + platform_set_drvdata(pdev, mtkd); + + /* Device-tree DMA controller registration */ + rc = of_dma_controller_register(np, of_dma_xlate_by_chan_id, mtkd); + if (rc) + goto dma_remove; + + return rc; + +dma_remove: + dma_async_device_unregister(&mtkd->ddev); +rpm_disable: + pm_runtime_disable(&pdev->dev); +err_no_dma: + mtk_uart_apdma_free(mtkd); + return rc; +} + +static int mtk_uart_apdma_remove(struct platform_device *pdev) +{ + struct mtk_uart_apdmadev *mtkd = platform_get_drvdata(pdev); + + of_dma_controller_free(pdev->dev.of_node); + + mtk_uart_apdma_free(mtkd); + + dma_async_device_unregister(&mtkd->ddev); + + pm_runtime_disable(&pdev->dev); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int mtk_uart_apdma_suspend(struct device *dev) +{ + struct mtk_uart_apdmadev *mtkd = dev_get_drvdata(dev); + + if (!pm_runtime_suspended(dev)) + clk_disable_unprepare(mtkd->clk); + + return 0; +} + +static int mtk_uart_apdma_resume(struct device *dev) +{ + int ret; + struct mtk_uart_apdmadev *mtkd = dev_get_drvdata(dev); + + if (!pm_runtime_suspended(dev)) { + ret = clk_prepare_enable(mtkd->clk); + if (ret) + return ret; + } + + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + +#ifdef CONFIG_PM +static int mtk_uart_apdma_runtime_suspend(struct device *dev) +{ + struct mtk_uart_apdmadev *mtkd = dev_get_drvdata(dev); + + clk_disable_unprepare(mtkd->clk); + + return 0; +} + +static int mtk_uart_apdma_runtime_resume(struct device *dev) +{ + struct mtk_uart_apdmadev *mtkd = dev_get_drvdata(dev); + + return clk_prepare_enable(mtkd->clk); +} +#endif /* CONFIG_PM */ + +static const struct dev_pm_ops mtk_uart_apdma_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(mtk_uart_apdma_suspend, mtk_uart_apdma_resume) + SET_RUNTIME_PM_OPS(mtk_uart_apdma_runtime_suspend, + mtk_uart_apdma_runtime_resume, NULL) +}; + +static struct platform_driver mtk_uart_apdma_driver = { + .probe = mtk_uart_apdma_probe, + .remove = mtk_uart_apdma_remove, + .driver = { + .name = KBUILD_MODNAME, + .pm = &mtk_uart_apdma_pm_ops, + .of_match_table = of_match_ptr(mtk_uart_apdma_match), + }, +}; + +module_platform_driver(mtk_uart_apdma_driver); + +MODULE_DESCRIPTION("MediaTek UART APDMA Controller Driver"); +MODULE_AUTHOR("Long Cheng "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/milbeaut-hdmac.c b/drivers/dma/milbeaut-hdmac.c new file mode 100644 index 0000000000..1b0a958926 --- /dev/null +++ b/drivers/dma/milbeaut-hdmac.c @@ -0,0 +1,578 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Copyright (C) 2019 Linaro Ltd. +// Copyright (C) 2019 Socionext Inc. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virt-dma.h" + +#define MLB_HDMAC_DMACR 0x0 /* global */ +#define MLB_HDMAC_DE BIT(31) +#define MLB_HDMAC_DS BIT(30) +#define MLB_HDMAC_PR BIT(28) +#define MLB_HDMAC_DH GENMASK(27, 24) + +#define MLB_HDMAC_CH_STRIDE 0x10 + +#define MLB_HDMAC_DMACA 0x0 /* channel */ +#define MLB_HDMAC_EB BIT(31) +#define MLB_HDMAC_PB BIT(30) +#define MLB_HDMAC_ST BIT(29) +#define MLB_HDMAC_IS GENMASK(28, 24) +#define MLB_HDMAC_BT GENMASK(23, 20) +#define MLB_HDMAC_BC GENMASK(19, 16) +#define MLB_HDMAC_TC GENMASK(15, 0) +#define MLB_HDMAC_DMACB 0x4 +#define MLB_HDMAC_TT GENMASK(31, 30) +#define MLB_HDMAC_MS GENMASK(29, 28) +#define MLB_HDMAC_TW GENMASK(27, 26) +#define MLB_HDMAC_FS BIT(25) +#define MLB_HDMAC_FD BIT(24) +#define MLB_HDMAC_RC BIT(23) +#define MLB_HDMAC_RS BIT(22) +#define MLB_HDMAC_RD BIT(21) +#define MLB_HDMAC_EI BIT(20) +#define MLB_HDMAC_CI BIT(19) +#define HDMAC_PAUSE 0x7 +#define MLB_HDMAC_SS GENMASK(18, 16) +#define MLB_HDMAC_SP GENMASK(15, 12) +#define MLB_HDMAC_DP GENMASK(11, 8) +#define MLB_HDMAC_DMACSA 0x8 +#define MLB_HDMAC_DMACDA 0xc + +#define MLB_HDMAC_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) + +struct milbeaut_hdmac_desc { + struct virt_dma_desc vd; + struct scatterlist *sgl; + unsigned int sg_len; + unsigned int sg_cur; + enum dma_transfer_direction dir; +}; + +struct milbeaut_hdmac_chan { + struct virt_dma_chan vc; + struct milbeaut_hdmac_device *mdev; + struct milbeaut_hdmac_desc *md; + void __iomem *reg_ch_base; + unsigned int slave_id; + struct dma_slave_config cfg; +}; + +struct milbeaut_hdmac_device { + struct dma_device ddev; + struct clk *clk; + void __iomem *reg_base; + struct milbeaut_hdmac_chan channels[]; +}; + +static struct milbeaut_hdmac_chan * +to_milbeaut_hdmac_chan(struct virt_dma_chan *vc) +{ + return container_of(vc, struct milbeaut_hdmac_chan, vc); +} + +static struct milbeaut_hdmac_desc * +to_milbeaut_hdmac_desc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct milbeaut_hdmac_desc, vd); +} + +/* mc->vc.lock must be held by caller */ +static struct milbeaut_hdmac_desc * +milbeaut_hdmac_next_desc(struct milbeaut_hdmac_chan *mc) +{ + struct virt_dma_desc *vd; + + vd = vchan_next_desc(&mc->vc); + if (!vd) { + mc->md = NULL; + return NULL; + } + + list_del(&vd->node); + + mc->md = to_milbeaut_hdmac_desc(vd); + + return mc->md; +} + +/* mc->vc.lock must be held by caller */ +static void milbeaut_chan_start(struct milbeaut_hdmac_chan *mc, + struct milbeaut_hdmac_desc *md) +{ + struct scatterlist *sg; + u32 cb, ca, src_addr, dest_addr, len; + u32 width, burst; + + sg = &md->sgl[md->sg_cur]; + len = sg_dma_len(sg); + + cb = MLB_HDMAC_CI | MLB_HDMAC_EI; + if (md->dir == DMA_MEM_TO_DEV) { + cb |= MLB_HDMAC_FD; + width = mc->cfg.dst_addr_width; + burst = mc->cfg.dst_maxburst; + src_addr = sg_dma_address(sg); + dest_addr = mc->cfg.dst_addr; + } else { + cb |= MLB_HDMAC_FS; + width = mc->cfg.src_addr_width; + burst = mc->cfg.src_maxburst; + src_addr = mc->cfg.src_addr; + dest_addr = sg_dma_address(sg); + } + cb |= FIELD_PREP(MLB_HDMAC_TW, (width >> 1)); + cb |= FIELD_PREP(MLB_HDMAC_MS, 2); + + writel_relaxed(MLB_HDMAC_DE, mc->mdev->reg_base + MLB_HDMAC_DMACR); + writel_relaxed(src_addr, mc->reg_ch_base + MLB_HDMAC_DMACSA); + writel_relaxed(dest_addr, mc->reg_ch_base + MLB_HDMAC_DMACDA); + writel_relaxed(cb, mc->reg_ch_base + MLB_HDMAC_DMACB); + + ca = FIELD_PREP(MLB_HDMAC_IS, mc->slave_id); + if (burst == 16) + ca |= FIELD_PREP(MLB_HDMAC_BT, 0xf); + else if (burst == 8) + ca |= FIELD_PREP(MLB_HDMAC_BT, 0xd); + else if (burst == 4) + ca |= FIELD_PREP(MLB_HDMAC_BT, 0xb); + burst *= width; + ca |= FIELD_PREP(MLB_HDMAC_TC, (len / burst - 1)); + writel_relaxed(ca, mc->reg_ch_base + MLB_HDMAC_DMACA); + ca |= MLB_HDMAC_EB; + writel_relaxed(ca, mc->reg_ch_base + MLB_HDMAC_DMACA); +} + +/* mc->vc.lock must be held by caller */ +static void milbeaut_hdmac_start(struct milbeaut_hdmac_chan *mc) +{ + struct milbeaut_hdmac_desc *md; + + md = milbeaut_hdmac_next_desc(mc); + if (md) + milbeaut_chan_start(mc, md); +} + +static irqreturn_t milbeaut_hdmac_interrupt(int irq, void *dev_id) +{ + struct milbeaut_hdmac_chan *mc = dev_id; + struct milbeaut_hdmac_desc *md; + u32 val; + + spin_lock(&mc->vc.lock); + + /* Ack and Disable irqs */ + val = readl_relaxed(mc->reg_ch_base + MLB_HDMAC_DMACB); + val &= ~(FIELD_PREP(MLB_HDMAC_SS, HDMAC_PAUSE)); + writel_relaxed(val, mc->reg_ch_base + MLB_HDMAC_DMACB); + val &= ~MLB_HDMAC_EI; + val &= ~MLB_HDMAC_CI; + writel_relaxed(val, mc->reg_ch_base + MLB_HDMAC_DMACB); + + md = mc->md; + if (!md) + goto out; + + md->sg_cur++; + + if (md->sg_cur >= md->sg_len) { + vchan_cookie_complete(&md->vd); + md = milbeaut_hdmac_next_desc(mc); + if (!md) + goto out; + } + + milbeaut_chan_start(mc, md); + +out: + spin_unlock(&mc->vc.lock); + return IRQ_HANDLED; +} + +static void milbeaut_hdmac_free_chan_resources(struct dma_chan *chan) +{ + vchan_free_chan_resources(to_virt_chan(chan)); +} + +static int +milbeaut_hdmac_chan_config(struct dma_chan *chan, struct dma_slave_config *cfg) +{ + struct virt_dma_chan *vc = to_virt_chan(chan); + struct milbeaut_hdmac_chan *mc = to_milbeaut_hdmac_chan(vc); + + spin_lock(&mc->vc.lock); + mc->cfg = *cfg; + spin_unlock(&mc->vc.lock); + + return 0; +} + +static int milbeaut_hdmac_chan_pause(struct dma_chan *chan) +{ + struct virt_dma_chan *vc = to_virt_chan(chan); + struct milbeaut_hdmac_chan *mc = to_milbeaut_hdmac_chan(vc); + u32 val; + + spin_lock(&mc->vc.lock); + val = readl_relaxed(mc->reg_ch_base + MLB_HDMAC_DMACA); + val |= MLB_HDMAC_PB; + writel_relaxed(val, mc->reg_ch_base + MLB_HDMAC_DMACA); + spin_unlock(&mc->vc.lock); + + return 0; +} + +static int milbeaut_hdmac_chan_resume(struct dma_chan *chan) +{ + struct virt_dma_chan *vc = to_virt_chan(chan); + struct milbeaut_hdmac_chan *mc = to_milbeaut_hdmac_chan(vc); + u32 val; + + spin_lock(&mc->vc.lock); + val = readl_relaxed(mc->reg_ch_base + MLB_HDMAC_DMACA); + val &= ~MLB_HDMAC_PB; + writel_relaxed(val, mc->reg_ch_base + MLB_HDMAC_DMACA); + spin_unlock(&mc->vc.lock); + + return 0; +} + +static struct dma_async_tx_descriptor * +milbeaut_hdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, + enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct virt_dma_chan *vc = to_virt_chan(chan); + struct milbeaut_hdmac_desc *md; + int i; + + if (!is_slave_direction(direction)) + return NULL; + + md = kzalloc(sizeof(*md), GFP_NOWAIT); + if (!md) + return NULL; + + md->sgl = kcalloc(sg_len, sizeof(*sgl), GFP_NOWAIT); + if (!md->sgl) { + kfree(md); + return NULL; + } + + for (i = 0; i < sg_len; i++) + md->sgl[i] = sgl[i]; + + md->sg_len = sg_len; + md->dir = direction; + + return vchan_tx_prep(vc, &md->vd, flags); +} + +static int milbeaut_hdmac_terminate_all(struct dma_chan *chan) +{ + struct virt_dma_chan *vc = to_virt_chan(chan); + struct milbeaut_hdmac_chan *mc = to_milbeaut_hdmac_chan(vc); + unsigned long flags; + u32 val; + + LIST_HEAD(head); + + spin_lock_irqsave(&vc->lock, flags); + + val = readl_relaxed(mc->reg_ch_base + MLB_HDMAC_DMACA); + val &= ~MLB_HDMAC_EB; /* disable the channel */ + writel_relaxed(val, mc->reg_ch_base + MLB_HDMAC_DMACA); + + if (mc->md) { + vchan_terminate_vdesc(&mc->md->vd); + mc->md = NULL; + } + + vchan_get_all_descriptors(vc, &head); + + spin_unlock_irqrestore(&vc->lock, flags); + + vchan_dma_desc_free_list(vc, &head); + + return 0; +} + +static void milbeaut_hdmac_synchronize(struct dma_chan *chan) +{ + vchan_synchronize(to_virt_chan(chan)); +} + +static enum dma_status milbeaut_hdmac_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct virt_dma_chan *vc; + struct virt_dma_desc *vd; + struct milbeaut_hdmac_chan *mc; + struct milbeaut_hdmac_desc *md = NULL; + enum dma_status stat; + unsigned long flags; + int i; + + stat = dma_cookie_status(chan, cookie, txstate); + /* Return immediately if we do not need to compute the residue. */ + if (stat == DMA_COMPLETE || !txstate) + return stat; + + vc = to_virt_chan(chan); + + spin_lock_irqsave(&vc->lock, flags); + + mc = to_milbeaut_hdmac_chan(vc); + + /* residue from the on-flight chunk */ + if (mc->md && mc->md->vd.tx.cookie == cookie) { + struct scatterlist *sg; + u32 done; + + md = mc->md; + sg = &md->sgl[md->sg_cur]; + + if (md->dir == DMA_DEV_TO_MEM) + done = readl_relaxed(mc->reg_ch_base + + MLB_HDMAC_DMACDA); + else + done = readl_relaxed(mc->reg_ch_base + + MLB_HDMAC_DMACSA); + done -= sg_dma_address(sg); + + txstate->residue = -done; + } + + if (!md) { + vd = vchan_find_desc(vc, cookie); + if (vd) + md = to_milbeaut_hdmac_desc(vd); + } + + if (md) { + /* residue from the queued chunks */ + for (i = md->sg_cur; i < md->sg_len; i++) + txstate->residue += sg_dma_len(&md->sgl[i]); + } + + spin_unlock_irqrestore(&vc->lock, flags); + + return stat; +} + +static void milbeaut_hdmac_issue_pending(struct dma_chan *chan) +{ + struct virt_dma_chan *vc = to_virt_chan(chan); + struct milbeaut_hdmac_chan *mc = to_milbeaut_hdmac_chan(vc); + unsigned long flags; + + spin_lock_irqsave(&vc->lock, flags); + + if (vchan_issue_pending(vc) && !mc->md) + milbeaut_hdmac_start(mc); + + spin_unlock_irqrestore(&vc->lock, flags); +} + +static void milbeaut_hdmac_desc_free(struct virt_dma_desc *vd) +{ + struct milbeaut_hdmac_desc *md = to_milbeaut_hdmac_desc(vd); + + kfree(md->sgl); + kfree(md); +} + +static struct dma_chan * +milbeaut_hdmac_xlate(struct of_phandle_args *dma_spec, struct of_dma *of_dma) +{ + struct milbeaut_hdmac_device *mdev = of_dma->of_dma_data; + struct milbeaut_hdmac_chan *mc; + struct virt_dma_chan *vc; + struct dma_chan *chan; + + if (dma_spec->args_count != 1) + return NULL; + + chan = dma_get_any_slave_channel(&mdev->ddev); + if (!chan) + return NULL; + + vc = to_virt_chan(chan); + mc = to_milbeaut_hdmac_chan(vc); + mc->slave_id = dma_spec->args[0]; + + return chan; +} + +static int milbeaut_hdmac_chan_init(struct platform_device *pdev, + struct milbeaut_hdmac_device *mdev, + int chan_id) +{ + struct device *dev = &pdev->dev; + struct milbeaut_hdmac_chan *mc = &mdev->channels[chan_id]; + char *irq_name; + int irq, ret; + + irq = platform_get_irq(pdev, chan_id); + if (irq < 0) + return irq; + + irq_name = devm_kasprintf(dev, GFP_KERNEL, "milbeaut-hdmac-%d", + chan_id); + if (!irq_name) + return -ENOMEM; + + ret = devm_request_irq(dev, irq, milbeaut_hdmac_interrupt, + IRQF_SHARED, irq_name, mc); + if (ret) + return ret; + + mc->mdev = mdev; + mc->reg_ch_base = mdev->reg_base + MLB_HDMAC_CH_STRIDE * (chan_id + 1); + mc->vc.desc_free = milbeaut_hdmac_desc_free; + vchan_init(&mc->vc, &mdev->ddev); + + return 0; +} + +static int milbeaut_hdmac_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct milbeaut_hdmac_device *mdev; + struct dma_device *ddev; + int nr_chans, ret, i; + + nr_chans = platform_irq_count(pdev); + if (nr_chans < 0) + return nr_chans; + + ret = dma_set_mask(dev, DMA_BIT_MASK(32)); + if (ret) + return ret; + + mdev = devm_kzalloc(dev, struct_size(mdev, channels, nr_chans), + GFP_KERNEL); + if (!mdev) + return -ENOMEM; + + mdev->reg_base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(mdev->reg_base)) + return PTR_ERR(mdev->reg_base); + + mdev->clk = devm_clk_get(dev, NULL); + if (IS_ERR(mdev->clk)) { + dev_err(dev, "failed to get clock\n"); + return PTR_ERR(mdev->clk); + } + + ret = clk_prepare_enable(mdev->clk); + if (ret) + return ret; + + ddev = &mdev->ddev; + ddev->dev = dev; + dma_cap_set(DMA_SLAVE, ddev->cap_mask); + dma_cap_set(DMA_PRIVATE, ddev->cap_mask); + ddev->src_addr_widths = MLB_HDMAC_BUSWIDTHS; + ddev->dst_addr_widths = MLB_HDMAC_BUSWIDTHS; + ddev->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); + ddev->device_free_chan_resources = milbeaut_hdmac_free_chan_resources; + ddev->device_config = milbeaut_hdmac_chan_config; + ddev->device_pause = milbeaut_hdmac_chan_pause; + ddev->device_resume = milbeaut_hdmac_chan_resume; + ddev->device_prep_slave_sg = milbeaut_hdmac_prep_slave_sg; + ddev->device_terminate_all = milbeaut_hdmac_terminate_all; + ddev->device_synchronize = milbeaut_hdmac_synchronize; + ddev->device_tx_status = milbeaut_hdmac_tx_status; + ddev->device_issue_pending = milbeaut_hdmac_issue_pending; + INIT_LIST_HEAD(&ddev->channels); + + for (i = 0; i < nr_chans; i++) { + ret = milbeaut_hdmac_chan_init(pdev, mdev, i); + if (ret) + goto disable_clk; + } + + ret = dma_async_device_register(ddev); + if (ret) + goto disable_clk; + + ret = of_dma_controller_register(dev->of_node, + milbeaut_hdmac_xlate, mdev); + if (ret) + goto unregister_dmac; + + platform_set_drvdata(pdev, mdev); + + return 0; + +unregister_dmac: + dma_async_device_unregister(ddev); +disable_clk: + clk_disable_unprepare(mdev->clk); + + return ret; +} + +static int milbeaut_hdmac_remove(struct platform_device *pdev) +{ + struct milbeaut_hdmac_device *mdev = platform_get_drvdata(pdev); + struct dma_chan *chan; + int ret; + + /* + * Before reaching here, almost all descriptors have been freed by the + * ->device_free_chan_resources() hook. However, each channel might + * be still holding one descriptor that was on-flight at that moment. + * Terminate it to make sure this hardware is no longer running. Then, + * free the channel resources once again to avoid memory leak. + */ + list_for_each_entry(chan, &mdev->ddev.channels, device_node) { + ret = dmaengine_terminate_sync(chan); + if (ret) + return ret; + milbeaut_hdmac_free_chan_resources(chan); + } + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&mdev->ddev); + clk_disable_unprepare(mdev->clk); + + return 0; +} + +static const struct of_device_id milbeaut_hdmac_match[] = { + { .compatible = "socionext,milbeaut-m10v-hdmac" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, milbeaut_hdmac_match); + +static struct platform_driver milbeaut_hdmac_driver = { + .probe = milbeaut_hdmac_probe, + .remove = milbeaut_hdmac_remove, + .driver = { + .name = "milbeaut-m10v-hdmac", + .of_match_table = milbeaut_hdmac_match, + }, +}; +module_platform_driver(milbeaut_hdmac_driver); + +MODULE_DESCRIPTION("Milbeaut HDMAC DmaEngine driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/milbeaut-xdmac.c b/drivers/dma/milbeaut-xdmac.c new file mode 100644 index 0000000000..d29d01e730 --- /dev/null +++ b/drivers/dma/milbeaut-xdmac.c @@ -0,0 +1,416 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Copyright (C) 2019 Linaro Ltd. +// Copyright (C) 2019 Socionext Inc. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virt-dma.h" + +/* global register */ +#define M10V_XDACS 0x00 + +/* channel local register */ +#define M10V_XDTBC 0x10 +#define M10V_XDSSA 0x14 +#define M10V_XDDSA 0x18 +#define M10V_XDSAC 0x1C +#define M10V_XDDAC 0x20 +#define M10V_XDDCC 0x24 +#define M10V_XDDES 0x28 +#define M10V_XDDPC 0x2C +#define M10V_XDDSD 0x30 + +#define M10V_XDACS_XE BIT(28) + +#define M10V_DEFBS 0x3 +#define M10V_DEFBL 0xf + +#define M10V_XDSAC_SBS GENMASK(17, 16) +#define M10V_XDSAC_SBL GENMASK(11, 8) + +#define M10V_XDDAC_DBS GENMASK(17, 16) +#define M10V_XDDAC_DBL GENMASK(11, 8) + +#define M10V_XDDES_CE BIT(28) +#define M10V_XDDES_SE BIT(24) +#define M10V_XDDES_SA BIT(15) +#define M10V_XDDES_TF GENMASK(23, 20) +#define M10V_XDDES_EI BIT(1) +#define M10V_XDDES_TI BIT(0) + +#define M10V_XDDSD_IS_MASK GENMASK(3, 0) +#define M10V_XDDSD_IS_NORMAL 0x8 + +#define MLB_XDMAC_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)) + +struct milbeaut_xdmac_desc { + struct virt_dma_desc vd; + size_t len; + dma_addr_t src; + dma_addr_t dst; +}; + +struct milbeaut_xdmac_chan { + struct virt_dma_chan vc; + struct milbeaut_xdmac_desc *md; + void __iomem *reg_ch_base; +}; + +struct milbeaut_xdmac_device { + struct dma_device ddev; + void __iomem *reg_base; + struct milbeaut_xdmac_chan channels[]; +}; + +static struct milbeaut_xdmac_chan * +to_milbeaut_xdmac_chan(struct virt_dma_chan *vc) +{ + return container_of(vc, struct milbeaut_xdmac_chan, vc); +} + +static struct milbeaut_xdmac_desc * +to_milbeaut_xdmac_desc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct milbeaut_xdmac_desc, vd); +} + +/* mc->vc.lock must be held by caller */ +static struct milbeaut_xdmac_desc * +milbeaut_xdmac_next_desc(struct milbeaut_xdmac_chan *mc) +{ + struct virt_dma_desc *vd; + + vd = vchan_next_desc(&mc->vc); + if (!vd) { + mc->md = NULL; + return NULL; + } + + list_del(&vd->node); + + mc->md = to_milbeaut_xdmac_desc(vd); + + return mc->md; +} + +/* mc->vc.lock must be held by caller */ +static void milbeaut_chan_start(struct milbeaut_xdmac_chan *mc, + struct milbeaut_xdmac_desc *md) +{ + u32 val; + + /* Setup the channel */ + val = md->len - 1; + writel_relaxed(val, mc->reg_ch_base + M10V_XDTBC); + + val = md->src; + writel_relaxed(val, mc->reg_ch_base + M10V_XDSSA); + + val = md->dst; + writel_relaxed(val, mc->reg_ch_base + M10V_XDDSA); + + val = readl_relaxed(mc->reg_ch_base + M10V_XDSAC); + val &= ~(M10V_XDSAC_SBS | M10V_XDSAC_SBL); + val |= FIELD_PREP(M10V_XDSAC_SBS, M10V_DEFBS) | + FIELD_PREP(M10V_XDSAC_SBL, M10V_DEFBL); + writel_relaxed(val, mc->reg_ch_base + M10V_XDSAC); + + val = readl_relaxed(mc->reg_ch_base + M10V_XDDAC); + val &= ~(M10V_XDDAC_DBS | M10V_XDDAC_DBL); + val |= FIELD_PREP(M10V_XDDAC_DBS, M10V_DEFBS) | + FIELD_PREP(M10V_XDDAC_DBL, M10V_DEFBL); + writel_relaxed(val, mc->reg_ch_base + M10V_XDDAC); + + /* Start the channel */ + val = readl_relaxed(mc->reg_ch_base + M10V_XDDES); + val &= ~(M10V_XDDES_CE | M10V_XDDES_SE | M10V_XDDES_TF | + M10V_XDDES_EI | M10V_XDDES_TI); + val |= FIELD_PREP(M10V_XDDES_CE, 1) | FIELD_PREP(M10V_XDDES_SE, 1) | + FIELD_PREP(M10V_XDDES_TF, 1) | FIELD_PREP(M10V_XDDES_EI, 1) | + FIELD_PREP(M10V_XDDES_TI, 1); + writel_relaxed(val, mc->reg_ch_base + M10V_XDDES); +} + +/* mc->vc.lock must be held by caller */ +static void milbeaut_xdmac_start(struct milbeaut_xdmac_chan *mc) +{ + struct milbeaut_xdmac_desc *md; + + md = milbeaut_xdmac_next_desc(mc); + if (md) + milbeaut_chan_start(mc, md); +} + +static irqreturn_t milbeaut_xdmac_interrupt(int irq, void *dev_id) +{ + struct milbeaut_xdmac_chan *mc = dev_id; + struct milbeaut_xdmac_desc *md; + u32 val; + + spin_lock(&mc->vc.lock); + + /* Ack and Stop */ + val = FIELD_PREP(M10V_XDDSD_IS_MASK, 0x0); + writel_relaxed(val, mc->reg_ch_base + M10V_XDDSD); + + md = mc->md; + if (!md) + goto out; + + vchan_cookie_complete(&md->vd); + + milbeaut_xdmac_start(mc); +out: + spin_unlock(&mc->vc.lock); + return IRQ_HANDLED; +} + +static void milbeaut_xdmac_free_chan_resources(struct dma_chan *chan) +{ + vchan_free_chan_resources(to_virt_chan(chan)); +} + +static struct dma_async_tx_descriptor * +milbeaut_xdmac_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct virt_dma_chan *vc = to_virt_chan(chan); + struct milbeaut_xdmac_desc *md; + + md = kzalloc(sizeof(*md), GFP_NOWAIT); + if (!md) + return NULL; + + md->len = len; + md->src = src; + md->dst = dst; + + return vchan_tx_prep(vc, &md->vd, flags); +} + +static int milbeaut_xdmac_terminate_all(struct dma_chan *chan) +{ + struct virt_dma_chan *vc = to_virt_chan(chan); + struct milbeaut_xdmac_chan *mc = to_milbeaut_xdmac_chan(vc); + unsigned long flags; + u32 val; + + LIST_HEAD(head); + + spin_lock_irqsave(&vc->lock, flags); + + /* Halt the channel */ + val = readl(mc->reg_ch_base + M10V_XDDES); + val &= ~M10V_XDDES_CE; + val |= FIELD_PREP(M10V_XDDES_CE, 0); + writel(val, mc->reg_ch_base + M10V_XDDES); + + if (mc->md) { + vchan_terminate_vdesc(&mc->md->vd); + mc->md = NULL; + } + + vchan_get_all_descriptors(vc, &head); + + spin_unlock_irqrestore(&vc->lock, flags); + + vchan_dma_desc_free_list(vc, &head); + + return 0; +} + +static void milbeaut_xdmac_synchronize(struct dma_chan *chan) +{ + vchan_synchronize(to_virt_chan(chan)); +} + +static void milbeaut_xdmac_issue_pending(struct dma_chan *chan) +{ + struct virt_dma_chan *vc = to_virt_chan(chan); + struct milbeaut_xdmac_chan *mc = to_milbeaut_xdmac_chan(vc); + unsigned long flags; + + spin_lock_irqsave(&vc->lock, flags); + + if (vchan_issue_pending(vc) && !mc->md) + milbeaut_xdmac_start(mc); + + spin_unlock_irqrestore(&vc->lock, flags); +} + +static void milbeaut_xdmac_desc_free(struct virt_dma_desc *vd) +{ + kfree(to_milbeaut_xdmac_desc(vd)); +} + +static int milbeaut_xdmac_chan_init(struct platform_device *pdev, + struct milbeaut_xdmac_device *mdev, + int chan_id) +{ + struct device *dev = &pdev->dev; + struct milbeaut_xdmac_chan *mc = &mdev->channels[chan_id]; + char *irq_name; + int irq, ret; + + irq = platform_get_irq(pdev, chan_id); + if (irq < 0) + return irq; + + irq_name = devm_kasprintf(dev, GFP_KERNEL, "milbeaut-xdmac-%d", + chan_id); + if (!irq_name) + return -ENOMEM; + + ret = devm_request_irq(dev, irq, milbeaut_xdmac_interrupt, + IRQF_SHARED, irq_name, mc); + if (ret) + return ret; + + mc->reg_ch_base = mdev->reg_base + chan_id * 0x30; + + mc->vc.desc_free = milbeaut_xdmac_desc_free; + vchan_init(&mc->vc, &mdev->ddev); + + return 0; +} + +static void enable_xdmac(struct milbeaut_xdmac_device *mdev) +{ + unsigned int val; + + val = readl(mdev->reg_base + M10V_XDACS); + val |= M10V_XDACS_XE; + writel(val, mdev->reg_base + M10V_XDACS); +} + +static void disable_xdmac(struct milbeaut_xdmac_device *mdev) +{ + unsigned int val; + + val = readl(mdev->reg_base + M10V_XDACS); + val &= ~M10V_XDACS_XE; + writel(val, mdev->reg_base + M10V_XDACS); +} + +static int milbeaut_xdmac_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct milbeaut_xdmac_device *mdev; + struct dma_device *ddev; + int nr_chans, ret, i; + + nr_chans = platform_irq_count(pdev); + if (nr_chans < 0) + return nr_chans; + + mdev = devm_kzalloc(dev, struct_size(mdev, channels, nr_chans), + GFP_KERNEL); + if (!mdev) + return -ENOMEM; + + mdev->reg_base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(mdev->reg_base)) + return PTR_ERR(mdev->reg_base); + + ddev = &mdev->ddev; + ddev->dev = dev; + dma_cap_set(DMA_MEMCPY, ddev->cap_mask); + ddev->src_addr_widths = MLB_XDMAC_BUSWIDTHS; + ddev->dst_addr_widths = MLB_XDMAC_BUSWIDTHS; + ddev->device_free_chan_resources = milbeaut_xdmac_free_chan_resources; + ddev->device_prep_dma_memcpy = milbeaut_xdmac_prep_memcpy; + ddev->device_terminate_all = milbeaut_xdmac_terminate_all; + ddev->device_synchronize = milbeaut_xdmac_synchronize; + ddev->device_tx_status = dma_cookie_status; + ddev->device_issue_pending = milbeaut_xdmac_issue_pending; + INIT_LIST_HEAD(&ddev->channels); + + for (i = 0; i < nr_chans; i++) { + ret = milbeaut_xdmac_chan_init(pdev, mdev, i); + if (ret) + return ret; + } + + enable_xdmac(mdev); + + ret = dma_async_device_register(ddev); + if (ret) + goto disable_xdmac; + + ret = of_dma_controller_register(dev->of_node, + of_dma_simple_xlate, mdev); + if (ret) + goto unregister_dmac; + + platform_set_drvdata(pdev, mdev); + + return 0; + +unregister_dmac: + dma_async_device_unregister(ddev); +disable_xdmac: + disable_xdmac(mdev); + return ret; +} + +static int milbeaut_xdmac_remove(struct platform_device *pdev) +{ + struct milbeaut_xdmac_device *mdev = platform_get_drvdata(pdev); + struct dma_chan *chan; + int ret; + + /* + * Before reaching here, almost all descriptors have been freed by the + * ->device_free_chan_resources() hook. However, each channel might + * be still holding one descriptor that was on-flight at that moment. + * Terminate it to make sure this hardware is no longer running. Then, + * free the channel resources once again to avoid memory leak. + */ + list_for_each_entry(chan, &mdev->ddev.channels, device_node) { + ret = dmaengine_terminate_sync(chan); + if (ret) + return ret; + milbeaut_xdmac_free_chan_resources(chan); + } + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&mdev->ddev); + + disable_xdmac(mdev); + + return 0; +} + +static const struct of_device_id milbeaut_xdmac_match[] = { + { .compatible = "socionext,milbeaut-m10v-xdmac" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, milbeaut_xdmac_match); + +static struct platform_driver milbeaut_xdmac_driver = { + .probe = milbeaut_xdmac_probe, + .remove = milbeaut_xdmac_remove, + .driver = { + .name = "milbeaut-m10v-xdmac", + .of_match_table = milbeaut_xdmac_match, + }, +}; +module_platform_driver(milbeaut_xdmac_driver); + +MODULE_DESCRIPTION("Milbeaut XDMAC DmaEngine driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c new file mode 100644 index 0000000000..ebdfdcbb4f --- /dev/null +++ b/drivers/dma/mmp_pdma.c @@ -0,0 +1,1151 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2012 Marvell International Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" + +#define DCSR 0x0000 +#define DALGN 0x00a0 +#define DINT 0x00f0 +#define DDADR 0x0200 +#define DSADR(n) (0x0204 + ((n) << 4)) +#define DTADR(n) (0x0208 + ((n) << 4)) +#define DCMD 0x020c + +#define DCSR_RUN BIT(31) /* Run Bit (read / write) */ +#define DCSR_NODESC BIT(30) /* No-Descriptor Fetch (read / write) */ +#define DCSR_STOPIRQEN BIT(29) /* Stop Interrupt Enable (read / write) */ +#define DCSR_REQPEND BIT(8) /* Request Pending (read-only) */ +#define DCSR_STOPSTATE BIT(3) /* Stop State (read-only) */ +#define DCSR_ENDINTR BIT(2) /* End Interrupt (read / write) */ +#define DCSR_STARTINTR BIT(1) /* Start Interrupt (read / write) */ +#define DCSR_BUSERR BIT(0) /* Bus Error Interrupt (read / write) */ + +#define DCSR_EORIRQEN BIT(28) /* End of Receive Interrupt Enable (R/W) */ +#define DCSR_EORJMPEN BIT(27) /* Jump to next descriptor on EOR */ +#define DCSR_EORSTOPEN BIT(26) /* STOP on an EOR */ +#define DCSR_SETCMPST BIT(25) /* Set Descriptor Compare Status */ +#define DCSR_CLRCMPST BIT(24) /* Clear Descriptor Compare Status */ +#define DCSR_CMPST BIT(10) /* The Descriptor Compare Status */ +#define DCSR_EORINTR BIT(9) /* The end of Receive */ + +#define DRCMR(n) ((((n) < 64) ? 0x0100 : 0x1100) + (((n) & 0x3f) << 2)) +#define DRCMR_MAPVLD BIT(7) /* Map Valid (read / write) */ +#define DRCMR_CHLNUM 0x1f /* mask for Channel Number (read / write) */ + +#define DDADR_DESCADDR 0xfffffff0 /* Address of next descriptor (mask) */ +#define DDADR_STOP BIT(0) /* Stop (read / write) */ + +#define DCMD_INCSRCADDR BIT(31) /* Source Address Increment Setting. */ +#define DCMD_INCTRGADDR BIT(30) /* Target Address Increment Setting. */ +#define DCMD_FLOWSRC BIT(29) /* Flow Control by the source. */ +#define DCMD_FLOWTRG BIT(28) /* Flow Control by the target. */ +#define DCMD_STARTIRQEN BIT(22) /* Start Interrupt Enable */ +#define DCMD_ENDIRQEN BIT(21) /* End Interrupt Enable */ +#define DCMD_ENDIAN BIT(18) /* Device Endian-ness. */ +#define DCMD_BURST8 (1 << 16) /* 8 byte burst */ +#define DCMD_BURST16 (2 << 16) /* 16 byte burst */ +#define DCMD_BURST32 (3 << 16) /* 32 byte burst */ +#define DCMD_WIDTH1 (1 << 14) /* 1 byte width */ +#define DCMD_WIDTH2 (2 << 14) /* 2 byte width (HalfWord) */ +#define DCMD_WIDTH4 (3 << 14) /* 4 byte width (Word) */ +#define DCMD_LENGTH 0x01fff /* length mask (max = 8K - 1) */ + +#define PDMA_MAX_DESC_BYTES DCMD_LENGTH + +struct mmp_pdma_desc_hw { + u32 ddadr; /* Points to the next descriptor + flags */ + u32 dsadr; /* DSADR value for the current transfer */ + u32 dtadr; /* DTADR value for the current transfer */ + u32 dcmd; /* DCMD value for the current transfer */ +} __aligned(32); + +struct mmp_pdma_desc_sw { + struct mmp_pdma_desc_hw desc; + struct list_head node; + struct list_head tx_list; + struct dma_async_tx_descriptor async_tx; +}; + +struct mmp_pdma_phy; + +struct mmp_pdma_chan { + struct device *dev; + struct dma_chan chan; + struct dma_async_tx_descriptor desc; + struct mmp_pdma_phy *phy; + enum dma_transfer_direction dir; + struct dma_slave_config slave_config; + + struct mmp_pdma_desc_sw *cyclic_first; /* first desc_sw if channel + * is in cyclic mode */ + + /* channel's basic info */ + struct tasklet_struct tasklet; + u32 dcmd; + u32 drcmr; + u32 dev_addr; + + /* list for desc */ + spinlock_t desc_lock; /* Descriptor list lock */ + struct list_head chain_pending; /* Link descriptors queue for pending */ + struct list_head chain_running; /* Link descriptors queue for running */ + bool idle; /* channel statue machine */ + bool byte_align; + + struct dma_pool *desc_pool; /* Descriptors pool */ +}; + +struct mmp_pdma_phy { + int idx; + void __iomem *base; + struct mmp_pdma_chan *vchan; +}; + +struct mmp_pdma_device { + int dma_channels; + void __iomem *base; + struct device *dev; + struct dma_device device; + struct mmp_pdma_phy *phy; + spinlock_t phy_lock; /* protect alloc/free phy channels */ +}; + +#define tx_to_mmp_pdma_desc(tx) \ + container_of(tx, struct mmp_pdma_desc_sw, async_tx) +#define to_mmp_pdma_desc(lh) \ + container_of(lh, struct mmp_pdma_desc_sw, node) +#define to_mmp_pdma_chan(dchan) \ + container_of(dchan, struct mmp_pdma_chan, chan) +#define to_mmp_pdma_dev(dmadev) \ + container_of(dmadev, struct mmp_pdma_device, device) + +static int mmp_pdma_config_write(struct dma_chan *dchan, + struct dma_slave_config *cfg, + enum dma_transfer_direction direction); + +static void set_desc(struct mmp_pdma_phy *phy, dma_addr_t addr) +{ + u32 reg = (phy->idx << 4) + DDADR; + + writel(addr, phy->base + reg); +} + +static void enable_chan(struct mmp_pdma_phy *phy) +{ + u32 reg, dalgn; + + if (!phy->vchan) + return; + + reg = DRCMR(phy->vchan->drcmr); + writel(DRCMR_MAPVLD | phy->idx, phy->base + reg); + + dalgn = readl(phy->base + DALGN); + if (phy->vchan->byte_align) + dalgn |= 1 << phy->idx; + else + dalgn &= ~(1 << phy->idx); + writel(dalgn, phy->base + DALGN); + + reg = (phy->idx << 2) + DCSR; + writel(readl(phy->base + reg) | DCSR_RUN, phy->base + reg); +} + +static void disable_chan(struct mmp_pdma_phy *phy) +{ + u32 reg; + + if (!phy) + return; + + reg = (phy->idx << 2) + DCSR; + writel(readl(phy->base + reg) & ~DCSR_RUN, phy->base + reg); +} + +static int clear_chan_irq(struct mmp_pdma_phy *phy) +{ + u32 dcsr; + u32 dint = readl(phy->base + DINT); + u32 reg = (phy->idx << 2) + DCSR; + + if (!(dint & BIT(phy->idx))) + return -EAGAIN; + + /* clear irq */ + dcsr = readl(phy->base + reg); + writel(dcsr, phy->base + reg); + if ((dcsr & DCSR_BUSERR) && (phy->vchan)) + dev_warn(phy->vchan->dev, "DCSR_BUSERR\n"); + + return 0; +} + +static irqreturn_t mmp_pdma_chan_handler(int irq, void *dev_id) +{ + struct mmp_pdma_phy *phy = dev_id; + + if (clear_chan_irq(phy) != 0) + return IRQ_NONE; + + tasklet_schedule(&phy->vchan->tasklet); + return IRQ_HANDLED; +} + +static irqreturn_t mmp_pdma_int_handler(int irq, void *dev_id) +{ + struct mmp_pdma_device *pdev = dev_id; + struct mmp_pdma_phy *phy; + u32 dint = readl(pdev->base + DINT); + int i, ret; + int irq_num = 0; + + while (dint) { + i = __ffs(dint); + /* only handle interrupts belonging to pdma driver*/ + if (i >= pdev->dma_channels) + break; + dint &= (dint - 1); + phy = &pdev->phy[i]; + ret = mmp_pdma_chan_handler(irq, phy); + if (ret == IRQ_HANDLED) + irq_num++; + } + + if (irq_num) + return IRQ_HANDLED; + + return IRQ_NONE; +} + +/* lookup free phy channel as descending priority */ +static struct mmp_pdma_phy *lookup_phy(struct mmp_pdma_chan *pchan) +{ + int prio, i; + struct mmp_pdma_device *pdev = to_mmp_pdma_dev(pchan->chan.device); + struct mmp_pdma_phy *phy, *found = NULL; + unsigned long flags; + + /* + * dma channel priorities + * ch 0 - 3, 16 - 19 <--> (0) + * ch 4 - 7, 20 - 23 <--> (1) + * ch 8 - 11, 24 - 27 <--> (2) + * ch 12 - 15, 28 - 31 <--> (3) + */ + + spin_lock_irqsave(&pdev->phy_lock, flags); + for (prio = 0; prio <= ((pdev->dma_channels - 1) & 0xf) >> 2; prio++) { + for (i = 0; i < pdev->dma_channels; i++) { + if (prio != (i & 0xf) >> 2) + continue; + phy = &pdev->phy[i]; + if (!phy->vchan) { + phy->vchan = pchan; + found = phy; + goto out_unlock; + } + } + } + +out_unlock: + spin_unlock_irqrestore(&pdev->phy_lock, flags); + return found; +} + +static void mmp_pdma_free_phy(struct mmp_pdma_chan *pchan) +{ + struct mmp_pdma_device *pdev = to_mmp_pdma_dev(pchan->chan.device); + unsigned long flags; + u32 reg; + + if (!pchan->phy) + return; + + /* clear the channel mapping in DRCMR */ + reg = DRCMR(pchan->drcmr); + writel(0, pchan->phy->base + reg); + + spin_lock_irqsave(&pdev->phy_lock, flags); + pchan->phy->vchan = NULL; + pchan->phy = NULL; + spin_unlock_irqrestore(&pdev->phy_lock, flags); +} + +/* + * start_pending_queue - transfer any pending transactions + * pending list ==> running list + */ +static void start_pending_queue(struct mmp_pdma_chan *chan) +{ + struct mmp_pdma_desc_sw *desc; + + /* still in running, irq will start the pending list */ + if (!chan->idle) { + dev_dbg(chan->dev, "DMA controller still busy\n"); + return; + } + + if (list_empty(&chan->chain_pending)) { + /* chance to re-fetch phy channel with higher prio */ + mmp_pdma_free_phy(chan); + dev_dbg(chan->dev, "no pending list\n"); + return; + } + + if (!chan->phy) { + chan->phy = lookup_phy(chan); + if (!chan->phy) { + dev_dbg(chan->dev, "no free dma channel\n"); + return; + } + } + + /* + * pending -> running + * reintilize pending list + */ + desc = list_first_entry(&chan->chain_pending, + struct mmp_pdma_desc_sw, node); + list_splice_tail_init(&chan->chain_pending, &chan->chain_running); + + /* + * Program the descriptor's address into the DMA controller, + * then start the DMA transaction + */ + set_desc(chan->phy, desc->async_tx.phys); + enable_chan(chan->phy); + chan->idle = false; +} + + +/* desc->tx_list ==> pending list */ +static dma_cookie_t mmp_pdma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct mmp_pdma_chan *chan = to_mmp_pdma_chan(tx->chan); + struct mmp_pdma_desc_sw *desc = tx_to_mmp_pdma_desc(tx); + struct mmp_pdma_desc_sw *child; + unsigned long flags; + dma_cookie_t cookie = -EBUSY; + + spin_lock_irqsave(&chan->desc_lock, flags); + + list_for_each_entry(child, &desc->tx_list, node) { + cookie = dma_cookie_assign(&child->async_tx); + } + + /* softly link to pending list - desc->tx_list ==> pending list */ + list_splice_tail_init(&desc->tx_list, &chan->chain_pending); + + spin_unlock_irqrestore(&chan->desc_lock, flags); + + return cookie; +} + +static struct mmp_pdma_desc_sw * +mmp_pdma_alloc_descriptor(struct mmp_pdma_chan *chan) +{ + struct mmp_pdma_desc_sw *desc; + dma_addr_t pdesc; + + desc = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &pdesc); + if (!desc) { + dev_err(chan->dev, "out of memory for link descriptor\n"); + return NULL; + } + + INIT_LIST_HEAD(&desc->tx_list); + dma_async_tx_descriptor_init(&desc->async_tx, &chan->chan); + /* each desc has submit */ + desc->async_tx.tx_submit = mmp_pdma_tx_submit; + desc->async_tx.phys = pdesc; + + return desc; +} + +/* + * mmp_pdma_alloc_chan_resources - Allocate resources for DMA channel. + * + * This function will create a dma pool for descriptor allocation. + * Request irq only when channel is requested + * Return - The number of allocated descriptors. + */ + +static int mmp_pdma_alloc_chan_resources(struct dma_chan *dchan) +{ + struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan); + + if (chan->desc_pool) + return 1; + + chan->desc_pool = dma_pool_create(dev_name(&dchan->dev->device), + chan->dev, + sizeof(struct mmp_pdma_desc_sw), + __alignof__(struct mmp_pdma_desc_sw), + 0); + if (!chan->desc_pool) { + dev_err(chan->dev, "unable to allocate descriptor pool\n"); + return -ENOMEM; + } + + mmp_pdma_free_phy(chan); + chan->idle = true; + chan->dev_addr = 0; + return 1; +} + +static void mmp_pdma_free_desc_list(struct mmp_pdma_chan *chan, + struct list_head *list) +{ + struct mmp_pdma_desc_sw *desc, *_desc; + + list_for_each_entry_safe(desc, _desc, list, node) { + list_del(&desc->node); + dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys); + } +} + +static void mmp_pdma_free_chan_resources(struct dma_chan *dchan) +{ + struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan); + unsigned long flags; + + spin_lock_irqsave(&chan->desc_lock, flags); + mmp_pdma_free_desc_list(chan, &chan->chain_pending); + mmp_pdma_free_desc_list(chan, &chan->chain_running); + spin_unlock_irqrestore(&chan->desc_lock, flags); + + dma_pool_destroy(chan->desc_pool); + chan->desc_pool = NULL; + chan->idle = true; + chan->dev_addr = 0; + mmp_pdma_free_phy(chan); + return; +} + +static struct dma_async_tx_descriptor * +mmp_pdma_prep_memcpy(struct dma_chan *dchan, + dma_addr_t dma_dst, dma_addr_t dma_src, + size_t len, unsigned long flags) +{ + struct mmp_pdma_chan *chan; + struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new; + size_t copy = 0; + + if (!dchan) + return NULL; + + if (!len) + return NULL; + + chan = to_mmp_pdma_chan(dchan); + chan->byte_align = false; + + if (!chan->dir) { + chan->dir = DMA_MEM_TO_MEM; + chan->dcmd = DCMD_INCTRGADDR | DCMD_INCSRCADDR; + chan->dcmd |= DCMD_BURST32; + } + + do { + /* Allocate the link descriptor from DMA pool */ + new = mmp_pdma_alloc_descriptor(chan); + if (!new) { + dev_err(chan->dev, "no memory for desc\n"); + goto fail; + } + + copy = min_t(size_t, len, PDMA_MAX_DESC_BYTES); + if (dma_src & 0x7 || dma_dst & 0x7) + chan->byte_align = true; + + new->desc.dcmd = chan->dcmd | (DCMD_LENGTH & copy); + new->desc.dsadr = dma_src; + new->desc.dtadr = dma_dst; + + if (!first) + first = new; + else + prev->desc.ddadr = new->async_tx.phys; + + new->async_tx.cookie = 0; + async_tx_ack(&new->async_tx); + + prev = new; + len -= copy; + + if (chan->dir == DMA_MEM_TO_DEV) { + dma_src += copy; + } else if (chan->dir == DMA_DEV_TO_MEM) { + dma_dst += copy; + } else if (chan->dir == DMA_MEM_TO_MEM) { + dma_src += copy; + dma_dst += copy; + } + + /* Insert the link descriptor to the LD ring */ + list_add_tail(&new->node, &first->tx_list); + } while (len); + + first->async_tx.flags = flags; /* client is in control of this ack */ + first->async_tx.cookie = -EBUSY; + + /* last desc and fire IRQ */ + new->desc.ddadr = DDADR_STOP; + new->desc.dcmd |= DCMD_ENDIRQEN; + + chan->cyclic_first = NULL; + + return &first->async_tx; + +fail: + if (first) + mmp_pdma_free_desc_list(chan, &first->tx_list); + return NULL; +} + +static struct dma_async_tx_descriptor * +mmp_pdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan); + struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new = NULL; + size_t len, avail; + struct scatterlist *sg; + dma_addr_t addr; + int i; + + if ((sgl == NULL) || (sg_len == 0)) + return NULL; + + chan->byte_align = false; + + mmp_pdma_config_write(dchan, &chan->slave_config, dir); + + for_each_sg(sgl, sg, sg_len, i) { + addr = sg_dma_address(sg); + avail = sg_dma_len(sgl); + + do { + len = min_t(size_t, avail, PDMA_MAX_DESC_BYTES); + if (addr & 0x7) + chan->byte_align = true; + + /* allocate and populate the descriptor */ + new = mmp_pdma_alloc_descriptor(chan); + if (!new) { + dev_err(chan->dev, "no memory for desc\n"); + goto fail; + } + + new->desc.dcmd = chan->dcmd | (DCMD_LENGTH & len); + if (dir == DMA_MEM_TO_DEV) { + new->desc.dsadr = addr; + new->desc.dtadr = chan->dev_addr; + } else { + new->desc.dsadr = chan->dev_addr; + new->desc.dtadr = addr; + } + + if (!first) + first = new; + else + prev->desc.ddadr = new->async_tx.phys; + + new->async_tx.cookie = 0; + async_tx_ack(&new->async_tx); + prev = new; + + /* Insert the link descriptor to the LD ring */ + list_add_tail(&new->node, &first->tx_list); + + /* update metadata */ + addr += len; + avail -= len; + } while (avail); + } + + first->async_tx.cookie = -EBUSY; + first->async_tx.flags = flags; + + /* last desc and fire IRQ */ + new->desc.ddadr = DDADR_STOP; + new->desc.dcmd |= DCMD_ENDIRQEN; + + chan->dir = dir; + chan->cyclic_first = NULL; + + return &first->async_tx; + +fail: + if (first) + mmp_pdma_free_desc_list(chan, &first->tx_list); + return NULL; +} + +static struct dma_async_tx_descriptor * +mmp_pdma_prep_dma_cyclic(struct dma_chan *dchan, + dma_addr_t buf_addr, size_t len, size_t period_len, + enum dma_transfer_direction direction, + unsigned long flags) +{ + struct mmp_pdma_chan *chan; + struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new; + dma_addr_t dma_src, dma_dst; + + if (!dchan || !len || !period_len) + return NULL; + + /* the buffer length must be a multiple of period_len */ + if (len % period_len != 0) + return NULL; + + if (period_len > PDMA_MAX_DESC_BYTES) + return NULL; + + chan = to_mmp_pdma_chan(dchan); + mmp_pdma_config_write(dchan, &chan->slave_config, direction); + + switch (direction) { + case DMA_MEM_TO_DEV: + dma_src = buf_addr; + dma_dst = chan->dev_addr; + break; + case DMA_DEV_TO_MEM: + dma_dst = buf_addr; + dma_src = chan->dev_addr; + break; + default: + dev_err(chan->dev, "Unsupported direction for cyclic DMA\n"); + return NULL; + } + + chan->dir = direction; + + do { + /* Allocate the link descriptor from DMA pool */ + new = mmp_pdma_alloc_descriptor(chan); + if (!new) { + dev_err(chan->dev, "no memory for desc\n"); + goto fail; + } + + new->desc.dcmd = (chan->dcmd | DCMD_ENDIRQEN | + (DCMD_LENGTH & period_len)); + new->desc.dsadr = dma_src; + new->desc.dtadr = dma_dst; + + if (!first) + first = new; + else + prev->desc.ddadr = new->async_tx.phys; + + new->async_tx.cookie = 0; + async_tx_ack(&new->async_tx); + + prev = new; + len -= period_len; + + if (chan->dir == DMA_MEM_TO_DEV) + dma_src += period_len; + else + dma_dst += period_len; + + /* Insert the link descriptor to the LD ring */ + list_add_tail(&new->node, &first->tx_list); + } while (len); + + first->async_tx.flags = flags; /* client is in control of this ack */ + first->async_tx.cookie = -EBUSY; + + /* make the cyclic link */ + new->desc.ddadr = first->async_tx.phys; + chan->cyclic_first = first; + + return &first->async_tx; + +fail: + if (first) + mmp_pdma_free_desc_list(chan, &first->tx_list); + return NULL; +} + +static int mmp_pdma_config_write(struct dma_chan *dchan, + struct dma_slave_config *cfg, + enum dma_transfer_direction direction) +{ + struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan); + u32 maxburst = 0, addr = 0; + enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED; + + if (!dchan) + return -EINVAL; + + if (direction == DMA_DEV_TO_MEM) { + chan->dcmd = DCMD_INCTRGADDR | DCMD_FLOWSRC; + maxburst = cfg->src_maxburst; + width = cfg->src_addr_width; + addr = cfg->src_addr; + } else if (direction == DMA_MEM_TO_DEV) { + chan->dcmd = DCMD_INCSRCADDR | DCMD_FLOWTRG; + maxburst = cfg->dst_maxburst; + width = cfg->dst_addr_width; + addr = cfg->dst_addr; + } + + if (width == DMA_SLAVE_BUSWIDTH_1_BYTE) + chan->dcmd |= DCMD_WIDTH1; + else if (width == DMA_SLAVE_BUSWIDTH_2_BYTES) + chan->dcmd |= DCMD_WIDTH2; + else if (width == DMA_SLAVE_BUSWIDTH_4_BYTES) + chan->dcmd |= DCMD_WIDTH4; + + if (maxburst == 8) + chan->dcmd |= DCMD_BURST8; + else if (maxburst == 16) + chan->dcmd |= DCMD_BURST16; + else if (maxburst == 32) + chan->dcmd |= DCMD_BURST32; + + chan->dir = direction; + chan->dev_addr = addr; + + return 0; +} + +static int mmp_pdma_config(struct dma_chan *dchan, + struct dma_slave_config *cfg) +{ + struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan); + + memcpy(&chan->slave_config, cfg, sizeof(*cfg)); + return 0; +} + +static int mmp_pdma_terminate_all(struct dma_chan *dchan) +{ + struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan); + unsigned long flags; + + if (!dchan) + return -EINVAL; + + disable_chan(chan->phy); + mmp_pdma_free_phy(chan); + spin_lock_irqsave(&chan->desc_lock, flags); + mmp_pdma_free_desc_list(chan, &chan->chain_pending); + mmp_pdma_free_desc_list(chan, &chan->chain_running); + spin_unlock_irqrestore(&chan->desc_lock, flags); + chan->idle = true; + + return 0; +} + +static unsigned int mmp_pdma_residue(struct mmp_pdma_chan *chan, + dma_cookie_t cookie) +{ + struct mmp_pdma_desc_sw *sw; + u32 curr, residue = 0; + bool passed = false; + bool cyclic = chan->cyclic_first != NULL; + + /* + * If the channel does not have a phy pointer anymore, it has already + * been completed. Therefore, its residue is 0. + */ + if (!chan->phy) + return 0; + + if (chan->dir == DMA_DEV_TO_MEM) + curr = readl(chan->phy->base + DTADR(chan->phy->idx)); + else + curr = readl(chan->phy->base + DSADR(chan->phy->idx)); + + list_for_each_entry(sw, &chan->chain_running, node) { + u32 start, end, len; + + if (chan->dir == DMA_DEV_TO_MEM) + start = sw->desc.dtadr; + else + start = sw->desc.dsadr; + + len = sw->desc.dcmd & DCMD_LENGTH; + end = start + len; + + /* + * 'passed' will be latched once we found the descriptor which + * lies inside the boundaries of the curr pointer. All + * descriptors that occur in the list _after_ we found that + * partially handled descriptor are still to be processed and + * are hence added to the residual bytes counter. + */ + + if (passed) { + residue += len; + } else if (curr >= start && curr <= end) { + residue += end - curr; + passed = true; + } + + /* + * Descriptors that have the ENDIRQEN bit set mark the end of a + * transaction chain, and the cookie assigned with it has been + * returned previously from mmp_pdma_tx_submit(). + * + * In case we have multiple transactions in the running chain, + * and the cookie does not match the one the user asked us + * about, reset the state variables and start over. + * + * This logic does not apply to cyclic transactions, where all + * descriptors have the ENDIRQEN bit set, and for which we + * can't have multiple transactions on one channel anyway. + */ + if (cyclic || !(sw->desc.dcmd & DCMD_ENDIRQEN)) + continue; + + if (sw->async_tx.cookie == cookie) { + return residue; + } else { + residue = 0; + passed = false; + } + } + + /* We should only get here in case of cyclic transactions */ + return residue; +} + +static enum dma_status mmp_pdma_tx_status(struct dma_chan *dchan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan); + enum dma_status ret; + + ret = dma_cookie_status(dchan, cookie, txstate); + if (likely(ret != DMA_ERROR)) + dma_set_residue(txstate, mmp_pdma_residue(chan, cookie)); + + return ret; +} + +/* + * mmp_pdma_issue_pending - Issue the DMA start command + * pending list ==> running list + */ +static void mmp_pdma_issue_pending(struct dma_chan *dchan) +{ + struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan); + unsigned long flags; + + spin_lock_irqsave(&chan->desc_lock, flags); + start_pending_queue(chan); + spin_unlock_irqrestore(&chan->desc_lock, flags); +} + +/* + * dma_do_tasklet + * Do call back + * Start pending list + */ +static void dma_do_tasklet(struct tasklet_struct *t) +{ + struct mmp_pdma_chan *chan = from_tasklet(chan, t, tasklet); + struct mmp_pdma_desc_sw *desc, *_desc; + LIST_HEAD(chain_cleanup); + unsigned long flags; + struct dmaengine_desc_callback cb; + + if (chan->cyclic_first) { + spin_lock_irqsave(&chan->desc_lock, flags); + desc = chan->cyclic_first; + dmaengine_desc_get_callback(&desc->async_tx, &cb); + spin_unlock_irqrestore(&chan->desc_lock, flags); + + dmaengine_desc_callback_invoke(&cb, NULL); + + return; + } + + /* submit pending list; callback for each desc; free desc */ + spin_lock_irqsave(&chan->desc_lock, flags); + + list_for_each_entry_safe(desc, _desc, &chan->chain_running, node) { + /* + * move the descriptors to a temporary list so we can drop + * the lock during the entire cleanup operation + */ + list_move(&desc->node, &chain_cleanup); + + /* + * Look for the first list entry which has the ENDIRQEN flag + * set. That is the descriptor we got an interrupt for, so + * complete that transaction and its cookie. + */ + if (desc->desc.dcmd & DCMD_ENDIRQEN) { + dma_cookie_t cookie = desc->async_tx.cookie; + dma_cookie_complete(&desc->async_tx); + dev_dbg(chan->dev, "completed_cookie=%d\n", cookie); + break; + } + } + + /* + * The hardware is idle and ready for more when the + * chain_running list is empty. + */ + chan->idle = list_empty(&chan->chain_running); + + /* Start any pending transactions automatically */ + start_pending_queue(chan); + spin_unlock_irqrestore(&chan->desc_lock, flags); + + /* Run the callback for each descriptor, in order */ + list_for_each_entry_safe(desc, _desc, &chain_cleanup, node) { + struct dma_async_tx_descriptor *txd = &desc->async_tx; + + /* Remove from the list of transactions */ + list_del(&desc->node); + /* Run the link descriptor callback function */ + dmaengine_desc_get_callback(txd, &cb); + dmaengine_desc_callback_invoke(&cb, NULL); + + dma_pool_free(chan->desc_pool, desc, txd->phys); + } +} + +static int mmp_pdma_remove(struct platform_device *op) +{ + struct mmp_pdma_device *pdev = platform_get_drvdata(op); + struct mmp_pdma_phy *phy; + int i, irq = 0, irq_num = 0; + + if (op->dev.of_node) + of_dma_controller_free(op->dev.of_node); + + for (i = 0; i < pdev->dma_channels; i++) { + if (platform_get_irq(op, i) > 0) + irq_num++; + } + + if (irq_num != pdev->dma_channels) { + irq = platform_get_irq(op, 0); + devm_free_irq(&op->dev, irq, pdev); + } else { + for (i = 0; i < pdev->dma_channels; i++) { + phy = &pdev->phy[i]; + irq = platform_get_irq(op, i); + devm_free_irq(&op->dev, irq, phy); + } + } + + dma_async_device_unregister(&pdev->device); + return 0; +} + +static int mmp_pdma_chan_init(struct mmp_pdma_device *pdev, int idx, int irq) +{ + struct mmp_pdma_phy *phy = &pdev->phy[idx]; + struct mmp_pdma_chan *chan; + int ret; + + chan = devm_kzalloc(pdev->dev, sizeof(*chan), GFP_KERNEL); + if (chan == NULL) + return -ENOMEM; + + phy->idx = idx; + phy->base = pdev->base; + + if (irq) { + ret = devm_request_irq(pdev->dev, irq, mmp_pdma_chan_handler, + IRQF_SHARED, "pdma", phy); + if (ret) { + dev_err(pdev->dev, "channel request irq fail!\n"); + return ret; + } + } + + spin_lock_init(&chan->desc_lock); + chan->dev = pdev->dev; + chan->chan.device = &pdev->device; + tasklet_setup(&chan->tasklet, dma_do_tasklet); + INIT_LIST_HEAD(&chan->chain_pending); + INIT_LIST_HEAD(&chan->chain_running); + + /* register virt channel to dma engine */ + list_add_tail(&chan->chan.device_node, &pdev->device.channels); + + return 0; +} + +static const struct of_device_id mmp_pdma_dt_ids[] = { + { .compatible = "marvell,pdma-1.0", }, + {} +}; +MODULE_DEVICE_TABLE(of, mmp_pdma_dt_ids); + +static struct dma_chan *mmp_pdma_dma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct mmp_pdma_device *d = ofdma->of_dma_data; + struct dma_chan *chan; + + chan = dma_get_any_slave_channel(&d->device); + if (!chan) + return NULL; + + to_mmp_pdma_chan(chan)->drcmr = dma_spec->args[0]; + + return chan; +} + +static int mmp_pdma_probe(struct platform_device *op) +{ + struct mmp_pdma_device *pdev; + const struct of_device_id *of_id; + struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev); + int i, ret, irq = 0; + int dma_channels = 0, irq_num = 0; + const enum dma_slave_buswidth widths = + DMA_SLAVE_BUSWIDTH_1_BYTE | DMA_SLAVE_BUSWIDTH_2_BYTES | + DMA_SLAVE_BUSWIDTH_4_BYTES; + + pdev = devm_kzalloc(&op->dev, sizeof(*pdev), GFP_KERNEL); + if (!pdev) + return -ENOMEM; + + pdev->dev = &op->dev; + + spin_lock_init(&pdev->phy_lock); + + pdev->base = devm_platform_ioremap_resource(op, 0); + if (IS_ERR(pdev->base)) + return PTR_ERR(pdev->base); + + of_id = of_match_device(mmp_pdma_dt_ids, pdev->dev); + if (of_id) { + /* Parse new and deprecated dma-channels properties */ + if (of_property_read_u32(pdev->dev->of_node, "dma-channels", + &dma_channels)) + of_property_read_u32(pdev->dev->of_node, "#dma-channels", + &dma_channels); + } else if (pdata && pdata->dma_channels) { + dma_channels = pdata->dma_channels; + } else { + dma_channels = 32; /* default 32 channel */ + } + pdev->dma_channels = dma_channels; + + for (i = 0; i < dma_channels; i++) { + if (platform_get_irq_optional(op, i) > 0) + irq_num++; + } + + pdev->phy = devm_kcalloc(pdev->dev, dma_channels, sizeof(*pdev->phy), + GFP_KERNEL); + if (pdev->phy == NULL) + return -ENOMEM; + + INIT_LIST_HEAD(&pdev->device.channels); + + if (irq_num != dma_channels) { + /* all chan share one irq, demux inside */ + irq = platform_get_irq(op, 0); + ret = devm_request_irq(pdev->dev, irq, mmp_pdma_int_handler, + IRQF_SHARED, "pdma", pdev); + if (ret) + return ret; + } + + for (i = 0; i < dma_channels; i++) { + irq = (irq_num != dma_channels) ? 0 : platform_get_irq(op, i); + ret = mmp_pdma_chan_init(pdev, i, irq); + if (ret) + return ret; + } + + dma_cap_set(DMA_SLAVE, pdev->device.cap_mask); + dma_cap_set(DMA_MEMCPY, pdev->device.cap_mask); + dma_cap_set(DMA_CYCLIC, pdev->device.cap_mask); + dma_cap_set(DMA_PRIVATE, pdev->device.cap_mask); + pdev->device.dev = &op->dev; + pdev->device.device_alloc_chan_resources = mmp_pdma_alloc_chan_resources; + pdev->device.device_free_chan_resources = mmp_pdma_free_chan_resources; + pdev->device.device_tx_status = mmp_pdma_tx_status; + pdev->device.device_prep_dma_memcpy = mmp_pdma_prep_memcpy; + pdev->device.device_prep_slave_sg = mmp_pdma_prep_slave_sg; + pdev->device.device_prep_dma_cyclic = mmp_pdma_prep_dma_cyclic; + pdev->device.device_issue_pending = mmp_pdma_issue_pending; + pdev->device.device_config = mmp_pdma_config; + pdev->device.device_terminate_all = mmp_pdma_terminate_all; + pdev->device.copy_align = DMAENGINE_ALIGN_8_BYTES; + pdev->device.src_addr_widths = widths; + pdev->device.dst_addr_widths = widths; + pdev->device.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); + pdev->device.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + + if (pdev->dev->coherent_dma_mask) + dma_set_mask(pdev->dev, pdev->dev->coherent_dma_mask); + else + dma_set_mask(pdev->dev, DMA_BIT_MASK(64)); + + ret = dma_async_device_register(&pdev->device); + if (ret) { + dev_err(pdev->device.dev, "unable to register\n"); + return ret; + } + + if (op->dev.of_node) { + /* Device-tree DMA controller registration */ + ret = of_dma_controller_register(op->dev.of_node, + mmp_pdma_dma_xlate, pdev); + if (ret < 0) { + dev_err(&op->dev, "of_dma_controller_register failed\n"); + dma_async_device_unregister(&pdev->device); + return ret; + } + } + + platform_set_drvdata(op, pdev); + dev_info(pdev->device.dev, "initialized %d channels\n", dma_channels); + return 0; +} + +static const struct platform_device_id mmp_pdma_id_table[] = { + { "mmp-pdma", }, + { }, +}; + +static struct platform_driver mmp_pdma_driver = { + .driver = { + .name = "mmp-pdma", + .of_match_table = mmp_pdma_dt_ids, + }, + .id_table = mmp_pdma_id_table, + .probe = mmp_pdma_probe, + .remove = mmp_pdma_remove, +}; + +module_platform_driver(mmp_pdma_driver); + +MODULE_DESCRIPTION("MARVELL MMP Peripheral DMA Driver"); +MODULE_AUTHOR("Marvell International Ltd."); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c new file mode 100644 index 0000000000..d49fa6bc67 --- /dev/null +++ b/drivers/dma/mmp_tdma.c @@ -0,0 +1,765 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Driver For Marvell Two-channel DMA Engine + * + * Copyright: Marvell International Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" + +/* + * Two-Channel DMA registers + */ +#define TDBCR 0x00 /* Byte Count */ +#define TDSAR 0x10 /* Src Addr */ +#define TDDAR 0x20 /* Dst Addr */ +#define TDNDPR 0x30 /* Next Desc */ +#define TDCR 0x40 /* Control */ +#define TDCP 0x60 /* Priority*/ +#define TDCDPR 0x70 /* Current Desc */ +#define TDIMR 0x80 /* Int Mask */ +#define TDISR 0xa0 /* Int Status */ + +/* Two-Channel DMA Control Register */ +#define TDCR_SSZ_8_BITS (0x0 << 22) /* Sample Size */ +#define TDCR_SSZ_12_BITS (0x1 << 22) +#define TDCR_SSZ_16_BITS (0x2 << 22) +#define TDCR_SSZ_20_BITS (0x3 << 22) +#define TDCR_SSZ_24_BITS (0x4 << 22) +#define TDCR_SSZ_32_BITS (0x5 << 22) +#define TDCR_SSZ_SHIFT (0x1 << 22) +#define TDCR_SSZ_MASK (0x7 << 22) +#define TDCR_SSPMOD (0x1 << 21) /* SSP MOD */ +#define TDCR_ABR (0x1 << 20) /* Channel Abort */ +#define TDCR_CDE (0x1 << 17) /* Close Desc Enable */ +#define TDCR_PACKMOD (0x1 << 16) /* Pack Mode (ADMA Only) */ +#define TDCR_CHANACT (0x1 << 14) /* Channel Active */ +#define TDCR_FETCHND (0x1 << 13) /* Fetch Next Desc */ +#define TDCR_CHANEN (0x1 << 12) /* Channel Enable */ +#define TDCR_INTMODE (0x1 << 10) /* Interrupt Mode */ +#define TDCR_CHAINMOD (0x1 << 9) /* Chain Mode */ +#define TDCR_BURSTSZ_MSK (0x7 << 6) /* Burst Size */ +#define TDCR_BURSTSZ_4B (0x0 << 6) +#define TDCR_BURSTSZ_8B (0x1 << 6) +#define TDCR_BURSTSZ_16B (0x3 << 6) +#define TDCR_BURSTSZ_32B (0x6 << 6) +#define TDCR_BURSTSZ_64B (0x7 << 6) +#define TDCR_BURSTSZ_SQU_1B (0x5 << 6) +#define TDCR_BURSTSZ_SQU_2B (0x6 << 6) +#define TDCR_BURSTSZ_SQU_4B (0x0 << 6) +#define TDCR_BURSTSZ_SQU_8B (0x1 << 6) +#define TDCR_BURSTSZ_SQU_16B (0x3 << 6) +#define TDCR_BURSTSZ_SQU_32B (0x7 << 6) +#define TDCR_BURSTSZ_128B (0x5 << 6) +#define TDCR_DSTDIR_MSK (0x3 << 4) /* Dst Direction */ +#define TDCR_DSTDIR_ADDR_HOLD (0x2 << 4) /* Dst Addr Hold */ +#define TDCR_DSTDIR_ADDR_INC (0x0 << 4) /* Dst Addr Increment */ +#define TDCR_SRCDIR_MSK (0x3 << 2) /* Src Direction */ +#define TDCR_SRCDIR_ADDR_HOLD (0x2 << 2) /* Src Addr Hold */ +#define TDCR_SRCDIR_ADDR_INC (0x0 << 2) /* Src Addr Increment */ +#define TDCR_DSTDESCCONT (0x1 << 1) +#define TDCR_SRCDESTCONT (0x1 << 0) + +/* Two-Channel DMA Int Mask Register */ +#define TDIMR_COMP (0x1 << 0) + +/* Two-Channel DMA Int Status Register */ +#define TDISR_COMP (0x1 << 0) + +/* + * Two-Channel DMA Descriptor Struct + * NOTE: desc's buf must be aligned to 16 bytes. + */ +struct mmp_tdma_desc { + u32 byte_cnt; + u32 src_addr; + u32 dst_addr; + u32 nxt_desc; +}; + +enum mmp_tdma_type { + MMP_AUD_TDMA = 0, + PXA910_SQU, +}; + +#define TDMA_MAX_XFER_BYTES SZ_64K + +struct mmp_tdma_chan { + struct device *dev; + struct dma_chan chan; + struct dma_async_tx_descriptor desc; + struct tasklet_struct tasklet; + + struct mmp_tdma_desc *desc_arr; + dma_addr_t desc_arr_phys; + int desc_num; + enum dma_transfer_direction dir; + dma_addr_t dev_addr; + u32 burst_sz; + enum dma_slave_buswidth buswidth; + enum dma_status status; + struct dma_slave_config slave_config; + + int idx; + enum mmp_tdma_type type; + int irq; + void __iomem *reg_base; + + size_t buf_len; + size_t period_len; + size_t pos; + + struct gen_pool *pool; +}; + +#define TDMA_CHANNEL_NUM 2 +struct mmp_tdma_device { + struct device *dev; + void __iomem *base; + struct dma_device device; + struct mmp_tdma_chan *tdmac[TDMA_CHANNEL_NUM]; +}; + +#define to_mmp_tdma_chan(dchan) container_of(dchan, struct mmp_tdma_chan, chan) + +static int mmp_tdma_config_write(struct dma_chan *chan, + enum dma_transfer_direction dir, + struct dma_slave_config *dmaengine_cfg); + +static void mmp_tdma_chan_set_desc(struct mmp_tdma_chan *tdmac, dma_addr_t phys) +{ + writel(phys, tdmac->reg_base + TDNDPR); + writel(readl(tdmac->reg_base + TDCR) | TDCR_FETCHND, + tdmac->reg_base + TDCR); +} + +static void mmp_tdma_enable_irq(struct mmp_tdma_chan *tdmac, bool enable) +{ + if (enable) + writel(TDIMR_COMP, tdmac->reg_base + TDIMR); + else + writel(0, tdmac->reg_base + TDIMR); +} + +static void mmp_tdma_enable_chan(struct mmp_tdma_chan *tdmac) +{ + /* enable dma chan */ + writel(readl(tdmac->reg_base + TDCR) | TDCR_CHANEN, + tdmac->reg_base + TDCR); + tdmac->status = DMA_IN_PROGRESS; +} + +static int mmp_tdma_disable_chan(struct dma_chan *chan) +{ + struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan); + u32 tdcr; + + tdcr = readl(tdmac->reg_base + TDCR); + tdcr |= TDCR_ABR; + tdcr &= ~TDCR_CHANEN; + writel(tdcr, tdmac->reg_base + TDCR); + + tdmac->status = DMA_COMPLETE; + + return 0; +} + +static int mmp_tdma_resume_chan(struct dma_chan *chan) +{ + struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan); + + writel(readl(tdmac->reg_base + TDCR) | TDCR_CHANEN, + tdmac->reg_base + TDCR); + tdmac->status = DMA_IN_PROGRESS; + + return 0; +} + +static int mmp_tdma_pause_chan(struct dma_chan *chan) +{ + struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan); + + writel(readl(tdmac->reg_base + TDCR) & ~TDCR_CHANEN, + tdmac->reg_base + TDCR); + tdmac->status = DMA_PAUSED; + + return 0; +} + +static int mmp_tdma_config_chan(struct dma_chan *chan) +{ + struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan); + unsigned int tdcr = 0; + + mmp_tdma_disable_chan(chan); + + if (tdmac->dir == DMA_MEM_TO_DEV) + tdcr = TDCR_DSTDIR_ADDR_HOLD | TDCR_SRCDIR_ADDR_INC; + else if (tdmac->dir == DMA_DEV_TO_MEM) + tdcr = TDCR_SRCDIR_ADDR_HOLD | TDCR_DSTDIR_ADDR_INC; + + if (tdmac->type == MMP_AUD_TDMA) { + tdcr |= TDCR_PACKMOD; + + switch (tdmac->burst_sz) { + case 4: + tdcr |= TDCR_BURSTSZ_4B; + break; + case 8: + tdcr |= TDCR_BURSTSZ_8B; + break; + case 16: + tdcr |= TDCR_BURSTSZ_16B; + break; + case 32: + tdcr |= TDCR_BURSTSZ_32B; + break; + case 64: + tdcr |= TDCR_BURSTSZ_64B; + break; + case 128: + tdcr |= TDCR_BURSTSZ_128B; + break; + default: + dev_err(tdmac->dev, "unknown burst size.\n"); + return -EINVAL; + } + + switch (tdmac->buswidth) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + tdcr |= TDCR_SSZ_8_BITS; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + tdcr |= TDCR_SSZ_16_BITS; + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + tdcr |= TDCR_SSZ_32_BITS; + break; + default: + dev_err(tdmac->dev, "unknown bus size.\n"); + return -EINVAL; + } + } else if (tdmac->type == PXA910_SQU) { + tdcr |= TDCR_SSPMOD; + + switch (tdmac->burst_sz) { + case 1: + tdcr |= TDCR_BURSTSZ_SQU_1B; + break; + case 2: + tdcr |= TDCR_BURSTSZ_SQU_2B; + break; + case 4: + tdcr |= TDCR_BURSTSZ_SQU_4B; + break; + case 8: + tdcr |= TDCR_BURSTSZ_SQU_8B; + break; + case 16: + tdcr |= TDCR_BURSTSZ_SQU_16B; + break; + case 32: + tdcr |= TDCR_BURSTSZ_SQU_32B; + break; + default: + dev_err(tdmac->dev, "unknown burst size.\n"); + return -EINVAL; + } + } + + writel(tdcr, tdmac->reg_base + TDCR); + return 0; +} + +static int mmp_tdma_clear_chan_irq(struct mmp_tdma_chan *tdmac) +{ + u32 reg = readl(tdmac->reg_base + TDISR); + + if (reg & TDISR_COMP) { + /* clear irq */ + reg &= ~TDISR_COMP; + writel(reg, tdmac->reg_base + TDISR); + + return 0; + } + return -EAGAIN; +} + +static size_t mmp_tdma_get_pos(struct mmp_tdma_chan *tdmac) +{ + size_t reg; + + if (tdmac->idx == 0) { + reg = __raw_readl(tdmac->reg_base + TDSAR); + reg -= tdmac->desc_arr[0].src_addr; + } else if (tdmac->idx == 1) { + reg = __raw_readl(tdmac->reg_base + TDDAR); + reg -= tdmac->desc_arr[0].dst_addr; + } else + return -EINVAL; + + return reg; +} + +static irqreturn_t mmp_tdma_chan_handler(int irq, void *dev_id) +{ + struct mmp_tdma_chan *tdmac = dev_id; + + if (mmp_tdma_clear_chan_irq(tdmac) == 0) { + tasklet_schedule(&tdmac->tasklet); + return IRQ_HANDLED; + } else + return IRQ_NONE; +} + +static irqreturn_t mmp_tdma_int_handler(int irq, void *dev_id) +{ + struct mmp_tdma_device *tdev = dev_id; + int i, ret; + int irq_num = 0; + + for (i = 0; i < TDMA_CHANNEL_NUM; i++) { + struct mmp_tdma_chan *tdmac = tdev->tdmac[i]; + + ret = mmp_tdma_chan_handler(irq, tdmac); + if (ret == IRQ_HANDLED) + irq_num++; + } + + if (irq_num) + return IRQ_HANDLED; + else + return IRQ_NONE; +} + +static void dma_do_tasklet(struct tasklet_struct *t) +{ + struct mmp_tdma_chan *tdmac = from_tasklet(tdmac, t, tasklet); + + dmaengine_desc_get_callback_invoke(&tdmac->desc, NULL); +} + +static void mmp_tdma_free_descriptor(struct mmp_tdma_chan *tdmac) +{ + struct gen_pool *gpool; + int size = tdmac->desc_num * sizeof(struct mmp_tdma_desc); + + gpool = tdmac->pool; + if (gpool && tdmac->desc_arr) + gen_pool_free(gpool, (unsigned long)tdmac->desc_arr, + size); + tdmac->desc_arr = NULL; + if (tdmac->status == DMA_ERROR) + tdmac->status = DMA_COMPLETE; + + return; +} + +static dma_cookie_t mmp_tdma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(tx->chan); + + mmp_tdma_chan_set_desc(tdmac, tdmac->desc_arr_phys); + + return 0; +} + +static int mmp_tdma_alloc_chan_resources(struct dma_chan *chan) +{ + struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan); + int ret; + + dma_async_tx_descriptor_init(&tdmac->desc, chan); + tdmac->desc.tx_submit = mmp_tdma_tx_submit; + + if (tdmac->irq) { + ret = devm_request_irq(tdmac->dev, tdmac->irq, + mmp_tdma_chan_handler, 0, "tdma", tdmac); + if (ret) + return ret; + } + return 1; +} + +static void mmp_tdma_free_chan_resources(struct dma_chan *chan) +{ + struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan); + + if (tdmac->irq) + devm_free_irq(tdmac->dev, tdmac->irq, tdmac); + mmp_tdma_free_descriptor(tdmac); + return; +} + +static struct mmp_tdma_desc *mmp_tdma_alloc_descriptor(struct mmp_tdma_chan *tdmac) +{ + struct gen_pool *gpool; + int size = tdmac->desc_num * sizeof(struct mmp_tdma_desc); + + gpool = tdmac->pool; + if (!gpool) + return NULL; + + tdmac->desc_arr = gen_pool_dma_alloc(gpool, size, &tdmac->desc_arr_phys); + + return tdmac->desc_arr; +} + +static struct dma_async_tx_descriptor *mmp_tdma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan); + struct mmp_tdma_desc *desc; + int num_periods = buf_len / period_len; + int i = 0, buf = 0; + + if (!is_slave_direction(direction)) { + dev_err(tdmac->dev, "unsupported transfer direction\n"); + return NULL; + } + + if (tdmac->status != DMA_COMPLETE) { + dev_err(tdmac->dev, "controller busy"); + return NULL; + } + + if (period_len > TDMA_MAX_XFER_BYTES) { + dev_err(tdmac->dev, + "maximum period size exceeded: %zu > %d\n", + period_len, TDMA_MAX_XFER_BYTES); + goto err_out; + } + + tdmac->status = DMA_IN_PROGRESS; + tdmac->desc_num = num_periods; + desc = mmp_tdma_alloc_descriptor(tdmac); + if (!desc) + goto err_out; + + if (mmp_tdma_config_write(chan, direction, &tdmac->slave_config)) + goto err_out; + + while (buf < buf_len) { + desc = &tdmac->desc_arr[i]; + + if (i + 1 == num_periods) + desc->nxt_desc = tdmac->desc_arr_phys; + else + desc->nxt_desc = tdmac->desc_arr_phys + + sizeof(*desc) * (i + 1); + + if (direction == DMA_MEM_TO_DEV) { + desc->src_addr = dma_addr; + desc->dst_addr = tdmac->dev_addr; + } else { + desc->src_addr = tdmac->dev_addr; + desc->dst_addr = dma_addr; + } + desc->byte_cnt = period_len; + dma_addr += period_len; + buf += period_len; + i++; + } + + /* enable interrupt */ + if (flags & DMA_PREP_INTERRUPT) + mmp_tdma_enable_irq(tdmac, true); + + tdmac->buf_len = buf_len; + tdmac->period_len = period_len; + tdmac->pos = 0; + + return &tdmac->desc; + +err_out: + tdmac->status = DMA_ERROR; + return NULL; +} + +static int mmp_tdma_terminate_all(struct dma_chan *chan) +{ + struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan); + + mmp_tdma_disable_chan(chan); + /* disable interrupt */ + mmp_tdma_enable_irq(tdmac, false); + + return 0; +} + +static int mmp_tdma_config(struct dma_chan *chan, + struct dma_slave_config *dmaengine_cfg) +{ + struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan); + + memcpy(&tdmac->slave_config, dmaengine_cfg, sizeof(*dmaengine_cfg)); + + return 0; +} + +static int mmp_tdma_config_write(struct dma_chan *chan, + enum dma_transfer_direction dir, + struct dma_slave_config *dmaengine_cfg) +{ + struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan); + + if (dir == DMA_DEV_TO_MEM) { + tdmac->dev_addr = dmaengine_cfg->src_addr; + tdmac->burst_sz = dmaengine_cfg->src_maxburst; + tdmac->buswidth = dmaengine_cfg->src_addr_width; + } else { + tdmac->dev_addr = dmaengine_cfg->dst_addr; + tdmac->burst_sz = dmaengine_cfg->dst_maxburst; + tdmac->buswidth = dmaengine_cfg->dst_addr_width; + } + tdmac->dir = dir; + + return mmp_tdma_config_chan(chan); +} + +static enum dma_status mmp_tdma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) +{ + struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan); + + tdmac->pos = mmp_tdma_get_pos(tdmac); + dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie, + tdmac->buf_len - tdmac->pos); + + return tdmac->status; +} + +static void mmp_tdma_issue_pending(struct dma_chan *chan) +{ + struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan); + + mmp_tdma_enable_chan(tdmac); +} + +static int mmp_tdma_remove(struct platform_device *pdev) +{ + if (pdev->dev.of_node) + of_dma_controller_free(pdev->dev.of_node); + + return 0; +} + +static int mmp_tdma_chan_init(struct mmp_tdma_device *tdev, + int idx, int irq, + int type, struct gen_pool *pool) +{ + struct mmp_tdma_chan *tdmac; + + if (idx >= TDMA_CHANNEL_NUM) { + dev_err(tdev->dev, "too many channels for device!\n"); + return -EINVAL; + } + + /* alloc channel */ + tdmac = devm_kzalloc(tdev->dev, sizeof(*tdmac), GFP_KERNEL); + if (!tdmac) + return -ENOMEM; + + if (irq) + tdmac->irq = irq; + tdmac->dev = tdev->dev; + tdmac->chan.device = &tdev->device; + tdmac->idx = idx; + tdmac->type = type; + tdmac->reg_base = tdev->base + idx * 4; + tdmac->pool = pool; + tdmac->status = DMA_COMPLETE; + tdev->tdmac[tdmac->idx] = tdmac; + tasklet_setup(&tdmac->tasklet, dma_do_tasklet); + + /* add the channel to tdma_chan list */ + list_add_tail(&tdmac->chan.device_node, + &tdev->device.channels); + return 0; +} + +struct mmp_tdma_filter_param { + unsigned int chan_id; +}; + +static bool mmp_tdma_filter_fn(struct dma_chan *chan, void *fn_param) +{ + struct mmp_tdma_filter_param *param = fn_param; + + if (chan->chan_id != param->chan_id) + return false; + + return true; +} + +static struct dma_chan *mmp_tdma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct mmp_tdma_device *tdev = ofdma->of_dma_data; + dma_cap_mask_t mask = tdev->device.cap_mask; + struct mmp_tdma_filter_param param; + + if (dma_spec->args_count != 1) + return NULL; + + param.chan_id = dma_spec->args[0]; + + if (param.chan_id >= TDMA_CHANNEL_NUM) + return NULL; + + return __dma_request_channel(&mask, mmp_tdma_filter_fn, ¶m, + ofdma->of_node); +} + +static const struct of_device_id mmp_tdma_dt_ids[] = { + { .compatible = "marvell,adma-1.0", .data = (void *)MMP_AUD_TDMA}, + { .compatible = "marvell,pxa910-squ", .data = (void *)PXA910_SQU}, + {} +}; +MODULE_DEVICE_TABLE(of, mmp_tdma_dt_ids); + +static int mmp_tdma_probe(struct platform_device *pdev) +{ + enum mmp_tdma_type type; + const struct of_device_id *of_id; + struct mmp_tdma_device *tdev; + int i, ret; + int irq = 0, irq_num = 0; + int chan_num = TDMA_CHANNEL_NUM; + struct gen_pool *pool = NULL; + + of_id = of_match_device(mmp_tdma_dt_ids, &pdev->dev); + if (of_id) + type = (enum mmp_tdma_type) of_id->data; + else + type = platform_get_device_id(pdev)->driver_data; + + /* always have couple channels */ + tdev = devm_kzalloc(&pdev->dev, sizeof(*tdev), GFP_KERNEL); + if (!tdev) + return -ENOMEM; + + tdev->dev = &pdev->dev; + + for (i = 0; i < chan_num; i++) { + if (platform_get_irq(pdev, i) > 0) + irq_num++; + } + + tdev->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(tdev->base)) + return PTR_ERR(tdev->base); + + INIT_LIST_HEAD(&tdev->device.channels); + + pool = of_gen_pool_get(pdev->dev.of_node, "asram", 0); + if (!pool) { + dev_err(&pdev->dev, "asram pool not available\n"); + return -ENOMEM; + } + + if (irq_num != chan_num) { + irq = platform_get_irq(pdev, 0); + ret = devm_request_irq(&pdev->dev, irq, + mmp_tdma_int_handler, IRQF_SHARED, "tdma", tdev); + if (ret) + return ret; + } + + /* initialize channel parameters */ + for (i = 0; i < chan_num; i++) { + irq = (irq_num != chan_num) ? 0 : platform_get_irq(pdev, i); + ret = mmp_tdma_chan_init(tdev, i, irq, type, pool); + if (ret) + return ret; + } + + dma_cap_set(DMA_SLAVE, tdev->device.cap_mask); + dma_cap_set(DMA_CYCLIC, tdev->device.cap_mask); + tdev->device.dev = &pdev->dev; + tdev->device.device_alloc_chan_resources = + mmp_tdma_alloc_chan_resources; + tdev->device.device_free_chan_resources = + mmp_tdma_free_chan_resources; + tdev->device.device_prep_dma_cyclic = mmp_tdma_prep_dma_cyclic; + tdev->device.device_tx_status = mmp_tdma_tx_status; + tdev->device.device_issue_pending = mmp_tdma_issue_pending; + tdev->device.device_config = mmp_tdma_config; + tdev->device.device_pause = mmp_tdma_pause_chan; + tdev->device.device_resume = mmp_tdma_resume_chan; + tdev->device.device_terminate_all = mmp_tdma_terminate_all; + tdev->device.copy_align = DMAENGINE_ALIGN_8_BYTES; + + tdev->device.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + if (type == MMP_AUD_TDMA) { + tdev->device.max_burst = SZ_128; + tdev->device.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + tdev->device.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + } else if (type == PXA910_SQU) { + tdev->device.max_burst = SZ_32; + } + tdev->device.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + tdev->device.descriptor_reuse = true; + + dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); + platform_set_drvdata(pdev, tdev); + + ret = dmaenginem_async_device_register(&tdev->device); + if (ret) { + dev_err(tdev->device.dev, "unable to register\n"); + return ret; + } + + if (pdev->dev.of_node) { + ret = of_dma_controller_register(pdev->dev.of_node, + mmp_tdma_xlate, tdev); + if (ret) { + dev_err(tdev->device.dev, + "failed to register controller\n"); + return ret; + } + } + + dev_info(tdev->device.dev, "initialized\n"); + return 0; +} + +static const struct platform_device_id mmp_tdma_id_table[] = { + { "mmp-adma", MMP_AUD_TDMA }, + { "pxa910-squ", PXA910_SQU }, + { }, +}; + +static struct platform_driver mmp_tdma_driver = { + .driver = { + .name = "mmp-tdma", + .of_match_table = mmp_tdma_dt_ids, + }, + .id_table = mmp_tdma_id_table, + .probe = mmp_tdma_probe, + .remove = mmp_tdma_remove, +}; + +module_platform_driver(mmp_tdma_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("MMP Two-Channel DMA Driver"); +MODULE_ALIAS("platform:mmp-tdma"); +MODULE_AUTHOR("Leo Yan "); +MODULE_AUTHOR("Zhangfei Gao "); diff --git a/drivers/dma/moxart-dma.c b/drivers/dma/moxart-dma.c new file mode 100644 index 0000000000..7565ad98ba --- /dev/null +++ b/drivers/dma/moxart-dma.c @@ -0,0 +1,676 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * MOXA ART SoCs DMA Engine support. + * + * Copyright (C) 2013 Jonas Jensen + * + * Jonas Jensen + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "dmaengine.h" +#include "virt-dma.h" + +#define APB_DMA_MAX_CHANNEL 4 + +#define REG_OFF_ADDRESS_SOURCE 0 +#define REG_OFF_ADDRESS_DEST 4 +#define REG_OFF_CYCLES 8 +#define REG_OFF_CTRL 12 +#define REG_OFF_CHAN_SIZE 16 + +#define APB_DMA_ENABLE BIT(0) +#define APB_DMA_FIN_INT_STS BIT(1) +#define APB_DMA_FIN_INT_EN BIT(2) +#define APB_DMA_BURST_MODE BIT(3) +#define APB_DMA_ERR_INT_STS BIT(4) +#define APB_DMA_ERR_INT_EN BIT(5) + +/* + * Unset: APB + * Set: AHB + */ +#define APB_DMA_SOURCE_SELECT 0x40 +#define APB_DMA_DEST_SELECT 0x80 + +#define APB_DMA_SOURCE 0x100 +#define APB_DMA_DEST 0x1000 + +#define APB_DMA_SOURCE_MASK 0x700 +#define APB_DMA_DEST_MASK 0x7000 + +/* + * 000: No increment + * 001: +1 (Burst=0), +4 (Burst=1) + * 010: +2 (Burst=0), +8 (Burst=1) + * 011: +4 (Burst=0), +16 (Burst=1) + * 101: -1 (Burst=0), -4 (Burst=1) + * 110: -2 (Burst=0), -8 (Burst=1) + * 111: -4 (Burst=0), -16 (Burst=1) + */ +#define APB_DMA_SOURCE_INC_0 0 +#define APB_DMA_SOURCE_INC_1_4 0x100 +#define APB_DMA_SOURCE_INC_2_8 0x200 +#define APB_DMA_SOURCE_INC_4_16 0x300 +#define APB_DMA_SOURCE_DEC_1_4 0x500 +#define APB_DMA_SOURCE_DEC_2_8 0x600 +#define APB_DMA_SOURCE_DEC_4_16 0x700 +#define APB_DMA_DEST_INC_0 0 +#define APB_DMA_DEST_INC_1_4 0x1000 +#define APB_DMA_DEST_INC_2_8 0x2000 +#define APB_DMA_DEST_INC_4_16 0x3000 +#define APB_DMA_DEST_DEC_1_4 0x5000 +#define APB_DMA_DEST_DEC_2_8 0x6000 +#define APB_DMA_DEST_DEC_4_16 0x7000 + +/* + * Request signal select source/destination address for DMA hardware handshake. + * + * The request line number is a property of the DMA controller itself, + * e.g. MMC must always request channels where dma_slave_config->slave_id is 5. + * + * 0: No request / Grant signal + * 1-15: Request / Grant signal + */ +#define APB_DMA_SOURCE_REQ_NO 0x1000000 +#define APB_DMA_SOURCE_REQ_NO_MASK 0xf000000 +#define APB_DMA_DEST_REQ_NO 0x10000 +#define APB_DMA_DEST_REQ_NO_MASK 0xf0000 + +#define APB_DMA_DATA_WIDTH 0x100000 +#define APB_DMA_DATA_WIDTH_MASK 0x300000 +/* + * Data width of transfer: + * + * 00: Word + * 01: Half + * 10: Byte + */ +#define APB_DMA_DATA_WIDTH_4 0 +#define APB_DMA_DATA_WIDTH_2 0x100000 +#define APB_DMA_DATA_WIDTH_1 0x200000 + +#define APB_DMA_CYCLES_MASK 0x00ffffff + +#define MOXART_DMA_DATA_TYPE_S8 0x00 +#define MOXART_DMA_DATA_TYPE_S16 0x01 +#define MOXART_DMA_DATA_TYPE_S32 0x02 + +struct moxart_sg { + dma_addr_t addr; + uint32_t len; +}; + +struct moxart_desc { + enum dma_transfer_direction dma_dir; + dma_addr_t dev_addr; + unsigned int sglen; + unsigned int dma_cycles; + struct virt_dma_desc vd; + uint8_t es; + struct moxart_sg sg[]; +}; + +struct moxart_chan { + struct virt_dma_chan vc; + + void __iomem *base; + struct moxart_desc *desc; + + struct dma_slave_config cfg; + + bool allocated; + bool error; + int ch_num; + unsigned int line_reqno; + unsigned int sgidx; +}; + +struct moxart_dmadev { + struct dma_device dma_slave; + struct moxart_chan slave_chans[APB_DMA_MAX_CHANNEL]; + unsigned int irq; +}; + +struct moxart_filter_data { + struct moxart_dmadev *mdc; + struct of_phandle_args *dma_spec; +}; + +static const unsigned int es_bytes[] = { + [MOXART_DMA_DATA_TYPE_S8] = 1, + [MOXART_DMA_DATA_TYPE_S16] = 2, + [MOXART_DMA_DATA_TYPE_S32] = 4, +}; + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +static inline struct moxart_chan *to_moxart_dma_chan(struct dma_chan *c) +{ + return container_of(c, struct moxart_chan, vc.chan); +} + +static inline struct moxart_desc *to_moxart_dma_desc( + struct dma_async_tx_descriptor *t) +{ + return container_of(t, struct moxart_desc, vd.tx); +} + +static void moxart_dma_desc_free(struct virt_dma_desc *vd) +{ + kfree(container_of(vd, struct moxart_desc, vd)); +} + +static int moxart_terminate_all(struct dma_chan *chan) +{ + struct moxart_chan *ch = to_moxart_dma_chan(chan); + unsigned long flags; + LIST_HEAD(head); + u32 ctrl; + + dev_dbg(chan2dev(chan), "%s: ch=%p\n", __func__, ch); + + spin_lock_irqsave(&ch->vc.lock, flags); + + if (ch->desc) { + moxart_dma_desc_free(&ch->desc->vd); + ch->desc = NULL; + } + + ctrl = readl(ch->base + REG_OFF_CTRL); + ctrl &= ~(APB_DMA_ENABLE | APB_DMA_FIN_INT_EN | APB_DMA_ERR_INT_EN); + writel(ctrl, ch->base + REG_OFF_CTRL); + + vchan_get_all_descriptors(&ch->vc, &head); + spin_unlock_irqrestore(&ch->vc.lock, flags); + vchan_dma_desc_free_list(&ch->vc, &head); + + return 0; +} + +static int moxart_slave_config(struct dma_chan *chan, + struct dma_slave_config *cfg) +{ + struct moxart_chan *ch = to_moxart_dma_chan(chan); + u32 ctrl; + + ch->cfg = *cfg; + + ctrl = readl(ch->base + REG_OFF_CTRL); + ctrl |= APB_DMA_BURST_MODE; + ctrl &= ~(APB_DMA_DEST_MASK | APB_DMA_SOURCE_MASK); + ctrl &= ~(APB_DMA_DEST_REQ_NO_MASK | APB_DMA_SOURCE_REQ_NO_MASK); + + switch (ch->cfg.src_addr_width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + ctrl |= APB_DMA_DATA_WIDTH_1; + if (ch->cfg.direction != DMA_MEM_TO_DEV) + ctrl |= APB_DMA_DEST_INC_1_4; + else + ctrl |= APB_DMA_SOURCE_INC_1_4; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + ctrl |= APB_DMA_DATA_WIDTH_2; + if (ch->cfg.direction != DMA_MEM_TO_DEV) + ctrl |= APB_DMA_DEST_INC_2_8; + else + ctrl |= APB_DMA_SOURCE_INC_2_8; + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + ctrl &= ~APB_DMA_DATA_WIDTH; + if (ch->cfg.direction != DMA_MEM_TO_DEV) + ctrl |= APB_DMA_DEST_INC_4_16; + else + ctrl |= APB_DMA_SOURCE_INC_4_16; + break; + default: + return -EINVAL; + } + + if (ch->cfg.direction == DMA_MEM_TO_DEV) { + ctrl &= ~APB_DMA_DEST_SELECT; + ctrl |= APB_DMA_SOURCE_SELECT; + ctrl |= (ch->line_reqno << 16 & + APB_DMA_DEST_REQ_NO_MASK); + } else { + ctrl |= APB_DMA_DEST_SELECT; + ctrl &= ~APB_DMA_SOURCE_SELECT; + ctrl |= (ch->line_reqno << 24 & + APB_DMA_SOURCE_REQ_NO_MASK); + } + + writel(ctrl, ch->base + REG_OFF_CTRL); + + return 0; +} + +static struct dma_async_tx_descriptor *moxart_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long tx_flags, void *context) +{ + struct moxart_chan *ch = to_moxart_dma_chan(chan); + struct moxart_desc *d; + enum dma_slave_buswidth dev_width; + dma_addr_t dev_addr; + struct scatterlist *sgent; + unsigned int es; + unsigned int i; + + if (!is_slave_direction(dir)) { + dev_err(chan2dev(chan), "%s: invalid DMA direction\n", + __func__); + return NULL; + } + + if (dir == DMA_DEV_TO_MEM) { + dev_addr = ch->cfg.src_addr; + dev_width = ch->cfg.src_addr_width; + } else { + dev_addr = ch->cfg.dst_addr; + dev_width = ch->cfg.dst_addr_width; + } + + switch (dev_width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + es = MOXART_DMA_DATA_TYPE_S8; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + es = MOXART_DMA_DATA_TYPE_S16; + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + es = MOXART_DMA_DATA_TYPE_S32; + break; + default: + dev_err(chan2dev(chan), "%s: unsupported data width (%u)\n", + __func__, dev_width); + return NULL; + } + + d = kzalloc(struct_size(d, sg, sg_len), GFP_ATOMIC); + if (!d) + return NULL; + + d->dma_dir = dir; + d->dev_addr = dev_addr; + d->es = es; + + for_each_sg(sgl, sgent, sg_len, i) { + d->sg[i].addr = sg_dma_address(sgent); + d->sg[i].len = sg_dma_len(sgent); + } + + d->sglen = sg_len; + + ch->error = 0; + + return vchan_tx_prep(&ch->vc, &d->vd, tx_flags); +} + +static struct dma_chan *moxart_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct moxart_dmadev *mdc = ofdma->of_dma_data; + struct dma_chan *chan; + struct moxart_chan *ch; + + chan = dma_get_any_slave_channel(&mdc->dma_slave); + if (!chan) + return NULL; + + ch = to_moxart_dma_chan(chan); + ch->line_reqno = dma_spec->args[0]; + + return chan; +} + +static int moxart_alloc_chan_resources(struct dma_chan *chan) +{ + struct moxart_chan *ch = to_moxart_dma_chan(chan); + + dev_dbg(chan2dev(chan), "%s: allocating channel #%u\n", + __func__, ch->ch_num); + ch->allocated = 1; + + return 0; +} + +static void moxart_free_chan_resources(struct dma_chan *chan) +{ + struct moxart_chan *ch = to_moxart_dma_chan(chan); + + vchan_free_chan_resources(&ch->vc); + + dev_dbg(chan2dev(chan), "%s: freeing channel #%u\n", + __func__, ch->ch_num); + ch->allocated = 0; +} + +static void moxart_dma_set_params(struct moxart_chan *ch, dma_addr_t src_addr, + dma_addr_t dst_addr) +{ + writel(src_addr, ch->base + REG_OFF_ADDRESS_SOURCE); + writel(dst_addr, ch->base + REG_OFF_ADDRESS_DEST); +} + +static void moxart_set_transfer_params(struct moxart_chan *ch, unsigned int len) +{ + struct moxart_desc *d = ch->desc; + unsigned int sglen_div = es_bytes[d->es]; + + d->dma_cycles = len >> sglen_div; + + /* + * There are 4 cycles on 64 bytes copied, i.e. one cycle copies 16 + * bytes ( when width is APB_DMAB_DATA_WIDTH_4 ). + */ + writel(d->dma_cycles, ch->base + REG_OFF_CYCLES); + + dev_dbg(chan2dev(&ch->vc.chan), "%s: set %u DMA cycles (len=%u)\n", + __func__, d->dma_cycles, len); +} + +static void moxart_start_dma(struct moxart_chan *ch) +{ + u32 ctrl; + + ctrl = readl(ch->base + REG_OFF_CTRL); + ctrl |= (APB_DMA_ENABLE | APB_DMA_FIN_INT_EN | APB_DMA_ERR_INT_EN); + writel(ctrl, ch->base + REG_OFF_CTRL); +} + +static void moxart_dma_start_sg(struct moxart_chan *ch, unsigned int idx) +{ + struct moxart_desc *d = ch->desc; + struct moxart_sg *sg = ch->desc->sg + idx; + + if (ch->desc->dma_dir == DMA_MEM_TO_DEV) + moxart_dma_set_params(ch, sg->addr, d->dev_addr); + else if (ch->desc->dma_dir == DMA_DEV_TO_MEM) + moxart_dma_set_params(ch, d->dev_addr, sg->addr); + + moxart_set_transfer_params(ch, sg->len); + + moxart_start_dma(ch); +} + +static void moxart_dma_start_desc(struct dma_chan *chan) +{ + struct moxart_chan *ch = to_moxart_dma_chan(chan); + struct virt_dma_desc *vd; + + vd = vchan_next_desc(&ch->vc); + + if (!vd) { + ch->desc = NULL; + return; + } + + list_del(&vd->node); + + ch->desc = to_moxart_dma_desc(&vd->tx); + ch->sgidx = 0; + + moxart_dma_start_sg(ch, 0); +} + +static void moxart_issue_pending(struct dma_chan *chan) +{ + struct moxart_chan *ch = to_moxart_dma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&ch->vc.lock, flags); + if (vchan_issue_pending(&ch->vc) && !ch->desc) + moxart_dma_start_desc(chan); + spin_unlock_irqrestore(&ch->vc.lock, flags); +} + +static size_t moxart_dma_desc_size(struct moxart_desc *d, + unsigned int completed_sgs) +{ + unsigned int i; + size_t size; + + for (size = i = completed_sgs; i < d->sglen; i++) + size += d->sg[i].len; + + return size; +} + +static size_t moxart_dma_desc_size_in_flight(struct moxart_chan *ch) +{ + size_t size; + unsigned int completed_cycles, cycles; + + size = moxart_dma_desc_size(ch->desc, ch->sgidx); + cycles = readl(ch->base + REG_OFF_CYCLES); + completed_cycles = (ch->desc->dma_cycles - cycles); + size -= completed_cycles << es_bytes[ch->desc->es]; + + dev_dbg(chan2dev(&ch->vc.chan), "%s: size=%zu\n", __func__, size); + + return size; +} + +static enum dma_status moxart_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct moxart_chan *ch = to_moxart_dma_chan(chan); + struct virt_dma_desc *vd; + struct moxart_desc *d; + enum dma_status ret; + unsigned long flags; + + /* + * dma_cookie_status() assigns initial residue value. + */ + ret = dma_cookie_status(chan, cookie, txstate); + + spin_lock_irqsave(&ch->vc.lock, flags); + vd = vchan_find_desc(&ch->vc, cookie); + if (vd) { + d = to_moxart_dma_desc(&vd->tx); + txstate->residue = moxart_dma_desc_size(d, 0); + } else if (ch->desc && ch->desc->vd.tx.cookie == cookie) { + txstate->residue = moxart_dma_desc_size_in_flight(ch); + } + spin_unlock_irqrestore(&ch->vc.lock, flags); + + if (ch->error) + return DMA_ERROR; + + return ret; +} + +static void moxart_dma_init(struct dma_device *dma, struct device *dev) +{ + dma->device_prep_slave_sg = moxart_prep_slave_sg; + dma->device_alloc_chan_resources = moxart_alloc_chan_resources; + dma->device_free_chan_resources = moxart_free_chan_resources; + dma->device_issue_pending = moxart_issue_pending; + dma->device_tx_status = moxart_tx_status; + dma->device_config = moxart_slave_config; + dma->device_terminate_all = moxart_terminate_all; + dma->dev = dev; + + INIT_LIST_HEAD(&dma->channels); +} + +static irqreturn_t moxart_dma_interrupt(int irq, void *devid) +{ + struct moxart_dmadev *mc = devid; + struct moxart_chan *ch = &mc->slave_chans[0]; + unsigned int i; + u32 ctrl; + + dev_dbg(chan2dev(&ch->vc.chan), "%s\n", __func__); + + for (i = 0; i < APB_DMA_MAX_CHANNEL; i++, ch++) { + if (!ch->allocated) + continue; + + ctrl = readl(ch->base + REG_OFF_CTRL); + + dev_dbg(chan2dev(&ch->vc.chan), "%s: ch=%p ch->base=%p ctrl=%x\n", + __func__, ch, ch->base, ctrl); + + if (ctrl & APB_DMA_FIN_INT_STS) { + ctrl &= ~APB_DMA_FIN_INT_STS; + if (ch->desc) { + spin_lock(&ch->vc.lock); + if (++ch->sgidx < ch->desc->sglen) { + moxart_dma_start_sg(ch, ch->sgidx); + } else { + vchan_cookie_complete(&ch->desc->vd); + moxart_dma_start_desc(&ch->vc.chan); + } + spin_unlock(&ch->vc.lock); + } + } + + if (ctrl & APB_DMA_ERR_INT_STS) { + ctrl &= ~APB_DMA_ERR_INT_STS; + ch->error = 1; + } + + writel(ctrl, ch->base + REG_OFF_CTRL); + } + + return IRQ_HANDLED; +} + +static int moxart_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *node = dev->of_node; + void __iomem *dma_base_addr; + int ret, i; + unsigned int irq; + struct moxart_chan *ch; + struct moxart_dmadev *mdc; + + mdc = devm_kzalloc(dev, sizeof(*mdc), GFP_KERNEL); + if (!mdc) + return -ENOMEM; + + irq = irq_of_parse_and_map(node, 0); + if (!irq) { + dev_err(dev, "no IRQ resource\n"); + return -EINVAL; + } + + dma_base_addr = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(dma_base_addr)) + return PTR_ERR(dma_base_addr); + + dma_cap_zero(mdc->dma_slave.cap_mask); + dma_cap_set(DMA_SLAVE, mdc->dma_slave.cap_mask); + dma_cap_set(DMA_PRIVATE, mdc->dma_slave.cap_mask); + + moxart_dma_init(&mdc->dma_slave, dev); + + ch = &mdc->slave_chans[0]; + for (i = 0; i < APB_DMA_MAX_CHANNEL; i++, ch++) { + ch->ch_num = i; + ch->base = dma_base_addr + i * REG_OFF_CHAN_SIZE; + ch->allocated = 0; + + ch->vc.desc_free = moxart_dma_desc_free; + vchan_init(&ch->vc, &mdc->dma_slave); + + dev_dbg(dev, "%s: chs[%d]: ch->ch_num=%u ch->base=%p\n", + __func__, i, ch->ch_num, ch->base); + } + + platform_set_drvdata(pdev, mdc); + + ret = devm_request_irq(dev, irq, moxart_dma_interrupt, 0, + "moxart-dma-engine", mdc); + if (ret) { + dev_err(dev, "devm_request_irq failed\n"); + return ret; + } + mdc->irq = irq; + + ret = dma_async_device_register(&mdc->dma_slave); + if (ret) { + dev_err(dev, "dma_async_device_register failed\n"); + return ret; + } + + ret = of_dma_controller_register(node, moxart_of_xlate, mdc); + if (ret) { + dev_err(dev, "of_dma_controller_register failed\n"); + dma_async_device_unregister(&mdc->dma_slave); + return ret; + } + + dev_dbg(dev, "%s: IRQ=%u\n", __func__, irq); + + return 0; +} + +static int moxart_remove(struct platform_device *pdev) +{ + struct moxart_dmadev *m = platform_get_drvdata(pdev); + + devm_free_irq(&pdev->dev, m->irq, m); + + dma_async_device_unregister(&m->dma_slave); + + if (pdev->dev.of_node) + of_dma_controller_free(pdev->dev.of_node); + + return 0; +} + +static const struct of_device_id moxart_dma_match[] = { + { .compatible = "moxa,moxart-dma" }, + { } +}; +MODULE_DEVICE_TABLE(of, moxart_dma_match); + +static struct platform_driver moxart_driver = { + .probe = moxart_probe, + .remove = moxart_remove, + .driver = { + .name = "moxart-dma-engine", + .of_match_table = moxart_dma_match, + }, +}; + +static int moxart_init(void) +{ + return platform_driver_register(&moxart_driver); +} +subsys_initcall(moxart_init); + +static void __exit moxart_exit(void) +{ + platform_driver_unregister(&moxart_driver); +} +module_exit(moxart_exit); + +MODULE_AUTHOR("Jonas Jensen "); +MODULE_DESCRIPTION("MOXART DMA engine driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c new file mode 100644 index 0000000000..1104017320 --- /dev/null +++ b/drivers/dma/mpc512x_dma.c @@ -0,0 +1,1125 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) Freescale Semicondutor, Inc. 2007, 2008. + * Copyright (C) Semihalf 2009 + * Copyright (C) Ilya Yanok, Emcraft Systems 2010 + * Copyright (C) Alexander Popov, Promcontroller 2014 + * Copyright (C) Mario Six, Guntermann & Drunck GmbH, 2016 + * + * Written by Piotr Ziecik . Hardware description + * (defines, structures and comments) was taken from MPC5121 DMA driver + * written by Hongjun Chen . + * + * Approved as OSADL project by a majority of OSADL members and funded + * by OSADL membership fees in 2009; for details see www.osadl.org. + */ + +/* + * MPC512x and MPC8308 DMA driver. It supports memory to memory data transfers + * (tested using dmatest module) and data transfers between memory and + * peripheral I/O memory by means of slave scatter/gather with these + * limitations: + * - chunked transfers (described by s/g lists with more than one item) are + * refused as long as proper support for scatter/gather is missing + * - transfers on MPC8308 always start from software as this SoC does not have + * external request lines for peripheral flow control + * - memory <-> I/O memory transfer chunks of sizes of 1, 2, 4, 16 (for + * MPC512x), and 32 bytes are supported, and, consequently, source + * addresses and destination addresses must be aligned accordingly; + * furthermore, for MPC512x SoCs, the transfer size must be aligned on + * (chunk size * maxburst) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "dmaengine.h" + +/* Number of DMA Transfer descriptors allocated per channel */ +#define MPC_DMA_DESCRIPTORS 64 + +/* Macro definitions */ +#define MPC_DMA_TCD_OFFSET 0x1000 + +/* + * Maximum channel counts for individual hardware variants + * and the maximum channel count over all supported controllers, + * used for data structure size + */ +#define MPC8308_DMACHAN_MAX 16 +#define MPC512x_DMACHAN_MAX 64 +#define MPC_DMA_CHANNELS 64 + +/* Arbitration mode of group and channel */ +#define MPC_DMA_DMACR_EDCG (1 << 31) +#define MPC_DMA_DMACR_ERGA (1 << 3) +#define MPC_DMA_DMACR_ERCA (1 << 2) + +/* Error codes */ +#define MPC_DMA_DMAES_VLD (1 << 31) +#define MPC_DMA_DMAES_GPE (1 << 15) +#define MPC_DMA_DMAES_CPE (1 << 14) +#define MPC_DMA_DMAES_ERRCHN(err) \ + (((err) >> 8) & 0x3f) +#define MPC_DMA_DMAES_SAE (1 << 7) +#define MPC_DMA_DMAES_SOE (1 << 6) +#define MPC_DMA_DMAES_DAE (1 << 5) +#define MPC_DMA_DMAES_DOE (1 << 4) +#define MPC_DMA_DMAES_NCE (1 << 3) +#define MPC_DMA_DMAES_SGE (1 << 2) +#define MPC_DMA_DMAES_SBE (1 << 1) +#define MPC_DMA_DMAES_DBE (1 << 0) + +#define MPC_DMA_DMAGPOR_SNOOP_ENABLE (1 << 6) + +#define MPC_DMA_TSIZE_1 0x00 +#define MPC_DMA_TSIZE_2 0x01 +#define MPC_DMA_TSIZE_4 0x02 +#define MPC_DMA_TSIZE_16 0x04 +#define MPC_DMA_TSIZE_32 0x05 + +/* MPC5121 DMA engine registers */ +struct __attribute__ ((__packed__)) mpc_dma_regs { + /* 0x00 */ + u32 dmacr; /* DMA control register */ + u32 dmaes; /* DMA error status */ + /* 0x08 */ + u32 dmaerqh; /* DMA enable request high(channels 63~32) */ + u32 dmaerql; /* DMA enable request low(channels 31~0) */ + u32 dmaeeih; /* DMA enable error interrupt high(ch63~32) */ + u32 dmaeeil; /* DMA enable error interrupt low(ch31~0) */ + /* 0x18 */ + u8 dmaserq; /* DMA set enable request */ + u8 dmacerq; /* DMA clear enable request */ + u8 dmaseei; /* DMA set enable error interrupt */ + u8 dmaceei; /* DMA clear enable error interrupt */ + /* 0x1c */ + u8 dmacint; /* DMA clear interrupt request */ + u8 dmacerr; /* DMA clear error */ + u8 dmassrt; /* DMA set start bit */ + u8 dmacdne; /* DMA clear DONE status bit */ + /* 0x20 */ + u32 dmainth; /* DMA interrupt request high(ch63~32) */ + u32 dmaintl; /* DMA interrupt request low(ch31~0) */ + u32 dmaerrh; /* DMA error high(ch63~32) */ + u32 dmaerrl; /* DMA error low(ch31~0) */ + /* 0x30 */ + u32 dmahrsh; /* DMA hw request status high(ch63~32) */ + u32 dmahrsl; /* DMA hardware request status low(ch31~0) */ + union { + u32 dmaihsa; /* DMA interrupt high select AXE(ch63~32) */ + u32 dmagpor; /* (General purpose register on MPC8308) */ + }; + u32 dmailsa; /* DMA interrupt low select AXE(ch31~0) */ + /* 0x40 ~ 0xff */ + u32 reserve0[48]; /* Reserved */ + /* 0x100 */ + u8 dchpri[MPC_DMA_CHANNELS]; + /* DMA channels(0~63) priority */ +}; + +struct __attribute__ ((__packed__)) mpc_dma_tcd { + /* 0x00 */ + u32 saddr; /* Source address */ + + u32 smod:5; /* Source address modulo */ + u32 ssize:3; /* Source data transfer size */ + u32 dmod:5; /* Destination address modulo */ + u32 dsize:3; /* Destination data transfer size */ + u32 soff:16; /* Signed source address offset */ + + /* 0x08 */ + u32 nbytes; /* Inner "minor" byte count */ + u32 slast; /* Last source address adjustment */ + u32 daddr; /* Destination address */ + + /* 0x14 */ + u32 citer_elink:1; /* Enable channel-to-channel linking on + * minor loop complete + */ + u32 citer_linkch:6; /* Link channel for minor loop complete */ + u32 citer:9; /* Current "major" iteration count */ + u32 doff:16; /* Signed destination address offset */ + + /* 0x18 */ + u32 dlast_sga; /* Last Destination address adjustment/scatter + * gather address + */ + + /* 0x1c */ + u32 biter_elink:1; /* Enable channel-to-channel linking on major + * loop complete + */ + u32 biter_linkch:6; + u32 biter:9; /* Beginning "major" iteration count */ + u32 bwc:2; /* Bandwidth control */ + u32 major_linkch:6; /* Link channel number */ + u32 done:1; /* Channel done */ + u32 active:1; /* Channel active */ + u32 major_elink:1; /* Enable channel-to-channel linking on major + * loop complete + */ + u32 e_sg:1; /* Enable scatter/gather processing */ + u32 d_req:1; /* Disable request */ + u32 int_half:1; /* Enable an interrupt when major counter is + * half complete + */ + u32 int_maj:1; /* Enable an interrupt when major iteration + * count completes + */ + u32 start:1; /* Channel start */ +}; + +struct mpc_dma_desc { + struct dma_async_tx_descriptor desc; + struct mpc_dma_tcd *tcd; + dma_addr_t tcd_paddr; + int error; + struct list_head node; + int will_access_peripheral; +}; + +struct mpc_dma_chan { + struct dma_chan chan; + struct list_head free; + struct list_head prepared; + struct list_head queued; + struct list_head active; + struct list_head completed; + struct mpc_dma_tcd *tcd; + dma_addr_t tcd_paddr; + + /* Settings for access to peripheral FIFO */ + dma_addr_t src_per_paddr; + u32 src_tcd_nunits; + u8 swidth; + dma_addr_t dst_per_paddr; + u32 dst_tcd_nunits; + u8 dwidth; + + /* Lock for this structure */ + spinlock_t lock; +}; + +struct mpc_dma { + struct dma_device dma; + struct tasklet_struct tasklet; + struct mpc_dma_chan channels[MPC_DMA_CHANNELS]; + struct mpc_dma_regs __iomem *regs; + struct mpc_dma_tcd __iomem *tcd; + int irq; + int irq2; + uint error_status; + int is_mpc8308; + + /* Lock for error_status field in this structure */ + spinlock_t error_status_lock; +}; + +#define DRV_NAME "mpc512x_dma" + +/* Convert struct dma_chan to struct mpc_dma_chan */ +static inline struct mpc_dma_chan *dma_chan_to_mpc_dma_chan(struct dma_chan *c) +{ + return container_of(c, struct mpc_dma_chan, chan); +} + +/* Convert struct dma_chan to struct mpc_dma */ +static inline struct mpc_dma *dma_chan_to_mpc_dma(struct dma_chan *c) +{ + struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(c); + + return container_of(mchan, struct mpc_dma, channels[c->chan_id]); +} + +/* + * Execute all queued DMA descriptors. + * + * Following requirements must be met while calling mpc_dma_execute(): + * a) mchan->lock is acquired, + * b) mchan->active list is empty, + * c) mchan->queued list contains at least one entry. + */ +static void mpc_dma_execute(struct mpc_dma_chan *mchan) +{ + struct mpc_dma *mdma = dma_chan_to_mpc_dma(&mchan->chan); + struct mpc_dma_desc *first = NULL; + struct mpc_dma_desc *prev = NULL; + struct mpc_dma_desc *mdesc; + int cid = mchan->chan.chan_id; + + while (!list_empty(&mchan->queued)) { + mdesc = list_first_entry(&mchan->queued, + struct mpc_dma_desc, node); + /* + * Grab either several mem-to-mem transfer descriptors + * or one peripheral transfer descriptor, + * don't mix mem-to-mem and peripheral transfer descriptors + * within the same 'active' list. + */ + if (mdesc->will_access_peripheral) { + if (list_empty(&mchan->active)) + list_move_tail(&mdesc->node, &mchan->active); + break; + } else { + list_move_tail(&mdesc->node, &mchan->active); + } + } + + /* Chain descriptors into one transaction */ + list_for_each_entry(mdesc, &mchan->active, node) { + if (!first) + first = mdesc; + + if (!prev) { + prev = mdesc; + continue; + } + + prev->tcd->dlast_sga = mdesc->tcd_paddr; + prev->tcd->e_sg = 1; + mdesc->tcd->start = 1; + + prev = mdesc; + } + + prev->tcd->int_maj = 1; + + /* Send first descriptor in chain into hardware */ + memcpy_toio(&mdma->tcd[cid], first->tcd, sizeof(struct mpc_dma_tcd)); + + if (first != prev) + mdma->tcd[cid].e_sg = 1; + + if (mdma->is_mpc8308) { + /* MPC8308, no request lines, software initiated start */ + out_8(&mdma->regs->dmassrt, cid); + } else if (first->will_access_peripheral) { + /* Peripherals involved, start by external request signal */ + out_8(&mdma->regs->dmaserq, cid); + } else { + /* Memory to memory transfer, software initiated start */ + out_8(&mdma->regs->dmassrt, cid); + } +} + +/* Handle interrupt on one half of DMA controller (32 channels) */ +static void mpc_dma_irq_process(struct mpc_dma *mdma, u32 is, u32 es, int off) +{ + struct mpc_dma_chan *mchan; + struct mpc_dma_desc *mdesc; + u32 status = is | es; + int ch; + + while ((ch = fls(status) - 1) >= 0) { + status &= ~(1 << ch); + mchan = &mdma->channels[ch + off]; + + spin_lock(&mchan->lock); + + out_8(&mdma->regs->dmacint, ch + off); + out_8(&mdma->regs->dmacerr, ch + off); + + /* Check error status */ + if (es & (1 << ch)) + list_for_each_entry(mdesc, &mchan->active, node) + mdesc->error = -EIO; + + /* Execute queued descriptors */ + list_splice_tail_init(&mchan->active, &mchan->completed); + if (!list_empty(&mchan->queued)) + mpc_dma_execute(mchan); + + spin_unlock(&mchan->lock); + } +} + +/* Interrupt handler */ +static irqreturn_t mpc_dma_irq(int irq, void *data) +{ + struct mpc_dma *mdma = data; + uint es; + + /* Save error status register */ + es = in_be32(&mdma->regs->dmaes); + spin_lock(&mdma->error_status_lock); + if ((es & MPC_DMA_DMAES_VLD) && mdma->error_status == 0) + mdma->error_status = es; + spin_unlock(&mdma->error_status_lock); + + /* Handle interrupt on each channel */ + if (mdma->dma.chancnt > 32) { + mpc_dma_irq_process(mdma, in_be32(&mdma->regs->dmainth), + in_be32(&mdma->regs->dmaerrh), 32); + } + mpc_dma_irq_process(mdma, in_be32(&mdma->regs->dmaintl), + in_be32(&mdma->regs->dmaerrl), 0); + + /* Schedule tasklet */ + tasklet_schedule(&mdma->tasklet); + + return IRQ_HANDLED; +} + +/* process completed descriptors */ +static void mpc_dma_process_completed(struct mpc_dma *mdma) +{ + dma_cookie_t last_cookie = 0; + struct mpc_dma_chan *mchan; + struct mpc_dma_desc *mdesc; + struct dma_async_tx_descriptor *desc; + unsigned long flags; + LIST_HEAD(list); + int i; + + for (i = 0; i < mdma->dma.chancnt; i++) { + mchan = &mdma->channels[i]; + + /* Get all completed descriptors */ + spin_lock_irqsave(&mchan->lock, flags); + if (!list_empty(&mchan->completed)) + list_splice_tail_init(&mchan->completed, &list); + spin_unlock_irqrestore(&mchan->lock, flags); + + if (list_empty(&list)) + continue; + + /* Execute callbacks and run dependencies */ + list_for_each_entry(mdesc, &list, node) { + desc = &mdesc->desc; + + dmaengine_desc_get_callback_invoke(desc, NULL); + + last_cookie = desc->cookie; + dma_run_dependencies(desc); + } + + /* Free descriptors */ + spin_lock_irqsave(&mchan->lock, flags); + list_splice_tail_init(&list, &mchan->free); + mchan->chan.completed_cookie = last_cookie; + spin_unlock_irqrestore(&mchan->lock, flags); + } +} + +/* DMA Tasklet */ +static void mpc_dma_tasklet(struct tasklet_struct *t) +{ + struct mpc_dma *mdma = from_tasklet(mdma, t, tasklet); + unsigned long flags; + uint es; + + spin_lock_irqsave(&mdma->error_status_lock, flags); + es = mdma->error_status; + mdma->error_status = 0; + spin_unlock_irqrestore(&mdma->error_status_lock, flags); + + /* Print nice error report */ + if (es) { + dev_err(mdma->dma.dev, + "Hardware reported following error(s) on channel %u:\n", + MPC_DMA_DMAES_ERRCHN(es)); + + if (es & MPC_DMA_DMAES_GPE) + dev_err(mdma->dma.dev, "- Group Priority Error\n"); + if (es & MPC_DMA_DMAES_CPE) + dev_err(mdma->dma.dev, "- Channel Priority Error\n"); + if (es & MPC_DMA_DMAES_SAE) + dev_err(mdma->dma.dev, "- Source Address Error\n"); + if (es & MPC_DMA_DMAES_SOE) + dev_err(mdma->dma.dev, "- Source Offset Configuration Error\n"); + if (es & MPC_DMA_DMAES_DAE) + dev_err(mdma->dma.dev, "- Destination Address Error\n"); + if (es & MPC_DMA_DMAES_DOE) + dev_err(mdma->dma.dev, "- Destination Offset Configuration Error\n"); + if (es & MPC_DMA_DMAES_NCE) + dev_err(mdma->dma.dev, "- NBytes/Citter Configuration Error\n"); + if (es & MPC_DMA_DMAES_SGE) + dev_err(mdma->dma.dev, "- Scatter/Gather Configuration Error\n"); + if (es & MPC_DMA_DMAES_SBE) + dev_err(mdma->dma.dev, "- Source Bus Error\n"); + if (es & MPC_DMA_DMAES_DBE) + dev_err(mdma->dma.dev, "- Destination Bus Error\n"); + } + + mpc_dma_process_completed(mdma); +} + +/* Submit descriptor to hardware */ +static dma_cookie_t mpc_dma_tx_submit(struct dma_async_tx_descriptor *txd) +{ + struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(txd->chan); + struct mpc_dma_desc *mdesc; + unsigned long flags; + dma_cookie_t cookie; + + mdesc = container_of(txd, struct mpc_dma_desc, desc); + + spin_lock_irqsave(&mchan->lock, flags); + + /* Move descriptor to queue */ + list_move_tail(&mdesc->node, &mchan->queued); + + /* If channel is idle, execute all queued descriptors */ + if (list_empty(&mchan->active)) + mpc_dma_execute(mchan); + + /* Update cookie */ + cookie = dma_cookie_assign(txd); + spin_unlock_irqrestore(&mchan->lock, flags); + + return cookie; +} + +/* Alloc channel resources */ +static int mpc_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan); + struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan); + struct mpc_dma_desc *mdesc; + struct mpc_dma_tcd *tcd; + dma_addr_t tcd_paddr; + unsigned long flags; + LIST_HEAD(descs); + int i; + + /* Alloc DMA memory for Transfer Control Descriptors */ + tcd = dma_alloc_coherent(mdma->dma.dev, + MPC_DMA_DESCRIPTORS * sizeof(struct mpc_dma_tcd), + &tcd_paddr, GFP_KERNEL); + if (!tcd) + return -ENOMEM; + + /* Alloc descriptors for this channel */ + for (i = 0; i < MPC_DMA_DESCRIPTORS; i++) { + mdesc = kzalloc(sizeof(struct mpc_dma_desc), GFP_KERNEL); + if (!mdesc) { + dev_notice(mdma->dma.dev, + "Memory allocation error. Allocated only %u descriptors\n", i); + break; + } + + dma_async_tx_descriptor_init(&mdesc->desc, chan); + mdesc->desc.flags = DMA_CTRL_ACK; + mdesc->desc.tx_submit = mpc_dma_tx_submit; + + mdesc->tcd = &tcd[i]; + mdesc->tcd_paddr = tcd_paddr + (i * sizeof(struct mpc_dma_tcd)); + + list_add_tail(&mdesc->node, &descs); + } + + /* Return error only if no descriptors were allocated */ + if (i == 0) { + dma_free_coherent(mdma->dma.dev, + MPC_DMA_DESCRIPTORS * sizeof(struct mpc_dma_tcd), + tcd, tcd_paddr); + return -ENOMEM; + } + + spin_lock_irqsave(&mchan->lock, flags); + mchan->tcd = tcd; + mchan->tcd_paddr = tcd_paddr; + list_splice_tail_init(&descs, &mchan->free); + spin_unlock_irqrestore(&mchan->lock, flags); + + /* Enable Error Interrupt */ + out_8(&mdma->regs->dmaseei, chan->chan_id); + + return 0; +} + +/* Free channel resources */ +static void mpc_dma_free_chan_resources(struct dma_chan *chan) +{ + struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan); + struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan); + struct mpc_dma_desc *mdesc, *tmp; + struct mpc_dma_tcd *tcd; + dma_addr_t tcd_paddr; + unsigned long flags; + LIST_HEAD(descs); + + spin_lock_irqsave(&mchan->lock, flags); + + /* Channel must be idle */ + BUG_ON(!list_empty(&mchan->prepared)); + BUG_ON(!list_empty(&mchan->queued)); + BUG_ON(!list_empty(&mchan->active)); + BUG_ON(!list_empty(&mchan->completed)); + + /* Move data */ + list_splice_tail_init(&mchan->free, &descs); + tcd = mchan->tcd; + tcd_paddr = mchan->tcd_paddr; + + spin_unlock_irqrestore(&mchan->lock, flags); + + /* Free DMA memory used by descriptors */ + dma_free_coherent(mdma->dma.dev, + MPC_DMA_DESCRIPTORS * sizeof(struct mpc_dma_tcd), + tcd, tcd_paddr); + + /* Free descriptors */ + list_for_each_entry_safe(mdesc, tmp, &descs, node) + kfree(mdesc); + + /* Disable Error Interrupt */ + out_8(&mdma->regs->dmaceei, chan->chan_id); +} + +/* Send all pending descriptor to hardware */ +static void mpc_dma_issue_pending(struct dma_chan *chan) +{ + /* + * We are posting descriptors to the hardware as soon as + * they are ready, so this function does nothing. + */ +} + +/* Check request completion status */ +static enum dma_status +mpc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + return dma_cookie_status(chan, cookie, txstate); +} + +/* Prepare descriptor for memory to memory copy */ +static struct dma_async_tx_descriptor * +mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan); + struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan); + struct mpc_dma_desc *mdesc = NULL; + struct mpc_dma_tcd *tcd; + unsigned long iflags; + + /* Get free descriptor */ + spin_lock_irqsave(&mchan->lock, iflags); + if (!list_empty(&mchan->free)) { + mdesc = list_first_entry(&mchan->free, struct mpc_dma_desc, + node); + list_del(&mdesc->node); + } + spin_unlock_irqrestore(&mchan->lock, iflags); + + if (!mdesc) { + /* try to free completed descriptors */ + mpc_dma_process_completed(mdma); + return NULL; + } + + mdesc->error = 0; + mdesc->will_access_peripheral = 0; + tcd = mdesc->tcd; + + /* Prepare Transfer Control Descriptor for this transaction */ + memset(tcd, 0, sizeof(struct mpc_dma_tcd)); + + if (IS_ALIGNED(src | dst | len, 32)) { + tcd->ssize = MPC_DMA_TSIZE_32; + tcd->dsize = MPC_DMA_TSIZE_32; + tcd->soff = 32; + tcd->doff = 32; + } else if (!mdma->is_mpc8308 && IS_ALIGNED(src | dst | len, 16)) { + /* MPC8308 doesn't support 16 byte transfers */ + tcd->ssize = MPC_DMA_TSIZE_16; + tcd->dsize = MPC_DMA_TSIZE_16; + tcd->soff = 16; + tcd->doff = 16; + } else if (IS_ALIGNED(src | dst | len, 4)) { + tcd->ssize = MPC_DMA_TSIZE_4; + tcd->dsize = MPC_DMA_TSIZE_4; + tcd->soff = 4; + tcd->doff = 4; + } else if (IS_ALIGNED(src | dst | len, 2)) { + tcd->ssize = MPC_DMA_TSIZE_2; + tcd->dsize = MPC_DMA_TSIZE_2; + tcd->soff = 2; + tcd->doff = 2; + } else { + tcd->ssize = MPC_DMA_TSIZE_1; + tcd->dsize = MPC_DMA_TSIZE_1; + tcd->soff = 1; + tcd->doff = 1; + } + + tcd->saddr = src; + tcd->daddr = dst; + tcd->nbytes = len; + tcd->biter = 1; + tcd->citer = 1; + + /* Place descriptor in prepared list */ + spin_lock_irqsave(&mchan->lock, iflags); + list_add_tail(&mdesc->node, &mchan->prepared); + spin_unlock_irqrestore(&mchan->lock, iflags); + + return &mdesc->desc; +} + +inline u8 buswidth_to_dmatsize(u8 buswidth) +{ + u8 res; + + for (res = 0; buswidth > 1; buswidth /= 2) + res++; + return res; +} + +static struct dma_async_tx_descriptor * +mpc_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan); + struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan); + struct mpc_dma_desc *mdesc = NULL; + dma_addr_t per_paddr; + u32 tcd_nunits; + struct mpc_dma_tcd *tcd; + unsigned long iflags; + struct scatterlist *sg; + size_t len; + int iter, i; + + /* Currently there is no proper support for scatter/gather */ + if (sg_len != 1) + return NULL; + + if (!is_slave_direction(direction)) + return NULL; + + for_each_sg(sgl, sg, sg_len, i) { + spin_lock_irqsave(&mchan->lock, iflags); + + mdesc = list_first_entry(&mchan->free, + struct mpc_dma_desc, node); + if (!mdesc) { + spin_unlock_irqrestore(&mchan->lock, iflags); + /* Try to free completed descriptors */ + mpc_dma_process_completed(mdma); + return NULL; + } + + list_del(&mdesc->node); + + if (direction == DMA_DEV_TO_MEM) { + per_paddr = mchan->src_per_paddr; + tcd_nunits = mchan->src_tcd_nunits; + } else { + per_paddr = mchan->dst_per_paddr; + tcd_nunits = mchan->dst_tcd_nunits; + } + + spin_unlock_irqrestore(&mchan->lock, iflags); + + if (per_paddr == 0 || tcd_nunits == 0) + goto err_prep; + + mdesc->error = 0; + mdesc->will_access_peripheral = 1; + + /* Prepare Transfer Control Descriptor for this transaction */ + tcd = mdesc->tcd; + + memset(tcd, 0, sizeof(struct mpc_dma_tcd)); + + if (direction == DMA_DEV_TO_MEM) { + tcd->saddr = per_paddr; + tcd->daddr = sg_dma_address(sg); + + if (!IS_ALIGNED(sg_dma_address(sg), mchan->dwidth)) + goto err_prep; + + tcd->soff = 0; + tcd->doff = mchan->dwidth; + } else { + tcd->saddr = sg_dma_address(sg); + tcd->daddr = per_paddr; + + if (!IS_ALIGNED(sg_dma_address(sg), mchan->swidth)) + goto err_prep; + + tcd->soff = mchan->swidth; + tcd->doff = 0; + } + + tcd->ssize = buswidth_to_dmatsize(mchan->swidth); + tcd->dsize = buswidth_to_dmatsize(mchan->dwidth); + + if (mdma->is_mpc8308) { + tcd->nbytes = sg_dma_len(sg); + if (!IS_ALIGNED(tcd->nbytes, mchan->swidth)) + goto err_prep; + + /* No major loops for MPC8303 */ + tcd->biter = 1; + tcd->citer = 1; + } else { + len = sg_dma_len(sg); + tcd->nbytes = tcd_nunits * tcd->ssize; + if (!IS_ALIGNED(len, tcd->nbytes)) + goto err_prep; + + iter = len / tcd->nbytes; + if (iter >= 1 << 15) { + /* len is too big */ + goto err_prep; + } + /* citer_linkch contains the high bits of iter */ + tcd->biter = iter & 0x1ff; + tcd->biter_linkch = iter >> 9; + tcd->citer = tcd->biter; + tcd->citer_linkch = tcd->biter_linkch; + } + + tcd->e_sg = 0; + tcd->d_req = 1; + + /* Place descriptor in prepared list */ + spin_lock_irqsave(&mchan->lock, iflags); + list_add_tail(&mdesc->node, &mchan->prepared); + spin_unlock_irqrestore(&mchan->lock, iflags); + } + + return &mdesc->desc; + +err_prep: + /* Put the descriptor back */ + spin_lock_irqsave(&mchan->lock, iflags); + list_add_tail(&mdesc->node, &mchan->free); + spin_unlock_irqrestore(&mchan->lock, iflags); + + return NULL; +} + +inline bool is_buswidth_valid(u8 buswidth, bool is_mpc8308) +{ + switch (buswidth) { + case 16: + if (is_mpc8308) + return false; + break; + case 1: + case 2: + case 4: + case 32: + break; + default: + return false; + } + + return true; +} + +static int mpc_dma_device_config(struct dma_chan *chan, + struct dma_slave_config *cfg) +{ + struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan); + struct mpc_dma *mdma = dma_chan_to_mpc_dma(&mchan->chan); + unsigned long flags; + + /* + * Software constraints: + * - only transfers between a peripheral device and memory are + * supported + * - transfer chunk sizes of 1, 2, 4, 16 (for MPC512x), and 32 bytes + * are supported, and, consequently, source addresses and + * destination addresses; must be aligned accordingly; furthermore, + * for MPC512x SoCs, the transfer size must be aligned on (chunk + * size * maxburst) + * - during the transfer, the RAM address is incremented by the size + * of transfer chunk + * - the peripheral port's address is constant during the transfer. + */ + + if (!IS_ALIGNED(cfg->src_addr, cfg->src_addr_width) || + !IS_ALIGNED(cfg->dst_addr, cfg->dst_addr_width)) { + return -EINVAL; + } + + if (!is_buswidth_valid(cfg->src_addr_width, mdma->is_mpc8308) || + !is_buswidth_valid(cfg->dst_addr_width, mdma->is_mpc8308)) + return -EINVAL; + + spin_lock_irqsave(&mchan->lock, flags); + + mchan->src_per_paddr = cfg->src_addr; + mchan->src_tcd_nunits = cfg->src_maxburst; + mchan->swidth = cfg->src_addr_width; + mchan->dst_per_paddr = cfg->dst_addr; + mchan->dst_tcd_nunits = cfg->dst_maxburst; + mchan->dwidth = cfg->dst_addr_width; + + /* Apply defaults */ + if (mchan->src_tcd_nunits == 0) + mchan->src_tcd_nunits = 1; + if (mchan->dst_tcd_nunits == 0) + mchan->dst_tcd_nunits = 1; + + spin_unlock_irqrestore(&mchan->lock, flags); + + return 0; +} + +static int mpc_dma_device_terminate_all(struct dma_chan *chan) +{ + struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan); + struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan); + unsigned long flags; + + /* Disable channel requests */ + spin_lock_irqsave(&mchan->lock, flags); + + out_8(&mdma->regs->dmacerq, chan->chan_id); + list_splice_tail_init(&mchan->prepared, &mchan->free); + list_splice_tail_init(&mchan->queued, &mchan->free); + list_splice_tail_init(&mchan->active, &mchan->free); + + spin_unlock_irqrestore(&mchan->lock, flags); + + return 0; +} + +static int mpc_dma_probe(struct platform_device *op) +{ + struct device_node *dn = op->dev.of_node; + struct device *dev = &op->dev; + struct dma_device *dma; + struct mpc_dma *mdma; + struct mpc_dma_chan *mchan; + struct resource res; + ulong regs_start, regs_size; + int retval, i; + u8 chancnt; + + mdma = devm_kzalloc(dev, sizeof(struct mpc_dma), GFP_KERNEL); + if (!mdma) { + retval = -ENOMEM; + goto err; + } + + mdma->irq = irq_of_parse_and_map(dn, 0); + if (!mdma->irq) { + dev_err(dev, "Error mapping IRQ!\n"); + retval = -EINVAL; + goto err; + } + + if (of_device_is_compatible(dn, "fsl,mpc8308-dma")) { + mdma->is_mpc8308 = 1; + mdma->irq2 = irq_of_parse_and_map(dn, 1); + if (!mdma->irq2) { + dev_err(dev, "Error mapping IRQ!\n"); + retval = -EINVAL; + goto err_dispose1; + } + } + + retval = of_address_to_resource(dn, 0, &res); + if (retval) { + dev_err(dev, "Error parsing memory region!\n"); + goto err_dispose2; + } + + regs_start = res.start; + regs_size = resource_size(&res); + + if (!devm_request_mem_region(dev, regs_start, regs_size, DRV_NAME)) { + dev_err(dev, "Error requesting memory region!\n"); + retval = -EBUSY; + goto err_dispose2; + } + + mdma->regs = devm_ioremap(dev, regs_start, regs_size); + if (!mdma->regs) { + dev_err(dev, "Error mapping memory region!\n"); + retval = -ENOMEM; + goto err_dispose2; + } + + mdma->tcd = (struct mpc_dma_tcd *)((u8 *)(mdma->regs) + + MPC_DMA_TCD_OFFSET); + + retval = request_irq(mdma->irq, &mpc_dma_irq, 0, DRV_NAME, mdma); + if (retval) { + dev_err(dev, "Error requesting IRQ!\n"); + retval = -EINVAL; + goto err_dispose2; + } + + if (mdma->is_mpc8308) { + retval = request_irq(mdma->irq2, &mpc_dma_irq, 0, + DRV_NAME, mdma); + if (retval) { + dev_err(dev, "Error requesting IRQ2!\n"); + retval = -EINVAL; + goto err_free1; + } + } + + spin_lock_init(&mdma->error_status_lock); + + dma = &mdma->dma; + dma->dev = dev; + dma->device_alloc_chan_resources = mpc_dma_alloc_chan_resources; + dma->device_free_chan_resources = mpc_dma_free_chan_resources; + dma->device_issue_pending = mpc_dma_issue_pending; + dma->device_tx_status = mpc_dma_tx_status; + dma->device_prep_dma_memcpy = mpc_dma_prep_memcpy; + dma->device_prep_slave_sg = mpc_dma_prep_slave_sg; + dma->device_config = mpc_dma_device_config; + dma->device_terminate_all = mpc_dma_device_terminate_all; + + INIT_LIST_HEAD(&dma->channels); + dma_cap_set(DMA_MEMCPY, dma->cap_mask); + dma_cap_set(DMA_SLAVE, dma->cap_mask); + + if (mdma->is_mpc8308) + chancnt = MPC8308_DMACHAN_MAX; + else + chancnt = MPC512x_DMACHAN_MAX; + + for (i = 0; i < chancnt; i++) { + mchan = &mdma->channels[i]; + + mchan->chan.device = dma; + dma_cookie_init(&mchan->chan); + + INIT_LIST_HEAD(&mchan->free); + INIT_LIST_HEAD(&mchan->prepared); + INIT_LIST_HEAD(&mchan->queued); + INIT_LIST_HEAD(&mchan->active); + INIT_LIST_HEAD(&mchan->completed); + + spin_lock_init(&mchan->lock); + list_add_tail(&mchan->chan.device_node, &dma->channels); + } + + tasklet_setup(&mdma->tasklet, mpc_dma_tasklet); + + /* + * Configure DMA Engine: + * - Dynamic clock, + * - Round-robin group arbitration, + * - Round-robin channel arbitration. + */ + if (mdma->is_mpc8308) { + /* MPC8308 has 16 channels and lacks some registers */ + out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_ERCA); + + /* enable snooping */ + out_be32(&mdma->regs->dmagpor, MPC_DMA_DMAGPOR_SNOOP_ENABLE); + /* Disable error interrupts */ + out_be32(&mdma->regs->dmaeeil, 0); + + /* Clear interrupts status */ + out_be32(&mdma->regs->dmaintl, 0xFFFF); + out_be32(&mdma->regs->dmaerrl, 0xFFFF); + } else { + out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_EDCG | + MPC_DMA_DMACR_ERGA | + MPC_DMA_DMACR_ERCA); + + /* Disable hardware DMA requests */ + out_be32(&mdma->regs->dmaerqh, 0); + out_be32(&mdma->regs->dmaerql, 0); + + /* Disable error interrupts */ + out_be32(&mdma->regs->dmaeeih, 0); + out_be32(&mdma->regs->dmaeeil, 0); + + /* Clear interrupts status */ + out_be32(&mdma->regs->dmainth, 0xFFFFFFFF); + out_be32(&mdma->regs->dmaintl, 0xFFFFFFFF); + out_be32(&mdma->regs->dmaerrh, 0xFFFFFFFF); + out_be32(&mdma->regs->dmaerrl, 0xFFFFFFFF); + + /* Route interrupts to IPIC */ + out_be32(&mdma->regs->dmaihsa, 0); + out_be32(&mdma->regs->dmailsa, 0); + } + + /* Register DMA engine */ + dev_set_drvdata(dev, mdma); + retval = dma_async_device_register(dma); + if (retval) + goto err_free2; + + /* Register with OF helpers for DMA lookups (nonfatal) */ + if (dev->of_node) { + retval = of_dma_controller_register(dev->of_node, + of_dma_xlate_by_chan_id, mdma); + if (retval) + dev_warn(dev, "Could not register for OF lookup\n"); + } + + return 0; + +err_free2: + if (mdma->is_mpc8308) + free_irq(mdma->irq2, mdma); +err_free1: + free_irq(mdma->irq, mdma); +err_dispose2: + if (mdma->is_mpc8308) + irq_dispose_mapping(mdma->irq2); +err_dispose1: + irq_dispose_mapping(mdma->irq); +err: + return retval; +} + +static int mpc_dma_remove(struct platform_device *op) +{ + struct device *dev = &op->dev; + struct mpc_dma *mdma = dev_get_drvdata(dev); + + if (dev->of_node) + of_dma_controller_free(dev->of_node); + dma_async_device_unregister(&mdma->dma); + if (mdma->is_mpc8308) { + free_irq(mdma->irq2, mdma); + irq_dispose_mapping(mdma->irq2); + } + free_irq(mdma->irq, mdma); + irq_dispose_mapping(mdma->irq); + tasklet_kill(&mdma->tasklet); + + return 0; +} + +static const struct of_device_id mpc_dma_match[] = { + { .compatible = "fsl,mpc5121-dma", }, + { .compatible = "fsl,mpc8308-dma", }, + {}, +}; +MODULE_DEVICE_TABLE(of, mpc_dma_match); + +static struct platform_driver mpc_dma_driver = { + .probe = mpc_dma_probe, + .remove = mpc_dma_remove, + .driver = { + .name = DRV_NAME, + .of_match_table = mpc_dma_match, + }, +}; + +module_platform_driver(mpc_dma_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Piotr Ziecik "); diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c new file mode 100644 index 0000000000..23b232b575 --- /dev/null +++ b/drivers/dma/mv_xor.c @@ -0,0 +1,1468 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * offload engine driver for the Marvell XOR engine + * Copyright (C) 2007, 2008, Marvell International Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" +#include "mv_xor.h" + +enum mv_xor_type { + XOR_ORION, + XOR_ARMADA_38X, + XOR_ARMADA_37XX, +}; + +enum mv_xor_mode { + XOR_MODE_IN_REG, + XOR_MODE_IN_DESC, +}; + +static void mv_xor_issue_pending(struct dma_chan *chan); + +#define to_mv_xor_chan(chan) \ + container_of(chan, struct mv_xor_chan, dmachan) + +#define to_mv_xor_slot(tx) \ + container_of(tx, struct mv_xor_desc_slot, async_tx) + +#define mv_chan_to_devp(chan) \ + ((chan)->dmadev.dev) + +static void mv_desc_init(struct mv_xor_desc_slot *desc, + dma_addr_t addr, u32 byte_count, + enum dma_ctrl_flags flags) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + + hw_desc->status = XOR_DESC_DMA_OWNED; + hw_desc->phy_next_desc = 0; + /* Enable end-of-descriptor interrupts only for DMA_PREP_INTERRUPT */ + hw_desc->desc_command = (flags & DMA_PREP_INTERRUPT) ? + XOR_DESC_EOD_INT_EN : 0; + hw_desc->phy_dest_addr = addr; + hw_desc->byte_count = byte_count; +} + +static void mv_desc_set_mode(struct mv_xor_desc_slot *desc) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + + switch (desc->type) { + case DMA_XOR: + case DMA_INTERRUPT: + hw_desc->desc_command |= XOR_DESC_OPERATION_XOR; + break; + case DMA_MEMCPY: + hw_desc->desc_command |= XOR_DESC_OPERATION_MEMCPY; + break; + default: + BUG(); + return; + } +} + +static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc, + u32 next_desc_addr) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + BUG_ON(hw_desc->phy_next_desc); + hw_desc->phy_next_desc = next_desc_addr; +} + +static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc, + int index, dma_addr_t addr) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + hw_desc->phy_src_addr[mv_phy_src_idx(index)] = addr; + if (desc->type == DMA_XOR) + hw_desc->desc_command |= (1 << index); +} + +static u32 mv_chan_get_current_desc(struct mv_xor_chan *chan) +{ + return readl_relaxed(XOR_CURR_DESC(chan)); +} + +static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan, + u32 next_desc_addr) +{ + writel_relaxed(next_desc_addr, XOR_NEXT_DESC(chan)); +} + +static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan) +{ + u32 val = readl_relaxed(XOR_INTR_MASK(chan)); + val |= XOR_INTR_MASK_VALUE << (chan->idx * 16); + writel_relaxed(val, XOR_INTR_MASK(chan)); +} + +static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan) +{ + u32 intr_cause = readl_relaxed(XOR_INTR_CAUSE(chan)); + intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF; + return intr_cause; +} + +static void mv_chan_clear_eoc_cause(struct mv_xor_chan *chan) +{ + u32 val; + + val = XOR_INT_END_OF_DESC | XOR_INT_END_OF_CHAIN | XOR_INT_STOPPED; + val = ~(val << (chan->idx * 16)); + dev_dbg(mv_chan_to_devp(chan), "%s, val 0x%08x\n", __func__, val); + writel_relaxed(val, XOR_INTR_CAUSE(chan)); +} + +static void mv_chan_clear_err_status(struct mv_xor_chan *chan) +{ + u32 val = 0xFFFF0000 >> (chan->idx * 16); + writel_relaxed(val, XOR_INTR_CAUSE(chan)); +} + +static void mv_chan_set_mode(struct mv_xor_chan *chan, + u32 op_mode) +{ + u32 config = readl_relaxed(XOR_CONFIG(chan)); + + config &= ~0x7; + config |= op_mode; + +#if defined(__BIG_ENDIAN) + config |= XOR_DESCRIPTOR_SWAP; +#else + config &= ~XOR_DESCRIPTOR_SWAP; +#endif + + writel_relaxed(config, XOR_CONFIG(chan)); +} + +static void mv_chan_activate(struct mv_xor_chan *chan) +{ + dev_dbg(mv_chan_to_devp(chan), " activate chan.\n"); + + /* writel ensures all descriptors are flushed before activation */ + writel(BIT(0), XOR_ACTIVATION(chan)); +} + +static char mv_chan_is_busy(struct mv_xor_chan *chan) +{ + u32 state = readl_relaxed(XOR_ACTIVATION(chan)); + + state = (state >> 4) & 0x3; + + return (state == 1) ? 1 : 0; +} + +/* + * mv_chan_start_new_chain - program the engine to operate on new + * chain headed by sw_desc + * Caller must hold &mv_chan->lock while calling this function + */ +static void mv_chan_start_new_chain(struct mv_xor_chan *mv_chan, + struct mv_xor_desc_slot *sw_desc) +{ + dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: sw_desc %p\n", + __func__, __LINE__, sw_desc); + + /* set the hardware chain */ + mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys); + + mv_chan->pending++; + mv_xor_issue_pending(&mv_chan->dmachan); +} + +static dma_cookie_t +mv_desc_run_tx_complete_actions(struct mv_xor_desc_slot *desc, + struct mv_xor_chan *mv_chan, + dma_cookie_t cookie) +{ + BUG_ON(desc->async_tx.cookie < 0); + + if (desc->async_tx.cookie > 0) { + cookie = desc->async_tx.cookie; + + dma_descriptor_unmap(&desc->async_tx); + /* call the callback (must not sleep or submit new + * operations to this channel) + */ + dmaengine_desc_get_callback_invoke(&desc->async_tx, NULL); + } + + /* run dependent operations */ + dma_run_dependencies(&desc->async_tx); + + return cookie; +} + +static int +mv_chan_clean_completed_slots(struct mv_xor_chan *mv_chan) +{ + struct mv_xor_desc_slot *iter, *_iter; + + dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__); + list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots, + node) { + + if (async_tx_test_ack(&iter->async_tx)) { + list_move_tail(&iter->node, &mv_chan->free_slots); + if (!list_empty(&iter->sg_tx_list)) { + list_splice_tail_init(&iter->sg_tx_list, + &mv_chan->free_slots); + } + } + } + return 0; +} + +static int +mv_desc_clean_slot(struct mv_xor_desc_slot *desc, + struct mv_xor_chan *mv_chan) +{ + dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: desc %p flags %d\n", + __func__, __LINE__, desc, desc->async_tx.flags); + + /* the client is allowed to attach dependent operations + * until 'ack' is set + */ + if (!async_tx_test_ack(&desc->async_tx)) { + /* move this slot to the completed_slots */ + list_move_tail(&desc->node, &mv_chan->completed_slots); + if (!list_empty(&desc->sg_tx_list)) { + list_splice_tail_init(&desc->sg_tx_list, + &mv_chan->completed_slots); + } + } else { + list_move_tail(&desc->node, &mv_chan->free_slots); + if (!list_empty(&desc->sg_tx_list)) { + list_splice_tail_init(&desc->sg_tx_list, + &mv_chan->free_slots); + } + } + + return 0; +} + +/* This function must be called with the mv_xor_chan spinlock held */ +static void mv_chan_slot_cleanup(struct mv_xor_chan *mv_chan) +{ + struct mv_xor_desc_slot *iter, *_iter; + dma_cookie_t cookie = 0; + int busy = mv_chan_is_busy(mv_chan); + u32 current_desc = mv_chan_get_current_desc(mv_chan); + int current_cleaned = 0; + struct mv_xor_desc *hw_desc; + + dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__); + dev_dbg(mv_chan_to_devp(mv_chan), "current_desc %x\n", current_desc); + mv_chan_clean_completed_slots(mv_chan); + + /* free completed slots from the chain starting with + * the oldest descriptor + */ + + list_for_each_entry_safe(iter, _iter, &mv_chan->chain, + node) { + + /* clean finished descriptors */ + hw_desc = iter->hw_desc; + if (hw_desc->status & XOR_DESC_SUCCESS) { + cookie = mv_desc_run_tx_complete_actions(iter, mv_chan, + cookie); + + /* done processing desc, clean slot */ + mv_desc_clean_slot(iter, mv_chan); + + /* break if we did cleaned the current */ + if (iter->async_tx.phys == current_desc) { + current_cleaned = 1; + break; + } + } else { + if (iter->async_tx.phys == current_desc) { + current_cleaned = 0; + break; + } + } + } + + if ((busy == 0) && !list_empty(&mv_chan->chain)) { + if (current_cleaned) { + /* + * current descriptor cleaned and removed, run + * from list head + */ + iter = list_entry(mv_chan->chain.next, + struct mv_xor_desc_slot, + node); + mv_chan_start_new_chain(mv_chan, iter); + } else { + if (!list_is_last(&iter->node, &mv_chan->chain)) { + /* + * descriptors are still waiting after + * current, trigger them + */ + iter = list_entry(iter->node.next, + struct mv_xor_desc_slot, + node); + mv_chan_start_new_chain(mv_chan, iter); + } else { + /* + * some descriptors are still waiting + * to be cleaned + */ + tasklet_schedule(&mv_chan->irq_tasklet); + } + } + } + + if (cookie > 0) + mv_chan->dmachan.completed_cookie = cookie; +} + +static void mv_xor_tasklet(struct tasklet_struct *t) +{ + struct mv_xor_chan *chan = from_tasklet(chan, t, irq_tasklet); + + spin_lock(&chan->lock); + mv_chan_slot_cleanup(chan); + spin_unlock(&chan->lock); +} + +static struct mv_xor_desc_slot * +mv_chan_alloc_slot(struct mv_xor_chan *mv_chan) +{ + struct mv_xor_desc_slot *iter; + + spin_lock_bh(&mv_chan->lock); + + if (!list_empty(&mv_chan->free_slots)) { + iter = list_first_entry(&mv_chan->free_slots, + struct mv_xor_desc_slot, + node); + + list_move_tail(&iter->node, &mv_chan->allocated_slots); + + spin_unlock_bh(&mv_chan->lock); + + /* pre-ack descriptor */ + async_tx_ack(&iter->async_tx); + iter->async_tx.cookie = -EBUSY; + + return iter; + + } + + spin_unlock_bh(&mv_chan->lock); + + /* try to free some slots if the allocation fails */ + tasklet_schedule(&mv_chan->irq_tasklet); + + return NULL; +} + +/************************ DMA engine API functions ****************************/ +static dma_cookie_t +mv_xor_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct mv_xor_desc_slot *sw_desc = to_mv_xor_slot(tx); + struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan); + struct mv_xor_desc_slot *old_chain_tail; + dma_cookie_t cookie; + int new_hw_chain = 1; + + dev_dbg(mv_chan_to_devp(mv_chan), + "%s sw_desc %p: async_tx %p\n", + __func__, sw_desc, &sw_desc->async_tx); + + spin_lock_bh(&mv_chan->lock); + cookie = dma_cookie_assign(tx); + + if (list_empty(&mv_chan->chain)) + list_move_tail(&sw_desc->node, &mv_chan->chain); + else { + new_hw_chain = 0; + + old_chain_tail = list_entry(mv_chan->chain.prev, + struct mv_xor_desc_slot, + node); + list_move_tail(&sw_desc->node, &mv_chan->chain); + + dev_dbg(mv_chan_to_devp(mv_chan), "Append to last desc %pa\n", + &old_chain_tail->async_tx.phys); + + /* fix up the hardware chain */ + mv_desc_set_next_desc(old_chain_tail, sw_desc->async_tx.phys); + + /* if the channel is not busy */ + if (!mv_chan_is_busy(mv_chan)) { + u32 current_desc = mv_chan_get_current_desc(mv_chan); + /* + * and the curren desc is the end of the chain before + * the append, then we need to start the channel + */ + if (current_desc == old_chain_tail->async_tx.phys) + new_hw_chain = 1; + } + } + + if (new_hw_chain) + mv_chan_start_new_chain(mv_chan, sw_desc); + + spin_unlock_bh(&mv_chan->lock); + + return cookie; +} + +/* returns the number of allocated descriptors */ +static int mv_xor_alloc_chan_resources(struct dma_chan *chan) +{ + void *virt_desc; + dma_addr_t dma_desc; + int idx; + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + struct mv_xor_desc_slot *slot = NULL; + int num_descs_in_pool = MV_XOR_POOL_SIZE/MV_XOR_SLOT_SIZE; + + /* Allocate descriptor slots */ + idx = mv_chan->slots_allocated; + while (idx < num_descs_in_pool) { + slot = kzalloc(sizeof(*slot), GFP_KERNEL); + if (!slot) { + dev_info(mv_chan_to_devp(mv_chan), + "channel only initialized %d descriptor slots", + idx); + break; + } + virt_desc = mv_chan->dma_desc_pool_virt; + slot->hw_desc = virt_desc + idx * MV_XOR_SLOT_SIZE; + + dma_async_tx_descriptor_init(&slot->async_tx, chan); + slot->async_tx.tx_submit = mv_xor_tx_submit; + INIT_LIST_HEAD(&slot->node); + INIT_LIST_HEAD(&slot->sg_tx_list); + dma_desc = mv_chan->dma_desc_pool; + slot->async_tx.phys = dma_desc + idx * MV_XOR_SLOT_SIZE; + slot->idx = idx++; + + spin_lock_bh(&mv_chan->lock); + mv_chan->slots_allocated = idx; + list_add_tail(&slot->node, &mv_chan->free_slots); + spin_unlock_bh(&mv_chan->lock); + } + + dev_dbg(mv_chan_to_devp(mv_chan), + "allocated %d descriptor slots\n", + mv_chan->slots_allocated); + + return mv_chan->slots_allocated ? : -ENOMEM; +} + +/* + * Check if source or destination is an PCIe/IO address (non-SDRAM) and add + * a new MBus window if necessary. Use a cache for these check so that + * the MMIO mapped registers don't have to be accessed for this check + * to speed up this process. + */ +static int mv_xor_add_io_win(struct mv_xor_chan *mv_chan, u32 addr) +{ + struct mv_xor_device *xordev = mv_chan->xordev; + void __iomem *base = mv_chan->mmr_high_base; + u32 win_enable; + u32 size; + u8 target, attr; + int ret; + int i; + + /* Nothing needs to get done for the Armada 3700 */ + if (xordev->xor_type == XOR_ARMADA_37XX) + return 0; + + /* + * Loop over the cached windows to check, if the requested area + * is already mapped. If this the case, nothing needs to be done + * and we can return. + */ + for (i = 0; i < WINDOW_COUNT; i++) { + if (addr >= xordev->win_start[i] && + addr <= xordev->win_end[i]) { + /* Window is already mapped */ + return 0; + } + } + + /* + * The window is not mapped, so we need to create the new mapping + */ + + /* If no IO window is found that addr has to be located in SDRAM */ + ret = mvebu_mbus_get_io_win_info(addr, &size, &target, &attr); + if (ret < 0) + return 0; + + /* + * Mask the base addr 'addr' according to 'size' read back from the + * MBus window. Otherwise we might end up with an address located + * somewhere in the middle of this area here. + */ + size -= 1; + addr &= ~size; + + /* + * Reading one of both enabled register is enough, as they are always + * programmed to the identical values + */ + win_enable = readl(base + WINDOW_BAR_ENABLE(0)); + + /* Set 'i' to the first free window to write the new values to */ + i = ffs(~win_enable) - 1; + if (i >= WINDOW_COUNT) + return -ENOMEM; + + writel((addr & 0xffff0000) | (attr << 8) | target, + base + WINDOW_BASE(i)); + writel(size & 0xffff0000, base + WINDOW_SIZE(i)); + + /* Fill the caching variables for later use */ + xordev->win_start[i] = addr; + xordev->win_end[i] = addr + size; + + win_enable |= (1 << i); + win_enable |= 3 << (16 + (2 * i)); + writel(win_enable, base + WINDOW_BAR_ENABLE(0)); + writel(win_enable, base + WINDOW_BAR_ENABLE(1)); + + return 0; +} + +static struct dma_async_tx_descriptor * +mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + struct mv_xor_desc_slot *sw_desc; + int ret; + + if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) + return NULL; + + BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); + + dev_dbg(mv_chan_to_devp(mv_chan), + "%s src_cnt: %d len: %zu dest %pad flags: %ld\n", + __func__, src_cnt, len, &dest, flags); + + /* Check if a new window needs to get added for 'dest' */ + ret = mv_xor_add_io_win(mv_chan, dest); + if (ret) + return NULL; + + sw_desc = mv_chan_alloc_slot(mv_chan); + if (sw_desc) { + sw_desc->type = DMA_XOR; + sw_desc->async_tx.flags = flags; + mv_desc_init(sw_desc, dest, len, flags); + if (mv_chan->op_in_desc == XOR_MODE_IN_DESC) + mv_desc_set_mode(sw_desc); + while (src_cnt--) { + /* Check if a new window needs to get added for 'src' */ + ret = mv_xor_add_io_win(mv_chan, src[src_cnt]); + if (ret) + return NULL; + mv_desc_set_src_addr(sw_desc, src_cnt, src[src_cnt]); + } + } + + dev_dbg(mv_chan_to_devp(mv_chan), + "%s sw_desc %p async_tx %p \n", + __func__, sw_desc, &sw_desc->async_tx); + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + /* + * A MEMCPY operation is identical to an XOR operation with only + * a single source address. + */ + return mv_xor_prep_dma_xor(chan, dest, &src, 1, len, flags); +} + +static struct dma_async_tx_descriptor * +mv_xor_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + dma_addr_t src, dest; + size_t len; + + src = mv_chan->dummy_src_addr; + dest = mv_chan->dummy_dst_addr; + len = MV_XOR_MIN_BYTE_COUNT; + + /* + * We implement the DMA_INTERRUPT operation as a minimum sized + * XOR operation with a single dummy source address. + */ + return mv_xor_prep_dma_xor(chan, dest, &src, 1, len, flags); +} + +static void mv_xor_free_chan_resources(struct dma_chan *chan) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + struct mv_xor_desc_slot *iter, *_iter; + int in_use_descs = 0; + + spin_lock_bh(&mv_chan->lock); + + mv_chan_slot_cleanup(mv_chan); + + list_for_each_entry_safe(iter, _iter, &mv_chan->chain, + node) { + in_use_descs++; + list_move_tail(&iter->node, &mv_chan->free_slots); + } + list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots, + node) { + in_use_descs++; + list_move_tail(&iter->node, &mv_chan->free_slots); + } + list_for_each_entry_safe(iter, _iter, &mv_chan->allocated_slots, + node) { + in_use_descs++; + list_move_tail(&iter->node, &mv_chan->free_slots); + } + list_for_each_entry_safe_reverse( + iter, _iter, &mv_chan->free_slots, node) { + list_del(&iter->node); + kfree(iter); + mv_chan->slots_allocated--; + } + + dev_dbg(mv_chan_to_devp(mv_chan), "%s slots_allocated %d\n", + __func__, mv_chan->slots_allocated); + spin_unlock_bh(&mv_chan->lock); + + if (in_use_descs) + dev_err(mv_chan_to_devp(mv_chan), + "freeing %d in use descriptors!\n", in_use_descs); +} + +/** + * mv_xor_status - poll the status of an XOR transaction + * @chan: XOR channel handle + * @cookie: XOR transaction identifier + * @txstate: XOR transactions state holder (or NULL) + */ +static enum dma_status mv_xor_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + enum dma_status ret; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; + + spin_lock_bh(&mv_chan->lock); + mv_chan_slot_cleanup(mv_chan); + spin_unlock_bh(&mv_chan->lock); + + return dma_cookie_status(chan, cookie, txstate); +} + +static void mv_chan_dump_regs(struct mv_xor_chan *chan) +{ + u32 val; + + val = readl_relaxed(XOR_CONFIG(chan)); + dev_err(mv_chan_to_devp(chan), "config 0x%08x\n", val); + + val = readl_relaxed(XOR_ACTIVATION(chan)); + dev_err(mv_chan_to_devp(chan), "activation 0x%08x\n", val); + + val = readl_relaxed(XOR_INTR_CAUSE(chan)); + dev_err(mv_chan_to_devp(chan), "intr cause 0x%08x\n", val); + + val = readl_relaxed(XOR_INTR_MASK(chan)); + dev_err(mv_chan_to_devp(chan), "intr mask 0x%08x\n", val); + + val = readl_relaxed(XOR_ERROR_CAUSE(chan)); + dev_err(mv_chan_to_devp(chan), "error cause 0x%08x\n", val); + + val = readl_relaxed(XOR_ERROR_ADDR(chan)); + dev_err(mv_chan_to_devp(chan), "error addr 0x%08x\n", val); +} + +static void mv_chan_err_interrupt_handler(struct mv_xor_chan *chan, + u32 intr_cause) +{ + if (intr_cause & XOR_INT_ERR_DECODE) { + dev_dbg(mv_chan_to_devp(chan), "ignoring address decode error\n"); + return; + } + + dev_err(mv_chan_to_devp(chan), "error on chan %d. intr cause 0x%08x\n", + chan->idx, intr_cause); + + mv_chan_dump_regs(chan); + WARN_ON(1); +} + +static irqreturn_t mv_xor_interrupt_handler(int irq, void *data) +{ + struct mv_xor_chan *chan = data; + u32 intr_cause = mv_chan_get_intr_cause(chan); + + dev_dbg(mv_chan_to_devp(chan), "intr cause %x\n", intr_cause); + + if (intr_cause & XOR_INTR_ERRORS) + mv_chan_err_interrupt_handler(chan, intr_cause); + + tasklet_schedule(&chan->irq_tasklet); + + mv_chan_clear_eoc_cause(chan); + + return IRQ_HANDLED; +} + +static void mv_xor_issue_pending(struct dma_chan *chan) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + + if (mv_chan->pending >= MV_XOR_THRESHOLD) { + mv_chan->pending = 0; + mv_chan_activate(mv_chan); + } +} + +/* + * Perform a transaction to verify the HW works. + */ + +static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan) +{ + int i, ret; + void *src, *dest; + dma_addr_t src_dma, dest_dma; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + struct dma_async_tx_descriptor *tx; + struct dmaengine_unmap_data *unmap; + int err = 0; + + src = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!src) + return -ENOMEM; + + dest = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!dest) { + kfree(src); + return -ENOMEM; + } + + /* Fill in src buffer */ + for (i = 0; i < PAGE_SIZE; i++) + ((u8 *) src)[i] = (u8)i; + + dma_chan = &mv_chan->dmachan; + if (mv_xor_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + unmap = dmaengine_get_unmap_data(dma_chan->device->dev, 2, GFP_KERNEL); + if (!unmap) { + err = -ENOMEM; + goto free_resources; + } + + src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src), + offset_in_page(src), PAGE_SIZE, + DMA_TO_DEVICE); + unmap->addr[0] = src_dma; + + ret = dma_mapping_error(dma_chan->device->dev, src_dma); + if (ret) { + err = -ENOMEM; + goto free_resources; + } + unmap->to_cnt = 1; + + dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest), + offset_in_page(dest), PAGE_SIZE, + DMA_FROM_DEVICE); + unmap->addr[1] = dest_dma; + + ret = dma_mapping_error(dma_chan->device->dev, dest_dma); + if (ret) { + err = -ENOMEM; + goto free_resources; + } + unmap->from_cnt = 1; + unmap->len = PAGE_SIZE; + + tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma, + PAGE_SIZE, 0); + if (!tx) { + dev_err(dma_chan->device->dev, + "Self-test cannot prepare operation, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + cookie = mv_xor_tx_submit(tx); + if (dma_submit_error(cookie)) { + dev_err(dma_chan->device->dev, + "Self-test submit error, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + mv_xor_issue_pending(dma_chan); + async_tx_ack(tx); + msleep(1); + + if (mv_xor_status(dma_chan, cookie, NULL) != + DMA_COMPLETE) { + dev_err(dma_chan->device->dev, + "Self-test copy timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + dma_sync_single_for_cpu(dma_chan->device->dev, dest_dma, + PAGE_SIZE, DMA_FROM_DEVICE); + if (memcmp(src, dest, PAGE_SIZE)) { + dev_err(dma_chan->device->dev, + "Self-test copy failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + dmaengine_unmap_put(unmap); + mv_xor_free_chan_resources(dma_chan); +out: + kfree(src); + kfree(dest); + return err; +} + +#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */ +static int +mv_chan_xor_self_test(struct mv_xor_chan *mv_chan) +{ + int i, src_idx, ret; + struct page *dest; + struct page *xor_srcs[MV_XOR_NUM_SRC_TEST]; + dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST]; + dma_addr_t dest_dma; + struct dma_async_tx_descriptor *tx; + struct dmaengine_unmap_data *unmap; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u8 cmp_byte = 0; + u32 cmp_word; + int err = 0; + int src_count = MV_XOR_NUM_SRC_TEST; + + for (src_idx = 0; src_idx < src_count; src_idx++) { + xor_srcs[src_idx] = alloc_page(GFP_KERNEL); + if (!xor_srcs[src_idx]) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + } + + dest = alloc_page(GFP_KERNEL); + if (!dest) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + + /* Fill in src buffers */ + for (src_idx = 0; src_idx < src_count; src_idx++) { + u8 *ptr = page_address(xor_srcs[src_idx]); + for (i = 0; i < PAGE_SIZE; i++) + ptr[i] = (1 << src_idx); + } + + for (src_idx = 0; src_idx < src_count; src_idx++) + cmp_byte ^= (u8) (1 << src_idx); + + cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | + (cmp_byte << 8) | cmp_byte; + + memset(page_address(dest), 0, PAGE_SIZE); + + dma_chan = &mv_chan->dmachan; + if (mv_xor_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + unmap = dmaengine_get_unmap_data(dma_chan->device->dev, src_count + 1, + GFP_KERNEL); + if (!unmap) { + err = -ENOMEM; + goto free_resources; + } + + /* test xor */ + for (i = 0; i < src_count; i++) { + unmap->addr[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i], + 0, PAGE_SIZE, DMA_TO_DEVICE); + dma_srcs[i] = unmap->addr[i]; + ret = dma_mapping_error(dma_chan->device->dev, unmap->addr[i]); + if (ret) { + err = -ENOMEM; + goto free_resources; + } + unmap->to_cnt++; + } + + unmap->addr[src_count] = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE, + DMA_FROM_DEVICE); + dest_dma = unmap->addr[src_count]; + ret = dma_mapping_error(dma_chan->device->dev, unmap->addr[src_count]); + if (ret) { + err = -ENOMEM; + goto free_resources; + } + unmap->from_cnt = 1; + unmap->len = PAGE_SIZE; + + tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs, + src_count, PAGE_SIZE, 0); + if (!tx) { + dev_err(dma_chan->device->dev, + "Self-test cannot prepare operation, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + cookie = mv_xor_tx_submit(tx); + if (dma_submit_error(cookie)) { + dev_err(dma_chan->device->dev, + "Self-test submit error, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + mv_xor_issue_pending(dma_chan); + async_tx_ack(tx); + msleep(8); + + if (mv_xor_status(dma_chan, cookie, NULL) != + DMA_COMPLETE) { + dev_err(dma_chan->device->dev, + "Self-test xor timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + dma_sync_single_for_cpu(dma_chan->device->dev, dest_dma, + PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { + u32 *ptr = page_address(dest); + if (ptr[i] != cmp_word) { + dev_err(dma_chan->device->dev, + "Self-test xor failed compare, disabling. index %d, data %x, expected %x\n", + i, ptr[i], cmp_word); + err = -ENODEV; + goto free_resources; + } + } + +free_resources: + dmaengine_unmap_put(unmap); + mv_xor_free_chan_resources(dma_chan); +out: + src_idx = src_count; + while (src_idx--) + __free_page(xor_srcs[src_idx]); + __free_page(dest); + return err; +} + +static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan) +{ + struct dma_chan *chan, *_chan; + struct device *dev = mv_chan->dmadev.dev; + + dma_async_device_unregister(&mv_chan->dmadev); + + dma_free_coherent(dev, MV_XOR_POOL_SIZE, + mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool); + dma_unmap_single(dev, mv_chan->dummy_src_addr, + MV_XOR_MIN_BYTE_COUNT, DMA_FROM_DEVICE); + dma_unmap_single(dev, mv_chan->dummy_dst_addr, + MV_XOR_MIN_BYTE_COUNT, DMA_TO_DEVICE); + + list_for_each_entry_safe(chan, _chan, &mv_chan->dmadev.channels, + device_node) { + list_del(&chan->device_node); + } + + free_irq(mv_chan->irq, mv_chan); + + return 0; +} + +static struct mv_xor_chan * +mv_xor_channel_add(struct mv_xor_device *xordev, + struct platform_device *pdev, + int idx, dma_cap_mask_t cap_mask, int irq) +{ + int ret = 0; + struct mv_xor_chan *mv_chan; + struct dma_device *dma_dev; + + mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL); + if (!mv_chan) + return ERR_PTR(-ENOMEM); + + mv_chan->idx = idx; + mv_chan->irq = irq; + if (xordev->xor_type == XOR_ORION) + mv_chan->op_in_desc = XOR_MODE_IN_REG; + else + mv_chan->op_in_desc = XOR_MODE_IN_DESC; + + dma_dev = &mv_chan->dmadev; + dma_dev->dev = &pdev->dev; + mv_chan->xordev = xordev; + + /* + * These source and destination dummy buffers are used to implement + * a DMA_INTERRUPT operation as a minimum-sized XOR operation. + * Hence, we only need to map the buffers at initialization-time. + */ + mv_chan->dummy_src_addr = dma_map_single(dma_dev->dev, + mv_chan->dummy_src, MV_XOR_MIN_BYTE_COUNT, DMA_FROM_DEVICE); + mv_chan->dummy_dst_addr = dma_map_single(dma_dev->dev, + mv_chan->dummy_dst, MV_XOR_MIN_BYTE_COUNT, DMA_TO_DEVICE); + + /* allocate coherent memory for hardware descriptors + * note: writecombine gives slightly better performance, but + * requires that we explicitly flush the writes + */ + mv_chan->dma_desc_pool_virt = + dma_alloc_wc(&pdev->dev, MV_XOR_POOL_SIZE, &mv_chan->dma_desc_pool, + GFP_KERNEL); + if (!mv_chan->dma_desc_pool_virt) + return ERR_PTR(-ENOMEM); + + /* discover transaction capabilites from the platform data */ + dma_dev->cap_mask = cap_mask; + + INIT_LIST_HEAD(&dma_dev->channels); + + /* set base routines */ + dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources; + dma_dev->device_free_chan_resources = mv_xor_free_chan_resources; + dma_dev->device_tx_status = mv_xor_status; + dma_dev->device_issue_pending = mv_xor_issue_pending; + + /* set prep routines based on capability */ + if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) + dma_dev->device_prep_dma_interrupt = mv_xor_prep_dma_interrupt; + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) + dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy; + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { + dma_dev->max_xor = 8; + dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor; + } + + mv_chan->mmr_base = xordev->xor_base; + mv_chan->mmr_high_base = xordev->xor_high_base; + tasklet_setup(&mv_chan->irq_tasklet, mv_xor_tasklet); + + /* clear errors before enabling interrupts */ + mv_chan_clear_err_status(mv_chan); + + ret = request_irq(mv_chan->irq, mv_xor_interrupt_handler, + 0, dev_name(&pdev->dev), mv_chan); + if (ret) + goto err_free_dma; + + mv_chan_unmask_interrupts(mv_chan); + + if (mv_chan->op_in_desc == XOR_MODE_IN_DESC) + mv_chan_set_mode(mv_chan, XOR_OPERATION_MODE_IN_DESC); + else + mv_chan_set_mode(mv_chan, XOR_OPERATION_MODE_XOR); + + spin_lock_init(&mv_chan->lock); + INIT_LIST_HEAD(&mv_chan->chain); + INIT_LIST_HEAD(&mv_chan->completed_slots); + INIT_LIST_HEAD(&mv_chan->free_slots); + INIT_LIST_HEAD(&mv_chan->allocated_slots); + mv_chan->dmachan.device = dma_dev; + dma_cookie_init(&mv_chan->dmachan); + + list_add_tail(&mv_chan->dmachan.device_node, &dma_dev->channels); + + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { + ret = mv_chan_memcpy_self_test(mv_chan); + dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret); + if (ret) + goto err_free_irq; + } + + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { + ret = mv_chan_xor_self_test(mv_chan); + dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); + if (ret) + goto err_free_irq; + } + + dev_info(&pdev->dev, "Marvell XOR (%s): ( %s%s%s)\n", + mv_chan->op_in_desc ? "Descriptor Mode" : "Registers Mode", + dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", + dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", + dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); + + ret = dma_async_device_register(dma_dev); + if (ret) + goto err_free_irq; + + return mv_chan; + +err_free_irq: + free_irq(mv_chan->irq, mv_chan); +err_free_dma: + dma_free_coherent(&pdev->dev, MV_XOR_POOL_SIZE, + mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool); + return ERR_PTR(ret); +} + +static void +mv_xor_conf_mbus_windows(struct mv_xor_device *xordev, + const struct mbus_dram_target_info *dram) +{ + void __iomem *base = xordev->xor_high_base; + u32 win_enable = 0; + int i; + + for (i = 0; i < 8; i++) { + writel(0, base + WINDOW_BASE(i)); + writel(0, base + WINDOW_SIZE(i)); + if (i < 4) + writel(0, base + WINDOW_REMAP_HIGH(i)); + } + + for (i = 0; i < dram->num_cs; i++) { + const struct mbus_dram_window *cs = dram->cs + i; + + writel((cs->base & 0xffff0000) | + (cs->mbus_attr << 8) | + dram->mbus_dram_target_id, base + WINDOW_BASE(i)); + writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i)); + + /* Fill the caching variables for later use */ + xordev->win_start[i] = cs->base; + xordev->win_end[i] = cs->base + cs->size - 1; + + win_enable |= (1 << i); + win_enable |= 3 << (16 + (2 * i)); + } + + writel(win_enable, base + WINDOW_BAR_ENABLE(0)); + writel(win_enable, base + WINDOW_BAR_ENABLE(1)); + writel(0, base + WINDOW_OVERRIDE_CTRL(0)); + writel(0, base + WINDOW_OVERRIDE_CTRL(1)); +} + +static void +mv_xor_conf_mbus_windows_a3700(struct mv_xor_device *xordev) +{ + void __iomem *base = xordev->xor_high_base; + u32 win_enable = 0; + int i; + + for (i = 0; i < 8; i++) { + writel(0, base + WINDOW_BASE(i)); + writel(0, base + WINDOW_SIZE(i)); + if (i < 4) + writel(0, base + WINDOW_REMAP_HIGH(i)); + } + /* + * For Armada3700 open default 4GB Mbus window. The dram + * related configuration are done at AXIS level. + */ + writel(0xffff0000, base + WINDOW_SIZE(0)); + win_enable |= 1; + win_enable |= 3 << 16; + + writel(win_enable, base + WINDOW_BAR_ENABLE(0)); + writel(win_enable, base + WINDOW_BAR_ENABLE(1)); + writel(0, base + WINDOW_OVERRIDE_CTRL(0)); + writel(0, base + WINDOW_OVERRIDE_CTRL(1)); +} + +/* + * Since this XOR driver is basically used only for RAID5, we don't + * need to care about synchronizing ->suspend with DMA activity, + * because the DMA engine will naturally be quiet due to the block + * devices being suspended. + */ +static int mv_xor_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct mv_xor_device *xordev = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) { + struct mv_xor_chan *mv_chan = xordev->channels[i]; + + if (!mv_chan) + continue; + + mv_chan->saved_config_reg = + readl_relaxed(XOR_CONFIG(mv_chan)); + mv_chan->saved_int_mask_reg = + readl_relaxed(XOR_INTR_MASK(mv_chan)); + } + + return 0; +} + +static int mv_xor_resume(struct platform_device *dev) +{ + struct mv_xor_device *xordev = platform_get_drvdata(dev); + const struct mbus_dram_target_info *dram; + int i; + + for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) { + struct mv_xor_chan *mv_chan = xordev->channels[i]; + + if (!mv_chan) + continue; + + writel_relaxed(mv_chan->saved_config_reg, + XOR_CONFIG(mv_chan)); + writel_relaxed(mv_chan->saved_int_mask_reg, + XOR_INTR_MASK(mv_chan)); + } + + if (xordev->xor_type == XOR_ARMADA_37XX) { + mv_xor_conf_mbus_windows_a3700(xordev); + return 0; + } + + dram = mv_mbus_dram_info(); + if (dram) + mv_xor_conf_mbus_windows(xordev, dram); + + return 0; +} + +static const struct of_device_id mv_xor_dt_ids[] = { + { .compatible = "marvell,orion-xor", .data = (void *)XOR_ORION }, + { .compatible = "marvell,armada-380-xor", .data = (void *)XOR_ARMADA_38X }, + { .compatible = "marvell,armada-3700-xor", .data = (void *)XOR_ARMADA_37XX }, + {}, +}; + +static unsigned int mv_xor_engine_count; + +static int mv_xor_probe(struct platform_device *pdev) +{ + const struct mbus_dram_target_info *dram; + struct mv_xor_device *xordev; + struct mv_xor_platform_data *pdata = dev_get_platdata(&pdev->dev); + struct resource *res; + unsigned int max_engines, max_channels; + int i, ret; + + dev_notice(&pdev->dev, "Marvell shared XOR driver\n"); + + xordev = devm_kzalloc(&pdev->dev, sizeof(*xordev), GFP_KERNEL); + if (!xordev) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + xordev->xor_base = devm_ioremap(&pdev->dev, res->start, + resource_size(res)); + if (!xordev->xor_base) + return -EBUSY; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!res) + return -ENODEV; + + xordev->xor_high_base = devm_ioremap(&pdev->dev, res->start, + resource_size(res)); + if (!xordev->xor_high_base) + return -EBUSY; + + platform_set_drvdata(pdev, xordev); + + + /* + * We need to know which type of XOR device we use before + * setting up. In non-dt case it can only be the legacy one. + */ + xordev->xor_type = XOR_ORION; + if (pdev->dev.of_node) { + const struct of_device_id *of_id = + of_match_device(mv_xor_dt_ids, + &pdev->dev); + + xordev->xor_type = (uintptr_t)of_id->data; + } + + /* + * (Re-)program MBUS remapping windows if we are asked to. + */ + if (xordev->xor_type == XOR_ARMADA_37XX) { + mv_xor_conf_mbus_windows_a3700(xordev); + } else { + dram = mv_mbus_dram_info(); + if (dram) + mv_xor_conf_mbus_windows(xordev, dram); + } + + /* Not all platforms can gate the clock, so it is not + * an error if the clock does not exists. + */ + xordev->clk = clk_get(&pdev->dev, NULL); + if (!IS_ERR(xordev->clk)) + clk_prepare_enable(xordev->clk); + + /* + * We don't want to have more than one channel per CPU in + * order for async_tx to perform well. So we limit the number + * of engines and channels so that we take into account this + * constraint. Note that we also want to use channels from + * separate engines when possible. For dual-CPU Armada 3700 + * SoC with single XOR engine allow using its both channels. + */ + max_engines = num_present_cpus(); + if (xordev->xor_type == XOR_ARMADA_37XX) + max_channels = num_present_cpus(); + else + max_channels = min_t(unsigned int, + MV_XOR_MAX_CHANNELS, + DIV_ROUND_UP(num_present_cpus(), 2)); + + if (mv_xor_engine_count >= max_engines) + return 0; + + if (pdev->dev.of_node) { + struct device_node *np; + int i = 0; + + for_each_child_of_node(pdev->dev.of_node, np) { + struct mv_xor_chan *chan; + dma_cap_mask_t cap_mask; + int irq; + + if (i >= max_channels) + continue; + + dma_cap_zero(cap_mask); + dma_cap_set(DMA_MEMCPY, cap_mask); + dma_cap_set(DMA_XOR, cap_mask); + dma_cap_set(DMA_INTERRUPT, cap_mask); + + irq = irq_of_parse_and_map(np, 0); + if (!irq) { + ret = -ENODEV; + goto err_channel_add; + } + + chan = mv_xor_channel_add(xordev, pdev, i, + cap_mask, irq); + if (IS_ERR(chan)) { + ret = PTR_ERR(chan); + irq_dispose_mapping(irq); + goto err_channel_add; + } + + xordev->channels[i] = chan; + i++; + } + } else if (pdata && pdata->channels) { + for (i = 0; i < max_channels; i++) { + struct mv_xor_channel_data *cd; + struct mv_xor_chan *chan; + int irq; + + cd = &pdata->channels[i]; + irq = platform_get_irq(pdev, i); + if (irq < 0) { + ret = irq; + goto err_channel_add; + } + + chan = mv_xor_channel_add(xordev, pdev, i, + cd->cap_mask, irq); + if (IS_ERR(chan)) { + ret = PTR_ERR(chan); + goto err_channel_add; + } + + xordev->channels[i] = chan; + } + } + + return 0; + +err_channel_add: + for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) + if (xordev->channels[i]) { + mv_xor_channel_remove(xordev->channels[i]); + if (pdev->dev.of_node) + irq_dispose_mapping(xordev->channels[i]->irq); + } + + if (!IS_ERR(xordev->clk)) { + clk_disable_unprepare(xordev->clk); + clk_put(xordev->clk); + } + + return ret; +} + +static struct platform_driver mv_xor_driver = { + .probe = mv_xor_probe, + .suspend = mv_xor_suspend, + .resume = mv_xor_resume, + .driver = { + .name = MV_XOR_NAME, + .of_match_table = mv_xor_dt_ids, + }, +}; + +builtin_platform_driver(mv_xor_driver); + +/* +MODULE_AUTHOR("Saeed Bishara "); +MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine"); +MODULE_LICENSE("GPL"); +*/ diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h new file mode 100644 index 0000000000..d86086b05b --- /dev/null +++ b/drivers/dma/mv_xor.h @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2007, 2008, Marvell International Ltd. + */ + +#ifndef MV_XOR_H +#define MV_XOR_H + +#include +#include +#include +#include + +#define MV_XOR_POOL_SIZE (MV_XOR_SLOT_SIZE * 3072) +#define MV_XOR_SLOT_SIZE 64 +#define MV_XOR_THRESHOLD 1 +#define MV_XOR_MAX_CHANNELS 2 + +#define MV_XOR_MIN_BYTE_COUNT SZ_128 +#define MV_XOR_MAX_BYTE_COUNT (SZ_16M - 1) + +/* Values for the XOR_CONFIG register */ +#define XOR_OPERATION_MODE_XOR 0 +#define XOR_OPERATION_MODE_MEMCPY 2 +#define XOR_OPERATION_MODE_IN_DESC 7 +#define XOR_DESCRIPTOR_SWAP BIT(14) +#define XOR_DESC_SUCCESS 0x40000000 + +#define XOR_DESC_OPERATION_XOR (0 << 24) +#define XOR_DESC_OPERATION_CRC32C (1 << 24) +#define XOR_DESC_OPERATION_MEMCPY (2 << 24) + +#define XOR_DESC_DMA_OWNED BIT(31) +#define XOR_DESC_EOD_INT_EN BIT(31) + +#define XOR_CURR_DESC(chan) (chan->mmr_high_base + 0x10 + (chan->idx * 4)) +#define XOR_NEXT_DESC(chan) (chan->mmr_high_base + 0x00 + (chan->idx * 4)) +#define XOR_BYTE_COUNT(chan) (chan->mmr_high_base + 0x20 + (chan->idx * 4)) +#define XOR_DEST_POINTER(chan) (chan->mmr_high_base + 0xB0 + (chan->idx * 4)) +#define XOR_BLOCK_SIZE(chan) (chan->mmr_high_base + 0xC0 + (chan->idx * 4)) +#define XOR_INIT_VALUE_LOW(chan) (chan->mmr_high_base + 0xE0) +#define XOR_INIT_VALUE_HIGH(chan) (chan->mmr_high_base + 0xE4) + +#define XOR_CONFIG(chan) (chan->mmr_base + 0x10 + (chan->idx * 4)) +#define XOR_ACTIVATION(chan) (chan->mmr_base + 0x20 + (chan->idx * 4)) +#define XOR_INTR_CAUSE(chan) (chan->mmr_base + 0x30) +#define XOR_INTR_MASK(chan) (chan->mmr_base + 0x40) +#define XOR_ERROR_CAUSE(chan) (chan->mmr_base + 0x50) +#define XOR_ERROR_ADDR(chan) (chan->mmr_base + 0x60) + +#define XOR_INT_END_OF_DESC BIT(0) +#define XOR_INT_END_OF_CHAIN BIT(1) +#define XOR_INT_STOPPED BIT(2) +#define XOR_INT_PAUSED BIT(3) +#define XOR_INT_ERR_DECODE BIT(4) +#define XOR_INT_ERR_RDPROT BIT(5) +#define XOR_INT_ERR_WRPROT BIT(6) +#define XOR_INT_ERR_OWN BIT(7) +#define XOR_INT_ERR_PAR BIT(8) +#define XOR_INT_ERR_MBUS BIT(9) + +#define XOR_INTR_ERRORS (XOR_INT_ERR_DECODE | XOR_INT_ERR_RDPROT | \ + XOR_INT_ERR_WRPROT | XOR_INT_ERR_OWN | \ + XOR_INT_ERR_PAR | XOR_INT_ERR_MBUS) + +#define XOR_INTR_MASK_VALUE (XOR_INT_END_OF_DESC | XOR_INT_END_OF_CHAIN | \ + XOR_INT_STOPPED | XOR_INTR_ERRORS) + +#define WINDOW_BASE(w) (0x50 + ((w) << 2)) +#define WINDOW_SIZE(w) (0x70 + ((w) << 2)) +#define WINDOW_REMAP_HIGH(w) (0x90 + ((w) << 2)) +#define WINDOW_BAR_ENABLE(chan) (0x40 + ((chan) << 2)) +#define WINDOW_OVERRIDE_CTRL(chan) (0xA0 + ((chan) << 2)) + +#define WINDOW_COUNT 8 + +struct mv_xor_device { + void __iomem *xor_base; + void __iomem *xor_high_base; + struct clk *clk; + struct mv_xor_chan *channels[MV_XOR_MAX_CHANNELS]; + int xor_type; + + u32 win_start[WINDOW_COUNT]; + u32 win_end[WINDOW_COUNT]; +}; + +/** + * struct mv_xor_chan - internal representation of a XOR channel + * @pending: allows batching of hardware operations + * @lock: serializes enqueue/dequeue operations to the descriptors pool + * @mmr_base: memory mapped register base + * @idx: the index of the xor channel + * @chain: device chain view of the descriptors + * @free_slots: free slots usable by the channel + * @allocated_slots: slots allocated by the driver + * @completed_slots: slots completed by HW but still need to be acked + * @device: parent device + * @common: common dmaengine channel object members + * @slots_allocated: records the actual size of the descriptor slot pool + * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs + * @op_in_desc: new mode of driver, each op is writen to descriptor. + */ +struct mv_xor_chan { + int pending; + spinlock_t lock; /* protects the descriptor slot pool */ + void __iomem *mmr_base; + void __iomem *mmr_high_base; + unsigned int idx; + int irq; + struct list_head chain; + struct list_head free_slots; + struct list_head allocated_slots; + struct list_head completed_slots; + dma_addr_t dma_desc_pool; + void *dma_desc_pool_virt; + size_t pool_size; + struct dma_device dmadev; + struct dma_chan dmachan; + int slots_allocated; + struct tasklet_struct irq_tasklet; + int op_in_desc; + char dummy_src[MV_XOR_MIN_BYTE_COUNT]; + char dummy_dst[MV_XOR_MIN_BYTE_COUNT]; + dma_addr_t dummy_src_addr, dummy_dst_addr; + u32 saved_config_reg, saved_int_mask_reg; + + struct mv_xor_device *xordev; +}; + +/** + * struct mv_xor_desc_slot - software descriptor + * @node: node on the mv_xor_chan lists + * @hw_desc: virtual address of the hardware descriptor chain + * @phys: hardware address of the hardware descriptor chain + * @slot_used: slot in use or not + * @idx: pool index + * @tx_list: list of slots that make up a multi-descriptor transaction + * @async_tx: support for the async_tx api + */ +struct mv_xor_desc_slot { + struct list_head node; + struct list_head sg_tx_list; + enum dma_transaction_type type; + void *hw_desc; + u16 idx; + struct dma_async_tx_descriptor async_tx; +}; + +/* + * This structure describes XOR descriptor size 64bytes. The + * mv_phy_src_idx() macro must be used when indexing the values of the + * phy_src_addr[] array. This is due to the fact that the 'descriptor + * swap' feature, used on big endian systems, swaps descriptors data + * within blocks of 8 bytes. So two consecutive values of the + * phy_src_addr[] array are actually swapped in big-endian, which + * explains the different mv_phy_src_idx() implementation. + */ +#if defined(__LITTLE_ENDIAN) +struct mv_xor_desc { + u32 status; /* descriptor execution status */ + u32 crc32_result; /* result of CRC-32 calculation */ + u32 desc_command; /* type of operation to be carried out */ + u32 phy_next_desc; /* next descriptor address pointer */ + u32 byte_count; /* size of src/dst blocks in bytes */ + u32 phy_dest_addr; /* destination block address */ + u32 phy_src_addr[8]; /* source block addresses */ + u32 reserved0; + u32 reserved1; +}; +#define mv_phy_src_idx(src_idx) (src_idx) +#else +struct mv_xor_desc { + u32 crc32_result; /* result of CRC-32 calculation */ + u32 status; /* descriptor execution status */ + u32 phy_next_desc; /* next descriptor address pointer */ + u32 desc_command; /* type of operation to be carried out */ + u32 phy_dest_addr; /* destination block address */ + u32 byte_count; /* size of src/dst blocks in bytes */ + u32 phy_src_addr[8]; /* source block addresses */ + u32 reserved1; + u32 reserved0; +}; +#define mv_phy_src_idx(src_idx) (src_idx ^ 1) +#endif + +#define to_mv_sw_desc(addr_hw_desc) \ + container_of(addr_hw_desc, struct mv_xor_desc_slot, hw_desc) + +#define mv_hw_desc_slot_idx(hw_desc, idx) \ + ((void *)(((unsigned long)hw_desc) + ((idx) << 5))) + +#endif diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c new file mode 100644 index 0000000000..0e1e9ca1c0 --- /dev/null +++ b/drivers/dma/mv_xor_v2.c @@ -0,0 +1,898 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2015-2016 Marvell International Ltd. + + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" + +/* DMA Engine Registers */ +#define MV_XOR_V2_DMA_DESQ_BALR_OFF 0x000 +#define MV_XOR_V2_DMA_DESQ_BAHR_OFF 0x004 +#define MV_XOR_V2_DMA_DESQ_SIZE_OFF 0x008 +#define MV_XOR_V2_DMA_DESQ_DONE_OFF 0x00C +#define MV_XOR_V2_DMA_DESQ_DONE_PENDING_MASK 0x7FFF +#define MV_XOR_V2_DMA_DESQ_DONE_PENDING_SHIFT 0 +#define MV_XOR_V2_DMA_DESQ_DONE_READ_PTR_MASK 0x1FFF +#define MV_XOR_V2_DMA_DESQ_DONE_READ_PTR_SHIFT 16 +#define MV_XOR_V2_DMA_DESQ_ARATTR_OFF 0x010 +#define MV_XOR_V2_DMA_DESQ_ATTR_CACHE_MASK 0x3F3F +#define MV_XOR_V2_DMA_DESQ_ATTR_OUTER_SHAREABLE 0x202 +#define MV_XOR_V2_DMA_DESQ_ATTR_CACHEABLE 0x3C3C +#define MV_XOR_V2_DMA_IMSG_CDAT_OFF 0x014 +#define MV_XOR_V2_DMA_IMSG_THRD_OFF 0x018 +#define MV_XOR_V2_DMA_IMSG_THRD_MASK 0x7FFF +#define MV_XOR_V2_DMA_IMSG_TIMER_EN BIT(18) +#define MV_XOR_V2_DMA_DESQ_AWATTR_OFF 0x01C + /* Same flags as MV_XOR_V2_DMA_DESQ_ARATTR_OFF */ +#define MV_XOR_V2_DMA_DESQ_ALLOC_OFF 0x04C +#define MV_XOR_V2_DMA_DESQ_ALLOC_WRPTR_MASK 0xFFFF +#define MV_XOR_V2_DMA_DESQ_ALLOC_WRPTR_SHIFT 16 +#define MV_XOR_V2_DMA_IMSG_BALR_OFF 0x050 +#define MV_XOR_V2_DMA_IMSG_BAHR_OFF 0x054 +#define MV_XOR_V2_DMA_DESQ_CTRL_OFF 0x100 +#define MV_XOR_V2_DMA_DESQ_CTRL_32B 1 +#define MV_XOR_V2_DMA_DESQ_CTRL_128B 7 +#define MV_XOR_V2_DMA_DESQ_STOP_OFF 0x800 +#define MV_XOR_V2_DMA_DESQ_DEALLOC_OFF 0x804 +#define MV_XOR_V2_DMA_DESQ_ADD_OFF 0x808 +#define MV_XOR_V2_DMA_IMSG_TMOT 0x810 +#define MV_XOR_V2_DMA_IMSG_TIMER_THRD_MASK 0x1FFF + +/* XOR Global registers */ +#define MV_XOR_V2_GLOB_BW_CTRL 0x4 +#define MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_RD_SHIFT 0 +#define MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_RD_VAL 64 +#define MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_WR_SHIFT 8 +#define MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_WR_VAL 8 +#define MV_XOR_V2_GLOB_BW_CTRL_RD_BURST_LEN_SHIFT 12 +#define MV_XOR_V2_GLOB_BW_CTRL_RD_BURST_LEN_VAL 4 +#define MV_XOR_V2_GLOB_BW_CTRL_WR_BURST_LEN_SHIFT 16 +#define MV_XOR_V2_GLOB_BW_CTRL_WR_BURST_LEN_VAL 4 +#define MV_XOR_V2_GLOB_PAUSE 0x014 +#define MV_XOR_V2_GLOB_PAUSE_AXI_TIME_DIS_VAL 0x8 +#define MV_XOR_V2_GLOB_SYS_INT_CAUSE 0x200 +#define MV_XOR_V2_GLOB_SYS_INT_MASK 0x204 +#define MV_XOR_V2_GLOB_MEM_INT_CAUSE 0x220 +#define MV_XOR_V2_GLOB_MEM_INT_MASK 0x224 + +#define MV_XOR_V2_MIN_DESC_SIZE 32 +#define MV_XOR_V2_EXT_DESC_SIZE 128 + +#define MV_XOR_V2_DESC_RESERVED_SIZE 12 +#define MV_XOR_V2_DESC_BUFF_D_ADDR_SIZE 12 + +#define MV_XOR_V2_CMD_LINE_NUM_MAX_D_BUF 8 + +/* + * Descriptors queue size. With 32 bytes descriptors, up to 2^14 + * descriptors are allowed, with 128 bytes descriptors, up to 2^12 + * descriptors are allowed. This driver uses 128 bytes descriptors, + * but experimentation has shown that a set of 1024 descriptors is + * sufficient to reach a good level of performance. + */ +#define MV_XOR_V2_DESC_NUM 1024 + +/* + * Threshold values for descriptors and timeout, determined by + * experimentation as giving a good level of performance. + */ +#define MV_XOR_V2_DONE_IMSG_THRD 0x14 +#define MV_XOR_V2_TIMER_THRD 0xB0 + +/** + * struct mv_xor_v2_descriptor - DMA HW descriptor + * @desc_id: used by S/W and is not affected by H/W. + * @flags: error and status flags + * @crc32_result: CRC32 calculation result + * @desc_ctrl: operation mode and control flags + * @buff_size: amount of bytes to be processed + * @fill_pattern_src_addr: Fill-Pattern or Source-Address and + * AW-Attributes + * @data_buff_addr: Source (and might be RAID6 destination) + * addresses of data buffers in RAID5 and RAID6 + * @reserved: reserved + */ +struct mv_xor_v2_descriptor { + u16 desc_id; + u16 flags; + u32 crc32_result; + u32 desc_ctrl; + + /* Definitions for desc_ctrl */ +#define DESC_NUM_ACTIVE_D_BUF_SHIFT 22 +#define DESC_OP_MODE_SHIFT 28 +#define DESC_OP_MODE_NOP 0 /* Idle operation */ +#define DESC_OP_MODE_MEMCPY 1 /* Pure-DMA operation */ +#define DESC_OP_MODE_MEMSET 2 /* Mem-Fill operation */ +#define DESC_OP_MODE_MEMINIT 3 /* Mem-Init operation */ +#define DESC_OP_MODE_MEM_COMPARE 4 /* Mem-Compare operation */ +#define DESC_OP_MODE_CRC32 5 /* CRC32 calculation */ +#define DESC_OP_MODE_XOR 6 /* RAID5 (XOR) operation */ +#define DESC_OP_MODE_RAID6 7 /* RAID6 P&Q-generation */ +#define DESC_OP_MODE_RAID6_REC 8 /* RAID6 Recovery */ +#define DESC_Q_BUFFER_ENABLE BIT(16) +#define DESC_P_BUFFER_ENABLE BIT(17) +#define DESC_IOD BIT(27) + + u32 buff_size; + u32 fill_pattern_src_addr[4]; + u32 data_buff_addr[MV_XOR_V2_DESC_BUFF_D_ADDR_SIZE]; + u32 reserved[MV_XOR_V2_DESC_RESERVED_SIZE]; +}; + +/** + * struct mv_xor_v2_device - implements a xor device + * @lock: lock for the engine + * @clk: reference to the 'core' clock + * @reg_clk: reference to the 'reg' clock + * @dma_base: memory mapped DMA register base + * @glob_base: memory mapped global register base + * @irq_tasklet: tasklet used for IRQ handling call-backs + * @free_sw_desc: linked list of free SW descriptors + * @dmadev: dma device + * @dmachan: dma channel + * @hw_desq: HW descriptors queue + * @hw_desq_virt: virtual address of DESCQ + * @sw_desq: SW descriptors queue + * @desc_size: HW descriptor size + * @npendings: number of pending descriptors (for which tx_submit has + * @hw_queue_idx: HW queue index + * @irq: The Linux interrupt number + * been called, but not yet issue_pending) + */ +struct mv_xor_v2_device { + spinlock_t lock; + void __iomem *dma_base; + void __iomem *glob_base; + struct clk *clk; + struct clk *reg_clk; + struct tasklet_struct irq_tasklet; + struct list_head free_sw_desc; + struct dma_device dmadev; + struct dma_chan dmachan; + dma_addr_t hw_desq; + struct mv_xor_v2_descriptor *hw_desq_virt; + struct mv_xor_v2_sw_desc *sw_desq; + int desc_size; + unsigned int npendings; + unsigned int hw_queue_idx; + unsigned int irq; +}; + +/** + * struct mv_xor_v2_sw_desc - implements a xor SW descriptor + * @idx: descriptor index + * @async_tx: support for the async_tx api + * @hw_desc: assosiated HW descriptor + * @free_list: node of the free SW descriprots list +*/ +struct mv_xor_v2_sw_desc { + int idx; + struct dma_async_tx_descriptor async_tx; + struct mv_xor_v2_descriptor hw_desc; + struct list_head free_list; +}; + +/* + * Fill the data buffers to a HW descriptor + */ +static void mv_xor_v2_set_data_buffers(struct mv_xor_v2_device *xor_dev, + struct mv_xor_v2_descriptor *desc, + dma_addr_t src, int index) +{ + int arr_index = ((index >> 1) * 3); + + /* + * Fill the buffer's addresses to the descriptor. + * + * The format of the buffers address for 2 sequential buffers + * X and X + 1: + * + * First word: Buffer-DX-Address-Low[31:0] + * Second word: Buffer-DX+1-Address-Low[31:0] + * Third word: DX+1-Buffer-Address-High[47:32] [31:16] + * DX-Buffer-Address-High[47:32] [15:0] + */ + if ((index & 0x1) == 0) { + desc->data_buff_addr[arr_index] = lower_32_bits(src); + + desc->data_buff_addr[arr_index + 2] &= ~0xFFFF; + desc->data_buff_addr[arr_index + 2] |= + upper_32_bits(src) & 0xFFFF; + } else { + desc->data_buff_addr[arr_index + 1] = + lower_32_bits(src); + + desc->data_buff_addr[arr_index + 2] &= ~0xFFFF0000; + desc->data_buff_addr[arr_index + 2] |= + (upper_32_bits(src) & 0xFFFF) << 16; + } +} + +/* + * notify the engine of new descriptors, and update the available index. + */ +static void mv_xor_v2_add_desc_to_desq(struct mv_xor_v2_device *xor_dev, + int num_of_desc) +{ + /* write the number of new descriptors in the DESQ. */ + writel(num_of_desc, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_ADD_OFF); +} + +/* + * free HW descriptors + */ +static void mv_xor_v2_free_desc_from_desq(struct mv_xor_v2_device *xor_dev, + int num_of_desc) +{ + /* write the number of new descriptors in the DESQ. */ + writel(num_of_desc, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_DEALLOC_OFF); +} + +/* + * Set descriptor size + * Return the HW descriptor size in bytes + */ +static int mv_xor_v2_set_desc_size(struct mv_xor_v2_device *xor_dev) +{ + writel(MV_XOR_V2_DMA_DESQ_CTRL_128B, + xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_CTRL_OFF); + + return MV_XOR_V2_EXT_DESC_SIZE; +} + +/* + * Set the IMSG threshold + */ +static inline +void mv_xor_v2_enable_imsg_thrd(struct mv_xor_v2_device *xor_dev) +{ + u32 reg; + + /* Configure threshold of number of descriptors, and enable timer */ + reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_THRD_OFF); + reg &= ~MV_XOR_V2_DMA_IMSG_THRD_MASK; + reg |= MV_XOR_V2_DONE_IMSG_THRD; + reg |= MV_XOR_V2_DMA_IMSG_TIMER_EN; + writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_THRD_OFF); + + /* Configure Timer Threshold */ + reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_TMOT); + reg &= ~MV_XOR_V2_DMA_IMSG_TIMER_THRD_MASK; + reg |= MV_XOR_V2_TIMER_THRD; + writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_TMOT); +} + +static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data) +{ + struct mv_xor_v2_device *xor_dev = data; + unsigned int ndescs; + u32 reg; + + reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_DONE_OFF); + + ndescs = ((reg >> MV_XOR_V2_DMA_DESQ_DONE_PENDING_SHIFT) & + MV_XOR_V2_DMA_DESQ_DONE_PENDING_MASK); + + /* No descriptors to process */ + if (!ndescs) + return IRQ_NONE; + + /* schedule a tasklet to handle descriptors callbacks */ + tasklet_schedule(&xor_dev->irq_tasklet); + + return IRQ_HANDLED; +} + +/* + * submit a descriptor to the DMA engine + */ +static dma_cookie_t +mv_xor_v2_tx_submit(struct dma_async_tx_descriptor *tx) +{ + void *dest_hw_desc; + dma_cookie_t cookie; + struct mv_xor_v2_sw_desc *sw_desc = + container_of(tx, struct mv_xor_v2_sw_desc, async_tx); + struct mv_xor_v2_device *xor_dev = + container_of(tx->chan, struct mv_xor_v2_device, dmachan); + + dev_dbg(xor_dev->dmadev.dev, + "%s sw_desc %p: async_tx %p\n", + __func__, sw_desc, &sw_desc->async_tx); + + /* assign cookie */ + spin_lock_bh(&xor_dev->lock); + cookie = dma_cookie_assign(tx); + + /* copy the HW descriptor from the SW descriptor to the DESQ */ + dest_hw_desc = xor_dev->hw_desq_virt + xor_dev->hw_queue_idx; + + memcpy(dest_hw_desc, &sw_desc->hw_desc, xor_dev->desc_size); + + xor_dev->npendings++; + xor_dev->hw_queue_idx++; + if (xor_dev->hw_queue_idx >= MV_XOR_V2_DESC_NUM) + xor_dev->hw_queue_idx = 0; + + spin_unlock_bh(&xor_dev->lock); + + return cookie; +} + +/* + * Prepare a SW descriptor + */ +static struct mv_xor_v2_sw_desc * +mv_xor_v2_prep_sw_desc(struct mv_xor_v2_device *xor_dev) +{ + struct mv_xor_v2_sw_desc *sw_desc; + bool found = false; + + /* Lock the channel */ + spin_lock_bh(&xor_dev->lock); + + if (list_empty(&xor_dev->free_sw_desc)) { + spin_unlock_bh(&xor_dev->lock); + /* schedule tasklet to free some descriptors */ + tasklet_schedule(&xor_dev->irq_tasklet); + return NULL; + } + + list_for_each_entry(sw_desc, &xor_dev->free_sw_desc, free_list) { + if (async_tx_test_ack(&sw_desc->async_tx)) { + found = true; + break; + } + } + + if (!found) { + spin_unlock_bh(&xor_dev->lock); + return NULL; + } + + list_del(&sw_desc->free_list); + + /* Release the channel */ + spin_unlock_bh(&xor_dev->lock); + + return sw_desc; +} + +/* + * Prepare a HW descriptor for a memcpy operation + */ +static struct dma_async_tx_descriptor * +mv_xor_v2_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct mv_xor_v2_sw_desc *sw_desc; + struct mv_xor_v2_descriptor *hw_descriptor; + struct mv_xor_v2_device *xor_dev; + + xor_dev = container_of(chan, struct mv_xor_v2_device, dmachan); + + dev_dbg(xor_dev->dmadev.dev, + "%s len: %zu src %pad dest %pad flags: %ld\n", + __func__, len, &src, &dest, flags); + + sw_desc = mv_xor_v2_prep_sw_desc(xor_dev); + if (!sw_desc) + return NULL; + + sw_desc->async_tx.flags = flags; + + /* set the HW descriptor */ + hw_descriptor = &sw_desc->hw_desc; + + /* save the SW descriptor ID to restore when operation is done */ + hw_descriptor->desc_id = sw_desc->idx; + + /* Set the MEMCPY control word */ + hw_descriptor->desc_ctrl = + DESC_OP_MODE_MEMCPY << DESC_OP_MODE_SHIFT; + + if (flags & DMA_PREP_INTERRUPT) + hw_descriptor->desc_ctrl |= DESC_IOD; + + /* Set source address */ + hw_descriptor->fill_pattern_src_addr[0] = lower_32_bits(src); + hw_descriptor->fill_pattern_src_addr[1] = + upper_32_bits(src) & 0xFFFF; + + /* Set Destination address */ + hw_descriptor->fill_pattern_src_addr[2] = lower_32_bits(dest); + hw_descriptor->fill_pattern_src_addr[3] = + upper_32_bits(dest) & 0xFFFF; + + /* Set buffers size */ + hw_descriptor->buff_size = len; + + /* return the async tx descriptor */ + return &sw_desc->async_tx; +} + +/* + * Prepare a HW descriptor for a XOR operation + */ +static struct dma_async_tx_descriptor * +mv_xor_v2_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + struct mv_xor_v2_sw_desc *sw_desc; + struct mv_xor_v2_descriptor *hw_descriptor; + struct mv_xor_v2_device *xor_dev = + container_of(chan, struct mv_xor_v2_device, dmachan); + int i; + + if (src_cnt > MV_XOR_V2_CMD_LINE_NUM_MAX_D_BUF || src_cnt < 1) + return NULL; + + dev_dbg(xor_dev->dmadev.dev, + "%s src_cnt: %d len: %zu dest %pad flags: %ld\n", + __func__, src_cnt, len, &dest, flags); + + sw_desc = mv_xor_v2_prep_sw_desc(xor_dev); + if (!sw_desc) + return NULL; + + sw_desc->async_tx.flags = flags; + + /* set the HW descriptor */ + hw_descriptor = &sw_desc->hw_desc; + + /* save the SW descriptor ID to restore when operation is done */ + hw_descriptor->desc_id = sw_desc->idx; + + /* Set the XOR control word */ + hw_descriptor->desc_ctrl = + DESC_OP_MODE_XOR << DESC_OP_MODE_SHIFT; + hw_descriptor->desc_ctrl |= DESC_P_BUFFER_ENABLE; + + if (flags & DMA_PREP_INTERRUPT) + hw_descriptor->desc_ctrl |= DESC_IOD; + + /* Set the data buffers */ + for (i = 0; i < src_cnt; i++) + mv_xor_v2_set_data_buffers(xor_dev, hw_descriptor, src[i], i); + + hw_descriptor->desc_ctrl |= + src_cnt << DESC_NUM_ACTIVE_D_BUF_SHIFT; + + /* Set Destination address */ + hw_descriptor->fill_pattern_src_addr[2] = lower_32_bits(dest); + hw_descriptor->fill_pattern_src_addr[3] = + upper_32_bits(dest) & 0xFFFF; + + /* Set buffers size */ + hw_descriptor->buff_size = len; + + /* return the async tx descriptor */ + return &sw_desc->async_tx; +} + +/* + * Prepare a HW descriptor for interrupt operation. + */ +static struct dma_async_tx_descriptor * +mv_xor_v2_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags) +{ + struct mv_xor_v2_sw_desc *sw_desc; + struct mv_xor_v2_descriptor *hw_descriptor; + struct mv_xor_v2_device *xor_dev = + container_of(chan, struct mv_xor_v2_device, dmachan); + + sw_desc = mv_xor_v2_prep_sw_desc(xor_dev); + if (!sw_desc) + return NULL; + + /* set the HW descriptor */ + hw_descriptor = &sw_desc->hw_desc; + + /* save the SW descriptor ID to restore when operation is done */ + hw_descriptor->desc_id = sw_desc->idx; + + /* Set the INTERRUPT control word */ + hw_descriptor->desc_ctrl = + DESC_OP_MODE_NOP << DESC_OP_MODE_SHIFT; + hw_descriptor->desc_ctrl |= DESC_IOD; + + /* return the async tx descriptor */ + return &sw_desc->async_tx; +} + +/* + * push pending transactions to hardware + */ +static void mv_xor_v2_issue_pending(struct dma_chan *chan) +{ + struct mv_xor_v2_device *xor_dev = + container_of(chan, struct mv_xor_v2_device, dmachan); + + spin_lock_bh(&xor_dev->lock); + + /* + * update the engine with the number of descriptors to + * process + */ + mv_xor_v2_add_desc_to_desq(xor_dev, xor_dev->npendings); + xor_dev->npendings = 0; + + spin_unlock_bh(&xor_dev->lock); +} + +static inline +int mv_xor_v2_get_pending_params(struct mv_xor_v2_device *xor_dev, + int *pending_ptr) +{ + u32 reg; + + reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_DONE_OFF); + + /* get the next pending descriptor index */ + *pending_ptr = ((reg >> MV_XOR_V2_DMA_DESQ_DONE_READ_PTR_SHIFT) & + MV_XOR_V2_DMA_DESQ_DONE_READ_PTR_MASK); + + /* get the number of descriptors pending handle */ + return ((reg >> MV_XOR_V2_DMA_DESQ_DONE_PENDING_SHIFT) & + MV_XOR_V2_DMA_DESQ_DONE_PENDING_MASK); +} + +/* + * handle the descriptors after HW process + */ +static void mv_xor_v2_tasklet(struct tasklet_struct *t) +{ + struct mv_xor_v2_device *xor_dev = from_tasklet(xor_dev, t, + irq_tasklet); + int pending_ptr, num_of_pending, i; + struct mv_xor_v2_sw_desc *next_pending_sw_desc = NULL; + + dev_dbg(xor_dev->dmadev.dev, "%s %d\n", __func__, __LINE__); + + /* get the pending descriptors parameters */ + num_of_pending = mv_xor_v2_get_pending_params(xor_dev, &pending_ptr); + + /* loop over free descriptors */ + for (i = 0; i < num_of_pending; i++) { + struct mv_xor_v2_descriptor *next_pending_hw_desc = + xor_dev->hw_desq_virt + pending_ptr; + + /* get the SW descriptor related to the HW descriptor */ + next_pending_sw_desc = + &xor_dev->sw_desq[next_pending_hw_desc->desc_id]; + + /* call the callback */ + if (next_pending_sw_desc->async_tx.cookie > 0) { + /* + * update the channel's completed cookie - no + * lock is required the IMSG threshold provide + * the locking + */ + dma_cookie_complete(&next_pending_sw_desc->async_tx); + + dma_descriptor_unmap(&next_pending_sw_desc->async_tx); + dmaengine_desc_get_callback_invoke( + &next_pending_sw_desc->async_tx, NULL); + } + + dma_run_dependencies(&next_pending_sw_desc->async_tx); + + /* Lock the channel */ + spin_lock(&xor_dev->lock); + + /* add the SW descriptor to the free descriptors list */ + list_add(&next_pending_sw_desc->free_list, + &xor_dev->free_sw_desc); + + /* Release the channel */ + spin_unlock(&xor_dev->lock); + + /* increment the next descriptor */ + pending_ptr++; + if (pending_ptr >= MV_XOR_V2_DESC_NUM) + pending_ptr = 0; + } + + if (num_of_pending != 0) { + /* free the descriptores */ + mv_xor_v2_free_desc_from_desq(xor_dev, num_of_pending); + } +} + +/* + * Set DMA Interrupt-message (IMSG) parameters + */ +static void mv_xor_v2_set_msi_msg(struct msi_desc *desc, struct msi_msg *msg) +{ + struct mv_xor_v2_device *xor_dev = dev_get_drvdata(desc->dev); + + writel(msg->address_lo, + xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_BALR_OFF); + writel(msg->address_hi & 0xFFFF, + xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_BAHR_OFF); + writel(msg->data, + xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_CDAT_OFF); +} + +static int mv_xor_v2_descq_init(struct mv_xor_v2_device *xor_dev) +{ + u32 reg; + + /* write the DESQ size to the DMA engine */ + writel(MV_XOR_V2_DESC_NUM, + xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_SIZE_OFF); + + /* write the DESQ address to the DMA enngine*/ + writel(lower_32_bits(xor_dev->hw_desq), + xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_BALR_OFF); + writel(upper_32_bits(xor_dev->hw_desq), + xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_BAHR_OFF); + + /* + * This is a temporary solution, until we activate the + * SMMU. Set the attributes for reading & writing data buffers + * & descriptors to: + * + * - OuterShareable - Snoops will be performed on CPU caches + * - Enable cacheable - Bufferable, Modifiable, Other Allocate + * and Allocate + */ + reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_ARATTR_OFF); + reg &= ~MV_XOR_V2_DMA_DESQ_ATTR_CACHE_MASK; + reg |= MV_XOR_V2_DMA_DESQ_ATTR_OUTER_SHAREABLE | + MV_XOR_V2_DMA_DESQ_ATTR_CACHEABLE; + writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_ARATTR_OFF); + + reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_AWATTR_OFF); + reg &= ~MV_XOR_V2_DMA_DESQ_ATTR_CACHE_MASK; + reg |= MV_XOR_V2_DMA_DESQ_ATTR_OUTER_SHAREABLE | + MV_XOR_V2_DMA_DESQ_ATTR_CACHEABLE; + writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_AWATTR_OFF); + + /* BW CTRL - set values to optimize the XOR performance: + * + * - Set WrBurstLen & RdBurstLen - the unit will issue + * maximum of 256B write/read transactions. + * - Limit the number of outstanding write & read data + * (OBB/IBB) requests to the maximal value. + */ + reg = ((MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_RD_VAL << + MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_RD_SHIFT) | + (MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_WR_VAL << + MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_WR_SHIFT) | + (MV_XOR_V2_GLOB_BW_CTRL_RD_BURST_LEN_VAL << + MV_XOR_V2_GLOB_BW_CTRL_RD_BURST_LEN_SHIFT) | + (MV_XOR_V2_GLOB_BW_CTRL_WR_BURST_LEN_VAL << + MV_XOR_V2_GLOB_BW_CTRL_WR_BURST_LEN_SHIFT)); + writel(reg, xor_dev->glob_base + MV_XOR_V2_GLOB_BW_CTRL); + + /* Disable the AXI timer feature */ + reg = readl(xor_dev->glob_base + MV_XOR_V2_GLOB_PAUSE); + reg |= MV_XOR_V2_GLOB_PAUSE_AXI_TIME_DIS_VAL; + writel(reg, xor_dev->glob_base + MV_XOR_V2_GLOB_PAUSE); + + /* enable the DMA engine */ + writel(0, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF); + + return 0; +} + +static int mv_xor_v2_suspend(struct platform_device *dev, pm_message_t state) +{ + struct mv_xor_v2_device *xor_dev = platform_get_drvdata(dev); + + /* Set this bit to disable to stop the XOR unit. */ + writel(0x1, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF); + + return 0; +} + +static int mv_xor_v2_resume(struct platform_device *dev) +{ + struct mv_xor_v2_device *xor_dev = platform_get_drvdata(dev); + + mv_xor_v2_set_desc_size(xor_dev); + mv_xor_v2_enable_imsg_thrd(xor_dev); + mv_xor_v2_descq_init(xor_dev); + + return 0; +} + +static int mv_xor_v2_probe(struct platform_device *pdev) +{ + struct mv_xor_v2_device *xor_dev; + int i, ret = 0; + struct dma_device *dma_dev; + struct mv_xor_v2_sw_desc *sw_desc; + + BUILD_BUG_ON(sizeof(struct mv_xor_v2_descriptor) != + MV_XOR_V2_EXT_DESC_SIZE); + + xor_dev = devm_kzalloc(&pdev->dev, sizeof(*xor_dev), GFP_KERNEL); + if (!xor_dev) + return -ENOMEM; + + xor_dev->dma_base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(xor_dev->dma_base)) + return PTR_ERR(xor_dev->dma_base); + + xor_dev->glob_base = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(xor_dev->glob_base)) + return PTR_ERR(xor_dev->glob_base); + + platform_set_drvdata(pdev, xor_dev); + + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + if (ret) + return ret; + + xor_dev->reg_clk = devm_clk_get_optional_enabled(&pdev->dev, "reg"); + if (IS_ERR(xor_dev->reg_clk)) + return PTR_ERR(xor_dev->reg_clk); + + xor_dev->clk = devm_clk_get_enabled(&pdev->dev, NULL); + if (IS_ERR(xor_dev->clk)) + return PTR_ERR(xor_dev->clk); + + ret = platform_msi_domain_alloc_irqs(&pdev->dev, 1, + mv_xor_v2_set_msi_msg); + if (ret) + return ret; + + xor_dev->irq = msi_get_virq(&pdev->dev, 0); + + ret = devm_request_irq(&pdev->dev, xor_dev->irq, + mv_xor_v2_interrupt_handler, 0, + dev_name(&pdev->dev), xor_dev); + if (ret) + goto free_msi_irqs; + + tasklet_setup(&xor_dev->irq_tasklet, mv_xor_v2_tasklet); + + xor_dev->desc_size = mv_xor_v2_set_desc_size(xor_dev); + + dma_cookie_init(&xor_dev->dmachan); + + /* + * allocate coherent memory for hardware descriptors + * note: writecombine gives slightly better performance, but + * requires that we explicitly flush the writes + */ + xor_dev->hw_desq_virt = + dma_alloc_coherent(&pdev->dev, + xor_dev->desc_size * MV_XOR_V2_DESC_NUM, + &xor_dev->hw_desq, GFP_KERNEL); + if (!xor_dev->hw_desq_virt) { + ret = -ENOMEM; + goto free_msi_irqs; + } + + /* alloc memory for the SW descriptors */ + xor_dev->sw_desq = devm_kcalloc(&pdev->dev, + MV_XOR_V2_DESC_NUM, sizeof(*sw_desc), + GFP_KERNEL); + if (!xor_dev->sw_desq) { + ret = -ENOMEM; + goto free_hw_desq; + } + + spin_lock_init(&xor_dev->lock); + + /* init the free SW descriptors list */ + INIT_LIST_HEAD(&xor_dev->free_sw_desc); + + /* add all SW descriptors to the free list */ + for (i = 0; i < MV_XOR_V2_DESC_NUM; i++) { + struct mv_xor_v2_sw_desc *sw_desc = + xor_dev->sw_desq + i; + sw_desc->idx = i; + dma_async_tx_descriptor_init(&sw_desc->async_tx, + &xor_dev->dmachan); + sw_desc->async_tx.tx_submit = mv_xor_v2_tx_submit; + async_tx_ack(&sw_desc->async_tx); + + list_add(&sw_desc->free_list, + &xor_dev->free_sw_desc); + } + + dma_dev = &xor_dev->dmadev; + + /* set DMA capabilities */ + dma_cap_zero(dma_dev->cap_mask); + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_cap_set(DMA_XOR, dma_dev->cap_mask); + dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask); + + /* init dma link list */ + INIT_LIST_HEAD(&dma_dev->channels); + + /* set base routines */ + dma_dev->device_tx_status = dma_cookie_status; + dma_dev->device_issue_pending = mv_xor_v2_issue_pending; + dma_dev->dev = &pdev->dev; + + dma_dev->device_prep_dma_memcpy = mv_xor_v2_prep_dma_memcpy; + dma_dev->device_prep_dma_interrupt = mv_xor_v2_prep_dma_interrupt; + dma_dev->max_xor = 8; + dma_dev->device_prep_dma_xor = mv_xor_v2_prep_dma_xor; + + xor_dev->dmachan.device = dma_dev; + + list_add_tail(&xor_dev->dmachan.device_node, + &dma_dev->channels); + + mv_xor_v2_enable_imsg_thrd(xor_dev); + + mv_xor_v2_descq_init(xor_dev); + + ret = dma_async_device_register(dma_dev); + if (ret) + goto free_hw_desq; + + dev_notice(&pdev->dev, "Marvell Version 2 XOR driver\n"); + + return 0; + +free_hw_desq: + dma_free_coherent(&pdev->dev, + xor_dev->desc_size * MV_XOR_V2_DESC_NUM, + xor_dev->hw_desq_virt, xor_dev->hw_desq); +free_msi_irqs: + platform_msi_domain_free_irqs(&pdev->dev); + return ret; +} + +static int mv_xor_v2_remove(struct platform_device *pdev) +{ + struct mv_xor_v2_device *xor_dev = platform_get_drvdata(pdev); + + dma_async_device_unregister(&xor_dev->dmadev); + + dma_free_coherent(&pdev->dev, + xor_dev->desc_size * MV_XOR_V2_DESC_NUM, + xor_dev->hw_desq_virt, xor_dev->hw_desq); + + devm_free_irq(&pdev->dev, xor_dev->irq, xor_dev); + + platform_msi_domain_free_irqs(&pdev->dev); + + tasklet_kill(&xor_dev->irq_tasklet); + + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id mv_xor_v2_dt_ids[] = { + { .compatible = "marvell,xor-v2", }, + {}, +}; +MODULE_DEVICE_TABLE(of, mv_xor_v2_dt_ids); +#endif + +static struct platform_driver mv_xor_v2_driver = { + .probe = mv_xor_v2_probe, + .suspend = mv_xor_v2_suspend, + .resume = mv_xor_v2_resume, + .remove = mv_xor_v2_remove, + .driver = { + .name = "mv_xor_v2", + .of_match_table = of_match_ptr(mv_xor_v2_dt_ids), + }, +}; + +module_platform_driver(mv_xor_v2_driver); + +MODULE_DESCRIPTION("DMA engine driver for Marvell's Version 2 of XOR engine"); diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c new file mode 100644 index 0000000000..cfb9962417 --- /dev/null +++ b/drivers/dma/mxs-dma.c @@ -0,0 +1,842 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Copyright 2011 Freescale Semiconductor, Inc. All Rights Reserved. +// +// Refer to drivers/dma/imx-sdma.c + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "dmaengine.h" + +/* + * NOTE: The term "PIO" throughout the mxs-dma implementation means + * PIO mode of mxs apbh-dma and apbx-dma. With this working mode, + * dma can program the controller registers of peripheral devices. + */ + +#define dma_is_apbh(mxs_dma) ((mxs_dma)->type == MXS_DMA_APBH) +#define apbh_is_old(mxs_dma) ((mxs_dma)->dev_id == IMX23_DMA) + +#define HW_APBHX_CTRL0 0x000 +#define BM_APBH_CTRL0_APB_BURST8_EN (1 << 29) +#define BM_APBH_CTRL0_APB_BURST_EN (1 << 28) +#define BP_APBH_CTRL0_RESET_CHANNEL 16 +#define HW_APBHX_CTRL1 0x010 +#define HW_APBHX_CTRL2 0x020 +#define HW_APBHX_CHANNEL_CTRL 0x030 +#define BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL 16 +/* + * The offset of NXTCMDAR register is different per both dma type and version, + * while stride for each channel is all the same 0x70. + */ +#define HW_APBHX_CHn_NXTCMDAR(d, n) \ + (((dma_is_apbh(d) && apbh_is_old(d)) ? 0x050 : 0x110) + (n) * 0x70) +#define HW_APBHX_CHn_SEMA(d, n) \ + (((dma_is_apbh(d) && apbh_is_old(d)) ? 0x080 : 0x140) + (n) * 0x70) +#define HW_APBHX_CHn_BAR(d, n) \ + (((dma_is_apbh(d) && apbh_is_old(d)) ? 0x070 : 0x130) + (n) * 0x70) +#define HW_APBX_CHn_DEBUG1(d, n) (0x150 + (n) * 0x70) + +/* + * ccw bits definitions + * + * COMMAND: 0..1 (2) + * CHAIN: 2 (1) + * IRQ: 3 (1) + * NAND_LOCK: 4 (1) - not implemented + * NAND_WAIT4READY: 5 (1) - not implemented + * DEC_SEM: 6 (1) + * WAIT4END: 7 (1) + * HALT_ON_TERMINATE: 8 (1) + * TERMINATE_FLUSH: 9 (1) + * RESERVED: 10..11 (2) + * PIO_NUM: 12..15 (4) + */ +#define BP_CCW_COMMAND 0 +#define BM_CCW_COMMAND (3 << 0) +#define CCW_CHAIN (1 << 2) +#define CCW_IRQ (1 << 3) +#define CCW_WAIT4RDY (1 << 5) +#define CCW_DEC_SEM (1 << 6) +#define CCW_WAIT4END (1 << 7) +#define CCW_HALT_ON_TERM (1 << 8) +#define CCW_TERM_FLUSH (1 << 9) +#define BP_CCW_PIO_NUM 12 +#define BM_CCW_PIO_NUM (0xf << 12) + +#define BF_CCW(value, field) (((value) << BP_CCW_##field) & BM_CCW_##field) + +#define MXS_DMA_CMD_NO_XFER 0 +#define MXS_DMA_CMD_WRITE 1 +#define MXS_DMA_CMD_READ 2 +#define MXS_DMA_CMD_DMA_SENSE 3 /* not implemented */ + +struct mxs_dma_ccw { + u32 next; + u16 bits; + u16 xfer_bytes; +#define MAX_XFER_BYTES 0xff00 + u32 bufaddr; +#define MXS_PIO_WORDS 16 + u32 pio_words[MXS_PIO_WORDS]; +}; + +#define CCW_BLOCK_SIZE (4 * PAGE_SIZE) +#define NUM_CCW (int)(CCW_BLOCK_SIZE / sizeof(struct mxs_dma_ccw)) + +struct mxs_dma_chan { + struct mxs_dma_engine *mxs_dma; + struct dma_chan chan; + struct dma_async_tx_descriptor desc; + struct tasklet_struct tasklet; + unsigned int chan_irq; + struct mxs_dma_ccw *ccw; + dma_addr_t ccw_phys; + int desc_count; + enum dma_status status; + unsigned int flags; + bool reset; +#define MXS_DMA_SG_LOOP (1 << 0) +#define MXS_DMA_USE_SEMAPHORE (1 << 1) +}; + +#define MXS_DMA_CHANNELS 16 +#define MXS_DMA_CHANNELS_MASK 0xffff + +enum mxs_dma_devtype { + MXS_DMA_APBH, + MXS_DMA_APBX, +}; + +enum mxs_dma_id { + IMX23_DMA, + IMX28_DMA, +}; + +struct mxs_dma_engine { + enum mxs_dma_id dev_id; + enum mxs_dma_devtype type; + void __iomem *base; + struct clk *clk; + struct dma_device dma_device; + struct mxs_dma_chan mxs_chans[MXS_DMA_CHANNELS]; + struct platform_device *pdev; + unsigned int nr_channels; +}; + +struct mxs_dma_type { + enum mxs_dma_id id; + enum mxs_dma_devtype type; +}; + +static struct mxs_dma_type mxs_dma_types[] = { + { + .id = IMX23_DMA, + .type = MXS_DMA_APBH, + }, { + .id = IMX23_DMA, + .type = MXS_DMA_APBX, + }, { + .id = IMX28_DMA, + .type = MXS_DMA_APBH, + }, { + .id = IMX28_DMA, + .type = MXS_DMA_APBX, + } +}; + +static const struct of_device_id mxs_dma_dt_ids[] = { + { .compatible = "fsl,imx23-dma-apbh", .data = &mxs_dma_types[0], }, + { .compatible = "fsl,imx23-dma-apbx", .data = &mxs_dma_types[1], }, + { .compatible = "fsl,imx28-dma-apbh", .data = &mxs_dma_types[2], }, + { .compatible = "fsl,imx28-dma-apbx", .data = &mxs_dma_types[3], }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, mxs_dma_dt_ids); + +static struct mxs_dma_chan *to_mxs_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct mxs_dma_chan, chan); +} + +static void mxs_dma_reset_chan(struct dma_chan *chan) +{ + struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); + struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; + int chan_id = mxs_chan->chan.chan_id; + + /* + * mxs dma channel resets can cause a channel stall. To recover from a + * channel stall, we have to reset the whole DMA engine. To avoid this, + * we use cyclic DMA with semaphores, that are enhanced in + * mxs_dma_int_handler. To reset the channel, we can simply stop writing + * into the semaphore counter. + */ + if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE && + mxs_chan->flags & MXS_DMA_SG_LOOP) { + mxs_chan->reset = true; + } else if (dma_is_apbh(mxs_dma) && apbh_is_old(mxs_dma)) { + writel(1 << (chan_id + BP_APBH_CTRL0_RESET_CHANNEL), + mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_SET); + } else { + unsigned long elapsed = 0; + const unsigned long max_wait = 50000; /* 50ms */ + void __iomem *reg_dbg1 = mxs_dma->base + + HW_APBX_CHn_DEBUG1(mxs_dma, chan_id); + + /* + * On i.MX28 APBX, the DMA channel can stop working if we reset + * the channel while it is in READ_FLUSH (0x08) state. + * We wait here until we leave the state. Then we trigger the + * reset. Waiting a maximum of 50ms, the kernel shouldn't crash + * because of this. + */ + while ((readl(reg_dbg1) & 0xf) == 0x8 && elapsed < max_wait) { + udelay(100); + elapsed += 100; + } + + if (elapsed >= max_wait) + dev_err(&mxs_chan->mxs_dma->pdev->dev, + "Failed waiting for the DMA channel %d to leave state READ_FLUSH, trying to reset channel in READ_FLUSH state now\n", + chan_id); + + writel(1 << (chan_id + BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL), + mxs_dma->base + HW_APBHX_CHANNEL_CTRL + STMP_OFFSET_REG_SET); + } + + mxs_chan->status = DMA_COMPLETE; +} + +static void mxs_dma_enable_chan(struct dma_chan *chan) +{ + struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); + struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; + int chan_id = mxs_chan->chan.chan_id; + + /* set cmd_addr up */ + writel(mxs_chan->ccw_phys, + mxs_dma->base + HW_APBHX_CHn_NXTCMDAR(mxs_dma, chan_id)); + + /* write 1 to SEMA to kick off the channel */ + if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE && + mxs_chan->flags & MXS_DMA_SG_LOOP) { + /* A cyclic DMA consists of at least 2 segments, so initialize + * the semaphore with 2 so we have enough time to add 1 to the + * semaphore if we need to */ + writel(2, mxs_dma->base + HW_APBHX_CHn_SEMA(mxs_dma, chan_id)); + } else { + writel(1, mxs_dma->base + HW_APBHX_CHn_SEMA(mxs_dma, chan_id)); + } + mxs_chan->reset = false; +} + +static void mxs_dma_disable_chan(struct dma_chan *chan) +{ + struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); + + mxs_chan->status = DMA_COMPLETE; +} + +static int mxs_dma_pause_chan(struct dma_chan *chan) +{ + struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); + struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; + int chan_id = mxs_chan->chan.chan_id; + + /* freeze the channel */ + if (dma_is_apbh(mxs_dma) && apbh_is_old(mxs_dma)) + writel(1 << chan_id, + mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_SET); + else + writel(1 << chan_id, + mxs_dma->base + HW_APBHX_CHANNEL_CTRL + STMP_OFFSET_REG_SET); + + mxs_chan->status = DMA_PAUSED; + return 0; +} + +static int mxs_dma_resume_chan(struct dma_chan *chan) +{ + struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); + struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; + int chan_id = mxs_chan->chan.chan_id; + + /* unfreeze the channel */ + if (dma_is_apbh(mxs_dma) && apbh_is_old(mxs_dma)) + writel(1 << chan_id, + mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_CLR); + else + writel(1 << chan_id, + mxs_dma->base + HW_APBHX_CHANNEL_CTRL + STMP_OFFSET_REG_CLR); + + mxs_chan->status = DMA_IN_PROGRESS; + return 0; +} + +static dma_cookie_t mxs_dma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + return dma_cookie_assign(tx); +} + +static void mxs_dma_tasklet(struct tasklet_struct *t) +{ + struct mxs_dma_chan *mxs_chan = from_tasklet(mxs_chan, t, tasklet); + + dmaengine_desc_get_callback_invoke(&mxs_chan->desc, NULL); +} + +static int mxs_dma_irq_to_chan(struct mxs_dma_engine *mxs_dma, int irq) +{ + int i; + + for (i = 0; i != mxs_dma->nr_channels; ++i) + if (mxs_dma->mxs_chans[i].chan_irq == irq) + return i; + + return -EINVAL; +} + +static irqreturn_t mxs_dma_int_handler(int irq, void *dev_id) +{ + struct mxs_dma_engine *mxs_dma = dev_id; + struct mxs_dma_chan *mxs_chan; + u32 completed; + u32 err; + int chan = mxs_dma_irq_to_chan(mxs_dma, irq); + + if (chan < 0) + return IRQ_NONE; + + /* completion status */ + completed = readl(mxs_dma->base + HW_APBHX_CTRL1); + completed = (completed >> chan) & 0x1; + + /* Clear interrupt */ + writel((1 << chan), + mxs_dma->base + HW_APBHX_CTRL1 + STMP_OFFSET_REG_CLR); + + /* error status */ + err = readl(mxs_dma->base + HW_APBHX_CTRL2); + err &= (1 << (MXS_DMA_CHANNELS + chan)) | (1 << chan); + + /* + * error status bit is in the upper 16 bits, error irq bit in the lower + * 16 bits. We transform it into a simpler error code: + * err: 0x00 = no error, 0x01 = TERMINATION, 0x02 = BUS_ERROR + */ + err = (err >> (MXS_DMA_CHANNELS + chan)) + (err >> chan); + + /* Clear error irq */ + writel((1 << chan), + mxs_dma->base + HW_APBHX_CTRL2 + STMP_OFFSET_REG_CLR); + + /* + * When both completion and error of termination bits set at the + * same time, we do not take it as an error. IOW, it only becomes + * an error we need to handle here in case of either it's a bus + * error or a termination error with no completion. 0x01 is termination + * error, so we can subtract err & completed to get the real error case. + */ + err -= err & completed; + + mxs_chan = &mxs_dma->mxs_chans[chan]; + + if (err) { + dev_dbg(mxs_dma->dma_device.dev, + "%s: error in channel %d\n", __func__, + chan); + mxs_chan->status = DMA_ERROR; + mxs_dma_reset_chan(&mxs_chan->chan); + } else if (mxs_chan->status != DMA_COMPLETE) { + if (mxs_chan->flags & MXS_DMA_SG_LOOP) { + mxs_chan->status = DMA_IN_PROGRESS; + if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE) + writel(1, mxs_dma->base + + HW_APBHX_CHn_SEMA(mxs_dma, chan)); + } else { + mxs_chan->status = DMA_COMPLETE; + } + } + + if (mxs_chan->status == DMA_COMPLETE) { + if (mxs_chan->reset) + return IRQ_HANDLED; + dma_cookie_complete(&mxs_chan->desc); + } + + /* schedule tasklet on this channel */ + tasklet_schedule(&mxs_chan->tasklet); + + return IRQ_HANDLED; +} + +static int mxs_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); + struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; + int ret; + + mxs_chan->ccw = dma_alloc_coherent(mxs_dma->dma_device.dev, + CCW_BLOCK_SIZE, + &mxs_chan->ccw_phys, GFP_KERNEL); + if (!mxs_chan->ccw) { + ret = -ENOMEM; + goto err_alloc; + } + + ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler, + 0, "mxs-dma", mxs_dma); + if (ret) + goto err_irq; + + ret = clk_prepare_enable(mxs_dma->clk); + if (ret) + goto err_clk; + + mxs_dma_reset_chan(chan); + + dma_async_tx_descriptor_init(&mxs_chan->desc, chan); + mxs_chan->desc.tx_submit = mxs_dma_tx_submit; + + /* the descriptor is ready */ + async_tx_ack(&mxs_chan->desc); + + return 0; + +err_clk: + free_irq(mxs_chan->chan_irq, mxs_dma); +err_irq: + dma_free_coherent(mxs_dma->dma_device.dev, CCW_BLOCK_SIZE, + mxs_chan->ccw, mxs_chan->ccw_phys); +err_alloc: + return ret; +} + +static void mxs_dma_free_chan_resources(struct dma_chan *chan) +{ + struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); + struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; + + mxs_dma_disable_chan(chan); + + free_irq(mxs_chan->chan_irq, mxs_dma); + + dma_free_coherent(mxs_dma->dma_device.dev, CCW_BLOCK_SIZE, + mxs_chan->ccw, mxs_chan->ccw_phys); + + clk_disable_unprepare(mxs_dma->clk); +} + +/* + * How to use the flags for ->device_prep_slave_sg() : + * [1] If there is only one DMA command in the DMA chain, the code should be: + * ...... + * ->device_prep_slave_sg(DMA_CTRL_ACK); + * ...... + * [2] If there are two DMA commands in the DMA chain, the code should be + * ...... + * ->device_prep_slave_sg(0); + * ...... + * ->device_prep_slave_sg(DMA_CTRL_ACK); + * ...... + * [3] If there are more than two DMA commands in the DMA chain, the code + * should be: + * ...... + * ->device_prep_slave_sg(0); // First + * ...... + * ->device_prep_slave_sg(DMA_CTRL_ACK]); + * ...... + * ->device_prep_slave_sg(DMA_CTRL_ACK); // Last + * ...... + */ +static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); + struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; + struct mxs_dma_ccw *ccw; + struct scatterlist *sg; + u32 i, j; + u32 *pio; + int idx = 0; + + if (mxs_chan->status == DMA_IN_PROGRESS) + idx = mxs_chan->desc_count; + + if (sg_len + idx > NUM_CCW) { + dev_err(mxs_dma->dma_device.dev, + "maximum number of sg exceeded: %d > %d\n", + sg_len, NUM_CCW); + goto err_out; + } + + mxs_chan->status = DMA_IN_PROGRESS; + mxs_chan->flags = 0; + + /* + * If the sg is prepared with append flag set, the sg + * will be appended to the last prepared sg. + */ + if (idx) { + BUG_ON(idx < 1); + ccw = &mxs_chan->ccw[idx - 1]; + ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * idx; + ccw->bits |= CCW_CHAIN; + ccw->bits &= ~CCW_IRQ; + ccw->bits &= ~CCW_DEC_SEM; + } else { + idx = 0; + } + + if (direction == DMA_TRANS_NONE) { + ccw = &mxs_chan->ccw[idx++]; + pio = (u32 *) sgl; + + for (j = 0; j < sg_len;) + ccw->pio_words[j++] = *pio++; + + ccw->bits = 0; + ccw->bits |= CCW_IRQ; + ccw->bits |= CCW_DEC_SEM; + if (flags & MXS_DMA_CTRL_WAIT4END) + ccw->bits |= CCW_WAIT4END; + ccw->bits |= CCW_HALT_ON_TERM; + ccw->bits |= CCW_TERM_FLUSH; + ccw->bits |= BF_CCW(sg_len, PIO_NUM); + ccw->bits |= BF_CCW(MXS_DMA_CMD_NO_XFER, COMMAND); + if (flags & MXS_DMA_CTRL_WAIT4RDY) + ccw->bits |= CCW_WAIT4RDY; + } else { + for_each_sg(sgl, sg, sg_len, i) { + if (sg_dma_len(sg) > MAX_XFER_BYTES) { + dev_err(mxs_dma->dma_device.dev, "maximum bytes for sg entry exceeded: %d > %d\n", + sg_dma_len(sg), MAX_XFER_BYTES); + goto err_out; + } + + ccw = &mxs_chan->ccw[idx++]; + + ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * idx; + ccw->bufaddr = sg->dma_address; + ccw->xfer_bytes = sg_dma_len(sg); + + ccw->bits = 0; + ccw->bits |= CCW_CHAIN; + ccw->bits |= CCW_HALT_ON_TERM; + ccw->bits |= CCW_TERM_FLUSH; + ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ? + MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ, + COMMAND); + + if (i + 1 == sg_len) { + ccw->bits &= ~CCW_CHAIN; + ccw->bits |= CCW_IRQ; + ccw->bits |= CCW_DEC_SEM; + if (flags & MXS_DMA_CTRL_WAIT4END) + ccw->bits |= CCW_WAIT4END; + } + } + } + mxs_chan->desc_count = idx; + + return &mxs_chan->desc; + +err_out: + mxs_chan->status = DMA_ERROR; + return NULL; +} + +static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); + struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; + u32 num_periods = buf_len / period_len; + u32 i = 0, buf = 0; + + if (mxs_chan->status == DMA_IN_PROGRESS) + return NULL; + + mxs_chan->status = DMA_IN_PROGRESS; + mxs_chan->flags |= MXS_DMA_SG_LOOP; + mxs_chan->flags |= MXS_DMA_USE_SEMAPHORE; + + if (num_periods > NUM_CCW) { + dev_err(mxs_dma->dma_device.dev, + "maximum number of sg exceeded: %d > %d\n", + num_periods, NUM_CCW); + goto err_out; + } + + if (period_len > MAX_XFER_BYTES) { + dev_err(mxs_dma->dma_device.dev, + "maximum period size exceeded: %zu > %d\n", + period_len, MAX_XFER_BYTES); + goto err_out; + } + + while (buf < buf_len) { + struct mxs_dma_ccw *ccw = &mxs_chan->ccw[i]; + + if (i + 1 == num_periods) + ccw->next = mxs_chan->ccw_phys; + else + ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * (i + 1); + + ccw->bufaddr = dma_addr; + ccw->xfer_bytes = period_len; + + ccw->bits = 0; + ccw->bits |= CCW_CHAIN; + ccw->bits |= CCW_IRQ; + ccw->bits |= CCW_HALT_ON_TERM; + ccw->bits |= CCW_TERM_FLUSH; + ccw->bits |= CCW_DEC_SEM; + ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ? + MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ, COMMAND); + + dma_addr += period_len; + buf += period_len; + + i++; + } + mxs_chan->desc_count = i; + + return &mxs_chan->desc; + +err_out: + mxs_chan->status = DMA_ERROR; + return NULL; +} + +static int mxs_dma_terminate_all(struct dma_chan *chan) +{ + mxs_dma_reset_chan(chan); + mxs_dma_disable_chan(chan); + + return 0; +} + +static enum dma_status mxs_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) +{ + struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); + struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; + u32 residue = 0; + + if (mxs_chan->status == DMA_IN_PROGRESS && + mxs_chan->flags & MXS_DMA_SG_LOOP) { + struct mxs_dma_ccw *last_ccw; + u32 bar; + + last_ccw = &mxs_chan->ccw[mxs_chan->desc_count - 1]; + residue = last_ccw->xfer_bytes + last_ccw->bufaddr; + + bar = readl(mxs_dma->base + + HW_APBHX_CHn_BAR(mxs_dma, chan->chan_id)); + residue -= bar; + } + + dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie, + residue); + + return mxs_chan->status; +} + +static int mxs_dma_init(struct mxs_dma_engine *mxs_dma) +{ + int ret; + + ret = clk_prepare_enable(mxs_dma->clk); + if (ret) + return ret; + + ret = stmp_reset_block(mxs_dma->base); + if (ret) + goto err_out; + + /* enable apbh burst */ + if (dma_is_apbh(mxs_dma)) { + writel(BM_APBH_CTRL0_APB_BURST_EN, + mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_SET); + writel(BM_APBH_CTRL0_APB_BURST8_EN, + mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_SET); + } + + /* enable irq for all the channels */ + writel(MXS_DMA_CHANNELS_MASK << MXS_DMA_CHANNELS, + mxs_dma->base + HW_APBHX_CTRL1 + STMP_OFFSET_REG_SET); + +err_out: + clk_disable_unprepare(mxs_dma->clk); + return ret; +} + +struct mxs_dma_filter_param { + unsigned int chan_id; +}; + +static bool mxs_dma_filter_fn(struct dma_chan *chan, void *fn_param) +{ + struct mxs_dma_filter_param *param = fn_param; + struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); + struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; + int chan_irq; + + if (chan->chan_id != param->chan_id) + return false; + + chan_irq = platform_get_irq(mxs_dma->pdev, param->chan_id); + if (chan_irq < 0) + return false; + + mxs_chan->chan_irq = chan_irq; + + return true; +} + +static struct dma_chan *mxs_dma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct mxs_dma_engine *mxs_dma = ofdma->of_dma_data; + dma_cap_mask_t mask = mxs_dma->dma_device.cap_mask; + struct mxs_dma_filter_param param; + + if (dma_spec->args_count != 1) + return NULL; + + param.chan_id = dma_spec->args[0]; + + if (param.chan_id >= mxs_dma->nr_channels) + return NULL; + + return __dma_request_channel(&mask, mxs_dma_filter_fn, ¶m, + ofdma->of_node); +} + +static int mxs_dma_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + const struct mxs_dma_type *dma_type; + struct mxs_dma_engine *mxs_dma; + int ret, i; + + mxs_dma = devm_kzalloc(&pdev->dev, sizeof(*mxs_dma), GFP_KERNEL); + if (!mxs_dma) + return -ENOMEM; + + ret = of_property_read_u32(np, "dma-channels", &mxs_dma->nr_channels); + if (ret) { + dev_err(&pdev->dev, "failed to read dma-channels\n"); + return ret; + } + + dma_type = (struct mxs_dma_type *)of_device_get_match_data(&pdev->dev); + mxs_dma->type = dma_type->type; + mxs_dma->dev_id = dma_type->id; + + mxs_dma->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(mxs_dma->base)) + return PTR_ERR(mxs_dma->base); + + mxs_dma->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(mxs_dma->clk)) + return PTR_ERR(mxs_dma->clk); + + dma_cap_set(DMA_SLAVE, mxs_dma->dma_device.cap_mask); + dma_cap_set(DMA_CYCLIC, mxs_dma->dma_device.cap_mask); + + INIT_LIST_HEAD(&mxs_dma->dma_device.channels); + + /* Initialize channel parameters */ + for (i = 0; i < MXS_DMA_CHANNELS; i++) { + struct mxs_dma_chan *mxs_chan = &mxs_dma->mxs_chans[i]; + + mxs_chan->mxs_dma = mxs_dma; + mxs_chan->chan.device = &mxs_dma->dma_device; + dma_cookie_init(&mxs_chan->chan); + + tasklet_setup(&mxs_chan->tasklet, mxs_dma_tasklet); + + + /* Add the channel to mxs_chan list */ + list_add_tail(&mxs_chan->chan.device_node, + &mxs_dma->dma_device.channels); + } + + ret = mxs_dma_init(mxs_dma); + if (ret) + return ret; + + mxs_dma->pdev = pdev; + mxs_dma->dma_device.dev = &pdev->dev; + + /* mxs_dma gets 65535 bytes maximum sg size */ + dma_set_max_seg_size(mxs_dma->dma_device.dev, MAX_XFER_BYTES); + + mxs_dma->dma_device.device_alloc_chan_resources = mxs_dma_alloc_chan_resources; + mxs_dma->dma_device.device_free_chan_resources = mxs_dma_free_chan_resources; + mxs_dma->dma_device.device_tx_status = mxs_dma_tx_status; + mxs_dma->dma_device.device_prep_slave_sg = mxs_dma_prep_slave_sg; + mxs_dma->dma_device.device_prep_dma_cyclic = mxs_dma_prep_dma_cyclic; + mxs_dma->dma_device.device_pause = mxs_dma_pause_chan; + mxs_dma->dma_device.device_resume = mxs_dma_resume_chan; + mxs_dma->dma_device.device_terminate_all = mxs_dma_terminate_all; + mxs_dma->dma_device.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + mxs_dma->dma_device.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + mxs_dma->dma_device.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + mxs_dma->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + mxs_dma->dma_device.device_issue_pending = mxs_dma_enable_chan; + + ret = dmaenginem_async_device_register(&mxs_dma->dma_device); + if (ret) { + dev_err(mxs_dma->dma_device.dev, "unable to register\n"); + return ret; + } + + ret = of_dma_controller_register(np, mxs_dma_xlate, mxs_dma); + if (ret) { + dev_err(mxs_dma->dma_device.dev, + "failed to register controller\n"); + } + + dev_info(mxs_dma->dma_device.dev, "initialized\n"); + + return 0; +} + +static struct platform_driver mxs_dma_driver = { + .driver = { + .name = "mxs-dma", + .of_match_table = mxs_dma_dt_ids, + }, + .probe = mxs_dma_probe, +}; + +builtin_platform_driver(mxs_dma_driver); diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c new file mode 100644 index 0000000000..0b2f96fd8b --- /dev/null +++ b/drivers/dma/nbpfaxi.c @@ -0,0 +1,1527 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2013-2014 Renesas Electronics Europe Ltd. + * Author: Guennadi Liakhovetski + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "dmaengine.h" + +#define NBPF_REG_CHAN_OFFSET 0 +#define NBPF_REG_CHAN_SIZE 0x40 + +/* Channel Current Transaction Byte register */ +#define NBPF_CHAN_CUR_TR_BYTE 0x20 + +/* Channel Status register */ +#define NBPF_CHAN_STAT 0x24 +#define NBPF_CHAN_STAT_EN 1 +#define NBPF_CHAN_STAT_TACT 4 +#define NBPF_CHAN_STAT_ERR 0x10 +#define NBPF_CHAN_STAT_END 0x20 +#define NBPF_CHAN_STAT_TC 0x40 +#define NBPF_CHAN_STAT_DER 0x400 + +/* Channel Control register */ +#define NBPF_CHAN_CTRL 0x28 +#define NBPF_CHAN_CTRL_SETEN 1 +#define NBPF_CHAN_CTRL_CLREN 2 +#define NBPF_CHAN_CTRL_STG 4 +#define NBPF_CHAN_CTRL_SWRST 8 +#define NBPF_CHAN_CTRL_CLRRQ 0x10 +#define NBPF_CHAN_CTRL_CLREND 0x20 +#define NBPF_CHAN_CTRL_CLRTC 0x40 +#define NBPF_CHAN_CTRL_SETSUS 0x100 +#define NBPF_CHAN_CTRL_CLRSUS 0x200 + +/* Channel Configuration register */ +#define NBPF_CHAN_CFG 0x2c +#define NBPF_CHAN_CFG_SEL 7 /* terminal SELect: 0..7 */ +#define NBPF_CHAN_CFG_REQD 8 /* REQuest Direction: DMAREQ is 0: input, 1: output */ +#define NBPF_CHAN_CFG_LOEN 0x10 /* LOw ENable: low DMA request line is: 0: inactive, 1: active */ +#define NBPF_CHAN_CFG_HIEN 0x20 /* HIgh ENable: high DMA request line is: 0: inactive, 1: active */ +#define NBPF_CHAN_CFG_LVL 0x40 /* LeVeL: DMA request line is sensed as 0: edge, 1: level */ +#define NBPF_CHAN_CFG_AM 0x700 /* ACK Mode: 0: Pulse mode, 1: Level mode, b'1x: Bus Cycle */ +#define NBPF_CHAN_CFG_SDS 0xf000 /* Source Data Size: 0: 8 bits,... , 7: 1024 bits */ +#define NBPF_CHAN_CFG_DDS 0xf0000 /* Destination Data Size: as above */ +#define NBPF_CHAN_CFG_SAD 0x100000 /* Source ADdress counting: 0: increment, 1: fixed */ +#define NBPF_CHAN_CFG_DAD 0x200000 /* Destination ADdress counting: 0: increment, 1: fixed */ +#define NBPF_CHAN_CFG_TM 0x400000 /* Transfer Mode: 0: single, 1: block TM */ +#define NBPF_CHAN_CFG_DEM 0x1000000 /* DMAEND interrupt Mask */ +#define NBPF_CHAN_CFG_TCM 0x2000000 /* DMATCO interrupt Mask */ +#define NBPF_CHAN_CFG_SBE 0x8000000 /* Sweep Buffer Enable */ +#define NBPF_CHAN_CFG_RSEL 0x10000000 /* RM: Register Set sELect */ +#define NBPF_CHAN_CFG_RSW 0x20000000 /* RM: Register Select sWitch */ +#define NBPF_CHAN_CFG_REN 0x40000000 /* RM: Register Set Enable */ +#define NBPF_CHAN_CFG_DMS 0x80000000 /* 0: register mode (RM), 1: link mode (LM) */ + +#define NBPF_CHAN_NXLA 0x38 +#define NBPF_CHAN_CRLA 0x3c + +/* Link Header field */ +#define NBPF_HEADER_LV 1 +#define NBPF_HEADER_LE 2 +#define NBPF_HEADER_WBD 4 +#define NBPF_HEADER_DIM 8 + +#define NBPF_CTRL 0x300 +#define NBPF_CTRL_PR 1 /* 0: fixed priority, 1: round robin */ +#define NBPF_CTRL_LVINT 2 /* DMAEND and DMAERR signalling: 0: pulse, 1: level */ + +#define NBPF_DSTAT_ER 0x314 +#define NBPF_DSTAT_END 0x318 + +#define NBPF_DMA_BUSWIDTHS \ + (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \ + BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)) + +struct nbpf_config { + int num_channels; + int buffer_size; +}; + +/* + * We've got 3 types of objects, used to describe DMA transfers: + * 1. high-level descriptor, containing a struct dma_async_tx_descriptor object + * in it, used to communicate with the user + * 2. hardware DMA link descriptors, that we pass to DMAC for DMA transfer + * queuing, these must be DMAable, using either the streaming DMA API or + * allocated from coherent memory - one per SG segment + * 3. one per SG segment descriptors, used to manage HW link descriptors from + * (2). They do not have to be DMAable. They can either be (a) allocated + * together with link descriptors as mixed (DMA / CPU) objects, or (b) + * separately. Even if allocated separately it would be best to link them + * to link descriptors once during channel resource allocation and always + * use them as a single object. + * Therefore for both cases (a) and (b) at run-time objects (2) and (3) shall be + * treated as a single SG segment descriptor. + */ + +struct nbpf_link_reg { + u32 header; + u32 src_addr; + u32 dst_addr; + u32 transaction_size; + u32 config; + u32 interval; + u32 extension; + u32 next; +} __packed; + +struct nbpf_device; +struct nbpf_channel; +struct nbpf_desc; + +struct nbpf_link_desc { + struct nbpf_link_reg *hwdesc; + dma_addr_t hwdesc_dma_addr; + struct nbpf_desc *desc; + struct list_head node; +}; + +/** + * struct nbpf_desc - DMA transfer descriptor + * @async_tx: dmaengine object + * @user_wait: waiting for a user ack + * @length: total transfer length + * @chan: associated DMAC channel + * @sg: list of hardware descriptors, represented by struct nbpf_link_desc + * @node: member in channel descriptor lists + */ +struct nbpf_desc { + struct dma_async_tx_descriptor async_tx; + bool user_wait; + size_t length; + struct nbpf_channel *chan; + struct list_head sg; + struct list_head node; +}; + +/* Take a wild guess: allocate 4 segments per descriptor */ +#define NBPF_SEGMENTS_PER_DESC 4 +#define NBPF_DESCS_PER_PAGE ((PAGE_SIZE - sizeof(struct list_head)) / \ + (sizeof(struct nbpf_desc) + \ + NBPF_SEGMENTS_PER_DESC * \ + (sizeof(struct nbpf_link_desc) + sizeof(struct nbpf_link_reg)))) +#define NBPF_SEGMENTS_PER_PAGE (NBPF_SEGMENTS_PER_DESC * NBPF_DESCS_PER_PAGE) + +struct nbpf_desc_page { + struct list_head node; + struct nbpf_desc desc[NBPF_DESCS_PER_PAGE]; + struct nbpf_link_desc ldesc[NBPF_SEGMENTS_PER_PAGE]; + struct nbpf_link_reg hwdesc[NBPF_SEGMENTS_PER_PAGE]; +}; + +/** + * struct nbpf_channel - one DMAC channel + * @dma_chan: standard dmaengine channel object + * @tasklet: channel specific tasklet used for callbacks + * @base: register address base + * @nbpf: DMAC + * @name: IRQ name + * @irq: IRQ number + * @slave_src_addr: source address for slave DMA + * @slave_src_width: source slave data size in bytes + * @slave_src_burst: maximum source slave burst size in bytes + * @slave_dst_addr: destination address for slave DMA + * @slave_dst_width: destination slave data size in bytes + * @slave_dst_burst: maximum destination slave burst size in bytes + * @terminal: DMA terminal, assigned to this channel + * @dmarq_cfg: DMA request line configuration - high / low, edge / level for NBPF_CHAN_CFG + * @flags: configuration flags from DT + * @lock: protect descriptor lists + * @free_links: list of free link descriptors + * @free: list of free descriptors + * @queued: list of queued descriptors + * @active: list of descriptors, scheduled for processing + * @done: list of completed descriptors, waiting post-processing + * @desc_page: list of additionally allocated descriptor pages - if any + * @running: linked descriptor of running transaction + * @paused: are translations on this channel paused? + */ +struct nbpf_channel { + struct dma_chan dma_chan; + struct tasklet_struct tasklet; + void __iomem *base; + struct nbpf_device *nbpf; + char name[16]; + int irq; + dma_addr_t slave_src_addr; + size_t slave_src_width; + size_t slave_src_burst; + dma_addr_t slave_dst_addr; + size_t slave_dst_width; + size_t slave_dst_burst; + unsigned int terminal; + u32 dmarq_cfg; + unsigned long flags; + spinlock_t lock; + struct list_head free_links; + struct list_head free; + struct list_head queued; + struct list_head active; + struct list_head done; + struct list_head desc_page; + struct nbpf_desc *running; + bool paused; +}; + +struct nbpf_device { + struct dma_device dma_dev; + void __iomem *base; + u32 max_burst_mem_read; + u32 max_burst_mem_write; + struct clk *clk; + const struct nbpf_config *config; + unsigned int eirq; + struct nbpf_channel chan[]; +}; + +enum nbpf_model { + NBPF1B4, + NBPF1B8, + NBPF1B16, + NBPF4B4, + NBPF4B8, + NBPF4B16, + NBPF8B4, + NBPF8B8, + NBPF8B16, +}; + +static struct nbpf_config nbpf_cfg[] = { + [NBPF1B4] = { + .num_channels = 1, + .buffer_size = 4, + }, + [NBPF1B8] = { + .num_channels = 1, + .buffer_size = 8, + }, + [NBPF1B16] = { + .num_channels = 1, + .buffer_size = 16, + }, + [NBPF4B4] = { + .num_channels = 4, + .buffer_size = 4, + }, + [NBPF4B8] = { + .num_channels = 4, + .buffer_size = 8, + }, + [NBPF4B16] = { + .num_channels = 4, + .buffer_size = 16, + }, + [NBPF8B4] = { + .num_channels = 8, + .buffer_size = 4, + }, + [NBPF8B8] = { + .num_channels = 8, + .buffer_size = 8, + }, + [NBPF8B16] = { + .num_channels = 8, + .buffer_size = 16, + }, +}; + +#define nbpf_to_chan(d) container_of(d, struct nbpf_channel, dma_chan) + +/* + * dmaengine drivers seem to have a lot in common and instead of sharing more + * code, they reimplement those common algorithms independently. In this driver + * we try to separate the hardware-specific part from the (largely) generic + * part. This improves code readability and makes it possible in the future to + * reuse the generic code in form of a helper library. That generic code should + * be suitable for various DMA controllers, using transfer descriptors in RAM + * and pushing one SG list at a time to the DMA controller. + */ + +/* Hardware-specific part */ + +static inline u32 nbpf_chan_read(struct nbpf_channel *chan, + unsigned int offset) +{ + u32 data = ioread32(chan->base + offset); + dev_dbg(chan->dma_chan.device->dev, "%s(0x%p + 0x%x) = 0x%x\n", + __func__, chan->base, offset, data); + return data; +} + +static inline void nbpf_chan_write(struct nbpf_channel *chan, + unsigned int offset, u32 data) +{ + iowrite32(data, chan->base + offset); + dev_dbg(chan->dma_chan.device->dev, "%s(0x%p + 0x%x) = 0x%x\n", + __func__, chan->base, offset, data); +} + +static inline u32 nbpf_read(struct nbpf_device *nbpf, + unsigned int offset) +{ + u32 data = ioread32(nbpf->base + offset); + dev_dbg(nbpf->dma_dev.dev, "%s(0x%p + 0x%x) = 0x%x\n", + __func__, nbpf->base, offset, data); + return data; +} + +static inline void nbpf_write(struct nbpf_device *nbpf, + unsigned int offset, u32 data) +{ + iowrite32(data, nbpf->base + offset); + dev_dbg(nbpf->dma_dev.dev, "%s(0x%p + 0x%x) = 0x%x\n", + __func__, nbpf->base, offset, data); +} + +static void nbpf_chan_halt(struct nbpf_channel *chan) +{ + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_CLREN); +} + +static bool nbpf_status_get(struct nbpf_channel *chan) +{ + u32 status = nbpf_read(chan->nbpf, NBPF_DSTAT_END); + + return status & BIT(chan - chan->nbpf->chan); +} + +static void nbpf_status_ack(struct nbpf_channel *chan) +{ + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_CLREND); +} + +static u32 nbpf_error_get(struct nbpf_device *nbpf) +{ + return nbpf_read(nbpf, NBPF_DSTAT_ER); +} + +static struct nbpf_channel *nbpf_error_get_channel(struct nbpf_device *nbpf, u32 error) +{ + return nbpf->chan + __ffs(error); +} + +static void nbpf_error_clear(struct nbpf_channel *chan) +{ + u32 status; + int i; + + /* Stop the channel, make sure DMA has been aborted */ + nbpf_chan_halt(chan); + + for (i = 1000; i; i--) { + status = nbpf_chan_read(chan, NBPF_CHAN_STAT); + if (!(status & NBPF_CHAN_STAT_TACT)) + break; + cpu_relax(); + } + + if (!i) + dev_err(chan->dma_chan.device->dev, + "%s(): abort timeout, channel status 0x%x\n", __func__, status); + + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_SWRST); +} + +static int nbpf_start(struct nbpf_desc *desc) +{ + struct nbpf_channel *chan = desc->chan; + struct nbpf_link_desc *ldesc = list_first_entry(&desc->sg, struct nbpf_link_desc, node); + + nbpf_chan_write(chan, NBPF_CHAN_NXLA, (u32)ldesc->hwdesc_dma_addr); + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_SETEN | NBPF_CHAN_CTRL_CLRSUS); + chan->paused = false; + + /* Software trigger MEMCPY - only MEMCPY uses the block mode */ + if (ldesc->hwdesc->config & NBPF_CHAN_CFG_TM) + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_STG); + + dev_dbg(chan->nbpf->dma_dev.dev, "%s(): next 0x%x, cur 0x%x\n", __func__, + nbpf_chan_read(chan, NBPF_CHAN_NXLA), nbpf_chan_read(chan, NBPF_CHAN_CRLA)); + + return 0; +} + +static void nbpf_chan_prepare(struct nbpf_channel *chan) +{ + chan->dmarq_cfg = (chan->flags & NBPF_SLAVE_RQ_HIGH ? NBPF_CHAN_CFG_HIEN : 0) | + (chan->flags & NBPF_SLAVE_RQ_LOW ? NBPF_CHAN_CFG_LOEN : 0) | + (chan->flags & NBPF_SLAVE_RQ_LEVEL ? + NBPF_CHAN_CFG_LVL | (NBPF_CHAN_CFG_AM & 0x200) : 0) | + chan->terminal; +} + +static void nbpf_chan_prepare_default(struct nbpf_channel *chan) +{ + /* Don't output DMAACK */ + chan->dmarq_cfg = NBPF_CHAN_CFG_AM & 0x400; + chan->terminal = 0; + chan->flags = 0; +} + +static void nbpf_chan_configure(struct nbpf_channel *chan) +{ + /* + * We assume, that only the link mode and DMA request line configuration + * have to be set in the configuration register manually. Dynamic + * per-transfer configuration will be loaded from transfer descriptors. + */ + nbpf_chan_write(chan, NBPF_CHAN_CFG, NBPF_CHAN_CFG_DMS | chan->dmarq_cfg); +} + +static u32 nbpf_xfer_ds(struct nbpf_device *nbpf, size_t size, + enum dma_transfer_direction direction) +{ + int max_burst = nbpf->config->buffer_size * 8; + + if (nbpf->max_burst_mem_read || nbpf->max_burst_mem_write) { + switch (direction) { + case DMA_MEM_TO_MEM: + max_burst = min_not_zero(nbpf->max_burst_mem_read, + nbpf->max_burst_mem_write); + break; + case DMA_MEM_TO_DEV: + if (nbpf->max_burst_mem_read) + max_burst = nbpf->max_burst_mem_read; + break; + case DMA_DEV_TO_MEM: + if (nbpf->max_burst_mem_write) + max_burst = nbpf->max_burst_mem_write; + break; + case DMA_DEV_TO_DEV: + default: + break; + } + } + + /* Maximum supported bursts depend on the buffer size */ + return min_t(int, __ffs(size), ilog2(max_burst)); +} + +static size_t nbpf_xfer_size(struct nbpf_device *nbpf, + enum dma_slave_buswidth width, u32 burst) +{ + size_t size; + + if (!burst) + burst = 1; + + switch (width) { + case DMA_SLAVE_BUSWIDTH_8_BYTES: + size = 8 * burst; + break; + + case DMA_SLAVE_BUSWIDTH_4_BYTES: + size = 4 * burst; + break; + + case DMA_SLAVE_BUSWIDTH_2_BYTES: + size = 2 * burst; + break; + + default: + pr_warn("%s(): invalid bus width %u\n", __func__, width); + fallthrough; + case DMA_SLAVE_BUSWIDTH_1_BYTE: + size = burst; + } + + return nbpf_xfer_ds(nbpf, size, DMA_TRANS_NONE); +} + +/* + * We need a way to recognise slaves, whose data is sent "raw" over the bus, + * i.e. it isn't known in advance how many bytes will be received. Therefore + * the slave driver has to provide a "large enough" buffer and either read the + * buffer, when it is full, or detect, that some data has arrived, then wait for + * a timeout, if no more data arrives - receive what's already there. We want to + * handle such slaves in a special way to allow an optimised mode for other + * users, for whom the amount of data is known in advance. So far there's no way + * to recognise such slaves. We use a data-width check to distinguish between + * the SD host and the PL011 UART. + */ + +static int nbpf_prep_one(struct nbpf_link_desc *ldesc, + enum dma_transfer_direction direction, + dma_addr_t src, dma_addr_t dst, size_t size, bool last) +{ + struct nbpf_link_reg *hwdesc = ldesc->hwdesc; + struct nbpf_desc *desc = ldesc->desc; + struct nbpf_channel *chan = desc->chan; + struct device *dev = chan->dma_chan.device->dev; + size_t mem_xfer, slave_xfer; + bool can_burst; + + hwdesc->header = NBPF_HEADER_WBD | NBPF_HEADER_LV | + (last ? NBPF_HEADER_LE : 0); + + hwdesc->src_addr = src; + hwdesc->dst_addr = dst; + hwdesc->transaction_size = size; + + /* + * set config: SAD, DAD, DDS, SDS, etc. + * Note on transfer sizes: the DMAC can perform unaligned DMA transfers, + * but it is important to have transaction size a multiple of both + * receiver and transmitter transfer sizes. It is also possible to use + * different RAM and device transfer sizes, and it does work well with + * some devices, e.g. with V08R07S01E SD host controllers, which can use + * 128 byte transfers. But this doesn't work with other devices, + * especially when the transaction size is unknown. This is the case, + * e.g. with serial drivers like amba-pl011.c. For reception it sets up + * the transaction size of 4K and if fewer bytes are received, it + * pauses DMA and reads out data received via DMA as well as those left + * in the Rx FIFO. For this to work with the RAM side using burst + * transfers we enable the SBE bit and terminate the transfer in our + * .device_pause handler. + */ + mem_xfer = nbpf_xfer_ds(chan->nbpf, size, direction); + + switch (direction) { + case DMA_DEV_TO_MEM: + can_burst = chan->slave_src_width >= 3; + slave_xfer = min(mem_xfer, can_burst ? + chan->slave_src_burst : chan->slave_src_width); + /* + * Is the slave narrower than 64 bits, i.e. isn't using the full + * bus width and cannot use bursts? + */ + if (mem_xfer > chan->slave_src_burst && !can_burst) + mem_xfer = chan->slave_src_burst; + /* Device-to-RAM DMA is unreliable without REQD set */ + hwdesc->config = NBPF_CHAN_CFG_SAD | (NBPF_CHAN_CFG_DDS & (mem_xfer << 16)) | + (NBPF_CHAN_CFG_SDS & (slave_xfer << 12)) | NBPF_CHAN_CFG_REQD | + NBPF_CHAN_CFG_SBE; + break; + + case DMA_MEM_TO_DEV: + slave_xfer = min(mem_xfer, chan->slave_dst_width >= 3 ? + chan->slave_dst_burst : chan->slave_dst_width); + hwdesc->config = NBPF_CHAN_CFG_DAD | (NBPF_CHAN_CFG_SDS & (mem_xfer << 12)) | + (NBPF_CHAN_CFG_DDS & (slave_xfer << 16)) | NBPF_CHAN_CFG_REQD; + break; + + case DMA_MEM_TO_MEM: + hwdesc->config = NBPF_CHAN_CFG_TCM | NBPF_CHAN_CFG_TM | + (NBPF_CHAN_CFG_SDS & (mem_xfer << 12)) | + (NBPF_CHAN_CFG_DDS & (mem_xfer << 16)); + break; + + default: + return -EINVAL; + } + + hwdesc->config |= chan->dmarq_cfg | (last ? 0 : NBPF_CHAN_CFG_DEM) | + NBPF_CHAN_CFG_DMS; + + dev_dbg(dev, "%s(): desc @ %pad: hdr 0x%x, cfg 0x%x, %zu @ %pad -> %pad\n", + __func__, &ldesc->hwdesc_dma_addr, hwdesc->header, + hwdesc->config, size, &src, &dst); + + dma_sync_single_for_device(dev, ldesc->hwdesc_dma_addr, sizeof(*hwdesc), + DMA_TO_DEVICE); + + return 0; +} + +static size_t nbpf_bytes_left(struct nbpf_channel *chan) +{ + return nbpf_chan_read(chan, NBPF_CHAN_CUR_TR_BYTE); +} + +static void nbpf_configure(struct nbpf_device *nbpf) +{ + nbpf_write(nbpf, NBPF_CTRL, NBPF_CTRL_LVINT); +} + +/* Generic part */ + +/* DMA ENGINE functions */ +static void nbpf_issue_pending(struct dma_chan *dchan) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + unsigned long flags; + + dev_dbg(dchan->device->dev, "Entry %s()\n", __func__); + + spin_lock_irqsave(&chan->lock, flags); + if (list_empty(&chan->queued)) + goto unlock; + + list_splice_tail_init(&chan->queued, &chan->active); + + if (!chan->running) { + struct nbpf_desc *desc = list_first_entry(&chan->active, + struct nbpf_desc, node); + if (!nbpf_start(desc)) + chan->running = desc; + } + +unlock: + spin_unlock_irqrestore(&chan->lock, flags); +} + +static enum dma_status nbpf_tx_status(struct dma_chan *dchan, + dma_cookie_t cookie, struct dma_tx_state *state) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + enum dma_status status = dma_cookie_status(dchan, cookie, state); + + if (state) { + dma_cookie_t running; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + running = chan->running ? chan->running->async_tx.cookie : -EINVAL; + + if (cookie == running) { + state->residue = nbpf_bytes_left(chan); + dev_dbg(dchan->device->dev, "%s(): residue %u\n", __func__, + state->residue); + } else if (status == DMA_IN_PROGRESS) { + struct nbpf_desc *desc; + bool found = false; + + list_for_each_entry(desc, &chan->active, node) + if (desc->async_tx.cookie == cookie) { + found = true; + break; + } + + if (!found) + list_for_each_entry(desc, &chan->queued, node) + if (desc->async_tx.cookie == cookie) { + found = true; + break; + + } + + state->residue = found ? desc->length : 0; + } + + spin_unlock_irqrestore(&chan->lock, flags); + } + + if (chan->paused) + status = DMA_PAUSED; + + return status; +} + +static dma_cookie_t nbpf_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct nbpf_desc *desc = container_of(tx, struct nbpf_desc, async_tx); + struct nbpf_channel *chan = desc->chan; + unsigned long flags; + dma_cookie_t cookie; + + spin_lock_irqsave(&chan->lock, flags); + cookie = dma_cookie_assign(tx); + list_add_tail(&desc->node, &chan->queued); + spin_unlock_irqrestore(&chan->lock, flags); + + dev_dbg(chan->dma_chan.device->dev, "Entry %s(%d)\n", __func__, cookie); + + return cookie; +} + +static int nbpf_desc_page_alloc(struct nbpf_channel *chan) +{ + struct dma_chan *dchan = &chan->dma_chan; + struct nbpf_desc_page *dpage = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); + struct nbpf_link_desc *ldesc; + struct nbpf_link_reg *hwdesc; + struct nbpf_desc *desc; + LIST_HEAD(head); + LIST_HEAD(lhead); + int i; + struct device *dev = dchan->device->dev; + + if (!dpage) + return -ENOMEM; + + dev_dbg(dev, "%s(): alloc %lu descriptors, %lu segments, total alloc %zu\n", + __func__, NBPF_DESCS_PER_PAGE, NBPF_SEGMENTS_PER_PAGE, sizeof(*dpage)); + + for (i = 0, ldesc = dpage->ldesc, hwdesc = dpage->hwdesc; + i < ARRAY_SIZE(dpage->ldesc); + i++, ldesc++, hwdesc++) { + ldesc->hwdesc = hwdesc; + list_add_tail(&ldesc->node, &lhead); + ldesc->hwdesc_dma_addr = dma_map_single(dchan->device->dev, + hwdesc, sizeof(*hwdesc), DMA_TO_DEVICE); + + dev_dbg(dev, "%s(): mapped 0x%p to %pad\n", __func__, + hwdesc, &ldesc->hwdesc_dma_addr); + } + + for (i = 0, desc = dpage->desc; + i < ARRAY_SIZE(dpage->desc); + i++, desc++) { + dma_async_tx_descriptor_init(&desc->async_tx, dchan); + desc->async_tx.tx_submit = nbpf_tx_submit; + desc->chan = chan; + INIT_LIST_HEAD(&desc->sg); + list_add_tail(&desc->node, &head); + } + + /* + * This function cannot be called from interrupt context, so, no need to + * save flags + */ + spin_lock_irq(&chan->lock); + list_splice_tail(&lhead, &chan->free_links); + list_splice_tail(&head, &chan->free); + list_add(&dpage->node, &chan->desc_page); + spin_unlock_irq(&chan->lock); + + return ARRAY_SIZE(dpage->desc); +} + +static void nbpf_desc_put(struct nbpf_desc *desc) +{ + struct nbpf_channel *chan = desc->chan; + struct nbpf_link_desc *ldesc, *tmp; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + list_for_each_entry_safe(ldesc, tmp, &desc->sg, node) + list_move(&ldesc->node, &chan->free_links); + + list_add(&desc->node, &chan->free); + spin_unlock_irqrestore(&chan->lock, flags); +} + +static void nbpf_scan_acked(struct nbpf_channel *chan) +{ + struct nbpf_desc *desc, *tmp; + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&chan->lock, flags); + list_for_each_entry_safe(desc, tmp, &chan->done, node) + if (async_tx_test_ack(&desc->async_tx) && desc->user_wait) { + list_move(&desc->node, &head); + desc->user_wait = false; + } + spin_unlock_irqrestore(&chan->lock, flags); + + list_for_each_entry_safe(desc, tmp, &head, node) { + list_del(&desc->node); + nbpf_desc_put(desc); + } +} + +/* + * We have to allocate descriptors with the channel lock dropped. This means, + * before we re-acquire the lock buffers can be taken already, so we have to + * re-check after re-acquiring the lock and possibly retry, if buffers are gone + * again. + */ +static struct nbpf_desc *nbpf_desc_get(struct nbpf_channel *chan, size_t len) +{ + struct nbpf_desc *desc = NULL; + struct nbpf_link_desc *ldesc, *prev = NULL; + + nbpf_scan_acked(chan); + + spin_lock_irq(&chan->lock); + + do { + int i = 0, ret; + + if (list_empty(&chan->free)) { + /* No more free descriptors */ + spin_unlock_irq(&chan->lock); + ret = nbpf_desc_page_alloc(chan); + if (ret < 0) + return NULL; + spin_lock_irq(&chan->lock); + continue; + } + desc = list_first_entry(&chan->free, struct nbpf_desc, node); + list_del(&desc->node); + + do { + if (list_empty(&chan->free_links)) { + /* No more free link descriptors */ + spin_unlock_irq(&chan->lock); + ret = nbpf_desc_page_alloc(chan); + if (ret < 0) { + nbpf_desc_put(desc); + return NULL; + } + spin_lock_irq(&chan->lock); + continue; + } + + ldesc = list_first_entry(&chan->free_links, + struct nbpf_link_desc, node); + ldesc->desc = desc; + if (prev) + prev->hwdesc->next = (u32)ldesc->hwdesc_dma_addr; + + prev = ldesc; + list_move_tail(&ldesc->node, &desc->sg); + + i++; + } while (i < len); + } while (!desc); + + prev->hwdesc->next = 0; + + spin_unlock_irq(&chan->lock); + + return desc; +} + +static void nbpf_chan_idle(struct nbpf_channel *chan) +{ + struct nbpf_desc *desc, *tmp; + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&chan->lock, flags); + + list_splice_init(&chan->done, &head); + list_splice_init(&chan->active, &head); + list_splice_init(&chan->queued, &head); + + chan->running = NULL; + + spin_unlock_irqrestore(&chan->lock, flags); + + list_for_each_entry_safe(desc, tmp, &head, node) { + dev_dbg(chan->nbpf->dma_dev.dev, "%s(): force-free desc %p cookie %d\n", + __func__, desc, desc->async_tx.cookie); + list_del(&desc->node); + nbpf_desc_put(desc); + } +} + +static int nbpf_pause(struct dma_chan *dchan) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + + dev_dbg(dchan->device->dev, "Entry %s\n", __func__); + + chan->paused = true; + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_SETSUS); + /* See comment in nbpf_prep_one() */ + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_CLREN); + + return 0; +} + +static int nbpf_terminate_all(struct dma_chan *dchan) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + + dev_dbg(dchan->device->dev, "Entry %s\n", __func__); + dev_dbg(dchan->device->dev, "Terminating\n"); + + nbpf_chan_halt(chan); + nbpf_chan_idle(chan); + + return 0; +} + +static int nbpf_config(struct dma_chan *dchan, + struct dma_slave_config *config) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + + dev_dbg(dchan->device->dev, "Entry %s\n", __func__); + + /* + * We could check config->slave_id to match chan->terminal here, + * but with DT they would be coming from the same source, so + * such a check would be superflous + */ + + chan->slave_dst_addr = config->dst_addr; + chan->slave_dst_width = nbpf_xfer_size(chan->nbpf, + config->dst_addr_width, 1); + chan->slave_dst_burst = nbpf_xfer_size(chan->nbpf, + config->dst_addr_width, + config->dst_maxburst); + chan->slave_src_addr = config->src_addr; + chan->slave_src_width = nbpf_xfer_size(chan->nbpf, + config->src_addr_width, 1); + chan->slave_src_burst = nbpf_xfer_size(chan->nbpf, + config->src_addr_width, + config->src_maxburst); + + return 0; +} + +static struct dma_async_tx_descriptor *nbpf_prep_sg(struct nbpf_channel *chan, + struct scatterlist *src_sg, struct scatterlist *dst_sg, + size_t len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct nbpf_link_desc *ldesc; + struct scatterlist *mem_sg; + struct nbpf_desc *desc; + bool inc_src, inc_dst; + size_t data_len = 0; + int i = 0; + + switch (direction) { + case DMA_DEV_TO_MEM: + mem_sg = dst_sg; + inc_src = false; + inc_dst = true; + break; + + case DMA_MEM_TO_DEV: + mem_sg = src_sg; + inc_src = true; + inc_dst = false; + break; + + default: + case DMA_MEM_TO_MEM: + mem_sg = src_sg; + inc_src = true; + inc_dst = true; + } + + desc = nbpf_desc_get(chan, len); + if (!desc) + return NULL; + + desc->async_tx.flags = flags; + desc->async_tx.cookie = -EBUSY; + desc->user_wait = false; + + /* + * This is a private descriptor list, and we own the descriptor. No need + * to lock. + */ + list_for_each_entry(ldesc, &desc->sg, node) { + int ret = nbpf_prep_one(ldesc, direction, + sg_dma_address(src_sg), + sg_dma_address(dst_sg), + sg_dma_len(mem_sg), + i == len - 1); + if (ret < 0) { + nbpf_desc_put(desc); + return NULL; + } + data_len += sg_dma_len(mem_sg); + if (inc_src) + src_sg = sg_next(src_sg); + if (inc_dst) + dst_sg = sg_next(dst_sg); + mem_sg = direction == DMA_DEV_TO_MEM ? dst_sg : src_sg; + i++; + } + + desc->length = data_len; + + /* The user has to return the descriptor to us ASAP via .tx_submit() */ + return &desc->async_tx; +} + +static struct dma_async_tx_descriptor *nbpf_prep_memcpy( + struct dma_chan *dchan, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + struct scatterlist dst_sg; + struct scatterlist src_sg; + + sg_init_table(&dst_sg, 1); + sg_init_table(&src_sg, 1); + + sg_dma_address(&dst_sg) = dst; + sg_dma_address(&src_sg) = src; + + sg_dma_len(&dst_sg) = len; + sg_dma_len(&src_sg) = len; + + dev_dbg(dchan->device->dev, "%s(): %zu @ %pad -> %pad\n", + __func__, len, &src, &dst); + + return nbpf_prep_sg(chan, &src_sg, &dst_sg, 1, + DMA_MEM_TO_MEM, flags); +} + +static struct dma_async_tx_descriptor *nbpf_prep_slave_sg( + struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, unsigned long flags, void *context) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + struct scatterlist slave_sg; + + dev_dbg(dchan->device->dev, "Entry %s()\n", __func__); + + sg_init_table(&slave_sg, 1); + + switch (direction) { + case DMA_MEM_TO_DEV: + sg_dma_address(&slave_sg) = chan->slave_dst_addr; + return nbpf_prep_sg(chan, sgl, &slave_sg, sg_len, + direction, flags); + + case DMA_DEV_TO_MEM: + sg_dma_address(&slave_sg) = chan->slave_src_addr; + return nbpf_prep_sg(chan, &slave_sg, sgl, sg_len, + direction, flags); + + default: + return NULL; + } +} + +static int nbpf_alloc_chan_resources(struct dma_chan *dchan) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + int ret; + + INIT_LIST_HEAD(&chan->free); + INIT_LIST_HEAD(&chan->free_links); + INIT_LIST_HEAD(&chan->queued); + INIT_LIST_HEAD(&chan->active); + INIT_LIST_HEAD(&chan->done); + + ret = nbpf_desc_page_alloc(chan); + if (ret < 0) + return ret; + + dev_dbg(dchan->device->dev, "Entry %s(): terminal %u\n", __func__, + chan->terminal); + + nbpf_chan_configure(chan); + + return ret; +} + +static void nbpf_free_chan_resources(struct dma_chan *dchan) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + struct nbpf_desc_page *dpage, *tmp; + + dev_dbg(dchan->device->dev, "Entry %s()\n", __func__); + + nbpf_chan_halt(chan); + nbpf_chan_idle(chan); + /* Clean up for if a channel is re-used for MEMCPY after slave DMA */ + nbpf_chan_prepare_default(chan); + + list_for_each_entry_safe(dpage, tmp, &chan->desc_page, node) { + struct nbpf_link_desc *ldesc; + int i; + list_del(&dpage->node); + for (i = 0, ldesc = dpage->ldesc; + i < ARRAY_SIZE(dpage->ldesc); + i++, ldesc++) + dma_unmap_single(dchan->device->dev, ldesc->hwdesc_dma_addr, + sizeof(*ldesc->hwdesc), DMA_TO_DEVICE); + free_page((unsigned long)dpage); + } +} + +static struct dma_chan *nbpf_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct nbpf_device *nbpf = ofdma->of_dma_data; + struct dma_chan *dchan; + struct nbpf_channel *chan; + + if (dma_spec->args_count != 2) + return NULL; + + dchan = dma_get_any_slave_channel(&nbpf->dma_dev); + if (!dchan) + return NULL; + + dev_dbg(dchan->device->dev, "Entry %s(%pOFn)\n", __func__, + dma_spec->np); + + chan = nbpf_to_chan(dchan); + + chan->terminal = dma_spec->args[0]; + chan->flags = dma_spec->args[1]; + + nbpf_chan_prepare(chan); + nbpf_chan_configure(chan); + + return dchan; +} + +static void nbpf_chan_tasklet(struct tasklet_struct *t) +{ + struct nbpf_channel *chan = from_tasklet(chan, t, tasklet); + struct nbpf_desc *desc, *tmp; + struct dmaengine_desc_callback cb; + + while (!list_empty(&chan->done)) { + bool found = false, must_put, recycling = false; + + spin_lock_irq(&chan->lock); + + list_for_each_entry_safe(desc, tmp, &chan->done, node) { + if (!desc->user_wait) { + /* Newly completed descriptor, have to process */ + found = true; + break; + } else if (async_tx_test_ack(&desc->async_tx)) { + /* + * This descriptor was waiting for a user ACK, + * it can be recycled now. + */ + list_del(&desc->node); + spin_unlock_irq(&chan->lock); + nbpf_desc_put(desc); + recycling = true; + break; + } + } + + if (recycling) + continue; + + if (!found) { + /* This can happen if TERMINATE_ALL has been called */ + spin_unlock_irq(&chan->lock); + break; + } + + dma_cookie_complete(&desc->async_tx); + + /* + * With released lock we cannot dereference desc, maybe it's + * still on the "done" list + */ + if (async_tx_test_ack(&desc->async_tx)) { + list_del(&desc->node); + must_put = true; + } else { + desc->user_wait = true; + must_put = false; + } + + dmaengine_desc_get_callback(&desc->async_tx, &cb); + + /* ack and callback completed descriptor */ + spin_unlock_irq(&chan->lock); + + dmaengine_desc_callback_invoke(&cb, NULL); + + if (must_put) + nbpf_desc_put(desc); + } +} + +static irqreturn_t nbpf_chan_irq(int irq, void *dev) +{ + struct nbpf_channel *chan = dev; + bool done = nbpf_status_get(chan); + struct nbpf_desc *desc; + irqreturn_t ret; + bool bh = false; + + if (!done) + return IRQ_NONE; + + nbpf_status_ack(chan); + + dev_dbg(&chan->dma_chan.dev->device, "%s()\n", __func__); + + spin_lock(&chan->lock); + desc = chan->running; + if (WARN_ON(!desc)) { + ret = IRQ_NONE; + goto unlock; + } else { + ret = IRQ_HANDLED; + bh = true; + } + + list_move_tail(&desc->node, &chan->done); + chan->running = NULL; + + if (!list_empty(&chan->active)) { + desc = list_first_entry(&chan->active, + struct nbpf_desc, node); + if (!nbpf_start(desc)) + chan->running = desc; + } + +unlock: + spin_unlock(&chan->lock); + + if (bh) + tasklet_schedule(&chan->tasklet); + + return ret; +} + +static irqreturn_t nbpf_err_irq(int irq, void *dev) +{ + struct nbpf_device *nbpf = dev; + u32 error = nbpf_error_get(nbpf); + + dev_warn(nbpf->dma_dev.dev, "DMA error IRQ %u\n", irq); + + if (!error) + return IRQ_NONE; + + do { + struct nbpf_channel *chan = nbpf_error_get_channel(nbpf, error); + /* On error: abort all queued transfers, no callback */ + nbpf_error_clear(chan); + nbpf_chan_idle(chan); + error = nbpf_error_get(nbpf); + } while (error); + + return IRQ_HANDLED; +} + +static int nbpf_chan_probe(struct nbpf_device *nbpf, int n) +{ + struct dma_device *dma_dev = &nbpf->dma_dev; + struct nbpf_channel *chan = nbpf->chan + n; + int ret; + + chan->nbpf = nbpf; + chan->base = nbpf->base + NBPF_REG_CHAN_OFFSET + NBPF_REG_CHAN_SIZE * n; + INIT_LIST_HEAD(&chan->desc_page); + spin_lock_init(&chan->lock); + chan->dma_chan.device = dma_dev; + dma_cookie_init(&chan->dma_chan); + nbpf_chan_prepare_default(chan); + + dev_dbg(dma_dev->dev, "%s(): channel %d: -> %p\n", __func__, n, chan->base); + + snprintf(chan->name, sizeof(chan->name), "nbpf %d", n); + + tasklet_setup(&chan->tasklet, nbpf_chan_tasklet); + ret = devm_request_irq(dma_dev->dev, chan->irq, + nbpf_chan_irq, IRQF_SHARED, + chan->name, chan); + if (ret < 0) + return ret; + + /* Add the channel to DMA device channel list */ + list_add_tail(&chan->dma_chan.device_node, + &dma_dev->channels); + + return 0; +} + +static const struct of_device_id nbpf_match[] = { + {.compatible = "renesas,nbpfaxi64dmac1b4", .data = &nbpf_cfg[NBPF1B4]}, + {.compatible = "renesas,nbpfaxi64dmac1b8", .data = &nbpf_cfg[NBPF1B8]}, + {.compatible = "renesas,nbpfaxi64dmac1b16", .data = &nbpf_cfg[NBPF1B16]}, + {.compatible = "renesas,nbpfaxi64dmac4b4", .data = &nbpf_cfg[NBPF4B4]}, + {.compatible = "renesas,nbpfaxi64dmac4b8", .data = &nbpf_cfg[NBPF4B8]}, + {.compatible = "renesas,nbpfaxi64dmac4b16", .data = &nbpf_cfg[NBPF4B16]}, + {.compatible = "renesas,nbpfaxi64dmac8b4", .data = &nbpf_cfg[NBPF8B4]}, + {.compatible = "renesas,nbpfaxi64dmac8b8", .data = &nbpf_cfg[NBPF8B8]}, + {.compatible = "renesas,nbpfaxi64dmac8b16", .data = &nbpf_cfg[NBPF8B16]}, + {} +}; +MODULE_DEVICE_TABLE(of, nbpf_match); + +static int nbpf_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct nbpf_device *nbpf; + struct dma_device *dma_dev; + const struct nbpf_config *cfg; + int num_channels; + int ret, irq, eirq, i; + int irqbuf[9] /* maximum 8 channels + error IRQ */; + unsigned int irqs = 0; + + BUILD_BUG_ON(sizeof(struct nbpf_desc_page) > PAGE_SIZE); + + /* DT only */ + if (!np) + return -ENODEV; + + cfg = of_device_get_match_data(dev); + num_channels = cfg->num_channels; + + nbpf = devm_kzalloc(dev, struct_size(nbpf, chan, num_channels), + GFP_KERNEL); + if (!nbpf) + return -ENOMEM; + + dma_dev = &nbpf->dma_dev; + dma_dev->dev = dev; + + nbpf->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(nbpf->base)) + return PTR_ERR(nbpf->base); + + nbpf->clk = devm_clk_get(dev, NULL); + if (IS_ERR(nbpf->clk)) + return PTR_ERR(nbpf->clk); + + of_property_read_u32(np, "max-burst-mem-read", + &nbpf->max_burst_mem_read); + of_property_read_u32(np, "max-burst-mem-write", + &nbpf->max_burst_mem_write); + + nbpf->config = cfg; + + for (i = 0; irqs < ARRAY_SIZE(irqbuf); i++) { + irq = platform_get_irq_optional(pdev, i); + if (irq < 0 && irq != -ENXIO) + return irq; + if (irq > 0) + irqbuf[irqs++] = irq; + } + + /* + * 3 IRQ resource schemes are supported: + * 1. 1 shared IRQ for error and all channels + * 2. 2 IRQs: one for error and one shared for all channels + * 3. 1 IRQ for error and an own IRQ for each channel + */ + if (irqs != 1 && irqs != 2 && irqs != num_channels + 1) + return -ENXIO; + + if (irqs == 1) { + eirq = irqbuf[0]; + + for (i = 0; i <= num_channels; i++) + nbpf->chan[i].irq = irqbuf[0]; + } else { + eirq = platform_get_irq_byname(pdev, "error"); + if (eirq < 0) + return eirq; + + if (irqs == num_channels + 1) { + struct nbpf_channel *chan; + + for (i = 0, chan = nbpf->chan; i <= num_channels; + i++, chan++) { + /* Skip the error IRQ */ + if (irqbuf[i] == eirq) + i++; + chan->irq = irqbuf[i]; + } + + if (chan != nbpf->chan + num_channels) + return -EINVAL; + } else { + /* 2 IRQs and more than one channel */ + if (irqbuf[0] == eirq) + irq = irqbuf[1]; + else + irq = irqbuf[0]; + + for (i = 0; i <= num_channels; i++) + nbpf->chan[i].irq = irq; + } + } + + ret = devm_request_irq(dev, eirq, nbpf_err_irq, + IRQF_SHARED, "dma error", nbpf); + if (ret < 0) + return ret; + nbpf->eirq = eirq; + + INIT_LIST_HEAD(&dma_dev->channels); + + /* Create DMA Channel */ + for (i = 0; i < num_channels; i++) { + ret = nbpf_chan_probe(nbpf, i); + if (ret < 0) + return ret; + } + + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); + dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask); + + /* Common and MEMCPY operations */ + dma_dev->device_alloc_chan_resources + = nbpf_alloc_chan_resources; + dma_dev->device_free_chan_resources = nbpf_free_chan_resources; + dma_dev->device_prep_dma_memcpy = nbpf_prep_memcpy; + dma_dev->device_tx_status = nbpf_tx_status; + dma_dev->device_issue_pending = nbpf_issue_pending; + + /* + * If we drop support for unaligned MEMCPY buffer addresses and / or + * lengths by setting + * dma_dev->copy_align = 4; + * then we can set transfer length to 4 bytes in nbpf_prep_one() for + * DMA_MEM_TO_MEM + */ + + /* Compulsory for DMA_SLAVE fields */ + dma_dev->device_prep_slave_sg = nbpf_prep_slave_sg; + dma_dev->device_config = nbpf_config; + dma_dev->device_pause = nbpf_pause; + dma_dev->device_terminate_all = nbpf_terminate_all; + + dma_dev->src_addr_widths = NBPF_DMA_BUSWIDTHS; + dma_dev->dst_addr_widths = NBPF_DMA_BUSWIDTHS; + dma_dev->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + + platform_set_drvdata(pdev, nbpf); + + ret = clk_prepare_enable(nbpf->clk); + if (ret < 0) + return ret; + + nbpf_configure(nbpf); + + ret = dma_async_device_register(dma_dev); + if (ret < 0) + goto e_clk_off; + + ret = of_dma_controller_register(np, nbpf_of_xlate, nbpf); + if (ret < 0) + goto e_dma_dev_unreg; + + return 0; + +e_dma_dev_unreg: + dma_async_device_unregister(dma_dev); +e_clk_off: + clk_disable_unprepare(nbpf->clk); + + return ret; +} + +static int nbpf_remove(struct platform_device *pdev) +{ + struct nbpf_device *nbpf = platform_get_drvdata(pdev); + int i; + + devm_free_irq(&pdev->dev, nbpf->eirq, nbpf); + + for (i = 0; i < nbpf->config->num_channels; i++) { + struct nbpf_channel *chan = nbpf->chan + i; + + devm_free_irq(&pdev->dev, chan->irq, chan); + + tasklet_kill(&chan->tasklet); + } + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&nbpf->dma_dev); + clk_disable_unprepare(nbpf->clk); + + return 0; +} + +static const struct platform_device_id nbpf_ids[] = { + {"nbpfaxi64dmac1b4", (kernel_ulong_t)&nbpf_cfg[NBPF1B4]}, + {"nbpfaxi64dmac1b8", (kernel_ulong_t)&nbpf_cfg[NBPF1B8]}, + {"nbpfaxi64dmac1b16", (kernel_ulong_t)&nbpf_cfg[NBPF1B16]}, + {"nbpfaxi64dmac4b4", (kernel_ulong_t)&nbpf_cfg[NBPF4B4]}, + {"nbpfaxi64dmac4b8", (kernel_ulong_t)&nbpf_cfg[NBPF4B8]}, + {"nbpfaxi64dmac4b16", (kernel_ulong_t)&nbpf_cfg[NBPF4B16]}, + {"nbpfaxi64dmac8b4", (kernel_ulong_t)&nbpf_cfg[NBPF8B4]}, + {"nbpfaxi64dmac8b8", (kernel_ulong_t)&nbpf_cfg[NBPF8B8]}, + {"nbpfaxi64dmac8b16", (kernel_ulong_t)&nbpf_cfg[NBPF8B16]}, + {}, +}; +MODULE_DEVICE_TABLE(platform, nbpf_ids); + +#ifdef CONFIG_PM +static int nbpf_runtime_suspend(struct device *dev) +{ + struct nbpf_device *nbpf = dev_get_drvdata(dev); + clk_disable_unprepare(nbpf->clk); + return 0; +} + +static int nbpf_runtime_resume(struct device *dev) +{ + struct nbpf_device *nbpf = dev_get_drvdata(dev); + return clk_prepare_enable(nbpf->clk); +} +#endif + +static const struct dev_pm_ops nbpf_pm_ops = { + SET_RUNTIME_PM_OPS(nbpf_runtime_suspend, nbpf_runtime_resume, NULL) +}; + +static struct platform_driver nbpf_driver = { + .driver = { + .name = "dma-nbpf", + .of_match_table = nbpf_match, + .pm = &nbpf_pm_ops, + }, + .id_table = nbpf_ids, + .probe = nbpf_probe, + .remove = nbpf_remove, +}; + +module_platform_driver(nbpf_driver); + +MODULE_AUTHOR("Guennadi Liakhovetski "); +MODULE_DESCRIPTION("dmaengine driver for NBPFAXI64* DMACs"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c new file mode 100644 index 0000000000..775a7f408b --- /dev/null +++ b/drivers/dma/of-dma.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Device tree helpers for DMA request / controller + * + * Based on of_gpio.c + * + * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" + +static LIST_HEAD(of_dma_list); +static DEFINE_MUTEX(of_dma_lock); + +/** + * of_dma_find_controller - Get a DMA controller in DT DMA helpers list + * @dma_spec: pointer to DMA specifier as found in the device tree + * + * Finds a DMA controller with matching device node and number for dma cells + * in a list of registered DMA controllers. If a match is found a valid pointer + * to the DMA data stored is retuned. A NULL pointer is returned if no match is + * found. + */ +static struct of_dma *of_dma_find_controller(struct of_phandle_args *dma_spec) +{ + struct of_dma *ofdma; + + list_for_each_entry(ofdma, &of_dma_list, of_dma_controllers) + if (ofdma->of_node == dma_spec->np) + return ofdma; + + pr_debug("%s: can't find DMA controller %pOF\n", __func__, + dma_spec->np); + + return NULL; +} + +/** + * of_dma_router_xlate - translation function for router devices + * @dma_spec: pointer to DMA specifier as found in the device tree + * @ofdma: pointer to DMA controller data (router information) + * + * The function creates new dma_spec to be passed to the router driver's + * of_dma_route_allocate() function to prepare a dma_spec which will be used + * to request channel from the real DMA controller. + */ +static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct dma_chan *chan; + struct of_dma *ofdma_target; + struct of_phandle_args dma_spec_target; + void *route_data; + + /* translate the request for the real DMA controller */ + memcpy(&dma_spec_target, dma_spec, sizeof(dma_spec_target)); + route_data = ofdma->of_dma_route_allocate(&dma_spec_target, ofdma); + if (IS_ERR(route_data)) + return NULL; + + ofdma_target = of_dma_find_controller(&dma_spec_target); + if (!ofdma_target) { + ofdma->dma_router->route_free(ofdma->dma_router->dev, + route_data); + chan = ERR_PTR(-EPROBE_DEFER); + goto err; + } + + chan = ofdma_target->of_dma_xlate(&dma_spec_target, ofdma_target); + if (IS_ERR_OR_NULL(chan)) { + ofdma->dma_router->route_free(ofdma->dma_router->dev, + route_data); + } else { + int ret = 0; + + chan->router = ofdma->dma_router; + chan->route_data = route_data; + + if (chan->device->device_router_config) + ret = chan->device->device_router_config(chan); + + if (ret) { + dma_release_channel(chan); + chan = ERR_PTR(ret); + } + } + +err: + /* + * Need to put the node back since the ofdma->of_dma_route_allocate + * has taken it for generating the new, translated dma_spec + */ + of_node_put(dma_spec_target.np); + return chan; +} + +/** + * of_dma_controller_register - Register a DMA controller to DT DMA helpers + * @np: device node of DMA controller + * @of_dma_xlate: translation function which converts a phandle + * arguments list into a dma_chan structure + * @data: pointer to controller specific data to be used by + * translation function + * + * Returns 0 on success or appropriate errno value on error. + * + * Allocated memory should be freed with appropriate of_dma_controller_free() + * call. + */ +int of_dma_controller_register(struct device_node *np, + struct dma_chan *(*of_dma_xlate) + (struct of_phandle_args *, struct of_dma *), + void *data) +{ + struct of_dma *ofdma; + + if (!np || !of_dma_xlate) { + pr_err("%s: not enough information provided\n", __func__); + return -EINVAL; + } + + ofdma = kzalloc(sizeof(*ofdma), GFP_KERNEL); + if (!ofdma) + return -ENOMEM; + + ofdma->of_node = np; + ofdma->of_dma_xlate = of_dma_xlate; + ofdma->of_dma_data = data; + + /* Now queue of_dma controller structure in list */ + mutex_lock(&of_dma_lock); + list_add_tail(&ofdma->of_dma_controllers, &of_dma_list); + mutex_unlock(&of_dma_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(of_dma_controller_register); + +/** + * of_dma_controller_free - Remove a DMA controller from DT DMA helpers list + * @np: device node of DMA controller + * + * Memory allocated by of_dma_controller_register() is freed here. + */ +void of_dma_controller_free(struct device_node *np) +{ + struct of_dma *ofdma; + + mutex_lock(&of_dma_lock); + + list_for_each_entry(ofdma, &of_dma_list, of_dma_controllers) + if (ofdma->of_node == np) { + list_del(&ofdma->of_dma_controllers); + kfree(ofdma); + break; + } + + mutex_unlock(&of_dma_lock); +} +EXPORT_SYMBOL_GPL(of_dma_controller_free); + +/** + * of_dma_router_register - Register a DMA router to DT DMA helpers as a + * controller + * @np: device node of DMA router + * @of_dma_route_allocate: setup function for the router which need to + * modify the dma_spec for the DMA controller to + * use and to set up the requested route. + * @dma_router: pointer to dma_router structure to be used when + * the route need to be free up. + * + * Returns 0 on success or appropriate errno value on error. + * + * Allocated memory should be freed with appropriate of_dma_controller_free() + * call. + */ +int of_dma_router_register(struct device_node *np, + void *(*of_dma_route_allocate) + (struct of_phandle_args *, struct of_dma *), + struct dma_router *dma_router) +{ + struct of_dma *ofdma; + + if (!np || !of_dma_route_allocate || !dma_router) { + pr_err("%s: not enough information provided\n", __func__); + return -EINVAL; + } + + ofdma = kzalloc(sizeof(*ofdma), GFP_KERNEL); + if (!ofdma) + return -ENOMEM; + + ofdma->of_node = np; + ofdma->of_dma_xlate = of_dma_router_xlate; + ofdma->of_dma_route_allocate = of_dma_route_allocate; + ofdma->dma_router = dma_router; + + /* Now queue of_dma controller structure in list */ + mutex_lock(&of_dma_lock); + list_add_tail(&ofdma->of_dma_controllers, &of_dma_list); + mutex_unlock(&of_dma_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(of_dma_router_register); + +/** + * of_dma_match_channel - Check if a DMA specifier matches name + * @np: device node to look for DMA channels + * @name: channel name to be matched + * @index: index of DMA specifier in list of DMA specifiers + * @dma_spec: pointer to DMA specifier as found in the device tree + * + * Check if the DMA specifier pointed to by the index in a list of DMA + * specifiers, matches the name provided. Returns 0 if the name matches and + * a valid pointer to the DMA specifier is found. Otherwise returns -ENODEV. + */ +static int of_dma_match_channel(struct device_node *np, const char *name, + int index, struct of_phandle_args *dma_spec) +{ + const char *s; + + if (of_property_read_string_index(np, "dma-names", index, &s)) + return -ENODEV; + + if (strcmp(name, s)) + return -ENODEV; + + if (of_parse_phandle_with_args(np, "dmas", "#dma-cells", index, + dma_spec)) + return -ENODEV; + + return 0; +} + +/** + * of_dma_request_slave_channel - Get the DMA slave channel + * @np: device node to get DMA request from + * @name: name of desired channel + * + * Returns pointer to appropriate DMA channel on success or an error pointer. + */ +struct dma_chan *of_dma_request_slave_channel(struct device_node *np, + const char *name) +{ + struct of_phandle_args dma_spec; + struct of_dma *ofdma; + struct dma_chan *chan; + int count, i, start; + int ret_no_channel = -ENODEV; + static atomic_t last_index; + + if (!np || !name) { + pr_err("%s: not enough information provided\n", __func__); + return ERR_PTR(-ENODEV); + } + + /* Silently fail if there is not even the "dmas" property */ + if (!of_property_present(np, "dmas")) + return ERR_PTR(-ENODEV); + + count = of_property_count_strings(np, "dma-names"); + if (count < 0) { + pr_err("%s: dma-names property of node '%pOF' missing or empty\n", + __func__, np); + return ERR_PTR(-ENODEV); + } + + /* + * approximate an average distribution across multiple + * entries with the same name + */ + start = atomic_inc_return(&last_index); + for (i = 0; i < count; i++) { + if (of_dma_match_channel(np, name, + (i + start) % count, + &dma_spec)) + continue; + + mutex_lock(&of_dma_lock); + ofdma = of_dma_find_controller(&dma_spec); + + if (ofdma) { + chan = ofdma->of_dma_xlate(&dma_spec, ofdma); + } else { + ret_no_channel = -EPROBE_DEFER; + chan = NULL; + } + + mutex_unlock(&of_dma_lock); + + of_node_put(dma_spec.np); + + if (chan) + return chan; + } + + return ERR_PTR(ret_no_channel); +} +EXPORT_SYMBOL_GPL(of_dma_request_slave_channel); + +/** + * of_dma_simple_xlate - Simple DMA engine translation function + * @dma_spec: pointer to DMA specifier as found in the device tree + * @ofdma: pointer to DMA controller data + * + * A simple translation function for devices that use a 32-bit value for the + * filter_param when calling the DMA engine dma_request_channel() function. + * Note that this translation function requires that #dma-cells is equal to 1 + * and the argument of the dma specifier is the 32-bit filter_param. Returns + * pointer to appropriate dma channel on success or NULL on error. + */ +struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + int count = dma_spec->args_count; + struct of_dma_filter_info *info = ofdma->of_dma_data; + + if (!info || !info->filter_fn) + return NULL; + + if (count != 1) + return NULL; + + return __dma_request_channel(&info->dma_cap, info->filter_fn, + &dma_spec->args[0], dma_spec->np); +} +EXPORT_SYMBOL_GPL(of_dma_simple_xlate); + +/** + * of_dma_xlate_by_chan_id - Translate dt property to DMA channel by channel id + * @dma_spec: pointer to DMA specifier as found in the device tree + * @ofdma: pointer to DMA controller data + * + * This function can be used as the of xlate callback for DMA driver which wants + * to match the channel based on the channel id. When using this xlate function + * the #dma-cells propety of the DMA controller dt node needs to be set to 1. + * The data parameter of of_dma_controller_register must be a pointer to the + * dma_device struct the function should match upon. + * + * Returns pointer to appropriate dma channel on success or NULL on error. + */ +struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct dma_device *dev = ofdma->of_dma_data; + struct dma_chan *chan, *candidate = NULL; + + if (!dev || dma_spec->args_count != 1) + return NULL; + + list_for_each_entry(chan, &dev->channels, device_node) + if (chan->chan_id == dma_spec->args[0]) { + candidate = chan; + break; + } + + if (!candidate) + return NULL; + + return dma_get_slave_channel(candidate); +} +EXPORT_SYMBOL_GPL(of_dma_xlate_by_chan_id); diff --git a/drivers/dma/owl-dma.c b/drivers/dma/owl-dma.c new file mode 100644 index 0000000000..384476757c --- /dev/null +++ b/drivers/dma/owl-dma.c @@ -0,0 +1,1279 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Actions Semi Owl SoCs DMA driver +// +// Copyright (c) 2014 Actions Semi Inc. +// Author: David Liu +// +// Copyright (c) 2018 Linaro Ltd. +// Author: Manivannan Sadhasivam + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "virt-dma.h" + +#define OWL_DMA_FRAME_MAX_LENGTH 0xfffff + +/* Global DMA Controller Registers */ +#define OWL_DMA_IRQ_PD0 0x00 +#define OWL_DMA_IRQ_PD1 0x04 +#define OWL_DMA_IRQ_PD2 0x08 +#define OWL_DMA_IRQ_PD3 0x0C +#define OWL_DMA_IRQ_EN0 0x10 +#define OWL_DMA_IRQ_EN1 0x14 +#define OWL_DMA_IRQ_EN2 0x18 +#define OWL_DMA_IRQ_EN3 0x1C +#define OWL_DMA_SECURE_ACCESS_CTL 0x20 +#define OWL_DMA_NIC_QOS 0x24 +#define OWL_DMA_DBGSEL 0x28 +#define OWL_DMA_IDLE_STAT 0x2C + +/* Channel Registers */ +#define OWL_DMA_CHAN_BASE(i) (0x100 + (i) * 0x100) +#define OWL_DMAX_MODE 0x00 +#define OWL_DMAX_SOURCE 0x04 +#define OWL_DMAX_DESTINATION 0x08 +#define OWL_DMAX_FRAME_LEN 0x0C +#define OWL_DMAX_FRAME_CNT 0x10 +#define OWL_DMAX_REMAIN_FRAME_CNT 0x14 +#define OWL_DMAX_REMAIN_CNT 0x18 +#define OWL_DMAX_SOURCE_STRIDE 0x1C +#define OWL_DMAX_DESTINATION_STRIDE 0x20 +#define OWL_DMAX_START 0x24 +#define OWL_DMAX_PAUSE 0x28 +#define OWL_DMAX_CHAINED_CTL 0x2C +#define OWL_DMAX_CONSTANT 0x30 +#define OWL_DMAX_LINKLIST_CTL 0x34 +#define OWL_DMAX_NEXT_DESCRIPTOR 0x38 +#define OWL_DMAX_CURRENT_DESCRIPTOR_NUM 0x3C +#define OWL_DMAX_INT_CTL 0x40 +#define OWL_DMAX_INT_STATUS 0x44 +#define OWL_DMAX_CURRENT_SOURCE_POINTER 0x48 +#define OWL_DMAX_CURRENT_DESTINATION_POINTER 0x4C + +/* OWL_DMAX_MODE Bits */ +#define OWL_DMA_MODE_TS(x) (((x) & GENMASK(5, 0)) << 0) +#define OWL_DMA_MODE_ST(x) (((x) & GENMASK(1, 0)) << 8) +#define OWL_DMA_MODE_ST_DEV OWL_DMA_MODE_ST(0) +#define OWL_DMA_MODE_ST_DCU OWL_DMA_MODE_ST(2) +#define OWL_DMA_MODE_ST_SRAM OWL_DMA_MODE_ST(3) +#define OWL_DMA_MODE_DT(x) (((x) & GENMASK(1, 0)) << 10) +#define OWL_DMA_MODE_DT_DEV OWL_DMA_MODE_DT(0) +#define OWL_DMA_MODE_DT_DCU OWL_DMA_MODE_DT(2) +#define OWL_DMA_MODE_DT_SRAM OWL_DMA_MODE_DT(3) +#define OWL_DMA_MODE_SAM(x) (((x) & GENMASK(1, 0)) << 16) +#define OWL_DMA_MODE_SAM_CONST OWL_DMA_MODE_SAM(0) +#define OWL_DMA_MODE_SAM_INC OWL_DMA_MODE_SAM(1) +#define OWL_DMA_MODE_SAM_STRIDE OWL_DMA_MODE_SAM(2) +#define OWL_DMA_MODE_DAM(x) (((x) & GENMASK(1, 0)) << 18) +#define OWL_DMA_MODE_DAM_CONST OWL_DMA_MODE_DAM(0) +#define OWL_DMA_MODE_DAM_INC OWL_DMA_MODE_DAM(1) +#define OWL_DMA_MODE_DAM_STRIDE OWL_DMA_MODE_DAM(2) +#define OWL_DMA_MODE_PW(x) (((x) & GENMASK(2, 0)) << 20) +#define OWL_DMA_MODE_CB BIT(23) +#define OWL_DMA_MODE_NDDBW(x) (((x) & 0x1) << 28) +#define OWL_DMA_MODE_NDDBW_32BIT OWL_DMA_MODE_NDDBW(0) +#define OWL_DMA_MODE_NDDBW_8BIT OWL_DMA_MODE_NDDBW(1) +#define OWL_DMA_MODE_CFE BIT(29) +#define OWL_DMA_MODE_LME BIT(30) +#define OWL_DMA_MODE_CME BIT(31) + +/* OWL_DMAX_LINKLIST_CTL Bits */ +#define OWL_DMA_LLC_SAV(x) (((x) & GENMASK(1, 0)) << 8) +#define OWL_DMA_LLC_SAV_INC OWL_DMA_LLC_SAV(0) +#define OWL_DMA_LLC_SAV_LOAD_NEXT OWL_DMA_LLC_SAV(1) +#define OWL_DMA_LLC_SAV_LOAD_PREV OWL_DMA_LLC_SAV(2) +#define OWL_DMA_LLC_DAV(x) (((x) & GENMASK(1, 0)) << 10) +#define OWL_DMA_LLC_DAV_INC OWL_DMA_LLC_DAV(0) +#define OWL_DMA_LLC_DAV_LOAD_NEXT OWL_DMA_LLC_DAV(1) +#define OWL_DMA_LLC_DAV_LOAD_PREV OWL_DMA_LLC_DAV(2) +#define OWL_DMA_LLC_SUSPEND BIT(16) + +/* OWL_DMAX_INT_CTL Bits */ +#define OWL_DMA_INTCTL_BLOCK BIT(0) +#define OWL_DMA_INTCTL_SUPER_BLOCK BIT(1) +#define OWL_DMA_INTCTL_FRAME BIT(2) +#define OWL_DMA_INTCTL_HALF_FRAME BIT(3) +#define OWL_DMA_INTCTL_LAST_FRAME BIT(4) + +/* OWL_DMAX_INT_STATUS Bits */ +#define OWL_DMA_INTSTAT_BLOCK BIT(0) +#define OWL_DMA_INTSTAT_SUPER_BLOCK BIT(1) +#define OWL_DMA_INTSTAT_FRAME BIT(2) +#define OWL_DMA_INTSTAT_HALF_FRAME BIT(3) +#define OWL_DMA_INTSTAT_LAST_FRAME BIT(4) + +/* Pack shift and newshift in a single word */ +#define BIT_FIELD(val, width, shift, newshift) \ + ((((val) >> (shift)) & ((BIT(width)) - 1)) << (newshift)) + +/* Frame count value is fixed as 1 */ +#define FCNT_VAL 0x1 + +/** + * enum owl_dmadesc_offsets - Describe DMA descriptor, hardware link + * list for dma transfer + * @OWL_DMADESC_NEXT_LLI: physical address of the next link list + * @OWL_DMADESC_SADDR: source physical address + * @OWL_DMADESC_DADDR: destination physical address + * @OWL_DMADESC_FLEN: frame length + * @OWL_DMADESC_SRC_STRIDE: source stride + * @OWL_DMADESC_DST_STRIDE: destination stride + * @OWL_DMADESC_CTRLA: dma_mode and linklist ctrl config + * @OWL_DMADESC_CTRLB: interrupt config + * @OWL_DMADESC_CONST_NUM: data for constant fill + * @OWL_DMADESC_SIZE: max size of this enum + */ +enum owl_dmadesc_offsets { + OWL_DMADESC_NEXT_LLI = 0, + OWL_DMADESC_SADDR, + OWL_DMADESC_DADDR, + OWL_DMADESC_FLEN, + OWL_DMADESC_SRC_STRIDE, + OWL_DMADESC_DST_STRIDE, + OWL_DMADESC_CTRLA, + OWL_DMADESC_CTRLB, + OWL_DMADESC_CONST_NUM, + OWL_DMADESC_SIZE +}; + +enum owl_dma_id { + S900_DMA, + S700_DMA, +}; + +/** + * struct owl_dma_lli - Link list for dma transfer + * @hw: hardware link list + * @phys: physical address of hardware link list + * @node: node for txd's lli_list + */ +struct owl_dma_lli { + u32 hw[OWL_DMADESC_SIZE]; + dma_addr_t phys; + struct list_head node; +}; + +/** + * struct owl_dma_txd - Wrapper for struct dma_async_tx_descriptor + * @vd: virtual DMA descriptor + * @lli_list: link list of lli nodes + * @cyclic: flag to indicate cyclic transfers + */ +struct owl_dma_txd { + struct virt_dma_desc vd; + struct list_head lli_list; + bool cyclic; +}; + +/** + * struct owl_dma_pchan - Holder for the physical channels + * @id: physical index to this channel + * @base: virtual memory base for the dma channel + * @vchan: the virtual channel currently being served by this physical channel + */ +struct owl_dma_pchan { + u32 id; + void __iomem *base; + struct owl_dma_vchan *vchan; +}; + +/** + * struct owl_dma_vchan - Wrapper for DMA ENGINE channel + * @vc: wrapped virtual channel + * @pchan: the physical channel utilized by this channel + * @txd: active transaction on this channel + * @cfg: slave configuration for this channel + * @drq: physical DMA request ID for this channel + */ +struct owl_dma_vchan { + struct virt_dma_chan vc; + struct owl_dma_pchan *pchan; + struct owl_dma_txd *txd; + struct dma_slave_config cfg; + u8 drq; +}; + +/** + * struct owl_dma - Holder for the Owl DMA controller + * @dma: dma engine for this instance + * @base: virtual memory base for the DMA controller + * @clk: clock for the DMA controller + * @lock: a lock to use when change DMA controller global register + * @lli_pool: a pool for the LLI descriptors + * @irq: interrupt ID for the DMA controller + * @nr_pchans: the number of physical channels + * @pchans: array of data for the physical channels + * @nr_vchans: the number of physical channels + * @vchans: array of data for the physical channels + * @devid: device id based on OWL SoC + */ +struct owl_dma { + struct dma_device dma; + void __iomem *base; + struct clk *clk; + spinlock_t lock; + struct dma_pool *lli_pool; + int irq; + + unsigned int nr_pchans; + struct owl_dma_pchan *pchans; + + unsigned int nr_vchans; + struct owl_dma_vchan *vchans; + enum owl_dma_id devid; +}; + +static void pchan_update(struct owl_dma_pchan *pchan, u32 reg, + u32 val, bool state) +{ + u32 regval; + + regval = readl(pchan->base + reg); + + if (state) + regval |= val; + else + regval &= ~val; + + writel(val, pchan->base + reg); +} + +static void pchan_writel(struct owl_dma_pchan *pchan, u32 reg, u32 data) +{ + writel(data, pchan->base + reg); +} + +static u32 pchan_readl(struct owl_dma_pchan *pchan, u32 reg) +{ + return readl(pchan->base + reg); +} + +static void dma_update(struct owl_dma *od, u32 reg, u32 val, bool state) +{ + u32 regval; + + regval = readl(od->base + reg); + + if (state) + regval |= val; + else + regval &= ~val; + + writel(val, od->base + reg); +} + +static void dma_writel(struct owl_dma *od, u32 reg, u32 data) +{ + writel(data, od->base + reg); +} + +static u32 dma_readl(struct owl_dma *od, u32 reg) +{ + return readl(od->base + reg); +} + +static inline struct owl_dma *to_owl_dma(struct dma_device *dd) +{ + return container_of(dd, struct owl_dma, dma); +} + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +static inline struct owl_dma_vchan *to_owl_vchan(struct dma_chan *chan) +{ + return container_of(chan, struct owl_dma_vchan, vc.chan); +} + +static inline struct owl_dma_txd *to_owl_txd(struct dma_async_tx_descriptor *tx) +{ + return container_of(tx, struct owl_dma_txd, vd.tx); +} + +static inline u32 llc_hw_ctrla(u32 mode, u32 llc_ctl) +{ + u32 ctl; + + ctl = BIT_FIELD(mode, 4, 28, 28) | + BIT_FIELD(mode, 8, 16, 20) | + BIT_FIELD(mode, 4, 8, 16) | + BIT_FIELD(mode, 6, 0, 10) | + BIT_FIELD(llc_ctl, 2, 10, 8) | + BIT_FIELD(llc_ctl, 2, 8, 6); + + return ctl; +} + +static inline u32 llc_hw_ctrlb(u32 int_ctl) +{ + u32 ctl; + + /* + * Irrespective of the SoC, ctrlb value starts filling from + * bit 18. + */ + ctl = BIT_FIELD(int_ctl, 7, 0, 18); + + return ctl; +} + +static u32 llc_hw_flen(struct owl_dma_lli *lli) +{ + return lli->hw[OWL_DMADESC_FLEN] & GENMASK(19, 0); +} + +static void owl_dma_free_lli(struct owl_dma *od, + struct owl_dma_lli *lli) +{ + list_del(&lli->node); + dma_pool_free(od->lli_pool, lli, lli->phys); +} + +static struct owl_dma_lli *owl_dma_alloc_lli(struct owl_dma *od) +{ + struct owl_dma_lli *lli; + dma_addr_t phys; + + lli = dma_pool_alloc(od->lli_pool, GFP_NOWAIT, &phys); + if (!lli) + return NULL; + + INIT_LIST_HEAD(&lli->node); + lli->phys = phys; + + return lli; +} + +static struct owl_dma_lli *owl_dma_add_lli(struct owl_dma_txd *txd, + struct owl_dma_lli *prev, + struct owl_dma_lli *next, + bool is_cyclic) +{ + if (!is_cyclic) + list_add_tail(&next->node, &txd->lli_list); + + if (prev) { + prev->hw[OWL_DMADESC_NEXT_LLI] = next->phys; + prev->hw[OWL_DMADESC_CTRLA] |= + llc_hw_ctrla(OWL_DMA_MODE_LME, 0); + } + + return next; +} + +static inline int owl_dma_cfg_lli(struct owl_dma_vchan *vchan, + struct owl_dma_lli *lli, + dma_addr_t src, dma_addr_t dst, + u32 len, enum dma_transfer_direction dir, + struct dma_slave_config *sconfig, + bool is_cyclic) +{ + struct owl_dma *od = to_owl_dma(vchan->vc.chan.device); + u32 mode, ctrlb; + + mode = OWL_DMA_MODE_PW(0); + + switch (dir) { + case DMA_MEM_TO_MEM: + mode |= OWL_DMA_MODE_TS(0) | OWL_DMA_MODE_ST_DCU | + OWL_DMA_MODE_DT_DCU | OWL_DMA_MODE_SAM_INC | + OWL_DMA_MODE_DAM_INC; + + break; + case DMA_MEM_TO_DEV: + mode |= OWL_DMA_MODE_TS(vchan->drq) + | OWL_DMA_MODE_ST_DCU | OWL_DMA_MODE_DT_DEV + | OWL_DMA_MODE_SAM_INC | OWL_DMA_MODE_DAM_CONST; + + /* + * Hardware only supports 32bit and 8bit buswidth. Since the + * default is 32bit, select 8bit only when requested. + */ + if (sconfig->dst_addr_width == DMA_SLAVE_BUSWIDTH_1_BYTE) + mode |= OWL_DMA_MODE_NDDBW_8BIT; + + break; + case DMA_DEV_TO_MEM: + mode |= OWL_DMA_MODE_TS(vchan->drq) + | OWL_DMA_MODE_ST_DEV | OWL_DMA_MODE_DT_DCU + | OWL_DMA_MODE_SAM_CONST | OWL_DMA_MODE_DAM_INC; + + /* + * Hardware only supports 32bit and 8bit buswidth. Since the + * default is 32bit, select 8bit only when requested. + */ + if (sconfig->src_addr_width == DMA_SLAVE_BUSWIDTH_1_BYTE) + mode |= OWL_DMA_MODE_NDDBW_8BIT; + + break; + default: + return -EINVAL; + } + + lli->hw[OWL_DMADESC_CTRLA] = llc_hw_ctrla(mode, + OWL_DMA_LLC_SAV_LOAD_NEXT | + OWL_DMA_LLC_DAV_LOAD_NEXT); + + if (is_cyclic) + ctrlb = llc_hw_ctrlb(OWL_DMA_INTCTL_BLOCK); + else + ctrlb = llc_hw_ctrlb(OWL_DMA_INTCTL_SUPER_BLOCK); + + lli->hw[OWL_DMADESC_NEXT_LLI] = 0; /* One link list by default */ + lli->hw[OWL_DMADESC_SADDR] = src; + lli->hw[OWL_DMADESC_DADDR] = dst; + lli->hw[OWL_DMADESC_SRC_STRIDE] = 0; + lli->hw[OWL_DMADESC_DST_STRIDE] = 0; + + if (od->devid == S700_DMA) { + /* Max frame length is 1MB */ + lli->hw[OWL_DMADESC_FLEN] = len; + /* + * On S700, word starts from offset 0x1C is shared between + * frame count and ctrlb, where first 12 bits are for frame + * count and rest of 20 bits are for ctrlb. + */ + lli->hw[OWL_DMADESC_CTRLB] = FCNT_VAL | ctrlb; + } else { + /* + * On S900, word starts from offset 0xC is shared between + * frame length (max frame length is 1MB) and frame count, + * where first 20 bits are for frame length and rest of + * 12 bits are for frame count. + */ + lli->hw[OWL_DMADESC_FLEN] = len | FCNT_VAL << 20; + lli->hw[OWL_DMADESC_CTRLB] = ctrlb; + } + + return 0; +} + +static struct owl_dma_pchan *owl_dma_get_pchan(struct owl_dma *od, + struct owl_dma_vchan *vchan) +{ + struct owl_dma_pchan *pchan = NULL; + unsigned long flags; + int i; + + for (i = 0; i < od->nr_pchans; i++) { + pchan = &od->pchans[i]; + + spin_lock_irqsave(&od->lock, flags); + if (!pchan->vchan) { + pchan->vchan = vchan; + spin_unlock_irqrestore(&od->lock, flags); + break; + } + + spin_unlock_irqrestore(&od->lock, flags); + } + + return pchan; +} + +static int owl_dma_pchan_busy(struct owl_dma *od, struct owl_dma_pchan *pchan) +{ + unsigned int val; + + val = dma_readl(od, OWL_DMA_IDLE_STAT); + + return !(val & (1 << pchan->id)); +} + +static void owl_dma_terminate_pchan(struct owl_dma *od, + struct owl_dma_pchan *pchan) +{ + unsigned long flags; + u32 irq_pd; + + pchan_writel(pchan, OWL_DMAX_START, 0); + pchan_update(pchan, OWL_DMAX_INT_STATUS, 0xff, false); + + spin_lock_irqsave(&od->lock, flags); + dma_update(od, OWL_DMA_IRQ_EN0, (1 << pchan->id), false); + + irq_pd = dma_readl(od, OWL_DMA_IRQ_PD0); + if (irq_pd & (1 << pchan->id)) { + dev_warn(od->dma.dev, + "terminating pchan %d that still has pending irq\n", + pchan->id); + dma_writel(od, OWL_DMA_IRQ_PD0, (1 << pchan->id)); + } + + pchan->vchan = NULL; + + spin_unlock_irqrestore(&od->lock, flags); +} + +static void owl_dma_pause_pchan(struct owl_dma_pchan *pchan) +{ + pchan_writel(pchan, 1, OWL_DMAX_PAUSE); +} + +static void owl_dma_resume_pchan(struct owl_dma_pchan *pchan) +{ + pchan_writel(pchan, 0, OWL_DMAX_PAUSE); +} + +static int owl_dma_start_next_txd(struct owl_dma_vchan *vchan) +{ + struct owl_dma *od = to_owl_dma(vchan->vc.chan.device); + struct virt_dma_desc *vd = vchan_next_desc(&vchan->vc); + struct owl_dma_pchan *pchan = vchan->pchan; + struct owl_dma_txd *txd = to_owl_txd(&vd->tx); + struct owl_dma_lli *lli; + unsigned long flags; + u32 int_ctl; + + list_del(&vd->node); + + vchan->txd = txd; + + /* Wait for channel inactive */ + while (owl_dma_pchan_busy(od, pchan)) + cpu_relax(); + + lli = list_first_entry(&txd->lli_list, + struct owl_dma_lli, node); + + if (txd->cyclic) + int_ctl = OWL_DMA_INTCTL_BLOCK; + else + int_ctl = OWL_DMA_INTCTL_SUPER_BLOCK; + + pchan_writel(pchan, OWL_DMAX_MODE, OWL_DMA_MODE_LME); + pchan_writel(pchan, OWL_DMAX_LINKLIST_CTL, + OWL_DMA_LLC_SAV_LOAD_NEXT | OWL_DMA_LLC_DAV_LOAD_NEXT); + pchan_writel(pchan, OWL_DMAX_NEXT_DESCRIPTOR, lli->phys); + pchan_writel(pchan, OWL_DMAX_INT_CTL, int_ctl); + + /* Clear IRQ status for this pchan */ + pchan_update(pchan, OWL_DMAX_INT_STATUS, 0xff, false); + + spin_lock_irqsave(&od->lock, flags); + + dma_update(od, OWL_DMA_IRQ_EN0, (1 << pchan->id), true); + + spin_unlock_irqrestore(&od->lock, flags); + + dev_dbg(chan2dev(&vchan->vc.chan), "starting pchan %d\n", pchan->id); + + /* Start DMA transfer for this pchan */ + pchan_writel(pchan, OWL_DMAX_START, 0x1); + + return 0; +} + +static void owl_dma_phy_free(struct owl_dma *od, struct owl_dma_vchan *vchan) +{ + /* Ensure that the physical channel is stopped */ + owl_dma_terminate_pchan(od, vchan->pchan); + + vchan->pchan = NULL; +} + +static irqreturn_t owl_dma_interrupt(int irq, void *dev_id) +{ + struct owl_dma *od = dev_id; + struct owl_dma_vchan *vchan; + struct owl_dma_pchan *pchan; + unsigned long pending; + int i; + unsigned int global_irq_pending, chan_irq_pending; + + spin_lock(&od->lock); + + pending = dma_readl(od, OWL_DMA_IRQ_PD0); + + /* Clear IRQ status for each pchan */ + for_each_set_bit(i, &pending, od->nr_pchans) { + pchan = &od->pchans[i]; + pchan_update(pchan, OWL_DMAX_INT_STATUS, 0xff, false); + } + + /* Clear pending IRQ */ + dma_writel(od, OWL_DMA_IRQ_PD0, pending); + + /* Check missed pending IRQ */ + for (i = 0; i < od->nr_pchans; i++) { + pchan = &od->pchans[i]; + chan_irq_pending = pchan_readl(pchan, OWL_DMAX_INT_CTL) & + pchan_readl(pchan, OWL_DMAX_INT_STATUS); + + /* Dummy read to ensure OWL_DMA_IRQ_PD0 value is updated */ + dma_readl(od, OWL_DMA_IRQ_PD0); + + global_irq_pending = dma_readl(od, OWL_DMA_IRQ_PD0); + + if (chan_irq_pending && !(global_irq_pending & BIT(i))) { + dev_dbg(od->dma.dev, + "global and channel IRQ pending match err\n"); + + /* Clear IRQ status for this pchan */ + pchan_update(pchan, OWL_DMAX_INT_STATUS, + 0xff, false); + + /* Update global IRQ pending */ + pending |= BIT(i); + } + } + + spin_unlock(&od->lock); + + for_each_set_bit(i, &pending, od->nr_pchans) { + struct owl_dma_txd *txd; + + pchan = &od->pchans[i]; + + vchan = pchan->vchan; + if (!vchan) { + dev_warn(od->dma.dev, "no vchan attached on pchan %d\n", + pchan->id); + continue; + } + + spin_lock(&vchan->vc.lock); + + txd = vchan->txd; + if (txd) { + vchan->txd = NULL; + + vchan_cookie_complete(&txd->vd); + + /* + * Start the next descriptor (if any), + * otherwise free this channel. + */ + if (vchan_next_desc(&vchan->vc)) + owl_dma_start_next_txd(vchan); + else + owl_dma_phy_free(od, vchan); + } + + spin_unlock(&vchan->vc.lock); + } + + return IRQ_HANDLED; +} + +static void owl_dma_free_txd(struct owl_dma *od, struct owl_dma_txd *txd) +{ + struct owl_dma_lli *lli, *_lli; + + if (unlikely(!txd)) + return; + + list_for_each_entry_safe(lli, _lli, &txd->lli_list, node) + owl_dma_free_lli(od, lli); + + kfree(txd); +} + +static void owl_dma_desc_free(struct virt_dma_desc *vd) +{ + struct owl_dma *od = to_owl_dma(vd->tx.chan->device); + struct owl_dma_txd *txd = to_owl_txd(&vd->tx); + + owl_dma_free_txd(od, txd); +} + +static int owl_dma_terminate_all(struct dma_chan *chan) +{ + struct owl_dma *od = to_owl_dma(chan->device); + struct owl_dma_vchan *vchan = to_owl_vchan(chan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&vchan->vc.lock, flags); + + if (vchan->pchan) + owl_dma_phy_free(od, vchan); + + if (vchan->txd) { + owl_dma_desc_free(&vchan->txd->vd); + vchan->txd = NULL; + } + + vchan_get_all_descriptors(&vchan->vc, &head); + + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + vchan_dma_desc_free_list(&vchan->vc, &head); + + return 0; +} + +static int owl_dma_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct owl_dma_vchan *vchan = to_owl_vchan(chan); + + /* Reject definitely invalid configurations */ + if (config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES || + config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES) + return -EINVAL; + + memcpy(&vchan->cfg, config, sizeof(struct dma_slave_config)); + + return 0; +} + +static int owl_dma_pause(struct dma_chan *chan) +{ + struct owl_dma_vchan *vchan = to_owl_vchan(chan); + unsigned long flags; + + spin_lock_irqsave(&vchan->vc.lock, flags); + + owl_dma_pause_pchan(vchan->pchan); + + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + return 0; +} + +static int owl_dma_resume(struct dma_chan *chan) +{ + struct owl_dma_vchan *vchan = to_owl_vchan(chan); + unsigned long flags; + + if (!vchan->pchan && !vchan->txd) + return 0; + + dev_dbg(chan2dev(chan), "vchan %p: resume\n", &vchan->vc); + + spin_lock_irqsave(&vchan->vc.lock, flags); + + owl_dma_resume_pchan(vchan->pchan); + + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + return 0; +} + +static u32 owl_dma_getbytes_chan(struct owl_dma_vchan *vchan) +{ + struct owl_dma_pchan *pchan; + struct owl_dma_txd *txd; + struct owl_dma_lli *lli; + unsigned int next_lli_phy; + size_t bytes; + + pchan = vchan->pchan; + txd = vchan->txd; + + if (!pchan || !txd) + return 0; + + /* Get remain count of current node in link list */ + bytes = pchan_readl(pchan, OWL_DMAX_REMAIN_CNT); + + /* Loop through the preceding nodes to get total remaining bytes */ + if (pchan_readl(pchan, OWL_DMAX_MODE) & OWL_DMA_MODE_LME) { + next_lli_phy = pchan_readl(pchan, OWL_DMAX_NEXT_DESCRIPTOR); + list_for_each_entry(lli, &txd->lli_list, node) { + /* Start from the next active node */ + if (lli->phys == next_lli_phy) { + list_for_each_entry(lli, &txd->lli_list, node) + bytes += llc_hw_flen(lli); + break; + } + } + } + + return bytes; +} + +static enum dma_status owl_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct owl_dma_vchan *vchan = to_owl_vchan(chan); + struct owl_dma_lli *lli; + struct virt_dma_desc *vd; + struct owl_dma_txd *txd; + enum dma_status ret; + unsigned long flags; + size_t bytes = 0; + + ret = dma_cookie_status(chan, cookie, state); + if (ret == DMA_COMPLETE || !state) + return ret; + + spin_lock_irqsave(&vchan->vc.lock, flags); + + vd = vchan_find_desc(&vchan->vc, cookie); + if (vd) { + txd = to_owl_txd(&vd->tx); + list_for_each_entry(lli, &txd->lli_list, node) + bytes += llc_hw_flen(lli); + } else { + bytes = owl_dma_getbytes_chan(vchan); + } + + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + dma_set_residue(state, bytes); + + return ret; +} + +static void owl_dma_phy_alloc_and_start(struct owl_dma_vchan *vchan) +{ + struct owl_dma *od = to_owl_dma(vchan->vc.chan.device); + struct owl_dma_pchan *pchan; + + pchan = owl_dma_get_pchan(od, vchan); + if (!pchan) + return; + + dev_dbg(od->dma.dev, "allocated pchan %d\n", pchan->id); + + vchan->pchan = pchan; + owl_dma_start_next_txd(vchan); +} + +static void owl_dma_issue_pending(struct dma_chan *chan) +{ + struct owl_dma_vchan *vchan = to_owl_vchan(chan); + unsigned long flags; + + spin_lock_irqsave(&vchan->vc.lock, flags); + if (vchan_issue_pending(&vchan->vc)) { + if (!vchan->pchan) + owl_dma_phy_alloc_and_start(vchan); + } + spin_unlock_irqrestore(&vchan->vc.lock, flags); +} + +static struct dma_async_tx_descriptor + *owl_dma_prep_memcpy(struct dma_chan *chan, + dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct owl_dma *od = to_owl_dma(chan->device); + struct owl_dma_vchan *vchan = to_owl_vchan(chan); + struct owl_dma_txd *txd; + struct owl_dma_lli *lli, *prev = NULL; + size_t offset, bytes; + int ret; + + if (!len) + return NULL; + + txd = kzalloc(sizeof(*txd), GFP_NOWAIT); + if (!txd) + return NULL; + + INIT_LIST_HEAD(&txd->lli_list); + + /* Process the transfer as frame by frame */ + for (offset = 0; offset < len; offset += bytes) { + lli = owl_dma_alloc_lli(od); + if (!lli) { + dev_warn(chan2dev(chan), "failed to allocate lli\n"); + goto err_txd_free; + } + + bytes = min_t(size_t, (len - offset), OWL_DMA_FRAME_MAX_LENGTH); + + ret = owl_dma_cfg_lli(vchan, lli, src + offset, dst + offset, + bytes, DMA_MEM_TO_MEM, + &vchan->cfg, txd->cyclic); + if (ret) { + dev_warn(chan2dev(chan), "failed to config lli\n"); + goto err_txd_free; + } + + prev = owl_dma_add_lli(txd, prev, lli, false); + } + + return vchan_tx_prep(&vchan->vc, &txd->vd, flags); + +err_txd_free: + owl_dma_free_txd(od, txd); + return NULL; +} + +static struct dma_async_tx_descriptor + *owl_dma_prep_slave_sg(struct dma_chan *chan, + struct scatterlist *sgl, + unsigned int sg_len, + enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct owl_dma *od = to_owl_dma(chan->device); + struct owl_dma_vchan *vchan = to_owl_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct owl_dma_txd *txd; + struct owl_dma_lli *lli, *prev = NULL; + struct scatterlist *sg; + dma_addr_t addr, src = 0, dst = 0; + size_t len; + int ret, i; + + txd = kzalloc(sizeof(*txd), GFP_NOWAIT); + if (!txd) + return NULL; + + INIT_LIST_HEAD(&txd->lli_list); + + for_each_sg(sgl, sg, sg_len, i) { + addr = sg_dma_address(sg); + len = sg_dma_len(sg); + + if (len > OWL_DMA_FRAME_MAX_LENGTH) { + dev_err(od->dma.dev, + "frame length exceeds max supported length"); + goto err_txd_free; + } + + lli = owl_dma_alloc_lli(od); + if (!lli) { + dev_err(chan2dev(chan), "failed to allocate lli"); + goto err_txd_free; + } + + if (dir == DMA_MEM_TO_DEV) { + src = addr; + dst = sconfig->dst_addr; + } else { + src = sconfig->src_addr; + dst = addr; + } + + ret = owl_dma_cfg_lli(vchan, lli, src, dst, len, dir, sconfig, + txd->cyclic); + if (ret) { + dev_warn(chan2dev(chan), "failed to config lli"); + goto err_txd_free; + } + + prev = owl_dma_add_lli(txd, prev, lli, false); + } + + return vchan_tx_prep(&vchan->vc, &txd->vd, flags); + +err_txd_free: + owl_dma_free_txd(od, txd); + + return NULL; +} + +static struct dma_async_tx_descriptor + *owl_prep_dma_cyclic(struct dma_chan *chan, + dma_addr_t buf_addr, size_t buf_len, + size_t period_len, + enum dma_transfer_direction dir, + unsigned long flags) +{ + struct owl_dma *od = to_owl_dma(chan->device); + struct owl_dma_vchan *vchan = to_owl_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct owl_dma_txd *txd; + struct owl_dma_lli *lli, *prev = NULL, *first = NULL; + dma_addr_t src = 0, dst = 0; + unsigned int periods = buf_len / period_len; + int ret, i; + + txd = kzalloc(sizeof(*txd), GFP_NOWAIT); + if (!txd) + return NULL; + + INIT_LIST_HEAD(&txd->lli_list); + txd->cyclic = true; + + for (i = 0; i < periods; i++) { + lli = owl_dma_alloc_lli(od); + if (!lli) { + dev_warn(chan2dev(chan), "failed to allocate lli"); + goto err_txd_free; + } + + if (dir == DMA_MEM_TO_DEV) { + src = buf_addr + (period_len * i); + dst = sconfig->dst_addr; + } else if (dir == DMA_DEV_TO_MEM) { + src = sconfig->src_addr; + dst = buf_addr + (period_len * i); + } + + ret = owl_dma_cfg_lli(vchan, lli, src, dst, period_len, + dir, sconfig, txd->cyclic); + if (ret) { + dev_warn(chan2dev(chan), "failed to config lli"); + goto err_txd_free; + } + + if (!first) + first = lli; + + prev = owl_dma_add_lli(txd, prev, lli, false); + } + + /* close the cyclic list */ + owl_dma_add_lli(txd, prev, first, true); + + return vchan_tx_prep(&vchan->vc, &txd->vd, flags); + +err_txd_free: + owl_dma_free_txd(od, txd); + + return NULL; +} + +static void owl_dma_free_chan_resources(struct dma_chan *chan) +{ + struct owl_dma_vchan *vchan = to_owl_vchan(chan); + + /* Ensure all queued descriptors are freed */ + vchan_free_chan_resources(&vchan->vc); +} + +static inline void owl_dma_free(struct owl_dma *od) +{ + struct owl_dma_vchan *vchan = NULL; + struct owl_dma_vchan *next; + + list_for_each_entry_safe(vchan, + next, &od->dma.channels, vc.chan.device_node) { + list_del(&vchan->vc.chan.device_node); + tasklet_kill(&vchan->vc.task); + } +} + +static struct dma_chan *owl_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct owl_dma *od = ofdma->of_dma_data; + struct owl_dma_vchan *vchan; + struct dma_chan *chan; + u8 drq = dma_spec->args[0]; + + if (drq > od->nr_vchans) + return NULL; + + chan = dma_get_any_slave_channel(&od->dma); + if (!chan) + return NULL; + + vchan = to_owl_vchan(chan); + vchan->drq = drq; + + return chan; +} + +static const struct of_device_id owl_dma_match[] = { + { .compatible = "actions,s500-dma", .data = (void *)S900_DMA,}, + { .compatible = "actions,s700-dma", .data = (void *)S700_DMA,}, + { .compatible = "actions,s900-dma", .data = (void *)S900_DMA,}, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, owl_dma_match); + +static int owl_dma_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct owl_dma *od; + int ret, i, nr_channels, nr_requests; + + od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL); + if (!od) + return -ENOMEM; + + od->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(od->base)) + return PTR_ERR(od->base); + + ret = of_property_read_u32(np, "dma-channels", &nr_channels); + if (ret) { + dev_err(&pdev->dev, "can't get dma-channels\n"); + return ret; + } + + ret = of_property_read_u32(np, "dma-requests", &nr_requests); + if (ret) { + dev_err(&pdev->dev, "can't get dma-requests\n"); + return ret; + } + + dev_info(&pdev->dev, "dma-channels %d, dma-requests %d\n", + nr_channels, nr_requests); + + od->devid = (uintptr_t)of_device_get_match_data(&pdev->dev); + + od->nr_pchans = nr_channels; + od->nr_vchans = nr_requests; + + pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32); + + platform_set_drvdata(pdev, od); + spin_lock_init(&od->lock); + + dma_cap_set(DMA_MEMCPY, od->dma.cap_mask); + dma_cap_set(DMA_SLAVE, od->dma.cap_mask); + dma_cap_set(DMA_CYCLIC, od->dma.cap_mask); + + od->dma.dev = &pdev->dev; + od->dma.device_free_chan_resources = owl_dma_free_chan_resources; + od->dma.device_tx_status = owl_dma_tx_status; + od->dma.device_issue_pending = owl_dma_issue_pending; + od->dma.device_prep_dma_memcpy = owl_dma_prep_memcpy; + od->dma.device_prep_slave_sg = owl_dma_prep_slave_sg; + od->dma.device_prep_dma_cyclic = owl_prep_dma_cyclic; + od->dma.device_config = owl_dma_config; + od->dma.device_pause = owl_dma_pause; + od->dma.device_resume = owl_dma_resume; + od->dma.device_terminate_all = owl_dma_terminate_all; + od->dma.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + od->dma.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + od->dma.directions = BIT(DMA_MEM_TO_MEM); + od->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + INIT_LIST_HEAD(&od->dma.channels); + + od->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(od->clk)) { + dev_err(&pdev->dev, "unable to get clock\n"); + return PTR_ERR(od->clk); + } + + /* + * Eventhough the DMA controller is capable of generating 4 + * IRQ's for DMA priority feature, we only use 1 IRQ for + * simplification. + */ + od->irq = platform_get_irq(pdev, 0); + ret = devm_request_irq(&pdev->dev, od->irq, owl_dma_interrupt, 0, + dev_name(&pdev->dev), od); + if (ret) { + dev_err(&pdev->dev, "unable to request IRQ\n"); + return ret; + } + + /* Init physical channel */ + od->pchans = devm_kcalloc(&pdev->dev, od->nr_pchans, + sizeof(struct owl_dma_pchan), GFP_KERNEL); + if (!od->pchans) + return -ENOMEM; + + for (i = 0; i < od->nr_pchans; i++) { + struct owl_dma_pchan *pchan = &od->pchans[i]; + + pchan->id = i; + pchan->base = od->base + OWL_DMA_CHAN_BASE(i); + } + + /* Init virtual channel */ + od->vchans = devm_kcalloc(&pdev->dev, od->nr_vchans, + sizeof(struct owl_dma_vchan), GFP_KERNEL); + if (!od->vchans) + return -ENOMEM; + + for (i = 0; i < od->nr_vchans; i++) { + struct owl_dma_vchan *vchan = &od->vchans[i]; + + vchan->vc.desc_free = owl_dma_desc_free; + vchan_init(&vchan->vc, &od->dma); + } + + /* Create a pool of consistent memory blocks for hardware descriptors */ + od->lli_pool = dma_pool_create(dev_name(od->dma.dev), od->dma.dev, + sizeof(struct owl_dma_lli), + __alignof__(struct owl_dma_lli), + 0); + if (!od->lli_pool) { + dev_err(&pdev->dev, "unable to allocate DMA descriptor pool\n"); + return -ENOMEM; + } + + clk_prepare_enable(od->clk); + + ret = dma_async_device_register(&od->dma); + if (ret) { + dev_err(&pdev->dev, "failed to register DMA engine device\n"); + goto err_pool_free; + } + + /* Device-tree DMA controller registration */ + ret = of_dma_controller_register(pdev->dev.of_node, + owl_dma_of_xlate, od); + if (ret) { + dev_err(&pdev->dev, "of_dma_controller_register failed\n"); + goto err_dma_unregister; + } + + return 0; + +err_dma_unregister: + dma_async_device_unregister(&od->dma); +err_pool_free: + clk_disable_unprepare(od->clk); + dma_pool_destroy(od->lli_pool); + + return ret; +} + +static int owl_dma_remove(struct platform_device *pdev) +{ + struct owl_dma *od = platform_get_drvdata(pdev); + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&od->dma); + + /* Mask all interrupts for this execution environment */ + dma_writel(od, OWL_DMA_IRQ_EN0, 0x0); + + /* Make sure we won't have any further interrupts */ + devm_free_irq(od->dma.dev, od->irq, od); + + owl_dma_free(od); + + clk_disable_unprepare(od->clk); + dma_pool_destroy(od->lli_pool); + + return 0; +} + +static struct platform_driver owl_dma_driver = { + .probe = owl_dma_probe, + .remove = owl_dma_remove, + .driver = { + .name = "dma-owl", + .of_match_table = of_match_ptr(owl_dma_match), + }, +}; + +static int owl_dma_init(void) +{ + return platform_driver_register(&owl_dma_driver); +} +subsys_initcall(owl_dma_init); + +static void __exit owl_dma_exit(void) +{ + platform_driver_unregister(&owl_dma_driver); +} +module_exit(owl_dma_exit); + +MODULE_AUTHOR("David Liu "); +MODULE_AUTHOR("Manivannan Sadhasivam "); +MODULE_DESCRIPTION("Actions Semi Owl SoCs DMA driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c new file mode 100644 index 0000000000..c359decc07 --- /dev/null +++ b/drivers/dma/pch_dma.c @@ -0,0 +1,995 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Topcliff PCH DMA controller driver + * Copyright (c) 2010 Intel Corporation + * Copyright (C) 2011 LAPIS Semiconductor Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" + +#define DRV_NAME "pch-dma" + +#define DMA_CTL0_DISABLE 0x0 +#define DMA_CTL0_SG 0x1 +#define DMA_CTL0_ONESHOT 0x2 +#define DMA_CTL0_MODE_MASK_BITS 0x3 +#define DMA_CTL0_DIR_SHIFT_BITS 2 +#define DMA_CTL0_BITS_PER_CH 4 + +#define DMA_CTL2_START_SHIFT_BITS 8 +#define DMA_CTL2_IRQ_ENABLE_MASK ((1UL << DMA_CTL2_START_SHIFT_BITS) - 1) + +#define DMA_STATUS_IDLE 0x0 +#define DMA_STATUS_DESC_READ 0x1 +#define DMA_STATUS_WAIT 0x2 +#define DMA_STATUS_ACCESS 0x3 +#define DMA_STATUS_BITS_PER_CH 2 +#define DMA_STATUS_MASK_BITS 0x3 +#define DMA_STATUS_SHIFT_BITS 16 +#define DMA_STATUS_IRQ(x) (0x1 << (x)) +#define DMA_STATUS0_ERR(x) (0x1 << ((x) + 8)) +#define DMA_STATUS2_ERR(x) (0x1 << (x)) + +#define DMA_DESC_WIDTH_SHIFT_BITS 12 +#define DMA_DESC_WIDTH_1_BYTE (0x3 << DMA_DESC_WIDTH_SHIFT_BITS) +#define DMA_DESC_WIDTH_2_BYTES (0x2 << DMA_DESC_WIDTH_SHIFT_BITS) +#define DMA_DESC_WIDTH_4_BYTES (0x0 << DMA_DESC_WIDTH_SHIFT_BITS) +#define DMA_DESC_MAX_COUNT_1_BYTE 0x3FF +#define DMA_DESC_MAX_COUNT_2_BYTES 0x3FF +#define DMA_DESC_MAX_COUNT_4_BYTES 0x7FF +#define DMA_DESC_END_WITHOUT_IRQ 0x0 +#define DMA_DESC_END_WITH_IRQ 0x1 +#define DMA_DESC_FOLLOW_WITHOUT_IRQ 0x2 +#define DMA_DESC_FOLLOW_WITH_IRQ 0x3 + +#define MAX_CHAN_NR 12 + +#define DMA_MASK_CTL0_MODE 0x33333333 +#define DMA_MASK_CTL2_MODE 0x00003333 + +static unsigned int init_nr_desc_per_channel = 64; +module_param(init_nr_desc_per_channel, uint, 0644); +MODULE_PARM_DESC(init_nr_desc_per_channel, + "initial descriptors per channel (default: 64)"); + +struct pch_dma_desc_regs { + u32 dev_addr; + u32 mem_addr; + u32 size; + u32 next; +}; + +struct pch_dma_regs { + u32 dma_ctl0; + u32 dma_ctl1; + u32 dma_ctl2; + u32 dma_ctl3; + u32 dma_sts0; + u32 dma_sts1; + u32 dma_sts2; + u32 reserved3; + struct pch_dma_desc_regs desc[MAX_CHAN_NR]; +}; + +struct pch_dma_desc { + struct pch_dma_desc_regs regs; + struct dma_async_tx_descriptor txd; + struct list_head desc_node; + struct list_head tx_list; +}; + +struct pch_dma_chan { + struct dma_chan chan; + void __iomem *membase; + enum dma_transfer_direction dir; + struct tasklet_struct tasklet; + unsigned long err_status; + + spinlock_t lock; + + struct list_head active_list; + struct list_head queue; + struct list_head free_list; + unsigned int descs_allocated; +}; + +#define PDC_DEV_ADDR 0x00 +#define PDC_MEM_ADDR 0x04 +#define PDC_SIZE 0x08 +#define PDC_NEXT 0x0C + +#define channel_readl(pdc, name) \ + readl((pdc)->membase + PDC_##name) +#define channel_writel(pdc, name, val) \ + writel((val), (pdc)->membase + PDC_##name) + +struct pch_dma { + struct dma_device dma; + void __iomem *membase; + struct dma_pool *pool; + struct pch_dma_regs regs; + struct pch_dma_desc_regs ch_regs[MAX_CHAN_NR]; + struct pch_dma_chan channels[MAX_CHAN_NR]; +}; + +#define PCH_DMA_CTL0 0x00 +#define PCH_DMA_CTL1 0x04 +#define PCH_DMA_CTL2 0x08 +#define PCH_DMA_CTL3 0x0C +#define PCH_DMA_STS0 0x10 +#define PCH_DMA_STS1 0x14 +#define PCH_DMA_STS2 0x18 + +#define dma_readl(pd, name) \ + readl((pd)->membase + PCH_DMA_##name) +#define dma_writel(pd, name, val) \ + writel((val), (pd)->membase + PCH_DMA_##name) + +static inline +struct pch_dma_desc *to_pd_desc(struct dma_async_tx_descriptor *txd) +{ + return container_of(txd, struct pch_dma_desc, txd); +} + +static inline struct pch_dma_chan *to_pd_chan(struct dma_chan *chan) +{ + return container_of(chan, struct pch_dma_chan, chan); +} + +static inline struct pch_dma *to_pd(struct dma_device *ddev) +{ + return container_of(ddev, struct pch_dma, dma); +} + +static inline struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +static inline struct device *chan2parent(struct dma_chan *chan) +{ + return chan->dev->device.parent; +} + +static inline +struct pch_dma_desc *pdc_first_active(struct pch_dma_chan *pd_chan) +{ + return list_first_entry(&pd_chan->active_list, + struct pch_dma_desc, desc_node); +} + +static inline +struct pch_dma_desc *pdc_first_queued(struct pch_dma_chan *pd_chan) +{ + return list_first_entry(&pd_chan->queue, + struct pch_dma_desc, desc_node); +} + +static void pdc_enable_irq(struct dma_chan *chan, int enable) +{ + struct pch_dma *pd = to_pd(chan->device); + u32 val; + int pos; + + if (chan->chan_id < 8) + pos = chan->chan_id; + else + pos = chan->chan_id + 8; + + val = dma_readl(pd, CTL2); + + if (enable) + val |= 0x1 << pos; + else + val &= ~(0x1 << pos); + + dma_writel(pd, CTL2, val); + + dev_dbg(chan2dev(chan), "pdc_enable_irq: chan %d -> %x\n", + chan->chan_id, val); +} + +static void pdc_set_dir(struct dma_chan *chan) +{ + struct pch_dma_chan *pd_chan = to_pd_chan(chan); + struct pch_dma *pd = to_pd(chan->device); + u32 val; + u32 mask_mode; + u32 mask_ctl; + + if (chan->chan_id < 8) { + val = dma_readl(pd, CTL0); + + mask_mode = DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * chan->chan_id); + mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * chan->chan_id)); + val &= mask_mode; + if (pd_chan->dir == DMA_MEM_TO_DEV) + val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + + DMA_CTL0_DIR_SHIFT_BITS); + else + val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + + DMA_CTL0_DIR_SHIFT_BITS)); + + val |= mask_ctl; + dma_writel(pd, CTL0, val); + } else { + int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */ + val = dma_readl(pd, CTL3); + + mask_mode = DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * ch); + mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * ch)); + val &= mask_mode; + if (pd_chan->dir == DMA_MEM_TO_DEV) + val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch + + DMA_CTL0_DIR_SHIFT_BITS); + else + val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * ch + + DMA_CTL0_DIR_SHIFT_BITS)); + val |= mask_ctl; + dma_writel(pd, CTL3, val); + } + + dev_dbg(chan2dev(chan), "pdc_set_dir: chan %d -> %x\n", + chan->chan_id, val); +} + +static void pdc_set_mode(struct dma_chan *chan, u32 mode) +{ + struct pch_dma *pd = to_pd(chan->device); + u32 val; + u32 mask_ctl; + u32 mask_dir; + + if (chan->chan_id < 8) { + mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * chan->chan_id)); + mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +\ + DMA_CTL0_DIR_SHIFT_BITS); + val = dma_readl(pd, CTL0); + val &= mask_dir; + val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id); + val |= mask_ctl; + dma_writel(pd, CTL0, val); + } else { + int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */ + mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * ch)); + mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * ch +\ + DMA_CTL0_DIR_SHIFT_BITS); + val = dma_readl(pd, CTL3); + val &= mask_dir; + val |= mode << (DMA_CTL0_BITS_PER_CH * ch); + val |= mask_ctl; + dma_writel(pd, CTL3, val); + } + + dev_dbg(chan2dev(chan), "pdc_set_mode: chan %d -> %x\n", + chan->chan_id, val); +} + +static u32 pdc_get_status0(struct pch_dma_chan *pd_chan) +{ + struct pch_dma *pd = to_pd(pd_chan->chan.device); + u32 val; + + val = dma_readl(pd, STS0); + return DMA_STATUS_MASK_BITS & (val >> (DMA_STATUS_SHIFT_BITS + + DMA_STATUS_BITS_PER_CH * pd_chan->chan.chan_id)); +} + +static u32 pdc_get_status2(struct pch_dma_chan *pd_chan) +{ + struct pch_dma *pd = to_pd(pd_chan->chan.device); + u32 val; + + val = dma_readl(pd, STS2); + return DMA_STATUS_MASK_BITS & (val >> (DMA_STATUS_SHIFT_BITS + + DMA_STATUS_BITS_PER_CH * (pd_chan->chan.chan_id - 8))); +} + +static bool pdc_is_idle(struct pch_dma_chan *pd_chan) +{ + u32 sts; + + if (pd_chan->chan.chan_id < 8) + sts = pdc_get_status0(pd_chan); + else + sts = pdc_get_status2(pd_chan); + + + if (sts == DMA_STATUS_IDLE) + return true; + else + return false; +} + +static void pdc_dostart(struct pch_dma_chan *pd_chan, struct pch_dma_desc* desc) +{ + if (!pdc_is_idle(pd_chan)) { + dev_err(chan2dev(&pd_chan->chan), + "BUG: Attempt to start non-idle channel\n"); + return; + } + + dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> dev_addr: %x\n", + pd_chan->chan.chan_id, desc->regs.dev_addr); + dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> mem_addr: %x\n", + pd_chan->chan.chan_id, desc->regs.mem_addr); + dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> size: %x\n", + pd_chan->chan.chan_id, desc->regs.size); + dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> next: %x\n", + pd_chan->chan.chan_id, desc->regs.next); + + if (list_empty(&desc->tx_list)) { + channel_writel(pd_chan, DEV_ADDR, desc->regs.dev_addr); + channel_writel(pd_chan, MEM_ADDR, desc->regs.mem_addr); + channel_writel(pd_chan, SIZE, desc->regs.size); + channel_writel(pd_chan, NEXT, desc->regs.next); + pdc_set_mode(&pd_chan->chan, DMA_CTL0_ONESHOT); + } else { + channel_writel(pd_chan, NEXT, desc->txd.phys); + pdc_set_mode(&pd_chan->chan, DMA_CTL0_SG); + } +} + +static void pdc_chain_complete(struct pch_dma_chan *pd_chan, + struct pch_dma_desc *desc) +{ + struct dma_async_tx_descriptor *txd = &desc->txd; + struct dmaengine_desc_callback cb; + + dmaengine_desc_get_callback(txd, &cb); + list_splice_init(&desc->tx_list, &pd_chan->free_list); + list_move(&desc->desc_node, &pd_chan->free_list); + + dmaengine_desc_callback_invoke(&cb, NULL); +} + +static void pdc_complete_all(struct pch_dma_chan *pd_chan) +{ + struct pch_dma_desc *desc, *_d; + LIST_HEAD(list); + + BUG_ON(!pdc_is_idle(pd_chan)); + + if (!list_empty(&pd_chan->queue)) + pdc_dostart(pd_chan, pdc_first_queued(pd_chan)); + + list_splice_init(&pd_chan->active_list, &list); + list_splice_init(&pd_chan->queue, &pd_chan->active_list); + + list_for_each_entry_safe(desc, _d, &list, desc_node) + pdc_chain_complete(pd_chan, desc); +} + +static void pdc_handle_error(struct pch_dma_chan *pd_chan) +{ + struct pch_dma_desc *bad_desc; + + bad_desc = pdc_first_active(pd_chan); + list_del(&bad_desc->desc_node); + + list_splice_init(&pd_chan->queue, pd_chan->active_list.prev); + + if (!list_empty(&pd_chan->active_list)) + pdc_dostart(pd_chan, pdc_first_active(pd_chan)); + + dev_crit(chan2dev(&pd_chan->chan), "Bad descriptor submitted\n"); + dev_crit(chan2dev(&pd_chan->chan), "descriptor cookie: %d\n", + bad_desc->txd.cookie); + + pdc_chain_complete(pd_chan, bad_desc); +} + +static void pdc_advance_work(struct pch_dma_chan *pd_chan) +{ + if (list_empty(&pd_chan->active_list) || + list_is_singular(&pd_chan->active_list)) { + pdc_complete_all(pd_chan); + } else { + pdc_chain_complete(pd_chan, pdc_first_active(pd_chan)); + pdc_dostart(pd_chan, pdc_first_active(pd_chan)); + } +} + +static dma_cookie_t pd_tx_submit(struct dma_async_tx_descriptor *txd) +{ + struct pch_dma_desc *desc = to_pd_desc(txd); + struct pch_dma_chan *pd_chan = to_pd_chan(txd->chan); + + spin_lock(&pd_chan->lock); + + if (list_empty(&pd_chan->active_list)) { + list_add_tail(&desc->desc_node, &pd_chan->active_list); + pdc_dostart(pd_chan, desc); + } else { + list_add_tail(&desc->desc_node, &pd_chan->queue); + } + + spin_unlock(&pd_chan->lock); + return 0; +} + +static struct pch_dma_desc *pdc_alloc_desc(struct dma_chan *chan, gfp_t flags) +{ + struct pch_dma_desc *desc = NULL; + struct pch_dma *pd = to_pd(chan->device); + dma_addr_t addr; + + desc = dma_pool_zalloc(pd->pool, flags, &addr); + if (desc) { + INIT_LIST_HEAD(&desc->tx_list); + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.tx_submit = pd_tx_submit; + desc->txd.flags = DMA_CTRL_ACK; + desc->txd.phys = addr; + } + + return desc; +} + +static struct pch_dma_desc *pdc_desc_get(struct pch_dma_chan *pd_chan) +{ + struct pch_dma_desc *desc, *_d; + struct pch_dma_desc *ret = NULL; + int i = 0; + + spin_lock(&pd_chan->lock); + list_for_each_entry_safe(desc, _d, &pd_chan->free_list, desc_node) { + i++; + if (async_tx_test_ack(&desc->txd)) { + list_del(&desc->desc_node); + ret = desc; + break; + } + dev_dbg(chan2dev(&pd_chan->chan), "desc %p not ACKed\n", desc); + } + spin_unlock(&pd_chan->lock); + dev_dbg(chan2dev(&pd_chan->chan), "scanned %d descriptors\n", i); + + if (!ret) { + ret = pdc_alloc_desc(&pd_chan->chan, GFP_ATOMIC); + if (ret) { + spin_lock(&pd_chan->lock); + pd_chan->descs_allocated++; + spin_unlock(&pd_chan->lock); + } else { + dev_err(chan2dev(&pd_chan->chan), + "failed to alloc desc\n"); + } + } + + return ret; +} + +static void pdc_desc_put(struct pch_dma_chan *pd_chan, + struct pch_dma_desc *desc) +{ + if (desc) { + spin_lock(&pd_chan->lock); + list_splice_init(&desc->tx_list, &pd_chan->free_list); + list_add(&desc->desc_node, &pd_chan->free_list); + spin_unlock(&pd_chan->lock); + } +} + +static int pd_alloc_chan_resources(struct dma_chan *chan) +{ + struct pch_dma_chan *pd_chan = to_pd_chan(chan); + struct pch_dma_desc *desc; + LIST_HEAD(tmp_list); + int i; + + if (!pdc_is_idle(pd_chan)) { + dev_dbg(chan2dev(chan), "DMA channel not idle ?\n"); + return -EIO; + } + + if (!list_empty(&pd_chan->free_list)) + return pd_chan->descs_allocated; + + for (i = 0; i < init_nr_desc_per_channel; i++) { + desc = pdc_alloc_desc(chan, GFP_KERNEL); + + if (!desc) { + dev_warn(chan2dev(chan), + "Only allocated %d initial descriptors\n", i); + break; + } + + list_add_tail(&desc->desc_node, &tmp_list); + } + + spin_lock_irq(&pd_chan->lock); + list_splice(&tmp_list, &pd_chan->free_list); + pd_chan->descs_allocated = i; + dma_cookie_init(chan); + spin_unlock_irq(&pd_chan->lock); + + pdc_enable_irq(chan, 1); + + return pd_chan->descs_allocated; +} + +static void pd_free_chan_resources(struct dma_chan *chan) +{ + struct pch_dma_chan *pd_chan = to_pd_chan(chan); + struct pch_dma *pd = to_pd(chan->device); + struct pch_dma_desc *desc, *_d; + LIST_HEAD(tmp_list); + + BUG_ON(!pdc_is_idle(pd_chan)); + BUG_ON(!list_empty(&pd_chan->active_list)); + BUG_ON(!list_empty(&pd_chan->queue)); + + spin_lock_irq(&pd_chan->lock); + list_splice_init(&pd_chan->free_list, &tmp_list); + pd_chan->descs_allocated = 0; + spin_unlock_irq(&pd_chan->lock); + + list_for_each_entry_safe(desc, _d, &tmp_list, desc_node) + dma_pool_free(pd->pool, desc, desc->txd.phys); + + pdc_enable_irq(chan, 0); +} + +static enum dma_status pd_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + return dma_cookie_status(chan, cookie, txstate); +} + +static void pd_issue_pending(struct dma_chan *chan) +{ + struct pch_dma_chan *pd_chan = to_pd_chan(chan); + + if (pdc_is_idle(pd_chan)) { + spin_lock(&pd_chan->lock); + pdc_advance_work(pd_chan); + spin_unlock(&pd_chan->lock); + } +} + +static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan, + struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, unsigned long flags, + void *context) +{ + struct pch_dma_chan *pd_chan = to_pd_chan(chan); + struct pch_dma_slave *pd_slave = chan->private; + struct pch_dma_desc *first = NULL; + struct pch_dma_desc *prev = NULL; + struct pch_dma_desc *desc = NULL; + struct scatterlist *sg; + dma_addr_t reg; + int i; + + if (unlikely(!sg_len)) { + dev_info(chan2dev(chan), "prep_slave_sg: length is zero!\n"); + return NULL; + } + + if (direction == DMA_DEV_TO_MEM) + reg = pd_slave->rx_reg; + else if (direction == DMA_MEM_TO_DEV) + reg = pd_slave->tx_reg; + else + return NULL; + + pd_chan->dir = direction; + pdc_set_dir(chan); + + for_each_sg(sgl, sg, sg_len, i) { + desc = pdc_desc_get(pd_chan); + + if (!desc) + goto err_desc_get; + + desc->regs.dev_addr = reg; + desc->regs.mem_addr = sg_dma_address(sg); + desc->regs.size = sg_dma_len(sg); + desc->regs.next = DMA_DESC_FOLLOW_WITHOUT_IRQ; + + switch (pd_slave->width) { + case PCH_DMA_WIDTH_1_BYTE: + if (desc->regs.size > DMA_DESC_MAX_COUNT_1_BYTE) + goto err_desc_get; + desc->regs.size |= DMA_DESC_WIDTH_1_BYTE; + break; + case PCH_DMA_WIDTH_2_BYTES: + if (desc->regs.size > DMA_DESC_MAX_COUNT_2_BYTES) + goto err_desc_get; + desc->regs.size |= DMA_DESC_WIDTH_2_BYTES; + break; + case PCH_DMA_WIDTH_4_BYTES: + if (desc->regs.size > DMA_DESC_MAX_COUNT_4_BYTES) + goto err_desc_get; + desc->regs.size |= DMA_DESC_WIDTH_4_BYTES; + break; + default: + goto err_desc_get; + } + + if (!first) { + first = desc; + } else { + prev->regs.next |= desc->txd.phys; + list_add_tail(&desc->desc_node, &first->tx_list); + } + + prev = desc; + } + + if (flags & DMA_PREP_INTERRUPT) + desc->regs.next = DMA_DESC_END_WITH_IRQ; + else + desc->regs.next = DMA_DESC_END_WITHOUT_IRQ; + + first->txd.cookie = -EBUSY; + desc->txd.flags = flags; + + return &first->txd; + +err_desc_get: + dev_err(chan2dev(chan), "failed to get desc or wrong parameters\n"); + pdc_desc_put(pd_chan, first); + return NULL; +} + +static int pd_device_terminate_all(struct dma_chan *chan) +{ + struct pch_dma_chan *pd_chan = to_pd_chan(chan); + struct pch_dma_desc *desc, *_d; + LIST_HEAD(list); + + spin_lock_irq(&pd_chan->lock); + + pdc_set_mode(&pd_chan->chan, DMA_CTL0_DISABLE); + + list_splice_init(&pd_chan->active_list, &list); + list_splice_init(&pd_chan->queue, &list); + + list_for_each_entry_safe(desc, _d, &list, desc_node) + pdc_chain_complete(pd_chan, desc); + + spin_unlock_irq(&pd_chan->lock); + + return 0; +} + +static void pdc_tasklet(struct tasklet_struct *t) +{ + struct pch_dma_chan *pd_chan = from_tasklet(pd_chan, t, tasklet); + unsigned long flags; + + if (!pdc_is_idle(pd_chan)) { + dev_err(chan2dev(&pd_chan->chan), + "BUG: handle non-idle channel in tasklet\n"); + return; + } + + spin_lock_irqsave(&pd_chan->lock, flags); + if (test_and_clear_bit(0, &pd_chan->err_status)) + pdc_handle_error(pd_chan); + else + pdc_advance_work(pd_chan); + spin_unlock_irqrestore(&pd_chan->lock, flags); +} + +static irqreturn_t pd_irq(int irq, void *devid) +{ + struct pch_dma *pd = (struct pch_dma *)devid; + struct pch_dma_chan *pd_chan; + u32 sts0; + u32 sts2; + int i; + int ret0 = IRQ_NONE; + int ret2 = IRQ_NONE; + + sts0 = dma_readl(pd, STS0); + sts2 = dma_readl(pd, STS2); + + dev_dbg(pd->dma.dev, "pd_irq sts0: %x\n", sts0); + + for (i = 0; i < pd->dma.chancnt; i++) { + pd_chan = &pd->channels[i]; + + if (i < 8) { + if (sts0 & DMA_STATUS_IRQ(i)) { + if (sts0 & DMA_STATUS0_ERR(i)) + set_bit(0, &pd_chan->err_status); + + tasklet_schedule(&pd_chan->tasklet); + ret0 = IRQ_HANDLED; + } + } else { + if (sts2 & DMA_STATUS_IRQ(i - 8)) { + if (sts2 & DMA_STATUS2_ERR(i)) + set_bit(0, &pd_chan->err_status); + + tasklet_schedule(&pd_chan->tasklet); + ret2 = IRQ_HANDLED; + } + } + } + + /* clear interrupt bits in status register */ + if (ret0) + dma_writel(pd, STS0, sts0); + if (ret2) + dma_writel(pd, STS2, sts2); + + return ret0 | ret2; +} + +static void __maybe_unused pch_dma_save_regs(struct pch_dma *pd) +{ + struct pch_dma_chan *pd_chan; + struct dma_chan *chan, *_c; + int i = 0; + + pd->regs.dma_ctl0 = dma_readl(pd, CTL0); + pd->regs.dma_ctl1 = dma_readl(pd, CTL1); + pd->regs.dma_ctl2 = dma_readl(pd, CTL2); + pd->regs.dma_ctl3 = dma_readl(pd, CTL3); + + list_for_each_entry_safe(chan, _c, &pd->dma.channels, device_node) { + pd_chan = to_pd_chan(chan); + + pd->ch_regs[i].dev_addr = channel_readl(pd_chan, DEV_ADDR); + pd->ch_regs[i].mem_addr = channel_readl(pd_chan, MEM_ADDR); + pd->ch_regs[i].size = channel_readl(pd_chan, SIZE); + pd->ch_regs[i].next = channel_readl(pd_chan, NEXT); + + i++; + } +} + +static void __maybe_unused pch_dma_restore_regs(struct pch_dma *pd) +{ + struct pch_dma_chan *pd_chan; + struct dma_chan *chan, *_c; + int i = 0; + + dma_writel(pd, CTL0, pd->regs.dma_ctl0); + dma_writel(pd, CTL1, pd->regs.dma_ctl1); + dma_writel(pd, CTL2, pd->regs.dma_ctl2); + dma_writel(pd, CTL3, pd->regs.dma_ctl3); + + list_for_each_entry_safe(chan, _c, &pd->dma.channels, device_node) { + pd_chan = to_pd_chan(chan); + + channel_writel(pd_chan, DEV_ADDR, pd->ch_regs[i].dev_addr); + channel_writel(pd_chan, MEM_ADDR, pd->ch_regs[i].mem_addr); + channel_writel(pd_chan, SIZE, pd->ch_regs[i].size); + channel_writel(pd_chan, NEXT, pd->ch_regs[i].next); + + i++; + } +} + +static int __maybe_unused pch_dma_suspend(struct device *dev) +{ + struct pch_dma *pd = dev_get_drvdata(dev); + + if (pd) + pch_dma_save_regs(pd); + + return 0; +} + +static int __maybe_unused pch_dma_resume(struct device *dev) +{ + struct pch_dma *pd = dev_get_drvdata(dev); + + if (pd) + pch_dma_restore_regs(pd); + + return 0; +} + +static int pch_dma_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct pch_dma *pd; + struct pch_dma_regs *regs; + unsigned int nr_channels; + int err; + int i; + + nr_channels = id->driver_data; + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return -ENOMEM; + + pci_set_drvdata(pdev, pd); + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "Cannot enable PCI device\n"); + goto err_free_mem; + } + + if (!(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { + dev_err(&pdev->dev, "Cannot find proper base address\n"); + err = -ENODEV; + goto err_disable_pdev; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(&pdev->dev, "Cannot obtain PCI resources\n"); + goto err_disable_pdev; + } + + err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, "Cannot set proper DMA config\n"); + goto err_free_res; + } + + regs = pd->membase = pci_iomap(pdev, 1, 0); + if (!pd->membase) { + dev_err(&pdev->dev, "Cannot map MMIO registers\n"); + err = -ENOMEM; + goto err_free_res; + } + + pci_set_master(pdev); + pd->dma.dev = &pdev->dev; + + err = request_irq(pdev->irq, pd_irq, IRQF_SHARED, DRV_NAME, pd); + if (err) { + dev_err(&pdev->dev, "Failed to request IRQ\n"); + goto err_iounmap; + } + + pd->pool = dma_pool_create("pch_dma_desc_pool", &pdev->dev, + sizeof(struct pch_dma_desc), 4, 0); + if (!pd->pool) { + dev_err(&pdev->dev, "Failed to alloc DMA descriptors\n"); + err = -ENOMEM; + goto err_free_irq; + } + + + INIT_LIST_HEAD(&pd->dma.channels); + + for (i = 0; i < nr_channels; i++) { + struct pch_dma_chan *pd_chan = &pd->channels[i]; + + pd_chan->chan.device = &pd->dma; + dma_cookie_init(&pd_chan->chan); + + pd_chan->membase = ®s->desc[i]; + + spin_lock_init(&pd_chan->lock); + + INIT_LIST_HEAD(&pd_chan->active_list); + INIT_LIST_HEAD(&pd_chan->queue); + INIT_LIST_HEAD(&pd_chan->free_list); + + tasklet_setup(&pd_chan->tasklet, pdc_tasklet); + list_add_tail(&pd_chan->chan.device_node, &pd->dma.channels); + } + + dma_cap_zero(pd->dma.cap_mask); + dma_cap_set(DMA_PRIVATE, pd->dma.cap_mask); + dma_cap_set(DMA_SLAVE, pd->dma.cap_mask); + + pd->dma.device_alloc_chan_resources = pd_alloc_chan_resources; + pd->dma.device_free_chan_resources = pd_free_chan_resources; + pd->dma.device_tx_status = pd_tx_status; + pd->dma.device_issue_pending = pd_issue_pending; + pd->dma.device_prep_slave_sg = pd_prep_slave_sg; + pd->dma.device_terminate_all = pd_device_terminate_all; + + err = dma_async_device_register(&pd->dma); + if (err) { + dev_err(&pdev->dev, "Failed to register DMA device\n"); + goto err_free_pool; + } + + return 0; + +err_free_pool: + dma_pool_destroy(pd->pool); +err_free_irq: + free_irq(pdev->irq, pd); +err_iounmap: + pci_iounmap(pdev, pd->membase); +err_free_res: + pci_release_regions(pdev); +err_disable_pdev: + pci_disable_device(pdev); +err_free_mem: + kfree(pd); + return err; +} + +static void pch_dma_remove(struct pci_dev *pdev) +{ + struct pch_dma *pd = pci_get_drvdata(pdev); + struct pch_dma_chan *pd_chan; + struct dma_chan *chan, *_c; + + if (pd) { + dma_async_device_unregister(&pd->dma); + + free_irq(pdev->irq, pd); + + list_for_each_entry_safe(chan, _c, &pd->dma.channels, + device_node) { + pd_chan = to_pd_chan(chan); + + tasklet_kill(&pd_chan->tasklet); + } + + dma_pool_destroy(pd->pool); + pci_iounmap(pdev, pd->membase); + pci_release_regions(pdev); + pci_disable_device(pdev); + kfree(pd); + } +} + +/* PCI Device ID of DMA device */ +#define PCI_DEVICE_ID_EG20T_PCH_DMA_8CH 0x8810 +#define PCI_DEVICE_ID_EG20T_PCH_DMA_4CH 0x8815 +#define PCI_DEVICE_ID_ML7213_DMA1_8CH 0x8026 +#define PCI_DEVICE_ID_ML7213_DMA2_8CH 0x802B +#define PCI_DEVICE_ID_ML7213_DMA3_4CH 0x8034 +#define PCI_DEVICE_ID_ML7213_DMA4_12CH 0x8032 +#define PCI_DEVICE_ID_ML7223_DMA1_4CH 0x800B +#define PCI_DEVICE_ID_ML7223_DMA2_4CH 0x800E +#define PCI_DEVICE_ID_ML7223_DMA3_4CH 0x8017 +#define PCI_DEVICE_ID_ML7223_DMA4_4CH 0x803B +#define PCI_DEVICE_ID_ML7831_DMA1_8CH 0x8810 +#define PCI_DEVICE_ID_ML7831_DMA2_4CH 0x8815 + +static const struct pci_device_id pch_dma_id_table[] = { + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_8CH), 8 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_4CH), 4 }, + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA1_8CH), 8}, /* UART Video */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA2_8CH), 8}, /* PCMIF SPI */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA3_4CH), 4}, /* FPGA */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA4_12CH), 12}, /* I2S */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA1_4CH), 4}, /* UART */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA2_4CH), 4}, /* Video SPI */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA3_4CH), 4}, /* Security */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA4_4CH), 4}, /* FPGA */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7831_DMA1_8CH), 8}, /* UART */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7831_DMA2_4CH), 4}, /* SPI */ + { 0, }, +}; + +static SIMPLE_DEV_PM_OPS(pch_dma_pm_ops, pch_dma_suspend, pch_dma_resume); + +static struct pci_driver pch_dma_driver = { + .name = DRV_NAME, + .id_table = pch_dma_id_table, + .probe = pch_dma_probe, + .remove = pch_dma_remove, + .driver.pm = &pch_dma_pm_ops, +}; + +module_pci_driver(pch_dma_driver); + +MODULE_DESCRIPTION("Intel EG20T PCH / LAPIS Semicon ML7213/ML7223/ML7831 IOH " + "DMA controller driver"); +MODULE_AUTHOR("Yong Wang "); +MODULE_LICENSE("GPL v2"); +MODULE_DEVICE_TABLE(pci, pch_dma_id_table); diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c new file mode 100644 index 0000000000..3cf0b38387 --- /dev/null +++ b/drivers/dma/pl330.c @@ -0,0 +1,3277 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Copyright (C) 2010 Samsung Electronics Co. Ltd. + * Jaswinder Singh + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" +#define PL330_MAX_CHAN 8 +#define PL330_MAX_IRQS 32 +#define PL330_MAX_PERI 32 +#define PL330_MAX_BURST 16 + +#define PL330_QUIRK_BROKEN_NO_FLUSHP BIT(0) +#define PL330_QUIRK_PERIPH_BURST BIT(1) + +enum pl330_cachectrl { + CCTRL0, /* Noncacheable and nonbufferable */ + CCTRL1, /* Bufferable only */ + CCTRL2, /* Cacheable, but do not allocate */ + CCTRL3, /* Cacheable and bufferable, but do not allocate */ + INVALID1, /* AWCACHE = 0x1000 */ + INVALID2, + CCTRL6, /* Cacheable write-through, allocate on writes only */ + CCTRL7, /* Cacheable write-back, allocate on writes only */ +}; + +enum pl330_byteswap { + SWAP_NO, + SWAP_2, + SWAP_4, + SWAP_8, + SWAP_16, +}; + +/* Register and Bit field Definitions */ +#define DS 0x0 +#define DS_ST_STOP 0x0 +#define DS_ST_EXEC 0x1 +#define DS_ST_CMISS 0x2 +#define DS_ST_UPDTPC 0x3 +#define DS_ST_WFE 0x4 +#define DS_ST_ATBRR 0x5 +#define DS_ST_QBUSY 0x6 +#define DS_ST_WFP 0x7 +#define DS_ST_KILL 0x8 +#define DS_ST_CMPLT 0x9 +#define DS_ST_FLTCMP 0xe +#define DS_ST_FAULT 0xf + +#define DPC 0x4 +#define INTEN 0x20 +#define ES 0x24 +#define INTSTATUS 0x28 +#define INTCLR 0x2c +#define FSM 0x30 +#define FSC 0x34 +#define FTM 0x38 + +#define _FTC 0x40 +#define FTC(n) (_FTC + (n)*0x4) + +#define _CS 0x100 +#define CS(n) (_CS + (n)*0x8) +#define CS_CNS (1 << 21) + +#define _CPC 0x104 +#define CPC(n) (_CPC + (n)*0x8) + +#define _SA 0x400 +#define SA(n) (_SA + (n)*0x20) + +#define _DA 0x404 +#define DA(n) (_DA + (n)*0x20) + +#define _CC 0x408 +#define CC(n) (_CC + (n)*0x20) + +#define CC_SRCINC (1 << 0) +#define CC_DSTINC (1 << 14) +#define CC_SRCPRI (1 << 8) +#define CC_DSTPRI (1 << 22) +#define CC_SRCNS (1 << 9) +#define CC_DSTNS (1 << 23) +#define CC_SRCIA (1 << 10) +#define CC_DSTIA (1 << 24) +#define CC_SRCBRSTLEN_SHFT 4 +#define CC_DSTBRSTLEN_SHFT 18 +#define CC_SRCBRSTSIZE_SHFT 1 +#define CC_DSTBRSTSIZE_SHFT 15 +#define CC_SRCCCTRL_SHFT 11 +#define CC_SRCCCTRL_MASK 0x7 +#define CC_DSTCCTRL_SHFT 25 +#define CC_DRCCCTRL_MASK 0x7 +#define CC_SWAP_SHFT 28 + +#define _LC0 0x40c +#define LC0(n) (_LC0 + (n)*0x20) + +#define _LC1 0x410 +#define LC1(n) (_LC1 + (n)*0x20) + +#define DBGSTATUS 0xd00 +#define DBG_BUSY (1 << 0) + +#define DBGCMD 0xd04 +#define DBGINST0 0xd08 +#define DBGINST1 0xd0c + +#define CR0 0xe00 +#define CR1 0xe04 +#define CR2 0xe08 +#define CR3 0xe0c +#define CR4 0xe10 +#define CRD 0xe14 + +#define PERIPH_ID 0xfe0 +#define PERIPH_REV_SHIFT 20 +#define PERIPH_REV_MASK 0xf +#define PERIPH_REV_R0P0 0 +#define PERIPH_REV_R1P0 1 +#define PERIPH_REV_R1P1 2 + +#define CR0_PERIPH_REQ_SET (1 << 0) +#define CR0_BOOT_EN_SET (1 << 1) +#define CR0_BOOT_MAN_NS (1 << 2) +#define CR0_NUM_CHANS_SHIFT 4 +#define CR0_NUM_CHANS_MASK 0x7 +#define CR0_NUM_PERIPH_SHIFT 12 +#define CR0_NUM_PERIPH_MASK 0x1f +#define CR0_NUM_EVENTS_SHIFT 17 +#define CR0_NUM_EVENTS_MASK 0x1f + +#define CR1_ICACHE_LEN_SHIFT 0 +#define CR1_ICACHE_LEN_MASK 0x7 +#define CR1_NUM_ICACHELINES_SHIFT 4 +#define CR1_NUM_ICACHELINES_MASK 0xf + +#define CRD_DATA_WIDTH_SHIFT 0 +#define CRD_DATA_WIDTH_MASK 0x7 +#define CRD_WR_CAP_SHIFT 4 +#define CRD_WR_CAP_MASK 0x7 +#define CRD_WR_Q_DEP_SHIFT 8 +#define CRD_WR_Q_DEP_MASK 0xf +#define CRD_RD_CAP_SHIFT 12 +#define CRD_RD_CAP_MASK 0x7 +#define CRD_RD_Q_DEP_SHIFT 16 +#define CRD_RD_Q_DEP_MASK 0xf +#define CRD_DATA_BUFF_SHIFT 20 +#define CRD_DATA_BUFF_MASK 0x3ff + +#define PART 0x330 +#define DESIGNER 0x41 +#define REVISION 0x0 +#define INTEG_CFG 0x0 +#define PERIPH_ID_VAL ((PART << 0) | (DESIGNER << 12)) + +#define PL330_STATE_STOPPED (1 << 0) +#define PL330_STATE_EXECUTING (1 << 1) +#define PL330_STATE_WFE (1 << 2) +#define PL330_STATE_FAULTING (1 << 3) +#define PL330_STATE_COMPLETING (1 << 4) +#define PL330_STATE_WFP (1 << 5) +#define PL330_STATE_KILLING (1 << 6) +#define PL330_STATE_FAULT_COMPLETING (1 << 7) +#define PL330_STATE_CACHEMISS (1 << 8) +#define PL330_STATE_UPDTPC (1 << 9) +#define PL330_STATE_ATBARRIER (1 << 10) +#define PL330_STATE_QUEUEBUSY (1 << 11) +#define PL330_STATE_INVALID (1 << 15) + +#define PL330_STABLE_STATES (PL330_STATE_STOPPED | PL330_STATE_EXECUTING \ + | PL330_STATE_WFE | PL330_STATE_FAULTING) + +#define CMD_DMAADDH 0x54 +#define CMD_DMAEND 0x00 +#define CMD_DMAFLUSHP 0x35 +#define CMD_DMAGO 0xa0 +#define CMD_DMALD 0x04 +#define CMD_DMALDP 0x25 +#define CMD_DMALP 0x20 +#define CMD_DMALPEND 0x28 +#define CMD_DMAKILL 0x01 +#define CMD_DMAMOV 0xbc +#define CMD_DMANOP 0x18 +#define CMD_DMARMB 0x12 +#define CMD_DMASEV 0x34 +#define CMD_DMAST 0x08 +#define CMD_DMASTP 0x29 +#define CMD_DMASTZ 0x0c +#define CMD_DMAWFE 0x36 +#define CMD_DMAWFP 0x30 +#define CMD_DMAWMB 0x13 + +#define SZ_DMAADDH 3 +#define SZ_DMAEND 1 +#define SZ_DMAFLUSHP 2 +#define SZ_DMALD 1 +#define SZ_DMALDP 2 +#define SZ_DMALP 2 +#define SZ_DMALPEND 2 +#define SZ_DMAKILL 1 +#define SZ_DMAMOV 6 +#define SZ_DMANOP 1 +#define SZ_DMARMB 1 +#define SZ_DMASEV 2 +#define SZ_DMAST 1 +#define SZ_DMASTP 2 +#define SZ_DMASTZ 1 +#define SZ_DMAWFE 2 +#define SZ_DMAWFP 2 +#define SZ_DMAWMB 1 +#define SZ_DMAGO 6 + +#define BRST_LEN(ccr) ((((ccr) >> CC_SRCBRSTLEN_SHFT) & 0xf) + 1) +#define BRST_SIZE(ccr) (1 << (((ccr) >> CC_SRCBRSTSIZE_SHFT) & 0x7)) + +#define BYTE_TO_BURST(b, ccr) ((b) / BRST_SIZE(ccr) / BRST_LEN(ccr)) +#define BURST_TO_BYTE(c, ccr) ((c) * BRST_SIZE(ccr) * BRST_LEN(ccr)) + +/* + * With 256 bytes, we can do more than 2.5MB and 5MB xfers per req + * at 1byte/burst for P<->M and M<->M respectively. + * For typical scenario, at 1word/burst, 10MB and 20MB xfers per req + * should be enough for P<->M and M<->M respectively. + */ +#define MCODE_BUFF_PER_REQ 256 + +/* Use this _only_ to wait on transient states */ +#define UNTIL(t, s) while (!(_state(t) & (s))) cpu_relax(); + +#ifdef PL330_DEBUG_MCGEN +static unsigned cmd_line; +#define PL330_DBGCMD_DUMP(off, x...) do { \ + printk("%x:", cmd_line); \ + printk(KERN_CONT x); \ + cmd_line += off; \ + } while (0) +#define PL330_DBGMC_START(addr) (cmd_line = addr) +#else +#define PL330_DBGCMD_DUMP(off, x...) do {} while (0) +#define PL330_DBGMC_START(addr) do {} while (0) +#endif + +/* The number of default descriptors */ + +#define NR_DEFAULT_DESC 16 + +/* Delay for runtime PM autosuspend, ms */ +#define PL330_AUTOSUSPEND_DELAY 20 + +/* Populated by the PL330 core driver for DMA API driver's info */ +struct pl330_config { + u32 periph_id; +#define DMAC_MODE_NS (1 << 0) + unsigned int mode; + unsigned int data_bus_width:10; /* In number of bits */ + unsigned int data_buf_dep:11; + unsigned int num_chan:4; + unsigned int num_peri:6; + u32 peri_ns; + unsigned int num_events:6; + u32 irq_ns; +}; + +/* + * Request Configuration. + * The PL330 core does not modify this and uses the last + * working configuration if the request doesn't provide any. + * + * The Client may want to provide this info only for the + * first request and a request with new settings. + */ +struct pl330_reqcfg { + /* Address Incrementing */ + unsigned dst_inc:1; + unsigned src_inc:1; + + /* + * For now, the SRC & DST protection levels + * and burst size/length are assumed same. + */ + bool nonsecure; + bool privileged; + bool insnaccess; + unsigned brst_len:5; + unsigned brst_size:3; /* in power of 2 */ + + enum pl330_cachectrl dcctl; + enum pl330_cachectrl scctl; + enum pl330_byteswap swap; + struct pl330_config *pcfg; +}; + +/* + * One cycle of DMAC operation. + * There may be more than one xfer in a request. + */ +struct pl330_xfer { + u32 src_addr; + u32 dst_addr; + /* Size to xfer */ + u32 bytes; +}; + +/* The xfer callbacks are made with one of these arguments. */ +enum pl330_op_err { + /* The all xfers in the request were success. */ + PL330_ERR_NONE, + /* If req aborted due to global error. */ + PL330_ERR_ABORT, + /* If req failed due to problem with Channel. */ + PL330_ERR_FAIL, +}; + +enum dmamov_dst { + SAR = 0, + CCR, + DAR, +}; + +enum pl330_dst { + SRC = 0, + DST, +}; + +enum pl330_cond { + SINGLE, + BURST, + ALWAYS, +}; + +struct dma_pl330_desc; + +struct _pl330_req { + u32 mc_bus; + void *mc_cpu; + struct dma_pl330_desc *desc; +}; + +/* ToBeDone for tasklet */ +struct _pl330_tbd { + bool reset_dmac; + bool reset_mngr; + u8 reset_chan; +}; + +/* A DMAC Thread */ +struct pl330_thread { + u8 id; + int ev; + /* If the channel is not yet acquired by any client */ + bool free; + /* Parent DMAC */ + struct pl330_dmac *dmac; + /* Only two at a time */ + struct _pl330_req req[2]; + /* Index of the last enqueued request */ + unsigned lstenq; + /* Index of the last submitted request or -1 if the DMA is stopped */ + int req_running; +}; + +enum pl330_dmac_state { + UNINIT, + INIT, + DYING, +}; + +enum desc_status { + /* In the DMAC pool */ + FREE, + /* + * Allocated to some channel during prep_xxx + * Also may be sitting on the work_list. + */ + PREP, + /* + * Sitting on the work_list and already submitted + * to the PL330 core. Not more than two descriptors + * of a channel can be BUSY at any time. + */ + BUSY, + /* + * Pause was called while descriptor was BUSY. Due to hardware + * limitations, only termination is possible for descriptors + * that have been paused. + */ + PAUSED, + /* + * Sitting on the channel work_list but xfer done + * by PL330 core + */ + DONE, +}; + +struct dma_pl330_chan { + /* Schedule desc completion */ + struct tasklet_struct task; + + /* DMA-Engine Channel */ + struct dma_chan chan; + + /* List of submitted descriptors */ + struct list_head submitted_list; + /* List of issued descriptors */ + struct list_head work_list; + /* List of completed descriptors */ + struct list_head completed_list; + + /* Pointer to the DMAC that manages this channel, + * NULL if the channel is available to be acquired. + * As the parent, this DMAC also provides descriptors + * to the channel. + */ + struct pl330_dmac *dmac; + + /* To protect channel manipulation */ + spinlock_t lock; + + /* + * Hardware channel thread of PL330 DMAC. NULL if the channel is + * available. + */ + struct pl330_thread *thread; + + /* For D-to-M and M-to-D channels */ + int burst_sz; /* the peripheral fifo width */ + int burst_len; /* the number of burst */ + phys_addr_t fifo_addr; + /* DMA-mapped view of the FIFO; may differ if an IOMMU is present */ + dma_addr_t fifo_dma; + enum dma_data_direction dir; + struct dma_slave_config slave_config; + + /* for cyclic capability */ + bool cyclic; + + /* for runtime pm tracking */ + bool active; +}; + +struct pl330_dmac { + /* DMA-Engine Device */ + struct dma_device ddma; + + /* Pool of descriptors available for the DMAC's channels */ + struct list_head desc_pool; + /* To protect desc_pool manipulation */ + spinlock_t pool_lock; + + /* Size of MicroCode buffers for each channel. */ + unsigned mcbufsz; + /* ioremap'ed address of PL330 registers. */ + void __iomem *base; + /* Populated by the PL330 core driver during pl330_add */ + struct pl330_config pcfg; + + spinlock_t lock; + /* Maximum possible events/irqs */ + int events[32]; + /* BUS address of MicroCode buffer */ + dma_addr_t mcode_bus; + /* CPU address of MicroCode buffer */ + void *mcode_cpu; + /* List of all Channel threads */ + struct pl330_thread *channels; + /* Pointer to the MANAGER thread */ + struct pl330_thread *manager; + /* To handle bad news in interrupt */ + struct tasklet_struct tasks; + struct _pl330_tbd dmac_tbd; + /* State of DMAC operation */ + enum pl330_dmac_state state; + /* Holds list of reqs with due callbacks */ + struct list_head req_done; + + /* Peripheral channels connected to this DMAC */ + unsigned int num_peripherals; + struct dma_pl330_chan *peripherals; /* keep at end */ + int quirks; + + struct reset_control *rstc; + struct reset_control *rstc_ocp; +}; + +static struct pl330_of_quirks { + char *quirk; + int id; +} of_quirks[] = { + { + .quirk = "arm,pl330-broken-no-flushp", + .id = PL330_QUIRK_BROKEN_NO_FLUSHP, + }, + { + .quirk = "arm,pl330-periph-burst", + .id = PL330_QUIRK_PERIPH_BURST, + } +}; + +struct dma_pl330_desc { + /* To attach to a queue as child */ + struct list_head node; + + /* Descriptor for the DMA Engine API */ + struct dma_async_tx_descriptor txd; + + /* Xfer for PL330 core */ + struct pl330_xfer px; + + struct pl330_reqcfg rqcfg; + + enum desc_status status; + + int bytes_requested; + bool last; + + /* The channel which currently holds this desc */ + struct dma_pl330_chan *pchan; + + enum dma_transfer_direction rqtype; + /* Index of peripheral for the xfer. */ + unsigned peri:5; + /* Hook to attach to DMAC's list of reqs with due callback */ + struct list_head rqd; +}; + +struct _xfer_spec { + u32 ccr; + struct dma_pl330_desc *desc; +}; + +static int pl330_config_write(struct dma_chan *chan, + struct dma_slave_config *slave_config, + enum dma_transfer_direction direction); + +static inline bool _queue_full(struct pl330_thread *thrd) +{ + return thrd->req[0].desc != NULL && thrd->req[1].desc != NULL; +} + +static inline bool is_manager(struct pl330_thread *thrd) +{ + return thrd->dmac->manager == thrd; +} + +/* If manager of the thread is in Non-Secure mode */ +static inline bool _manager_ns(struct pl330_thread *thrd) +{ + return (thrd->dmac->pcfg.mode & DMAC_MODE_NS) ? true : false; +} + +static inline u32 get_revision(u32 periph_id) +{ + return (periph_id >> PERIPH_REV_SHIFT) & PERIPH_REV_MASK; +} + +static inline u32 _emit_END(unsigned dry_run, u8 buf[]) +{ + if (dry_run) + return SZ_DMAEND; + + buf[0] = CMD_DMAEND; + + PL330_DBGCMD_DUMP(SZ_DMAEND, "\tDMAEND\n"); + + return SZ_DMAEND; +} + +static inline u32 _emit_FLUSHP(unsigned dry_run, u8 buf[], u8 peri) +{ + if (dry_run) + return SZ_DMAFLUSHP; + + buf[0] = CMD_DMAFLUSHP; + + peri &= 0x1f; + peri <<= 3; + buf[1] = peri; + + PL330_DBGCMD_DUMP(SZ_DMAFLUSHP, "\tDMAFLUSHP %u\n", peri >> 3); + + return SZ_DMAFLUSHP; +} + +static inline u32 _emit_LD(unsigned dry_run, u8 buf[], enum pl330_cond cond) +{ + if (dry_run) + return SZ_DMALD; + + buf[0] = CMD_DMALD; + + if (cond == SINGLE) + buf[0] |= (0 << 1) | (1 << 0); + else if (cond == BURST) + buf[0] |= (1 << 1) | (1 << 0); + + PL330_DBGCMD_DUMP(SZ_DMALD, "\tDMALD%c\n", + cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A')); + + return SZ_DMALD; +} + +static inline u32 _emit_LDP(unsigned dry_run, u8 buf[], + enum pl330_cond cond, u8 peri) +{ + if (dry_run) + return SZ_DMALDP; + + buf[0] = CMD_DMALDP; + + if (cond == BURST) + buf[0] |= (1 << 1); + + peri &= 0x1f; + peri <<= 3; + buf[1] = peri; + + PL330_DBGCMD_DUMP(SZ_DMALDP, "\tDMALDP%c %u\n", + cond == SINGLE ? 'S' : 'B', peri >> 3); + + return SZ_DMALDP; +} + +static inline u32 _emit_LP(unsigned dry_run, u8 buf[], + unsigned loop, u8 cnt) +{ + if (dry_run) + return SZ_DMALP; + + buf[0] = CMD_DMALP; + + if (loop) + buf[0] |= (1 << 1); + + cnt--; /* DMAC increments by 1 internally */ + buf[1] = cnt; + + PL330_DBGCMD_DUMP(SZ_DMALP, "\tDMALP_%c %u\n", loop ? '1' : '0', cnt); + + return SZ_DMALP; +} + +struct _arg_LPEND { + enum pl330_cond cond; + bool forever; + unsigned loop; + u8 bjump; +}; + +static inline u32 _emit_LPEND(unsigned dry_run, u8 buf[], + const struct _arg_LPEND *arg) +{ + enum pl330_cond cond = arg->cond; + bool forever = arg->forever; + unsigned loop = arg->loop; + u8 bjump = arg->bjump; + + if (dry_run) + return SZ_DMALPEND; + + buf[0] = CMD_DMALPEND; + + if (loop) + buf[0] |= (1 << 2); + + if (!forever) + buf[0] |= (1 << 4); + + if (cond == SINGLE) + buf[0] |= (0 << 1) | (1 << 0); + else if (cond == BURST) + buf[0] |= (1 << 1) | (1 << 0); + + buf[1] = bjump; + + PL330_DBGCMD_DUMP(SZ_DMALPEND, "\tDMALP%s%c_%c bjmpto_%x\n", + forever ? "FE" : "END", + cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A'), + loop ? '1' : '0', + bjump); + + return SZ_DMALPEND; +} + +static inline u32 _emit_KILL(unsigned dry_run, u8 buf[]) +{ + if (dry_run) + return SZ_DMAKILL; + + buf[0] = CMD_DMAKILL; + + return SZ_DMAKILL; +} + +static inline u32 _emit_MOV(unsigned dry_run, u8 buf[], + enum dmamov_dst dst, u32 val) +{ + if (dry_run) + return SZ_DMAMOV; + + buf[0] = CMD_DMAMOV; + buf[1] = dst; + buf[2] = val; + buf[3] = val >> 8; + buf[4] = val >> 16; + buf[5] = val >> 24; + + PL330_DBGCMD_DUMP(SZ_DMAMOV, "\tDMAMOV %s 0x%x\n", + dst == SAR ? "SAR" : (dst == DAR ? "DAR" : "CCR"), val); + + return SZ_DMAMOV; +} + +static inline u32 _emit_RMB(unsigned dry_run, u8 buf[]) +{ + if (dry_run) + return SZ_DMARMB; + + buf[0] = CMD_DMARMB; + + PL330_DBGCMD_DUMP(SZ_DMARMB, "\tDMARMB\n"); + + return SZ_DMARMB; +} + +static inline u32 _emit_SEV(unsigned dry_run, u8 buf[], u8 ev) +{ + if (dry_run) + return SZ_DMASEV; + + buf[0] = CMD_DMASEV; + + ev &= 0x1f; + ev <<= 3; + buf[1] = ev; + + PL330_DBGCMD_DUMP(SZ_DMASEV, "\tDMASEV %u\n", ev >> 3); + + return SZ_DMASEV; +} + +static inline u32 _emit_ST(unsigned dry_run, u8 buf[], enum pl330_cond cond) +{ + if (dry_run) + return SZ_DMAST; + + buf[0] = CMD_DMAST; + + if (cond == SINGLE) + buf[0] |= (0 << 1) | (1 << 0); + else if (cond == BURST) + buf[0] |= (1 << 1) | (1 << 0); + + PL330_DBGCMD_DUMP(SZ_DMAST, "\tDMAST%c\n", + cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A')); + + return SZ_DMAST; +} + +static inline u32 _emit_STP(unsigned dry_run, u8 buf[], + enum pl330_cond cond, u8 peri) +{ + if (dry_run) + return SZ_DMASTP; + + buf[0] = CMD_DMASTP; + + if (cond == BURST) + buf[0] |= (1 << 1); + + peri &= 0x1f; + peri <<= 3; + buf[1] = peri; + + PL330_DBGCMD_DUMP(SZ_DMASTP, "\tDMASTP%c %u\n", + cond == SINGLE ? 'S' : 'B', peri >> 3); + + return SZ_DMASTP; +} + +static inline u32 _emit_WFP(unsigned dry_run, u8 buf[], + enum pl330_cond cond, u8 peri) +{ + if (dry_run) + return SZ_DMAWFP; + + buf[0] = CMD_DMAWFP; + + if (cond == SINGLE) + buf[0] |= (0 << 1) | (0 << 0); + else if (cond == BURST) + buf[0] |= (1 << 1) | (0 << 0); + else + buf[0] |= (0 << 1) | (1 << 0); + + peri &= 0x1f; + peri <<= 3; + buf[1] = peri; + + PL330_DBGCMD_DUMP(SZ_DMAWFP, "\tDMAWFP%c %u\n", + cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'P'), peri >> 3); + + return SZ_DMAWFP; +} + +static inline u32 _emit_WMB(unsigned dry_run, u8 buf[]) +{ + if (dry_run) + return SZ_DMAWMB; + + buf[0] = CMD_DMAWMB; + + PL330_DBGCMD_DUMP(SZ_DMAWMB, "\tDMAWMB\n"); + + return SZ_DMAWMB; +} + +struct _arg_GO { + u8 chan; + u32 addr; + unsigned ns; +}; + +static inline u32 _emit_GO(unsigned dry_run, u8 buf[], + const struct _arg_GO *arg) +{ + u8 chan = arg->chan; + u32 addr = arg->addr; + unsigned ns = arg->ns; + + if (dry_run) + return SZ_DMAGO; + + buf[0] = CMD_DMAGO; + buf[0] |= (ns << 1); + buf[1] = chan & 0x7; + buf[2] = addr; + buf[3] = addr >> 8; + buf[4] = addr >> 16; + buf[5] = addr >> 24; + + return SZ_DMAGO; +} + +#define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t) + +/* Returns Time-Out */ +static bool _until_dmac_idle(struct pl330_thread *thrd) +{ + void __iomem *regs = thrd->dmac->base; + unsigned long loops = msecs_to_loops(5); + + do { + /* Until Manager is Idle */ + if (!(readl(regs + DBGSTATUS) & DBG_BUSY)) + break; + + cpu_relax(); + } while (--loops); + + if (!loops) + return true; + + return false; +} + +static inline void _execute_DBGINSN(struct pl330_thread *thrd, + u8 insn[], bool as_manager) +{ + void __iomem *regs = thrd->dmac->base; + u32 val; + + /* If timed out due to halted state-machine */ + if (_until_dmac_idle(thrd)) { + dev_err(thrd->dmac->ddma.dev, "DMAC halted!\n"); + return; + } + + val = (insn[0] << 16) | (insn[1] << 24); + if (!as_manager) { + val |= (1 << 0); + val |= (thrd->id << 8); /* Channel Number */ + } + writel(val, regs + DBGINST0); + + val = le32_to_cpu(*((__le32 *)&insn[2])); + writel(val, regs + DBGINST1); + + /* Get going */ + writel(0, regs + DBGCMD); +} + +static inline u32 _state(struct pl330_thread *thrd) +{ + void __iomem *regs = thrd->dmac->base; + u32 val; + + if (is_manager(thrd)) + val = readl(regs + DS) & 0xf; + else + val = readl(regs + CS(thrd->id)) & 0xf; + + switch (val) { + case DS_ST_STOP: + return PL330_STATE_STOPPED; + case DS_ST_EXEC: + return PL330_STATE_EXECUTING; + case DS_ST_CMISS: + return PL330_STATE_CACHEMISS; + case DS_ST_UPDTPC: + return PL330_STATE_UPDTPC; + case DS_ST_WFE: + return PL330_STATE_WFE; + case DS_ST_FAULT: + return PL330_STATE_FAULTING; + case DS_ST_ATBRR: + if (is_manager(thrd)) + return PL330_STATE_INVALID; + else + return PL330_STATE_ATBARRIER; + case DS_ST_QBUSY: + if (is_manager(thrd)) + return PL330_STATE_INVALID; + else + return PL330_STATE_QUEUEBUSY; + case DS_ST_WFP: + if (is_manager(thrd)) + return PL330_STATE_INVALID; + else + return PL330_STATE_WFP; + case DS_ST_KILL: + if (is_manager(thrd)) + return PL330_STATE_INVALID; + else + return PL330_STATE_KILLING; + case DS_ST_CMPLT: + if (is_manager(thrd)) + return PL330_STATE_INVALID; + else + return PL330_STATE_COMPLETING; + case DS_ST_FLTCMP: + if (is_manager(thrd)) + return PL330_STATE_INVALID; + else + return PL330_STATE_FAULT_COMPLETING; + default: + return PL330_STATE_INVALID; + } +} + +static void _stop(struct pl330_thread *thrd) +{ + void __iomem *regs = thrd->dmac->base; + u8 insn[6] = {0, 0, 0, 0, 0, 0}; + u32 inten = readl(regs + INTEN); + + if (_state(thrd) == PL330_STATE_FAULT_COMPLETING) + UNTIL(thrd, PL330_STATE_FAULTING | PL330_STATE_KILLING); + + /* Return if nothing needs to be done */ + if (_state(thrd) == PL330_STATE_COMPLETING + || _state(thrd) == PL330_STATE_KILLING + || _state(thrd) == PL330_STATE_STOPPED) + return; + + _emit_KILL(0, insn); + + _execute_DBGINSN(thrd, insn, is_manager(thrd)); + + /* clear the event */ + if (inten & (1 << thrd->ev)) + writel(1 << thrd->ev, regs + INTCLR); + /* Stop generating interrupts for SEV */ + writel(inten & ~(1 << thrd->ev), regs + INTEN); +} + +/* Start doing req 'idx' of thread 'thrd' */ +static bool _trigger(struct pl330_thread *thrd) +{ + void __iomem *regs = thrd->dmac->base; + struct _pl330_req *req; + struct dma_pl330_desc *desc; + struct _arg_GO go; + unsigned ns; + u8 insn[6] = {0, 0, 0, 0, 0, 0}; + int idx; + + /* Return if already ACTIVE */ + if (_state(thrd) != PL330_STATE_STOPPED) + return true; + + idx = 1 - thrd->lstenq; + if (thrd->req[idx].desc != NULL) { + req = &thrd->req[idx]; + } else { + idx = thrd->lstenq; + if (thrd->req[idx].desc != NULL) + req = &thrd->req[idx]; + else + req = NULL; + } + + /* Return if no request */ + if (!req) + return true; + + /* Return if req is running */ + if (idx == thrd->req_running) + return true; + + desc = req->desc; + + ns = desc->rqcfg.nonsecure ? 1 : 0; + + /* See 'Abort Sources' point-4 at Page 2-25 */ + if (_manager_ns(thrd) && !ns) + dev_info(thrd->dmac->ddma.dev, "%s:%d Recipe for ABORT!\n", + __func__, __LINE__); + + go.chan = thrd->id; + go.addr = req->mc_bus; + go.ns = ns; + _emit_GO(0, insn, &go); + + /* Set to generate interrupts for SEV */ + writel(readl(regs + INTEN) | (1 << thrd->ev), regs + INTEN); + + /* Only manager can execute GO */ + _execute_DBGINSN(thrd, insn, true); + + thrd->req_running = idx; + + return true; +} + +static bool pl330_start_thread(struct pl330_thread *thrd) +{ + switch (_state(thrd)) { + case PL330_STATE_FAULT_COMPLETING: + UNTIL(thrd, PL330_STATE_FAULTING | PL330_STATE_KILLING); + + if (_state(thrd) == PL330_STATE_KILLING) + UNTIL(thrd, PL330_STATE_STOPPED) + fallthrough; + + case PL330_STATE_FAULTING: + _stop(thrd); + fallthrough; + + case PL330_STATE_KILLING: + case PL330_STATE_COMPLETING: + UNTIL(thrd, PL330_STATE_STOPPED) + fallthrough; + + case PL330_STATE_STOPPED: + return _trigger(thrd); + + case PL330_STATE_WFP: + case PL330_STATE_QUEUEBUSY: + case PL330_STATE_ATBARRIER: + case PL330_STATE_UPDTPC: + case PL330_STATE_CACHEMISS: + case PL330_STATE_EXECUTING: + return true; + + case PL330_STATE_WFE: /* For RESUME, nothing yet */ + default: + return false; + } +} + +static inline int _ldst_memtomem(unsigned dry_run, u8 buf[], + const struct _xfer_spec *pxs, int cyc) +{ + int off = 0; + struct pl330_config *pcfg = pxs->desc->rqcfg.pcfg; + + /* check lock-up free version */ + if (get_revision(pcfg->periph_id) >= PERIPH_REV_R1P0) { + while (cyc--) { + off += _emit_LD(dry_run, &buf[off], ALWAYS); + off += _emit_ST(dry_run, &buf[off], ALWAYS); + } + } else { + while (cyc--) { + off += _emit_LD(dry_run, &buf[off], ALWAYS); + off += _emit_RMB(dry_run, &buf[off]); + off += _emit_ST(dry_run, &buf[off], ALWAYS); + off += _emit_WMB(dry_run, &buf[off]); + } + } + + return off; +} + +static u32 _emit_load(unsigned int dry_run, u8 buf[], + enum pl330_cond cond, enum dma_transfer_direction direction, + u8 peri) +{ + int off = 0; + + switch (direction) { + case DMA_MEM_TO_MEM: + case DMA_MEM_TO_DEV: + off += _emit_LD(dry_run, &buf[off], cond); + break; + + case DMA_DEV_TO_MEM: + if (cond == ALWAYS) { + off += _emit_LDP(dry_run, &buf[off], SINGLE, + peri); + off += _emit_LDP(dry_run, &buf[off], BURST, + peri); + } else { + off += _emit_LDP(dry_run, &buf[off], cond, + peri); + } + break; + + default: + /* this code should be unreachable */ + WARN_ON(1); + break; + } + + return off; +} + +static inline u32 _emit_store(unsigned int dry_run, u8 buf[], + enum pl330_cond cond, enum dma_transfer_direction direction, + u8 peri) +{ + int off = 0; + + switch (direction) { + case DMA_MEM_TO_MEM: + case DMA_DEV_TO_MEM: + off += _emit_ST(dry_run, &buf[off], cond); + break; + + case DMA_MEM_TO_DEV: + if (cond == ALWAYS) { + off += _emit_STP(dry_run, &buf[off], SINGLE, + peri); + off += _emit_STP(dry_run, &buf[off], BURST, + peri); + } else { + off += _emit_STP(dry_run, &buf[off], cond, + peri); + } + break; + + default: + /* this code should be unreachable */ + WARN_ON(1); + break; + } + + return off; +} + +static inline int _ldst_peripheral(struct pl330_dmac *pl330, + unsigned dry_run, u8 buf[], + const struct _xfer_spec *pxs, int cyc, + enum pl330_cond cond) +{ + int off = 0; + + /* + * do FLUSHP at beginning to clear any stale dma requests before the + * first WFP. + */ + if (!(pl330->quirks & PL330_QUIRK_BROKEN_NO_FLUSHP)) + off += _emit_FLUSHP(dry_run, &buf[off], pxs->desc->peri); + while (cyc--) { + off += _emit_WFP(dry_run, &buf[off], cond, pxs->desc->peri); + off += _emit_load(dry_run, &buf[off], cond, pxs->desc->rqtype, + pxs->desc->peri); + off += _emit_store(dry_run, &buf[off], cond, pxs->desc->rqtype, + pxs->desc->peri); + } + + return off; +} + +static int _bursts(struct pl330_dmac *pl330, unsigned dry_run, u8 buf[], + const struct _xfer_spec *pxs, int cyc) +{ + int off = 0; + enum pl330_cond cond = BRST_LEN(pxs->ccr) > 1 ? BURST : SINGLE; + + if (pl330->quirks & PL330_QUIRK_PERIPH_BURST) + cond = BURST; + + switch (pxs->desc->rqtype) { + case DMA_MEM_TO_DEV: + case DMA_DEV_TO_MEM: + off += _ldst_peripheral(pl330, dry_run, &buf[off], pxs, cyc, + cond); + break; + + case DMA_MEM_TO_MEM: + off += _ldst_memtomem(dry_run, &buf[off], pxs, cyc); + break; + + default: + /* this code should be unreachable */ + WARN_ON(1); + break; + } + + return off; +} + +/* + * only the unaligned burst transfers have the dregs. + * so, still transfer dregs with a reduced size burst + * for mem-to-mem, mem-to-dev or dev-to-mem. + */ +static int _dregs(struct pl330_dmac *pl330, unsigned int dry_run, u8 buf[], + const struct _xfer_spec *pxs, int transfer_length) +{ + int off = 0; + int dregs_ccr; + + if (transfer_length == 0) + return off; + + /* + * dregs_len = (total bytes - BURST_TO_BYTE(bursts, ccr)) / + * BRST_SIZE(ccr) + * the dregs len must be smaller than burst len, + * so, for higher efficiency, we can modify CCR + * to use a reduced size burst len for the dregs. + */ + dregs_ccr = pxs->ccr; + dregs_ccr &= ~((0xf << CC_SRCBRSTLEN_SHFT) | + (0xf << CC_DSTBRSTLEN_SHFT)); + dregs_ccr |= (((transfer_length - 1) & 0xf) << + CC_SRCBRSTLEN_SHFT); + dregs_ccr |= (((transfer_length - 1) & 0xf) << + CC_DSTBRSTLEN_SHFT); + + switch (pxs->desc->rqtype) { + case DMA_MEM_TO_DEV: + case DMA_DEV_TO_MEM: + off += _emit_MOV(dry_run, &buf[off], CCR, dregs_ccr); + off += _ldst_peripheral(pl330, dry_run, &buf[off], pxs, 1, + BURST); + break; + + case DMA_MEM_TO_MEM: + off += _emit_MOV(dry_run, &buf[off], CCR, dregs_ccr); + off += _ldst_memtomem(dry_run, &buf[off], pxs, 1); + break; + + default: + /* this code should be unreachable */ + WARN_ON(1); + break; + } + + return off; +} + +/* Returns bytes consumed and updates bursts */ +static inline int _loop(struct pl330_dmac *pl330, unsigned dry_run, u8 buf[], + unsigned long *bursts, const struct _xfer_spec *pxs) +{ + int cyc, cycmax, szlp, szlpend, szbrst, off; + unsigned lcnt0, lcnt1, ljmp0, ljmp1; + struct _arg_LPEND lpend; + + if (*bursts == 1) + return _bursts(pl330, dry_run, buf, pxs, 1); + + /* Max iterations possible in DMALP is 256 */ + if (*bursts >= 256*256) { + lcnt1 = 256; + lcnt0 = 256; + cyc = *bursts / lcnt1 / lcnt0; + } else if (*bursts > 256) { + lcnt1 = 256; + lcnt0 = *bursts / lcnt1; + cyc = 1; + } else { + lcnt1 = *bursts; + lcnt0 = 0; + cyc = 1; + } + + szlp = _emit_LP(1, buf, 0, 0); + szbrst = _bursts(pl330, 1, buf, pxs, 1); + + lpend.cond = ALWAYS; + lpend.forever = false; + lpend.loop = 0; + lpend.bjump = 0; + szlpend = _emit_LPEND(1, buf, &lpend); + + if (lcnt0) { + szlp *= 2; + szlpend *= 2; + } + + /* + * Max bursts that we can unroll due to limit on the + * size of backward jump that can be encoded in DMALPEND + * which is 8-bits and hence 255 + */ + cycmax = (255 - (szlp + szlpend)) / szbrst; + + cyc = (cycmax < cyc) ? cycmax : cyc; + + off = 0; + + if (lcnt0) { + off += _emit_LP(dry_run, &buf[off], 0, lcnt0); + ljmp0 = off; + } + + off += _emit_LP(dry_run, &buf[off], 1, lcnt1); + ljmp1 = off; + + off += _bursts(pl330, dry_run, &buf[off], pxs, cyc); + + lpend.cond = ALWAYS; + lpend.forever = false; + lpend.loop = 1; + lpend.bjump = off - ljmp1; + off += _emit_LPEND(dry_run, &buf[off], &lpend); + + if (lcnt0) { + lpend.cond = ALWAYS; + lpend.forever = false; + lpend.loop = 0; + lpend.bjump = off - ljmp0; + off += _emit_LPEND(dry_run, &buf[off], &lpend); + } + + *bursts = lcnt1 * cyc; + if (lcnt0) + *bursts *= lcnt0; + + return off; +} + +static inline int _setup_loops(struct pl330_dmac *pl330, + unsigned dry_run, u8 buf[], + const struct _xfer_spec *pxs) +{ + struct pl330_xfer *x = &pxs->desc->px; + u32 ccr = pxs->ccr; + unsigned long c, bursts = BYTE_TO_BURST(x->bytes, ccr); + int num_dregs = (x->bytes - BURST_TO_BYTE(bursts, ccr)) / + BRST_SIZE(ccr); + int off = 0; + + while (bursts) { + c = bursts; + off += _loop(pl330, dry_run, &buf[off], &c, pxs); + bursts -= c; + } + off += _dregs(pl330, dry_run, &buf[off], pxs, num_dregs); + + return off; +} + +static inline int _setup_xfer(struct pl330_dmac *pl330, + unsigned dry_run, u8 buf[], + const struct _xfer_spec *pxs) +{ + struct pl330_xfer *x = &pxs->desc->px; + int off = 0; + + /* DMAMOV SAR, x->src_addr */ + off += _emit_MOV(dry_run, &buf[off], SAR, x->src_addr); + /* DMAMOV DAR, x->dst_addr */ + off += _emit_MOV(dry_run, &buf[off], DAR, x->dst_addr); + + /* Setup Loop(s) */ + off += _setup_loops(pl330, dry_run, &buf[off], pxs); + + return off; +} + +/* + * A req is a sequence of one or more xfer units. + * Returns the number of bytes taken to setup the MC for the req. + */ +static int _setup_req(struct pl330_dmac *pl330, unsigned dry_run, + struct pl330_thread *thrd, unsigned index, + struct _xfer_spec *pxs) +{ + struct _pl330_req *req = &thrd->req[index]; + u8 *buf = req->mc_cpu; + int off = 0; + + PL330_DBGMC_START(req->mc_bus); + + /* DMAMOV CCR, ccr */ + off += _emit_MOV(dry_run, &buf[off], CCR, pxs->ccr); + + off += _setup_xfer(pl330, dry_run, &buf[off], pxs); + + /* DMASEV peripheral/event */ + off += _emit_SEV(dry_run, &buf[off], thrd->ev); + /* DMAEND */ + off += _emit_END(dry_run, &buf[off]); + + return off; +} + +static inline u32 _prepare_ccr(const struct pl330_reqcfg *rqc) +{ + u32 ccr = 0; + + if (rqc->src_inc) + ccr |= CC_SRCINC; + + if (rqc->dst_inc) + ccr |= CC_DSTINC; + + /* We set same protection levels for Src and DST for now */ + if (rqc->privileged) + ccr |= CC_SRCPRI | CC_DSTPRI; + if (rqc->nonsecure) + ccr |= CC_SRCNS | CC_DSTNS; + if (rqc->insnaccess) + ccr |= CC_SRCIA | CC_DSTIA; + + ccr |= (((rqc->brst_len - 1) & 0xf) << CC_SRCBRSTLEN_SHFT); + ccr |= (((rqc->brst_len - 1) & 0xf) << CC_DSTBRSTLEN_SHFT); + + ccr |= (rqc->brst_size << CC_SRCBRSTSIZE_SHFT); + ccr |= (rqc->brst_size << CC_DSTBRSTSIZE_SHFT); + + ccr |= (rqc->scctl << CC_SRCCCTRL_SHFT); + ccr |= (rqc->dcctl << CC_DSTCCTRL_SHFT); + + ccr |= (rqc->swap << CC_SWAP_SHFT); + + return ccr; +} + +/* + * Submit a list of xfers after which the client wants notification. + * Client is not notified after each xfer unit, just once after all + * xfer units are done or some error occurs. + */ +static int pl330_submit_req(struct pl330_thread *thrd, + struct dma_pl330_desc *desc) +{ + struct pl330_dmac *pl330 = thrd->dmac; + struct _xfer_spec xs; + unsigned long flags; + unsigned idx; + u32 ccr; + int ret = 0; + + switch (desc->rqtype) { + case DMA_MEM_TO_DEV: + break; + + case DMA_DEV_TO_MEM: + break; + + case DMA_MEM_TO_MEM: + break; + + default: + return -ENOTSUPP; + } + + if (pl330->state == DYING + || pl330->dmac_tbd.reset_chan & (1 << thrd->id)) { + dev_info(thrd->dmac->ddma.dev, "%s:%d\n", + __func__, __LINE__); + return -EAGAIN; + } + + /* If request for non-existing peripheral */ + if (desc->rqtype != DMA_MEM_TO_MEM && + desc->peri >= pl330->pcfg.num_peri) { + dev_info(thrd->dmac->ddma.dev, + "%s:%d Invalid peripheral(%u)!\n", + __func__, __LINE__, desc->peri); + return -EINVAL; + } + + spin_lock_irqsave(&pl330->lock, flags); + + if (_queue_full(thrd)) { + ret = -EAGAIN; + goto xfer_exit; + } + + /* Prefer Secure Channel */ + if (!_manager_ns(thrd)) + desc->rqcfg.nonsecure = 0; + else + desc->rqcfg.nonsecure = 1; + + ccr = _prepare_ccr(&desc->rqcfg); + + idx = thrd->req[0].desc == NULL ? 0 : 1; + + xs.ccr = ccr; + xs.desc = desc; + + /* First dry run to check if req is acceptable */ + ret = _setup_req(pl330, 1, thrd, idx, &xs); + + if (ret > pl330->mcbufsz / 2) { + dev_info(pl330->ddma.dev, "%s:%d Try increasing mcbufsz (%i/%i)\n", + __func__, __LINE__, ret, pl330->mcbufsz / 2); + ret = -ENOMEM; + goto xfer_exit; + } + + /* Hook the request */ + thrd->lstenq = idx; + thrd->req[idx].desc = desc; + _setup_req(pl330, 0, thrd, idx, &xs); + + ret = 0; + +xfer_exit: + spin_unlock_irqrestore(&pl330->lock, flags); + + return ret; +} + +static void dma_pl330_rqcb(struct dma_pl330_desc *desc, enum pl330_op_err err) +{ + struct dma_pl330_chan *pch; + unsigned long flags; + + if (!desc) + return; + + pch = desc->pchan; + + /* If desc aborted */ + if (!pch) + return; + + spin_lock_irqsave(&pch->lock, flags); + + desc->status = DONE; + + spin_unlock_irqrestore(&pch->lock, flags); + + tasklet_schedule(&pch->task); +} + +static void pl330_dotask(struct tasklet_struct *t) +{ + struct pl330_dmac *pl330 = from_tasklet(pl330, t, tasks); + unsigned long flags; + int i; + + spin_lock_irqsave(&pl330->lock, flags); + + /* The DMAC itself gone nuts */ + if (pl330->dmac_tbd.reset_dmac) { + pl330->state = DYING; + /* Reset the manager too */ + pl330->dmac_tbd.reset_mngr = true; + /* Clear the reset flag */ + pl330->dmac_tbd.reset_dmac = false; + } + + if (pl330->dmac_tbd.reset_mngr) { + _stop(pl330->manager); + /* Reset all channels */ + pl330->dmac_tbd.reset_chan = (1 << pl330->pcfg.num_chan) - 1; + /* Clear the reset flag */ + pl330->dmac_tbd.reset_mngr = false; + } + + for (i = 0; i < pl330->pcfg.num_chan; i++) { + + if (pl330->dmac_tbd.reset_chan & (1 << i)) { + struct pl330_thread *thrd = &pl330->channels[i]; + void __iomem *regs = pl330->base; + enum pl330_op_err err; + + _stop(thrd); + + if (readl(regs + FSC) & (1 << thrd->id)) + err = PL330_ERR_FAIL; + else + err = PL330_ERR_ABORT; + + spin_unlock_irqrestore(&pl330->lock, flags); + dma_pl330_rqcb(thrd->req[1 - thrd->lstenq].desc, err); + dma_pl330_rqcb(thrd->req[thrd->lstenq].desc, err); + spin_lock_irqsave(&pl330->lock, flags); + + thrd->req[0].desc = NULL; + thrd->req[1].desc = NULL; + thrd->req_running = -1; + + /* Clear the reset flag */ + pl330->dmac_tbd.reset_chan &= ~(1 << i); + } + } + + spin_unlock_irqrestore(&pl330->lock, flags); + + return; +} + +/* Returns 1 if state was updated, 0 otherwise */ +static int pl330_update(struct pl330_dmac *pl330) +{ + struct dma_pl330_desc *descdone; + unsigned long flags; + void __iomem *regs; + u32 val; + int id, ev, ret = 0; + + regs = pl330->base; + + spin_lock_irqsave(&pl330->lock, flags); + + val = readl(regs + FSM) & 0x1; + if (val) + pl330->dmac_tbd.reset_mngr = true; + else + pl330->dmac_tbd.reset_mngr = false; + + val = readl(regs + FSC) & ((1 << pl330->pcfg.num_chan) - 1); + pl330->dmac_tbd.reset_chan |= val; + if (val) { + int i = 0; + while (i < pl330->pcfg.num_chan) { + if (val & (1 << i)) { + dev_info(pl330->ddma.dev, + "Reset Channel-%d\t CS-%x FTC-%x\n", + i, readl(regs + CS(i)), + readl(regs + FTC(i))); + _stop(&pl330->channels[i]); + } + i++; + } + } + + /* Check which event happened i.e, thread notified */ + val = readl(regs + ES); + if (pl330->pcfg.num_events < 32 + && val & ~((1 << pl330->pcfg.num_events) - 1)) { + pl330->dmac_tbd.reset_dmac = true; + dev_err(pl330->ddma.dev, "%s:%d Unexpected!\n", __func__, + __LINE__); + ret = 1; + goto updt_exit; + } + + for (ev = 0; ev < pl330->pcfg.num_events; ev++) { + if (val & (1 << ev)) { /* Event occurred */ + struct pl330_thread *thrd; + u32 inten = readl(regs + INTEN); + int active; + + /* Clear the event */ + if (inten & (1 << ev)) + writel(1 << ev, regs + INTCLR); + + ret = 1; + + id = pl330->events[ev]; + + thrd = &pl330->channels[id]; + + active = thrd->req_running; + if (active == -1) /* Aborted */ + continue; + + /* Detach the req */ + descdone = thrd->req[active].desc; + thrd->req[active].desc = NULL; + + thrd->req_running = -1; + + /* Get going again ASAP */ + pl330_start_thread(thrd); + + /* For now, just make a list of callbacks to be done */ + list_add_tail(&descdone->rqd, &pl330->req_done); + } + } + + /* Now that we are in no hurry, do the callbacks */ + while (!list_empty(&pl330->req_done)) { + descdone = list_first_entry(&pl330->req_done, + struct dma_pl330_desc, rqd); + list_del(&descdone->rqd); + spin_unlock_irqrestore(&pl330->lock, flags); + dma_pl330_rqcb(descdone, PL330_ERR_NONE); + spin_lock_irqsave(&pl330->lock, flags); + } + +updt_exit: + spin_unlock_irqrestore(&pl330->lock, flags); + + if (pl330->dmac_tbd.reset_dmac + || pl330->dmac_tbd.reset_mngr + || pl330->dmac_tbd.reset_chan) { + ret = 1; + tasklet_schedule(&pl330->tasks); + } + + return ret; +} + +/* Reserve an event */ +static inline int _alloc_event(struct pl330_thread *thrd) +{ + struct pl330_dmac *pl330 = thrd->dmac; + int ev; + + for (ev = 0; ev < pl330->pcfg.num_events; ev++) + if (pl330->events[ev] == -1) { + pl330->events[ev] = thrd->id; + return ev; + } + + return -1; +} + +static bool _chan_ns(const struct pl330_dmac *pl330, int i) +{ + return pl330->pcfg.irq_ns & (1 << i); +} + +/* Upon success, returns IdentityToken for the + * allocated channel, NULL otherwise. + */ +static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330) +{ + struct pl330_thread *thrd = NULL; + int chans, i; + + if (pl330->state == DYING) + return NULL; + + chans = pl330->pcfg.num_chan; + + for (i = 0; i < chans; i++) { + thrd = &pl330->channels[i]; + if ((thrd->free) && (!_manager_ns(thrd) || + _chan_ns(pl330, i))) { + thrd->ev = _alloc_event(thrd); + if (thrd->ev >= 0) { + thrd->free = false; + thrd->lstenq = 1; + thrd->req[0].desc = NULL; + thrd->req[1].desc = NULL; + thrd->req_running = -1; + break; + } + } + thrd = NULL; + } + + return thrd; +} + +/* Release an event */ +static inline void _free_event(struct pl330_thread *thrd, int ev) +{ + struct pl330_dmac *pl330 = thrd->dmac; + + /* If the event is valid and was held by the thread */ + if (ev >= 0 && ev < pl330->pcfg.num_events + && pl330->events[ev] == thrd->id) + pl330->events[ev] = -1; +} + +static void pl330_release_channel(struct pl330_thread *thrd) +{ + if (!thrd || thrd->free) + return; + + _stop(thrd); + + dma_pl330_rqcb(thrd->req[1 - thrd->lstenq].desc, PL330_ERR_ABORT); + dma_pl330_rqcb(thrd->req[thrd->lstenq].desc, PL330_ERR_ABORT); + + _free_event(thrd, thrd->ev); + thrd->free = true; +} + +/* Initialize the structure for PL330 configuration, that can be used + * by the client driver the make best use of the DMAC + */ +static void read_dmac_config(struct pl330_dmac *pl330) +{ + void __iomem *regs = pl330->base; + u32 val; + + val = readl(regs + CRD) >> CRD_DATA_WIDTH_SHIFT; + val &= CRD_DATA_WIDTH_MASK; + pl330->pcfg.data_bus_width = 8 * (1 << val); + + val = readl(regs + CRD) >> CRD_DATA_BUFF_SHIFT; + val &= CRD_DATA_BUFF_MASK; + pl330->pcfg.data_buf_dep = val + 1; + + val = readl(regs + CR0) >> CR0_NUM_CHANS_SHIFT; + val &= CR0_NUM_CHANS_MASK; + val += 1; + pl330->pcfg.num_chan = val; + + val = readl(regs + CR0); + if (val & CR0_PERIPH_REQ_SET) { + val = (val >> CR0_NUM_PERIPH_SHIFT) & CR0_NUM_PERIPH_MASK; + val += 1; + pl330->pcfg.num_peri = val; + pl330->pcfg.peri_ns = readl(regs + CR4); + } else { + pl330->pcfg.num_peri = 0; + } + + val = readl(regs + CR0); + if (val & CR0_BOOT_MAN_NS) + pl330->pcfg.mode |= DMAC_MODE_NS; + else + pl330->pcfg.mode &= ~DMAC_MODE_NS; + + val = readl(regs + CR0) >> CR0_NUM_EVENTS_SHIFT; + val &= CR0_NUM_EVENTS_MASK; + val += 1; + pl330->pcfg.num_events = val; + + pl330->pcfg.irq_ns = readl(regs + CR3); +} + +static inline void _reset_thread(struct pl330_thread *thrd) +{ + struct pl330_dmac *pl330 = thrd->dmac; + + thrd->req[0].mc_cpu = pl330->mcode_cpu + + (thrd->id * pl330->mcbufsz); + thrd->req[0].mc_bus = pl330->mcode_bus + + (thrd->id * pl330->mcbufsz); + thrd->req[0].desc = NULL; + + thrd->req[1].mc_cpu = thrd->req[0].mc_cpu + + pl330->mcbufsz / 2; + thrd->req[1].mc_bus = thrd->req[0].mc_bus + + pl330->mcbufsz / 2; + thrd->req[1].desc = NULL; + + thrd->req_running = -1; +} + +static int dmac_alloc_threads(struct pl330_dmac *pl330) +{ + int chans = pl330->pcfg.num_chan; + struct pl330_thread *thrd; + int i; + + /* Allocate 1 Manager and 'chans' Channel threads */ + pl330->channels = kcalloc(1 + chans, sizeof(*thrd), + GFP_KERNEL); + if (!pl330->channels) + return -ENOMEM; + + /* Init Channel threads */ + for (i = 0; i < chans; i++) { + thrd = &pl330->channels[i]; + thrd->id = i; + thrd->dmac = pl330; + _reset_thread(thrd); + thrd->free = true; + } + + /* MANAGER is indexed at the end */ + thrd = &pl330->channels[chans]; + thrd->id = chans; + thrd->dmac = pl330; + thrd->free = false; + pl330->manager = thrd; + + return 0; +} + +static int dmac_alloc_resources(struct pl330_dmac *pl330) +{ + int chans = pl330->pcfg.num_chan; + int ret; + + /* + * Alloc MicroCode buffer for 'chans' Channel threads. + * A channel's buffer offset is (Channel_Id * MCODE_BUFF_PERCHAN) + */ + pl330->mcode_cpu = dma_alloc_attrs(pl330->ddma.dev, + chans * pl330->mcbufsz, + &pl330->mcode_bus, GFP_KERNEL, + DMA_ATTR_PRIVILEGED); + if (!pl330->mcode_cpu) { + dev_err(pl330->ddma.dev, "%s:%d Can't allocate memory!\n", + __func__, __LINE__); + return -ENOMEM; + } + + ret = dmac_alloc_threads(pl330); + if (ret) { + dev_err(pl330->ddma.dev, "%s:%d Can't to create channels for DMAC!\n", + __func__, __LINE__); + dma_free_attrs(pl330->ddma.dev, + chans * pl330->mcbufsz, + pl330->mcode_cpu, pl330->mcode_bus, + DMA_ATTR_PRIVILEGED); + return ret; + } + + return 0; +} + +static int pl330_add(struct pl330_dmac *pl330) +{ + int i, ret; + + /* Check if we can handle this DMAC */ + if ((pl330->pcfg.periph_id & 0xfffff) != PERIPH_ID_VAL) { + dev_err(pl330->ddma.dev, "PERIPH_ID 0x%x !\n", + pl330->pcfg.periph_id); + return -EINVAL; + } + + /* Read the configuration of the DMAC */ + read_dmac_config(pl330); + + if (pl330->pcfg.num_events == 0) { + dev_err(pl330->ddma.dev, "%s:%d Can't work without events!\n", + __func__, __LINE__); + return -EINVAL; + } + + spin_lock_init(&pl330->lock); + + INIT_LIST_HEAD(&pl330->req_done); + + /* Use default MC buffer size if not provided */ + if (!pl330->mcbufsz) + pl330->mcbufsz = MCODE_BUFF_PER_REQ * 2; + + /* Mark all events as free */ + for (i = 0; i < pl330->pcfg.num_events; i++) + pl330->events[i] = -1; + + /* Allocate resources needed by the DMAC */ + ret = dmac_alloc_resources(pl330); + if (ret) { + dev_err(pl330->ddma.dev, "Unable to create channels for DMAC\n"); + return ret; + } + + tasklet_setup(&pl330->tasks, pl330_dotask); + + pl330->state = INIT; + + return 0; +} + +static int dmac_free_threads(struct pl330_dmac *pl330) +{ + struct pl330_thread *thrd; + int i; + + /* Release Channel threads */ + for (i = 0; i < pl330->pcfg.num_chan; i++) { + thrd = &pl330->channels[i]; + pl330_release_channel(thrd); + } + + /* Free memory */ + kfree(pl330->channels); + + return 0; +} + +static void pl330_del(struct pl330_dmac *pl330) +{ + pl330->state = UNINIT; + + tasklet_kill(&pl330->tasks); + + /* Free DMAC resources */ + dmac_free_threads(pl330); + + dma_free_attrs(pl330->ddma.dev, + pl330->pcfg.num_chan * pl330->mcbufsz, pl330->mcode_cpu, + pl330->mcode_bus, DMA_ATTR_PRIVILEGED); +} + +/* forward declaration */ +static struct amba_driver pl330_driver; + +static inline struct dma_pl330_chan * +to_pchan(struct dma_chan *ch) +{ + if (!ch) + return NULL; + + return container_of(ch, struct dma_pl330_chan, chan); +} + +static inline struct dma_pl330_desc * +to_desc(struct dma_async_tx_descriptor *tx) +{ + return container_of(tx, struct dma_pl330_desc, txd); +} + +static inline void fill_queue(struct dma_pl330_chan *pch) +{ + struct dma_pl330_desc *desc; + int ret; + + list_for_each_entry(desc, &pch->work_list, node) { + + /* If already submitted */ + if (desc->status == BUSY || desc->status == PAUSED) + continue; + + ret = pl330_submit_req(pch->thread, desc); + if (!ret) { + desc->status = BUSY; + } else if (ret == -EAGAIN) { + /* QFull or DMAC Dying */ + break; + } else { + /* Unacceptable request */ + desc->status = DONE; + dev_err(pch->dmac->ddma.dev, "%s:%d Bad Desc(%d)\n", + __func__, __LINE__, desc->txd.cookie); + tasklet_schedule(&pch->task); + } + } +} + +static void pl330_tasklet(struct tasklet_struct *t) +{ + struct dma_pl330_chan *pch = from_tasklet(pch, t, task); + struct dma_pl330_desc *desc, *_dt; + unsigned long flags; + bool power_down = false; + + spin_lock_irqsave(&pch->lock, flags); + + /* Pick up ripe tomatoes */ + list_for_each_entry_safe(desc, _dt, &pch->work_list, node) + if (desc->status == DONE) { + if (!pch->cyclic) + dma_cookie_complete(&desc->txd); + list_move_tail(&desc->node, &pch->completed_list); + } + + /* Try to submit a req imm. next to the last completed cookie */ + fill_queue(pch); + + if (list_empty(&pch->work_list)) { + spin_lock(&pch->thread->dmac->lock); + _stop(pch->thread); + spin_unlock(&pch->thread->dmac->lock); + power_down = true; + pch->active = false; + } else { + /* Make sure the PL330 Channel thread is active */ + spin_lock(&pch->thread->dmac->lock); + pl330_start_thread(pch->thread); + spin_unlock(&pch->thread->dmac->lock); + } + + while (!list_empty(&pch->completed_list)) { + struct dmaengine_desc_callback cb; + + desc = list_first_entry(&pch->completed_list, + struct dma_pl330_desc, node); + + dmaengine_desc_get_callback(&desc->txd, &cb); + + if (pch->cyclic) { + desc->status = PREP; + list_move_tail(&desc->node, &pch->work_list); + if (power_down) { + pch->active = true; + spin_lock(&pch->thread->dmac->lock); + pl330_start_thread(pch->thread); + spin_unlock(&pch->thread->dmac->lock); + power_down = false; + } + } else { + desc->status = FREE; + list_move_tail(&desc->node, &pch->dmac->desc_pool); + } + + dma_descriptor_unmap(&desc->txd); + + if (dmaengine_desc_callback_valid(&cb)) { + spin_unlock_irqrestore(&pch->lock, flags); + dmaengine_desc_callback_invoke(&cb, NULL); + spin_lock_irqsave(&pch->lock, flags); + } + } + spin_unlock_irqrestore(&pch->lock, flags); + + /* If work list empty, power down */ + if (power_down) { + pm_runtime_mark_last_busy(pch->dmac->ddma.dev); + pm_runtime_put_autosuspend(pch->dmac->ddma.dev); + } +} + +static struct dma_chan *of_dma_pl330_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + int count = dma_spec->args_count; + struct pl330_dmac *pl330 = ofdma->of_dma_data; + unsigned int chan_id; + + if (!pl330) + return NULL; + + if (count != 1) + return NULL; + + chan_id = dma_spec->args[0]; + if (chan_id >= pl330->num_peripherals) + return NULL; + + return dma_get_slave_channel(&pl330->peripherals[chan_id].chan); +} + +static int pl330_alloc_chan_resources(struct dma_chan *chan) +{ + struct dma_pl330_chan *pch = to_pchan(chan); + struct pl330_dmac *pl330 = pch->dmac; + unsigned long flags; + + spin_lock_irqsave(&pl330->lock, flags); + + dma_cookie_init(chan); + pch->cyclic = false; + + pch->thread = pl330_request_channel(pl330); + if (!pch->thread) { + spin_unlock_irqrestore(&pl330->lock, flags); + return -ENOMEM; + } + + tasklet_setup(&pch->task, pl330_tasklet); + + spin_unlock_irqrestore(&pl330->lock, flags); + + return 1; +} + +/* + * We need the data direction between the DMAC (the dma-mapping "device") and + * the FIFO (the dmaengine "dev"), from the FIFO's point of view. Confusing! + */ +static enum dma_data_direction +pl330_dma_slave_map_dir(enum dma_transfer_direction dir) +{ + switch (dir) { + case DMA_MEM_TO_DEV: + return DMA_FROM_DEVICE; + case DMA_DEV_TO_MEM: + return DMA_TO_DEVICE; + case DMA_DEV_TO_DEV: + return DMA_BIDIRECTIONAL; + default: + return DMA_NONE; + } +} + +static void pl330_unprep_slave_fifo(struct dma_pl330_chan *pch) +{ + if (pch->dir != DMA_NONE) + dma_unmap_resource(pch->chan.device->dev, pch->fifo_dma, + 1 << pch->burst_sz, pch->dir, 0); + pch->dir = DMA_NONE; +} + + +static bool pl330_prep_slave_fifo(struct dma_pl330_chan *pch, + enum dma_transfer_direction dir) +{ + struct device *dev = pch->chan.device->dev; + enum dma_data_direction dma_dir = pl330_dma_slave_map_dir(dir); + + /* Already mapped for this config? */ + if (pch->dir == dma_dir) + return true; + + pl330_unprep_slave_fifo(pch); + pch->fifo_dma = dma_map_resource(dev, pch->fifo_addr, + 1 << pch->burst_sz, dma_dir, 0); + if (dma_mapping_error(dev, pch->fifo_dma)) + return false; + + pch->dir = dma_dir; + return true; +} + +static int fixup_burst_len(int max_burst_len, int quirks) +{ + if (max_burst_len > PL330_MAX_BURST) + return PL330_MAX_BURST; + else if (max_burst_len < 1) + return 1; + else + return max_burst_len; +} + +static int pl330_config_write(struct dma_chan *chan, + struct dma_slave_config *slave_config, + enum dma_transfer_direction direction) +{ + struct dma_pl330_chan *pch = to_pchan(chan); + + pl330_unprep_slave_fifo(pch); + if (direction == DMA_MEM_TO_DEV) { + if (slave_config->dst_addr) + pch->fifo_addr = slave_config->dst_addr; + if (slave_config->dst_addr_width) + pch->burst_sz = __ffs(slave_config->dst_addr_width); + pch->burst_len = fixup_burst_len(slave_config->dst_maxburst, + pch->dmac->quirks); + } else if (direction == DMA_DEV_TO_MEM) { + if (slave_config->src_addr) + pch->fifo_addr = slave_config->src_addr; + if (slave_config->src_addr_width) + pch->burst_sz = __ffs(slave_config->src_addr_width); + pch->burst_len = fixup_burst_len(slave_config->src_maxburst, + pch->dmac->quirks); + } + + return 0; +} + +static int pl330_config(struct dma_chan *chan, + struct dma_slave_config *slave_config) +{ + struct dma_pl330_chan *pch = to_pchan(chan); + + memcpy(&pch->slave_config, slave_config, sizeof(*slave_config)); + + return 0; +} + +static int pl330_terminate_all(struct dma_chan *chan) +{ + struct dma_pl330_chan *pch = to_pchan(chan); + struct dma_pl330_desc *desc; + unsigned long flags; + struct pl330_dmac *pl330 = pch->dmac; + bool power_down = false; + + pm_runtime_get_sync(pl330->ddma.dev); + spin_lock_irqsave(&pch->lock, flags); + + spin_lock(&pl330->lock); + _stop(pch->thread); + pch->thread->req[0].desc = NULL; + pch->thread->req[1].desc = NULL; + pch->thread->req_running = -1; + spin_unlock(&pl330->lock); + + power_down = pch->active; + pch->active = false; + + /* Mark all desc done */ + list_for_each_entry(desc, &pch->submitted_list, node) { + desc->status = FREE; + dma_cookie_complete(&desc->txd); + } + + list_for_each_entry(desc, &pch->work_list , node) { + desc->status = FREE; + dma_cookie_complete(&desc->txd); + } + + list_splice_tail_init(&pch->submitted_list, &pl330->desc_pool); + list_splice_tail_init(&pch->work_list, &pl330->desc_pool); + list_splice_tail_init(&pch->completed_list, &pl330->desc_pool); + spin_unlock_irqrestore(&pch->lock, flags); + pm_runtime_mark_last_busy(pl330->ddma.dev); + if (power_down) + pm_runtime_put_autosuspend(pl330->ddma.dev); + pm_runtime_put_autosuspend(pl330->ddma.dev); + + return 0; +} + +/* + * We don't support DMA_RESUME command because of hardware + * limitations, so after pausing the channel we cannot restore + * it to active state. We have to terminate channel and setup + * DMA transfer again. This pause feature was implemented to + * allow safely read residue before channel termination. + */ +static int pl330_pause(struct dma_chan *chan) +{ + struct dma_pl330_chan *pch = to_pchan(chan); + struct pl330_dmac *pl330 = pch->dmac; + struct dma_pl330_desc *desc; + unsigned long flags; + + pm_runtime_get_sync(pl330->ddma.dev); + spin_lock_irqsave(&pch->lock, flags); + + spin_lock(&pl330->lock); + _stop(pch->thread); + spin_unlock(&pl330->lock); + + list_for_each_entry(desc, &pch->work_list, node) { + if (desc->status == BUSY) + desc->status = PAUSED; + } + spin_unlock_irqrestore(&pch->lock, flags); + pm_runtime_mark_last_busy(pl330->ddma.dev); + pm_runtime_put_autosuspend(pl330->ddma.dev); + + return 0; +} + +static void pl330_free_chan_resources(struct dma_chan *chan) +{ + struct dma_pl330_chan *pch = to_pchan(chan); + struct pl330_dmac *pl330 = pch->dmac; + unsigned long flags; + + tasklet_kill(&pch->task); + + pm_runtime_get_sync(pch->dmac->ddma.dev); + spin_lock_irqsave(&pl330->lock, flags); + + pl330_release_channel(pch->thread); + pch->thread = NULL; + + if (pch->cyclic) + list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool); + + spin_unlock_irqrestore(&pl330->lock, flags); + pm_runtime_mark_last_busy(pch->dmac->ddma.dev); + pm_runtime_put_autosuspend(pch->dmac->ddma.dev); + pl330_unprep_slave_fifo(pch); +} + +static int pl330_get_current_xferred_count(struct dma_pl330_chan *pch, + struct dma_pl330_desc *desc) +{ + struct pl330_thread *thrd = pch->thread; + struct pl330_dmac *pl330 = pch->dmac; + void __iomem *regs = thrd->dmac->base; + u32 val, addr; + + pm_runtime_get_sync(pl330->ddma.dev); + val = addr = 0; + if (desc->rqcfg.src_inc) { + val = readl(regs + SA(thrd->id)); + addr = desc->px.src_addr; + } else { + val = readl(regs + DA(thrd->id)); + addr = desc->px.dst_addr; + } + pm_runtime_mark_last_busy(pch->dmac->ddma.dev); + pm_runtime_put_autosuspend(pl330->ddma.dev); + + /* If DMAMOV hasn't finished yet, SAR/DAR can be zero */ + if (!val) + return 0; + + return val - addr; +} + +static enum dma_status +pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + enum dma_status ret; + unsigned long flags; + struct dma_pl330_desc *desc, *running = NULL, *last_enq = NULL; + struct dma_pl330_chan *pch = to_pchan(chan); + unsigned int transferred, residual = 0; + + ret = dma_cookie_status(chan, cookie, txstate); + + if (!txstate) + return ret; + + if (ret == DMA_COMPLETE) + goto out; + + spin_lock_irqsave(&pch->lock, flags); + spin_lock(&pch->thread->dmac->lock); + + if (pch->thread->req_running != -1) + running = pch->thread->req[pch->thread->req_running].desc; + + last_enq = pch->thread->req[pch->thread->lstenq].desc; + + /* Check in pending list */ + list_for_each_entry(desc, &pch->work_list, node) { + if (desc->status == DONE) + transferred = desc->bytes_requested; + else if (running && desc == running) + transferred = + pl330_get_current_xferred_count(pch, desc); + else if (desc->status == BUSY || desc->status == PAUSED) + /* + * Busy but not running means either just enqueued, + * or finished and not yet marked done + */ + if (desc == last_enq) + transferred = 0; + else + transferred = desc->bytes_requested; + else + transferred = 0; + residual += desc->bytes_requested - transferred; + if (desc->txd.cookie == cookie) { + switch (desc->status) { + case DONE: + ret = DMA_COMPLETE; + break; + case PAUSED: + ret = DMA_PAUSED; + break; + case PREP: + case BUSY: + ret = DMA_IN_PROGRESS; + break; + default: + WARN_ON(1); + } + break; + } + if (desc->last) + residual = 0; + } + spin_unlock(&pch->thread->dmac->lock); + spin_unlock_irqrestore(&pch->lock, flags); + +out: + dma_set_residue(txstate, residual); + + return ret; +} + +static void pl330_issue_pending(struct dma_chan *chan) +{ + struct dma_pl330_chan *pch = to_pchan(chan); + unsigned long flags; + + spin_lock_irqsave(&pch->lock, flags); + if (list_empty(&pch->work_list)) { + /* + * Warn on nothing pending. Empty submitted_list may + * break our pm_runtime usage counter as it is + * updated on work_list emptiness status. + */ + WARN_ON(list_empty(&pch->submitted_list)); + pch->active = true; + pm_runtime_get_sync(pch->dmac->ddma.dev); + } + list_splice_tail_init(&pch->submitted_list, &pch->work_list); + spin_unlock_irqrestore(&pch->lock, flags); + + pl330_tasklet(&pch->task); +} + +/* + * We returned the last one of the circular list of descriptor(s) + * from prep_xxx, so the argument to submit corresponds to the last + * descriptor of the list. + */ +static dma_cookie_t pl330_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct dma_pl330_desc *desc, *last = to_desc(tx); + struct dma_pl330_chan *pch = to_pchan(tx->chan); + dma_cookie_t cookie; + unsigned long flags; + + spin_lock_irqsave(&pch->lock, flags); + + /* Assign cookies to all nodes */ + while (!list_empty(&last->node)) { + desc = list_entry(last->node.next, struct dma_pl330_desc, node); + if (pch->cyclic) { + desc->txd.callback = last->txd.callback; + desc->txd.callback_param = last->txd.callback_param; + } + desc->last = false; + + dma_cookie_assign(&desc->txd); + + list_move_tail(&desc->node, &pch->submitted_list); + } + + last->last = true; + cookie = dma_cookie_assign(&last->txd); + list_add_tail(&last->node, &pch->submitted_list); + spin_unlock_irqrestore(&pch->lock, flags); + + return cookie; +} + +static inline void _init_desc(struct dma_pl330_desc *desc) +{ + desc->rqcfg.swap = SWAP_NO; + desc->rqcfg.scctl = CCTRL0; + desc->rqcfg.dcctl = CCTRL0; + desc->txd.tx_submit = pl330_tx_submit; + + INIT_LIST_HEAD(&desc->node); +} + +/* Returns the number of descriptors added to the DMAC pool */ +static int add_desc(struct list_head *pool, spinlock_t *lock, + gfp_t flg, int count) +{ + struct dma_pl330_desc *desc; + unsigned long flags; + int i; + + desc = kcalloc(count, sizeof(*desc), flg); + if (!desc) + return 0; + + spin_lock_irqsave(lock, flags); + + for (i = 0; i < count; i++) { + _init_desc(&desc[i]); + list_add_tail(&desc[i].node, pool); + } + + spin_unlock_irqrestore(lock, flags); + + return count; +} + +static struct dma_pl330_desc *pluck_desc(struct list_head *pool, + spinlock_t *lock) +{ + struct dma_pl330_desc *desc = NULL; + unsigned long flags; + + spin_lock_irqsave(lock, flags); + + if (!list_empty(pool)) { + desc = list_entry(pool->next, + struct dma_pl330_desc, node); + + list_del_init(&desc->node); + + desc->status = PREP; + desc->txd.callback = NULL; + } + + spin_unlock_irqrestore(lock, flags); + + return desc; +} + +static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch) +{ + struct pl330_dmac *pl330 = pch->dmac; + u8 *peri_id = pch->chan.private; + struct dma_pl330_desc *desc; + + /* Pluck one desc from the pool of DMAC */ + desc = pluck_desc(&pl330->desc_pool, &pl330->pool_lock); + + /* If the DMAC pool is empty, alloc new */ + if (!desc) { + static DEFINE_SPINLOCK(lock); + LIST_HEAD(pool); + + if (!add_desc(&pool, &lock, GFP_ATOMIC, 1)) + return NULL; + + desc = pluck_desc(&pool, &lock); + WARN_ON(!desc || !list_empty(&pool)); + } + + /* Initialize the descriptor */ + desc->pchan = pch; + desc->txd.cookie = 0; + async_tx_ack(&desc->txd); + + desc->peri = peri_id ? pch->chan.chan_id : 0; + desc->rqcfg.pcfg = &pch->dmac->pcfg; + + dma_async_tx_descriptor_init(&desc->txd, &pch->chan); + + return desc; +} + +static inline void fill_px(struct pl330_xfer *px, + dma_addr_t dst, dma_addr_t src, size_t len) +{ + px->bytes = len; + px->dst_addr = dst; + px->src_addr = src; +} + +static struct dma_pl330_desc * +__pl330_prep_dma_memcpy(struct dma_pl330_chan *pch, dma_addr_t dst, + dma_addr_t src, size_t len) +{ + struct dma_pl330_desc *desc = pl330_get_desc(pch); + + if (!desc) { + dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n", + __func__, __LINE__); + return NULL; + } + + /* + * Ideally we should lookout for reqs bigger than + * those that can be programmed with 256 bytes of + * MC buffer, but considering a req size is seldom + * going to be word-unaligned and more than 200MB, + * we take it easy. + * Also, should the limit is reached we'd rather + * have the platform increase MC buffer size than + * complicating this API driver. + */ + fill_px(&desc->px, dst, src, len); + + return desc; +} + +/* Call after fixing burst size */ +static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len) +{ + struct dma_pl330_chan *pch = desc->pchan; + struct pl330_dmac *pl330 = pch->dmac; + int burst_len; + + burst_len = pl330->pcfg.data_bus_width / 8; + burst_len *= pl330->pcfg.data_buf_dep / pl330->pcfg.num_chan; + burst_len >>= desc->rqcfg.brst_size; + + /* src/dst_burst_len can't be more than 16 */ + if (burst_len > PL330_MAX_BURST) + burst_len = PL330_MAX_BURST; + + return burst_len; +} + +static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t dma_addr, size_t len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct dma_pl330_desc *desc = NULL, *first = NULL; + struct dma_pl330_chan *pch = to_pchan(chan); + struct pl330_dmac *pl330 = pch->dmac; + unsigned int i; + dma_addr_t dst; + dma_addr_t src; + + if (len % period_len != 0) + return NULL; + + if (!is_slave_direction(direction)) { + dev_err(pch->dmac->ddma.dev, "%s:%d Invalid dma direction\n", + __func__, __LINE__); + return NULL; + } + + pl330_config_write(chan, &pch->slave_config, direction); + + if (!pl330_prep_slave_fifo(pch, direction)) + return NULL; + + for (i = 0; i < len / period_len; i++) { + desc = pl330_get_desc(pch); + if (!desc) { + unsigned long iflags; + + dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n", + __func__, __LINE__); + + if (!first) + return NULL; + + spin_lock_irqsave(&pl330->pool_lock, iflags); + + while (!list_empty(&first->node)) { + desc = list_entry(first->node.next, + struct dma_pl330_desc, node); + list_move_tail(&desc->node, &pl330->desc_pool); + } + + list_move_tail(&first->node, &pl330->desc_pool); + + spin_unlock_irqrestore(&pl330->pool_lock, iflags); + + return NULL; + } + + switch (direction) { + case DMA_MEM_TO_DEV: + desc->rqcfg.src_inc = 1; + desc->rqcfg.dst_inc = 0; + src = dma_addr; + dst = pch->fifo_dma; + break; + case DMA_DEV_TO_MEM: + desc->rqcfg.src_inc = 0; + desc->rqcfg.dst_inc = 1; + src = pch->fifo_dma; + dst = dma_addr; + break; + default: + break; + } + + desc->rqtype = direction; + desc->rqcfg.brst_size = pch->burst_sz; + desc->rqcfg.brst_len = pch->burst_len; + desc->bytes_requested = period_len; + fill_px(&desc->px, dst, src, period_len); + + if (!first) + first = desc; + else + list_add_tail(&desc->node, &first->node); + + dma_addr += period_len; + } + + if (!desc) + return NULL; + + pch->cyclic = true; + + return &desc->txd; +} + +static struct dma_async_tx_descriptor * +pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct dma_pl330_desc *desc; + struct dma_pl330_chan *pch = to_pchan(chan); + struct pl330_dmac *pl330; + int burst; + + if (unlikely(!pch || !len)) + return NULL; + + pl330 = pch->dmac; + + desc = __pl330_prep_dma_memcpy(pch, dst, src, len); + if (!desc) + return NULL; + + desc->rqcfg.src_inc = 1; + desc->rqcfg.dst_inc = 1; + desc->rqtype = DMA_MEM_TO_MEM; + + /* Select max possible burst size */ + burst = pl330->pcfg.data_bus_width / 8; + + /* + * Make sure we use a burst size that aligns with all the memcpy + * parameters because our DMA programming algorithm doesn't cope with + * transfers which straddle an entry in the DMA device's MFIFO. + */ + while ((src | dst | len) & (burst - 1)) + burst /= 2; + + desc->rqcfg.brst_size = 0; + while (burst != (1 << desc->rqcfg.brst_size)) + desc->rqcfg.brst_size++; + + desc->rqcfg.brst_len = get_burst_len(desc, len); + /* + * If burst size is smaller than bus width then make sure we only + * transfer one at a time to avoid a burst stradling an MFIFO entry. + */ + if (burst * 8 < pl330->pcfg.data_bus_width) + desc->rqcfg.brst_len = 1; + + desc->bytes_requested = len; + + return &desc->txd; +} + +static void __pl330_giveback_desc(struct pl330_dmac *pl330, + struct dma_pl330_desc *first) +{ + unsigned long flags; + struct dma_pl330_desc *desc; + + if (!first) + return; + + spin_lock_irqsave(&pl330->pool_lock, flags); + + while (!list_empty(&first->node)) { + desc = list_entry(first->node.next, + struct dma_pl330_desc, node); + list_move_tail(&desc->node, &pl330->desc_pool); + } + + list_move_tail(&first->node, &pl330->desc_pool); + + spin_unlock_irqrestore(&pl330->pool_lock, flags); +} + +static struct dma_async_tx_descriptor * +pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flg, void *context) +{ + struct dma_pl330_desc *first, *desc = NULL; + struct dma_pl330_chan *pch = to_pchan(chan); + struct scatterlist *sg; + int i; + + if (unlikely(!pch || !sgl || !sg_len)) + return NULL; + + pl330_config_write(chan, &pch->slave_config, direction); + + if (!pl330_prep_slave_fifo(pch, direction)) + return NULL; + + first = NULL; + + for_each_sg(sgl, sg, sg_len, i) { + + desc = pl330_get_desc(pch); + if (!desc) { + struct pl330_dmac *pl330 = pch->dmac; + + dev_err(pch->dmac->ddma.dev, + "%s:%d Unable to fetch desc\n", + __func__, __LINE__); + __pl330_giveback_desc(pl330, first); + + return NULL; + } + + if (!first) + first = desc; + else + list_add_tail(&desc->node, &first->node); + + if (direction == DMA_MEM_TO_DEV) { + desc->rqcfg.src_inc = 1; + desc->rqcfg.dst_inc = 0; + fill_px(&desc->px, pch->fifo_dma, sg_dma_address(sg), + sg_dma_len(sg)); + } else { + desc->rqcfg.src_inc = 0; + desc->rqcfg.dst_inc = 1; + fill_px(&desc->px, sg_dma_address(sg), pch->fifo_dma, + sg_dma_len(sg)); + } + + desc->rqcfg.brst_size = pch->burst_sz; + desc->rqcfg.brst_len = pch->burst_len; + desc->rqtype = direction; + desc->bytes_requested = sg_dma_len(sg); + } + + /* Return the last desc in the chain */ + return &desc->txd; +} + +static irqreturn_t pl330_irq_handler(int irq, void *data) +{ + if (pl330_update(data)) + return IRQ_HANDLED; + else + return IRQ_NONE; +} + +#define PL330_DMA_BUSWIDTHS \ + BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \ + BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES) + +#ifdef CONFIG_DEBUG_FS +static int pl330_debugfs_show(struct seq_file *s, void *data) +{ + struct pl330_dmac *pl330 = s->private; + int chans, pchs, ch, pr; + + chans = pl330->pcfg.num_chan; + pchs = pl330->num_peripherals; + + seq_puts(s, "PL330 physical channels:\n"); + seq_puts(s, "THREAD:\t\tCHANNEL:\n"); + seq_puts(s, "--------\t-----\n"); + for (ch = 0; ch < chans; ch++) { + struct pl330_thread *thrd = &pl330->channels[ch]; + int found = -1; + + for (pr = 0; pr < pchs; pr++) { + struct dma_pl330_chan *pch = &pl330->peripherals[pr]; + + if (!pch->thread || thrd->id != pch->thread->id) + continue; + + found = pr; + } + + seq_printf(s, "%d\t\t", thrd->id); + if (found == -1) + seq_puts(s, "--\n"); + else + seq_printf(s, "%d\n", found); + } + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(pl330_debugfs); + +static inline void init_pl330_debugfs(struct pl330_dmac *pl330) +{ + debugfs_create_file(dev_name(pl330->ddma.dev), + S_IFREG | 0444, NULL, pl330, + &pl330_debugfs_fops); +} +#else +static inline void init_pl330_debugfs(struct pl330_dmac *pl330) +{ +} +#endif + +/* + * Runtime PM callbacks are provided by amba/bus.c driver. + * + * It is assumed here that IRQ safe runtime PM is chosen in probe and amba + * bus driver will only disable/enable the clock in runtime PM callbacks. + */ +static int __maybe_unused pl330_suspend(struct device *dev) +{ + struct amba_device *pcdev = to_amba_device(dev); + + pm_runtime_force_suspend(dev); + clk_unprepare(pcdev->pclk); + + return 0; +} + +static int __maybe_unused pl330_resume(struct device *dev) +{ + struct amba_device *pcdev = to_amba_device(dev); + int ret; + + ret = clk_prepare(pcdev->pclk); + if (ret) + return ret; + + pm_runtime_force_resume(dev); + + return ret; +} + +static const struct dev_pm_ops pl330_pm = { + SET_LATE_SYSTEM_SLEEP_PM_OPS(pl330_suspend, pl330_resume) +}; + +static int +pl330_probe(struct amba_device *adev, const struct amba_id *id) +{ + struct pl330_config *pcfg; + struct pl330_dmac *pl330; + struct dma_pl330_chan *pch, *_p; + struct dma_device *pd; + struct resource *res; + int i, ret, irq; + int num_chan; + struct device_node *np = adev->dev.of_node; + + ret = dma_set_mask_and_coherent(&adev->dev, DMA_BIT_MASK(32)); + if (ret) + return ret; + + /* Allocate a new DMAC and its Channels */ + pl330 = devm_kzalloc(&adev->dev, sizeof(*pl330), GFP_KERNEL); + if (!pl330) + return -ENOMEM; + + pd = &pl330->ddma; + pd->dev = &adev->dev; + + pl330->mcbufsz = 0; + + /* get quirk */ + for (i = 0; i < ARRAY_SIZE(of_quirks); i++) + if (of_property_read_bool(np, of_quirks[i].quirk)) + pl330->quirks |= of_quirks[i].id; + + res = &adev->res; + pl330->base = devm_ioremap_resource(&adev->dev, res); + if (IS_ERR(pl330->base)) + return PTR_ERR(pl330->base); + + amba_set_drvdata(adev, pl330); + + pl330->rstc = devm_reset_control_get_optional(&adev->dev, "dma"); + if (IS_ERR(pl330->rstc)) { + return dev_err_probe(&adev->dev, PTR_ERR(pl330->rstc), "Failed to get reset!\n"); + } else { + ret = reset_control_deassert(pl330->rstc); + if (ret) { + dev_err(&adev->dev, "Couldn't deassert the device from reset!\n"); + return ret; + } + } + + pl330->rstc_ocp = devm_reset_control_get_optional(&adev->dev, "dma-ocp"); + if (IS_ERR(pl330->rstc_ocp)) { + return dev_err_probe(&adev->dev, PTR_ERR(pl330->rstc_ocp), + "Failed to get OCP reset!\n"); + } else { + ret = reset_control_deassert(pl330->rstc_ocp); + if (ret) { + dev_err(&adev->dev, "Couldn't deassert the device from OCP reset!\n"); + return ret; + } + } + + for (i = 0; i < AMBA_NR_IRQS; i++) { + irq = adev->irq[i]; + if (irq) { + ret = devm_request_irq(&adev->dev, irq, + pl330_irq_handler, 0, + dev_name(&adev->dev), pl330); + if (ret) + return ret; + } else { + break; + } + } + + pcfg = &pl330->pcfg; + + pcfg->periph_id = adev->periphid; + ret = pl330_add(pl330); + if (ret) + return ret; + + INIT_LIST_HEAD(&pl330->desc_pool); + spin_lock_init(&pl330->pool_lock); + + /* Create a descriptor pool of default size */ + if (!add_desc(&pl330->desc_pool, &pl330->pool_lock, + GFP_KERNEL, NR_DEFAULT_DESC)) + dev_warn(&adev->dev, "unable to allocate desc\n"); + + INIT_LIST_HEAD(&pd->channels); + + /* Initialize channel parameters */ + num_chan = max_t(int, pcfg->num_peri, pcfg->num_chan); + + pl330->num_peripherals = num_chan; + + pl330->peripherals = kcalloc(num_chan, sizeof(*pch), GFP_KERNEL); + if (!pl330->peripherals) { + ret = -ENOMEM; + goto probe_err2; + } + + for (i = 0; i < num_chan; i++) { + pch = &pl330->peripherals[i]; + + pch->chan.private = adev->dev.of_node; + INIT_LIST_HEAD(&pch->submitted_list); + INIT_LIST_HEAD(&pch->work_list); + INIT_LIST_HEAD(&pch->completed_list); + spin_lock_init(&pch->lock); + pch->thread = NULL; + pch->chan.device = pd; + pch->dmac = pl330; + pch->dir = DMA_NONE; + + /* Add the channel to the DMAC list */ + list_add_tail(&pch->chan.device_node, &pd->channels); + } + + dma_cap_set(DMA_MEMCPY, pd->cap_mask); + if (pcfg->num_peri) { + dma_cap_set(DMA_SLAVE, pd->cap_mask); + dma_cap_set(DMA_CYCLIC, pd->cap_mask); + dma_cap_set(DMA_PRIVATE, pd->cap_mask); + } + + pd->device_alloc_chan_resources = pl330_alloc_chan_resources; + pd->device_free_chan_resources = pl330_free_chan_resources; + pd->device_prep_dma_memcpy = pl330_prep_dma_memcpy; + pd->device_prep_dma_cyclic = pl330_prep_dma_cyclic; + pd->device_tx_status = pl330_tx_status; + pd->device_prep_slave_sg = pl330_prep_slave_sg; + pd->device_config = pl330_config; + pd->device_pause = pl330_pause; + pd->device_terminate_all = pl330_terminate_all; + pd->device_issue_pending = pl330_issue_pending; + pd->src_addr_widths = PL330_DMA_BUSWIDTHS; + pd->dst_addr_widths = PL330_DMA_BUSWIDTHS; + pd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + pd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + pd->max_burst = PL330_MAX_BURST; + + ret = dma_async_device_register(pd); + if (ret) { + dev_err(&adev->dev, "unable to register DMAC\n"); + goto probe_err3; + } + + if (adev->dev.of_node) { + ret = of_dma_controller_register(adev->dev.of_node, + of_dma_pl330_xlate, pl330); + if (ret) { + dev_err(&adev->dev, + "unable to register DMA to the generic DT DMA helpers\n"); + } + } + + /* + * This is the limit for transfers with a buswidth of 1, larger + * buswidths will have larger limits. + */ + ret = dma_set_max_seg_size(&adev->dev, 1900800); + if (ret) + dev_err(&adev->dev, "unable to set the seg size\n"); + + + init_pl330_debugfs(pl330); + dev_info(&adev->dev, + "Loaded driver for PL330 DMAC-%x\n", adev->periphid); + dev_info(&adev->dev, + "\tDBUFF-%ux%ubytes Num_Chans-%u Num_Peri-%u Num_Events-%u\n", + pcfg->data_buf_dep, pcfg->data_bus_width / 8, pcfg->num_chan, + pcfg->num_peri, pcfg->num_events); + + pm_runtime_irq_safe(&adev->dev); + pm_runtime_use_autosuspend(&adev->dev); + pm_runtime_set_autosuspend_delay(&adev->dev, PL330_AUTOSUSPEND_DELAY); + pm_runtime_mark_last_busy(&adev->dev); + pm_runtime_put_autosuspend(&adev->dev); + + return 0; +probe_err3: + /* Idle the DMAC */ + list_for_each_entry_safe(pch, _p, &pl330->ddma.channels, + chan.device_node) { + + /* Remove the channel */ + list_del(&pch->chan.device_node); + + /* Flush the channel */ + if (pch->thread) { + pl330_terminate_all(&pch->chan); + pl330_free_chan_resources(&pch->chan); + } + } +probe_err2: + pl330_del(pl330); + + if (pl330->rstc_ocp) + reset_control_assert(pl330->rstc_ocp); + + if (pl330->rstc) + reset_control_assert(pl330->rstc); + return ret; +} + +static void pl330_remove(struct amba_device *adev) +{ + struct pl330_dmac *pl330 = amba_get_drvdata(adev); + struct dma_pl330_chan *pch, *_p; + int i, irq; + + pm_runtime_get_noresume(pl330->ddma.dev); + + if (adev->dev.of_node) + of_dma_controller_free(adev->dev.of_node); + + for (i = 0; i < AMBA_NR_IRQS; i++) { + irq = adev->irq[i]; + if (irq) + devm_free_irq(&adev->dev, irq, pl330); + } + + dma_async_device_unregister(&pl330->ddma); + + /* Idle the DMAC */ + list_for_each_entry_safe(pch, _p, &pl330->ddma.channels, + chan.device_node) { + + /* Remove the channel */ + list_del(&pch->chan.device_node); + + /* Flush the channel */ + if (pch->thread) { + pl330_terminate_all(&pch->chan); + pl330_free_chan_resources(&pch->chan); + } + } + + pl330_del(pl330); + + if (pl330->rstc_ocp) + reset_control_assert(pl330->rstc_ocp); + + if (pl330->rstc) + reset_control_assert(pl330->rstc); +} + +static const struct amba_id pl330_ids[] = { + { + .id = 0x00041330, + .mask = 0x000fffff, + }, + { 0, 0 }, +}; + +MODULE_DEVICE_TABLE(amba, pl330_ids); + +static struct amba_driver pl330_driver = { + .drv = { + .owner = THIS_MODULE, + .name = "dma-pl330", + .pm = &pl330_pm, + }, + .id_table = pl330_ids, + .probe = pl330_probe, + .remove = pl330_remove, +}; + +module_amba_driver(pl330_driver); + +MODULE_AUTHOR("Jaswinder Singh "); +MODULE_DESCRIPTION("API Driver for PL330 DMAC"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/plx_dma.c b/drivers/dma/plx_dma.c new file mode 100644 index 0000000000..34b6416c32 --- /dev/null +++ b/drivers/dma/plx_dma.c @@ -0,0 +1,635 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Microsemi Switchtec(tm) PCIe Management Driver + * Copyright (c) 2019, Logan Gunthorpe + * Copyright (c) 2019, GigaIO Networks, Inc + */ + +#include "dmaengine.h" + +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("PLX ExpressLane PEX PCI Switch DMA Engine"); +MODULE_VERSION("0.1"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Logan Gunthorpe"); + +#define PLX_REG_DESC_RING_ADDR 0x214 +#define PLX_REG_DESC_RING_ADDR_HI 0x218 +#define PLX_REG_DESC_RING_NEXT_ADDR 0x21C +#define PLX_REG_DESC_RING_COUNT 0x220 +#define PLX_REG_DESC_RING_LAST_ADDR 0x224 +#define PLX_REG_DESC_RING_LAST_SIZE 0x228 +#define PLX_REG_PREF_LIMIT 0x234 +#define PLX_REG_CTRL 0x238 +#define PLX_REG_CTRL2 0x23A +#define PLX_REG_INTR_CTRL 0x23C +#define PLX_REG_INTR_STATUS 0x23E + +#define PLX_REG_PREF_LIMIT_PREF_FOUR 8 + +#define PLX_REG_CTRL_GRACEFUL_PAUSE BIT(0) +#define PLX_REG_CTRL_ABORT BIT(1) +#define PLX_REG_CTRL_WRITE_BACK_EN BIT(2) +#define PLX_REG_CTRL_START BIT(3) +#define PLX_REG_CTRL_RING_STOP_MODE BIT(4) +#define PLX_REG_CTRL_DESC_MODE_BLOCK (0 << 5) +#define PLX_REG_CTRL_DESC_MODE_ON_CHIP (1 << 5) +#define PLX_REG_CTRL_DESC_MODE_OFF_CHIP (2 << 5) +#define PLX_REG_CTRL_DESC_INVALID BIT(8) +#define PLX_REG_CTRL_GRACEFUL_PAUSE_DONE BIT(9) +#define PLX_REG_CTRL_ABORT_DONE BIT(10) +#define PLX_REG_CTRL_IMM_PAUSE_DONE BIT(12) +#define PLX_REG_CTRL_IN_PROGRESS BIT(30) + +#define PLX_REG_CTRL_RESET_VAL (PLX_REG_CTRL_DESC_INVALID | \ + PLX_REG_CTRL_GRACEFUL_PAUSE_DONE | \ + PLX_REG_CTRL_ABORT_DONE | \ + PLX_REG_CTRL_IMM_PAUSE_DONE) + +#define PLX_REG_CTRL_START_VAL (PLX_REG_CTRL_WRITE_BACK_EN | \ + PLX_REG_CTRL_DESC_MODE_OFF_CHIP | \ + PLX_REG_CTRL_START | \ + PLX_REG_CTRL_RESET_VAL) + +#define PLX_REG_CTRL2_MAX_TXFR_SIZE_64B 0 +#define PLX_REG_CTRL2_MAX_TXFR_SIZE_128B 1 +#define PLX_REG_CTRL2_MAX_TXFR_SIZE_256B 2 +#define PLX_REG_CTRL2_MAX_TXFR_SIZE_512B 3 +#define PLX_REG_CTRL2_MAX_TXFR_SIZE_1KB 4 +#define PLX_REG_CTRL2_MAX_TXFR_SIZE_2KB 5 +#define PLX_REG_CTRL2_MAX_TXFR_SIZE_4B 7 + +#define PLX_REG_INTR_CRTL_ERROR_EN BIT(0) +#define PLX_REG_INTR_CRTL_INV_DESC_EN BIT(1) +#define PLX_REG_INTR_CRTL_ABORT_DONE_EN BIT(3) +#define PLX_REG_INTR_CRTL_PAUSE_DONE_EN BIT(4) +#define PLX_REG_INTR_CRTL_IMM_PAUSE_DONE_EN BIT(5) + +#define PLX_REG_INTR_STATUS_ERROR BIT(0) +#define PLX_REG_INTR_STATUS_INV_DESC BIT(1) +#define PLX_REG_INTR_STATUS_DESC_DONE BIT(2) +#define PLX_REG_INTR_CRTL_ABORT_DONE BIT(3) + +struct plx_dma_hw_std_desc { + __le32 flags_and_size; + __le16 dst_addr_hi; + __le16 src_addr_hi; + __le32 dst_addr_lo; + __le32 src_addr_lo; +}; + +#define PLX_DESC_SIZE_MASK 0x7ffffff +#define PLX_DESC_FLAG_VALID BIT(31) +#define PLX_DESC_FLAG_INT_WHEN_DONE BIT(30) + +#define PLX_DESC_WB_SUCCESS BIT(30) +#define PLX_DESC_WB_RD_FAIL BIT(29) +#define PLX_DESC_WB_WR_FAIL BIT(28) + +#define PLX_DMA_RING_COUNT 2048 + +struct plx_dma_desc { + struct dma_async_tx_descriptor txd; + struct plx_dma_hw_std_desc *hw; + u32 orig_size; +}; + +struct plx_dma_dev { + struct dma_device dma_dev; + struct dma_chan dma_chan; + struct pci_dev __rcu *pdev; + void __iomem *bar; + struct tasklet_struct desc_task; + + spinlock_t ring_lock; + bool ring_active; + int head; + int tail; + struct plx_dma_hw_std_desc *hw_ring; + dma_addr_t hw_ring_dma; + struct plx_dma_desc **desc_ring; +}; + +static struct plx_dma_dev *chan_to_plx_dma_dev(struct dma_chan *c) +{ + return container_of(c, struct plx_dma_dev, dma_chan); +} + +static struct plx_dma_desc *to_plx_desc(struct dma_async_tx_descriptor *txd) +{ + return container_of(txd, struct plx_dma_desc, txd); +} + +static struct plx_dma_desc *plx_dma_get_desc(struct plx_dma_dev *plxdev, int i) +{ + return plxdev->desc_ring[i & (PLX_DMA_RING_COUNT - 1)]; +} + +static void plx_dma_process_desc(struct plx_dma_dev *plxdev) +{ + struct dmaengine_result res; + struct plx_dma_desc *desc; + u32 flags; + + spin_lock(&plxdev->ring_lock); + + while (plxdev->tail != plxdev->head) { + desc = plx_dma_get_desc(plxdev, plxdev->tail); + + flags = le32_to_cpu(READ_ONCE(desc->hw->flags_and_size)); + + if (flags & PLX_DESC_FLAG_VALID) + break; + + res.residue = desc->orig_size - (flags & PLX_DESC_SIZE_MASK); + + if (flags & PLX_DESC_WB_SUCCESS) + res.result = DMA_TRANS_NOERROR; + else if (flags & PLX_DESC_WB_WR_FAIL) + res.result = DMA_TRANS_WRITE_FAILED; + else + res.result = DMA_TRANS_READ_FAILED; + + dma_cookie_complete(&desc->txd); + dma_descriptor_unmap(&desc->txd); + dmaengine_desc_get_callback_invoke(&desc->txd, &res); + desc->txd.callback = NULL; + desc->txd.callback_result = NULL; + + plxdev->tail++; + } + + spin_unlock(&plxdev->ring_lock); +} + +static void plx_dma_abort_desc(struct plx_dma_dev *plxdev) +{ + struct dmaengine_result res; + struct plx_dma_desc *desc; + + plx_dma_process_desc(plxdev); + + spin_lock_bh(&plxdev->ring_lock); + + while (plxdev->tail != plxdev->head) { + desc = plx_dma_get_desc(plxdev, plxdev->tail); + + res.residue = desc->orig_size; + res.result = DMA_TRANS_ABORTED; + + dma_cookie_complete(&desc->txd); + dma_descriptor_unmap(&desc->txd); + dmaengine_desc_get_callback_invoke(&desc->txd, &res); + desc->txd.callback = NULL; + desc->txd.callback_result = NULL; + + plxdev->tail++; + } + + spin_unlock_bh(&plxdev->ring_lock); +} + +static void __plx_dma_stop(struct plx_dma_dev *plxdev) +{ + unsigned long timeout = jiffies + msecs_to_jiffies(1000); + u32 val; + + val = readl(plxdev->bar + PLX_REG_CTRL); + if (!(val & ~PLX_REG_CTRL_GRACEFUL_PAUSE)) + return; + + writel(PLX_REG_CTRL_RESET_VAL | PLX_REG_CTRL_GRACEFUL_PAUSE, + plxdev->bar + PLX_REG_CTRL); + + while (!time_after(jiffies, timeout)) { + val = readl(plxdev->bar + PLX_REG_CTRL); + if (val & PLX_REG_CTRL_GRACEFUL_PAUSE_DONE) + break; + + cpu_relax(); + } + + if (!(val & PLX_REG_CTRL_GRACEFUL_PAUSE_DONE)) + dev_err(plxdev->dma_dev.dev, + "Timeout waiting for graceful pause!\n"); + + writel(PLX_REG_CTRL_RESET_VAL | PLX_REG_CTRL_GRACEFUL_PAUSE, + plxdev->bar + PLX_REG_CTRL); + + writel(0, plxdev->bar + PLX_REG_DESC_RING_COUNT); + writel(0, plxdev->bar + PLX_REG_DESC_RING_ADDR); + writel(0, plxdev->bar + PLX_REG_DESC_RING_ADDR_HI); + writel(0, plxdev->bar + PLX_REG_DESC_RING_NEXT_ADDR); +} + +static void plx_dma_stop(struct plx_dma_dev *plxdev) +{ + rcu_read_lock(); + if (!rcu_dereference(plxdev->pdev)) { + rcu_read_unlock(); + return; + } + + __plx_dma_stop(plxdev); + + rcu_read_unlock(); +} + +static void plx_dma_desc_task(struct tasklet_struct *t) +{ + struct plx_dma_dev *plxdev = from_tasklet(plxdev, t, desc_task); + + plx_dma_process_desc(plxdev); +} + +static struct dma_async_tx_descriptor *plx_dma_prep_memcpy(struct dma_chan *c, + dma_addr_t dma_dst, dma_addr_t dma_src, size_t len, + unsigned long flags) + __acquires(plxdev->ring_lock) +{ + struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(c); + struct plx_dma_desc *plxdesc; + + spin_lock_bh(&plxdev->ring_lock); + if (!plxdev->ring_active) + goto err_unlock; + + if (!CIRC_SPACE(plxdev->head, plxdev->tail, PLX_DMA_RING_COUNT)) + goto err_unlock; + + if (len > PLX_DESC_SIZE_MASK) + goto err_unlock; + + plxdesc = plx_dma_get_desc(plxdev, plxdev->head); + plxdev->head++; + + plxdesc->hw->dst_addr_lo = cpu_to_le32(lower_32_bits(dma_dst)); + plxdesc->hw->dst_addr_hi = cpu_to_le16(upper_32_bits(dma_dst)); + plxdesc->hw->src_addr_lo = cpu_to_le32(lower_32_bits(dma_src)); + plxdesc->hw->src_addr_hi = cpu_to_le16(upper_32_bits(dma_src)); + + plxdesc->orig_size = len; + + if (flags & DMA_PREP_INTERRUPT) + len |= PLX_DESC_FLAG_INT_WHEN_DONE; + + plxdesc->hw->flags_and_size = cpu_to_le32(len); + plxdesc->txd.flags = flags; + + /* return with the lock held, it will be released in tx_submit */ + + return &plxdesc->txd; + +err_unlock: + /* + * Keep sparse happy by restoring an even lock count on + * this lock. + */ + __acquire(plxdev->ring_lock); + + spin_unlock_bh(&plxdev->ring_lock); + return NULL; +} + +static dma_cookie_t plx_dma_tx_submit(struct dma_async_tx_descriptor *desc) + __releases(plxdev->ring_lock) +{ + struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(desc->chan); + struct plx_dma_desc *plxdesc = to_plx_desc(desc); + dma_cookie_t cookie; + + cookie = dma_cookie_assign(desc); + + /* + * Ensure the descriptor updates are visible to the dma device + * before setting the valid bit. + */ + wmb(); + + plxdesc->hw->flags_and_size |= cpu_to_le32(PLX_DESC_FLAG_VALID); + + spin_unlock_bh(&plxdev->ring_lock); + + return cookie; +} + +static enum dma_status plx_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) +{ + struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(chan); + enum dma_status ret; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; + + plx_dma_process_desc(plxdev); + + return dma_cookie_status(chan, cookie, txstate); +} + +static void plx_dma_issue_pending(struct dma_chan *chan) +{ + struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(chan); + + rcu_read_lock(); + if (!rcu_dereference(plxdev->pdev)) { + rcu_read_unlock(); + return; + } + + /* + * Ensure the valid bits are visible before starting the + * DMA engine. + */ + wmb(); + + writew(PLX_REG_CTRL_START_VAL, plxdev->bar + PLX_REG_CTRL); + + rcu_read_unlock(); +} + +static irqreturn_t plx_dma_isr(int irq, void *devid) +{ + struct plx_dma_dev *plxdev = devid; + u32 status; + + status = readw(plxdev->bar + PLX_REG_INTR_STATUS); + + if (!status) + return IRQ_NONE; + + if (status & PLX_REG_INTR_STATUS_DESC_DONE && plxdev->ring_active) + tasklet_schedule(&plxdev->desc_task); + + writew(status, plxdev->bar + PLX_REG_INTR_STATUS); + + return IRQ_HANDLED; +} + +static int plx_dma_alloc_desc(struct plx_dma_dev *plxdev) +{ + struct plx_dma_desc *desc; + int i; + + plxdev->desc_ring = kcalloc(PLX_DMA_RING_COUNT, + sizeof(*plxdev->desc_ring), GFP_KERNEL); + if (!plxdev->desc_ring) + return -ENOMEM; + + for (i = 0; i < PLX_DMA_RING_COUNT; i++) { + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) + goto free_and_exit; + + dma_async_tx_descriptor_init(&desc->txd, &plxdev->dma_chan); + desc->txd.tx_submit = plx_dma_tx_submit; + desc->hw = &plxdev->hw_ring[i]; + + plxdev->desc_ring[i] = desc; + } + + return 0; + +free_and_exit: + for (i = 0; i < PLX_DMA_RING_COUNT; i++) + kfree(plxdev->desc_ring[i]); + kfree(plxdev->desc_ring); + return -ENOMEM; +} + +static int plx_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(chan); + size_t ring_sz = PLX_DMA_RING_COUNT * sizeof(*plxdev->hw_ring); + int rc; + + plxdev->head = plxdev->tail = 0; + plxdev->hw_ring = dma_alloc_coherent(plxdev->dma_dev.dev, ring_sz, + &plxdev->hw_ring_dma, GFP_KERNEL); + if (!plxdev->hw_ring) + return -ENOMEM; + + rc = plx_dma_alloc_desc(plxdev); + if (rc) + goto out_free_hw_ring; + + rcu_read_lock(); + if (!rcu_dereference(plxdev->pdev)) { + rcu_read_unlock(); + rc = -ENODEV; + goto out_free_hw_ring; + } + + writel(PLX_REG_CTRL_RESET_VAL, plxdev->bar + PLX_REG_CTRL); + writel(lower_32_bits(plxdev->hw_ring_dma), + plxdev->bar + PLX_REG_DESC_RING_ADDR); + writel(upper_32_bits(plxdev->hw_ring_dma), + plxdev->bar + PLX_REG_DESC_RING_ADDR_HI); + writel(lower_32_bits(plxdev->hw_ring_dma), + plxdev->bar + PLX_REG_DESC_RING_NEXT_ADDR); + writel(PLX_DMA_RING_COUNT, plxdev->bar + PLX_REG_DESC_RING_COUNT); + writel(PLX_REG_PREF_LIMIT_PREF_FOUR, plxdev->bar + PLX_REG_PREF_LIMIT); + + plxdev->ring_active = true; + + rcu_read_unlock(); + + return PLX_DMA_RING_COUNT; + +out_free_hw_ring: + dma_free_coherent(plxdev->dma_dev.dev, ring_sz, plxdev->hw_ring, + plxdev->hw_ring_dma); + return rc; +} + +static void plx_dma_free_chan_resources(struct dma_chan *chan) +{ + struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(chan); + size_t ring_sz = PLX_DMA_RING_COUNT * sizeof(*plxdev->hw_ring); + struct pci_dev *pdev; + int irq = -1; + int i; + + spin_lock_bh(&plxdev->ring_lock); + plxdev->ring_active = false; + spin_unlock_bh(&plxdev->ring_lock); + + plx_dma_stop(plxdev); + + rcu_read_lock(); + pdev = rcu_dereference(plxdev->pdev); + if (pdev) + irq = pci_irq_vector(pdev, 0); + rcu_read_unlock(); + + if (irq > 0) + synchronize_irq(irq); + + tasklet_kill(&plxdev->desc_task); + + plx_dma_abort_desc(plxdev); + + for (i = 0; i < PLX_DMA_RING_COUNT; i++) + kfree(plxdev->desc_ring[i]); + + kfree(plxdev->desc_ring); + dma_free_coherent(plxdev->dma_dev.dev, ring_sz, plxdev->hw_ring, + plxdev->hw_ring_dma); + +} + +static void plx_dma_release(struct dma_device *dma_dev) +{ + struct plx_dma_dev *plxdev = + container_of(dma_dev, struct plx_dma_dev, dma_dev); + + put_device(dma_dev->dev); + kfree(plxdev); +} + +static int plx_dma_create(struct pci_dev *pdev) +{ + struct plx_dma_dev *plxdev; + struct dma_device *dma; + struct dma_chan *chan; + int rc; + + plxdev = kzalloc(sizeof(*plxdev), GFP_KERNEL); + if (!plxdev) + return -ENOMEM; + + rc = request_irq(pci_irq_vector(pdev, 0), plx_dma_isr, 0, + KBUILD_MODNAME, plxdev); + if (rc) + goto free_plx; + + spin_lock_init(&plxdev->ring_lock); + tasklet_setup(&plxdev->desc_task, plx_dma_desc_task); + + RCU_INIT_POINTER(plxdev->pdev, pdev); + plxdev->bar = pcim_iomap_table(pdev)[0]; + + dma = &plxdev->dma_dev; + INIT_LIST_HEAD(&dma->channels); + dma_cap_set(DMA_MEMCPY, dma->cap_mask); + dma->copy_align = DMAENGINE_ALIGN_1_BYTE; + dma->dev = get_device(&pdev->dev); + + dma->device_alloc_chan_resources = plx_dma_alloc_chan_resources; + dma->device_free_chan_resources = plx_dma_free_chan_resources; + dma->device_prep_dma_memcpy = plx_dma_prep_memcpy; + dma->device_issue_pending = plx_dma_issue_pending; + dma->device_tx_status = plx_dma_tx_status; + dma->device_release = plx_dma_release; + + chan = &plxdev->dma_chan; + chan->device = dma; + dma_cookie_init(chan); + list_add_tail(&chan->device_node, &dma->channels); + + rc = dma_async_device_register(dma); + if (rc) { + pci_err(pdev, "Failed to register dma device: %d\n", rc); + goto put_device; + } + + pci_set_drvdata(pdev, plxdev); + + return 0; + +put_device: + put_device(&pdev->dev); + free_irq(pci_irq_vector(pdev, 0), plxdev); +free_plx: + kfree(plxdev); + + return rc; +} + +static int plx_dma_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int rc; + + rc = pcim_enable_device(pdev); + if (rc) + return rc; + + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); + if (rc) + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (rc) + return rc; + + rc = pcim_iomap_regions(pdev, 1, KBUILD_MODNAME); + if (rc) + return rc; + + rc = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); + if (rc <= 0) + return rc; + + pci_set_master(pdev); + + rc = plx_dma_create(pdev); + if (rc) + goto err_free_irq_vectors; + + pci_info(pdev, "PLX DMA Channel Registered\n"); + + return 0; + +err_free_irq_vectors: + pci_free_irq_vectors(pdev); + return rc; +} + +static void plx_dma_remove(struct pci_dev *pdev) +{ + struct plx_dma_dev *plxdev = pci_get_drvdata(pdev); + + free_irq(pci_irq_vector(pdev, 0), plxdev); + + rcu_assign_pointer(plxdev->pdev, NULL); + synchronize_rcu(); + + spin_lock_bh(&plxdev->ring_lock); + plxdev->ring_active = false; + spin_unlock_bh(&plxdev->ring_lock); + + __plx_dma_stop(plxdev); + plx_dma_abort_desc(plxdev); + + plxdev->bar = NULL; + dma_async_device_unregister(&plxdev->dma_dev); + + pci_free_irq_vectors(pdev); +} + +static const struct pci_device_id plx_dma_pci_tbl[] = { + { + .vendor = PCI_VENDOR_ID_PLX, + .device = 0x87D0, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .class = PCI_CLASS_SYSTEM_OTHER << 8, + .class_mask = 0xFFFFFFFF, + }, + {0} +}; +MODULE_DEVICE_TABLE(pci, plx_dma_pci_tbl); + +static struct pci_driver plx_dma_pci_driver = { + .name = KBUILD_MODNAME, + .id_table = plx_dma_pci_tbl, + .probe = plx_dma_probe, + .remove = plx_dma_remove, +}; +module_pci_driver(plx_dma_pci_driver); diff --git a/drivers/dma/ppc4xx/Makefile b/drivers/dma/ppc4xx/Makefile new file mode 100644 index 0000000000..69c2cfac96 --- /dev/null +++ b/drivers/dma/ppc4xx/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += adma.o diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c new file mode 100644 index 0000000000..f9b82dff33 --- /dev/null +++ b/drivers/dma/ppc4xx/adma.c @@ -0,0 +1,4627 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2006-2009 DENX Software Engineering. + * + * Author: Yuri Tikhonov + * + * Further porting to arch/powerpc by + * Anatolij Gustschin + */ + +/* + * This driver supports the asynchrounous DMA copy and RAID engines available + * on the AMCC PPC440SPe Processors. + * Based on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x) + * ADMA driver written by D.Williams. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "adma.h" +#include "../dmaengine.h" + +enum ppc_adma_init_code { + PPC_ADMA_INIT_OK = 0, + PPC_ADMA_INIT_MEMRES, + PPC_ADMA_INIT_MEMREG, + PPC_ADMA_INIT_ALLOC, + PPC_ADMA_INIT_COHERENT, + PPC_ADMA_INIT_CHANNEL, + PPC_ADMA_INIT_IRQ1, + PPC_ADMA_INIT_IRQ2, + PPC_ADMA_INIT_REGISTER +}; + +static char *ppc_adma_errors[] = { + [PPC_ADMA_INIT_OK] = "ok", + [PPC_ADMA_INIT_MEMRES] = "failed to get memory resource", + [PPC_ADMA_INIT_MEMREG] = "failed to request memory region", + [PPC_ADMA_INIT_ALLOC] = "failed to allocate memory for adev " + "structure", + [PPC_ADMA_INIT_COHERENT] = "failed to allocate coherent memory for " + "hardware descriptors", + [PPC_ADMA_INIT_CHANNEL] = "failed to allocate memory for channel", + [PPC_ADMA_INIT_IRQ1] = "failed to request first irq", + [PPC_ADMA_INIT_IRQ2] = "failed to request second irq", + [PPC_ADMA_INIT_REGISTER] = "failed to register dma async device", +}; + +static enum ppc_adma_init_code +ppc440spe_adma_devices[PPC440SPE_ADMA_ENGINES_NUM]; + +struct ppc_dma_chan_ref { + struct dma_chan *chan; + struct list_head node; +}; + +/* The list of channels exported by ppc440spe ADMA */ +static struct list_head +ppc440spe_adma_chan_list = LIST_HEAD_INIT(ppc440spe_adma_chan_list); + +/* This flag is set when want to refetch the xor chain in the interrupt + * handler + */ +static u32 do_xor_refetch; + +/* Pointer to DMA0, DMA1 CP/CS FIFO */ +static void *ppc440spe_dma_fifo_buf; + +/* Pointers to last submitted to DMA0, DMA1 CDBs */ +static struct ppc440spe_adma_desc_slot *chan_last_sub[3]; +static struct ppc440spe_adma_desc_slot *chan_first_cdb[3]; + +/* Pointer to last linked and submitted xor CB */ +static struct ppc440spe_adma_desc_slot *xor_last_linked; +static struct ppc440spe_adma_desc_slot *xor_last_submit; + +/* This array is used in data-check operations for storing a pattern */ +static char ppc440spe_qword[16]; + +static atomic_t ppc440spe_adma_err_irq_ref; +static dcr_host_t ppc440spe_mq_dcr_host; +static unsigned int ppc440spe_mq_dcr_len; + +/* Since RXOR operations use the common register (MQ0_CF2H) for setting-up + * the block size in transactions, then we do not allow to activate more than + * only one RXOR transactions simultaneously. So use this var to store + * the information about is RXOR currently active (PPC440SPE_RXOR_RUN bit is + * set) or not (PPC440SPE_RXOR_RUN is clear). + */ +static unsigned long ppc440spe_rxor_state; + +/* These are used in enable & check routines + */ +static u32 ppc440spe_r6_enabled; +static struct ppc440spe_adma_chan *ppc440spe_r6_tchan; +static struct completion ppc440spe_r6_test_comp; + +static int ppc440spe_adma_dma2rxor_prep_src( + struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_rxor *cursor, int index, + int src_cnt, u32 addr); +static void ppc440spe_adma_dma2rxor_set_src( + struct ppc440spe_adma_desc_slot *desc, + int index, dma_addr_t addr); +static void ppc440spe_adma_dma2rxor_set_mult( + struct ppc440spe_adma_desc_slot *desc, + int index, u8 mult); + +#ifdef ADMA_LL_DEBUG +#define ADMA_LL_DBG(x) ({ if (1) x; 0; }) +#else +#define ADMA_LL_DBG(x) ({ if (0) x; 0; }) +#endif + +static void print_cb(struct ppc440spe_adma_chan *chan, void *block) +{ + struct dma_cdb *cdb; + struct xor_cb *cb; + int i; + + switch (chan->device->id) { + case 0: + case 1: + cdb = block; + + pr_debug("CDB at %p [%d]:\n" + "\t attr 0x%02x opc 0x%02x cnt 0x%08x\n" + "\t sg1u 0x%08x sg1l 0x%08x\n" + "\t sg2u 0x%08x sg2l 0x%08x\n" + "\t sg3u 0x%08x sg3l 0x%08x\n", + cdb, chan->device->id, + cdb->attr, cdb->opc, le32_to_cpu(cdb->cnt), + le32_to_cpu(cdb->sg1u), le32_to_cpu(cdb->sg1l), + le32_to_cpu(cdb->sg2u), le32_to_cpu(cdb->sg2l), + le32_to_cpu(cdb->sg3u), le32_to_cpu(cdb->sg3l) + ); + break; + case 2: + cb = block; + + pr_debug("CB at %p [%d]:\n" + "\t cbc 0x%08x cbbc 0x%08x cbs 0x%08x\n" + "\t cbtah 0x%08x cbtal 0x%08x\n" + "\t cblah 0x%08x cblal 0x%08x\n", + cb, chan->device->id, + cb->cbc, cb->cbbc, cb->cbs, + cb->cbtah, cb->cbtal, + cb->cblah, cb->cblal); + for (i = 0; i < 16; i++) { + if (i && !cb->ops[i].h && !cb->ops[i].l) + continue; + pr_debug("\t ops[%2d]: h 0x%08x l 0x%08x\n", + i, cb->ops[i].h, cb->ops[i].l); + } + break; + } +} + +static void print_cb_list(struct ppc440spe_adma_chan *chan, + struct ppc440spe_adma_desc_slot *iter) +{ + for (; iter; iter = iter->hw_next) + print_cb(chan, iter->hw_desc); +} + +static void prep_dma_xor_dbg(int id, dma_addr_t dst, dma_addr_t *src, + unsigned int src_cnt) +{ + int i; + + pr_debug("\n%s(%d):\nsrc: ", __func__, id); + for (i = 0; i < src_cnt; i++) + pr_debug("\t0x%016llx ", src[i]); + pr_debug("dst:\n\t0x%016llx\n", dst); +} + +static void prep_dma_pq_dbg(int id, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt) +{ + int i; + + pr_debug("\n%s(%d):\nsrc: ", __func__, id); + for (i = 0; i < src_cnt; i++) + pr_debug("\t0x%016llx ", src[i]); + pr_debug("dst: "); + for (i = 0; i < 2; i++) + pr_debug("\t0x%016llx ", dst[i]); +} + +static void prep_dma_pqzero_sum_dbg(int id, dma_addr_t *src, + unsigned int src_cnt, + const unsigned char *scf) +{ + int i; + + pr_debug("\n%s(%d):\nsrc(coef): ", __func__, id); + if (scf) { + for (i = 0; i < src_cnt; i++) + pr_debug("\t0x%016llx(0x%02x) ", src[i], scf[i]); + } else { + for (i = 0; i < src_cnt; i++) + pr_debug("\t0x%016llx(no) ", src[i]); + } + + pr_debug("dst: "); + for (i = 0; i < 2; i++) + pr_debug("\t0x%016llx ", src[src_cnt + i]); +} + +/****************************************************************************** + * Command (Descriptor) Blocks low-level routines + ******************************************************************************/ +/** + * ppc440spe_desc_init_interrupt - initialize the descriptor for INTERRUPT + * pseudo operation + */ +static void ppc440spe_desc_init_interrupt(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan) +{ + struct xor_cb *p; + + switch (chan->device->id) { + case PPC440SPE_XOR_ID: + p = desc->hw_desc; + memset(desc->hw_desc, 0, sizeof(struct xor_cb)); + /* NOP with Command Block Complete Enable */ + p->cbc = XOR_CBCR_CBCE_BIT; + break; + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + memset(desc->hw_desc, 0, sizeof(struct dma_cdb)); + /* NOP with interrupt */ + set_bit(PPC440SPE_DESC_INT, &desc->flags); + break; + default: + printk(KERN_ERR "Unsupported id %d in %s\n", chan->device->id, + __func__); + break; + } +} + +/** + * ppc440spe_desc_init_null_xor - initialize the descriptor for NULL XOR + * pseudo operation + */ +static void ppc440spe_desc_init_null_xor(struct ppc440spe_adma_desc_slot *desc) +{ + memset(desc->hw_desc, 0, sizeof(struct xor_cb)); + desc->hw_next = NULL; + desc->src_cnt = 0; + desc->dst_cnt = 1; +} + +/** + * ppc440spe_desc_init_xor - initialize the descriptor for XOR operation + */ +static void ppc440spe_desc_init_xor(struct ppc440spe_adma_desc_slot *desc, + int src_cnt, unsigned long flags) +{ + struct xor_cb *hw_desc = desc->hw_desc; + + memset(desc->hw_desc, 0, sizeof(struct xor_cb)); + desc->hw_next = NULL; + desc->src_cnt = src_cnt; + desc->dst_cnt = 1; + + hw_desc->cbc = XOR_CBCR_TGT_BIT | src_cnt; + if (flags & DMA_PREP_INTERRUPT) + /* Enable interrupt on completion */ + hw_desc->cbc |= XOR_CBCR_CBCE_BIT; +} + +/** + * ppc440spe_desc_init_dma2pq - initialize the descriptor for PQ + * operation in DMA2 controller + */ +static void ppc440spe_desc_init_dma2pq(struct ppc440spe_adma_desc_slot *desc, + int dst_cnt, int src_cnt, unsigned long flags) +{ + struct xor_cb *hw_desc = desc->hw_desc; + + memset(desc->hw_desc, 0, sizeof(struct xor_cb)); + desc->hw_next = NULL; + desc->src_cnt = src_cnt; + desc->dst_cnt = dst_cnt; + memset(desc->reverse_flags, 0, sizeof(desc->reverse_flags)); + desc->descs_per_op = 0; + + hw_desc->cbc = XOR_CBCR_TGT_BIT; + if (flags & DMA_PREP_INTERRUPT) + /* Enable interrupt on completion */ + hw_desc->cbc |= XOR_CBCR_CBCE_BIT; +} + +#define DMA_CTRL_FLAGS_LAST DMA_PREP_FENCE +#define DMA_PREP_ZERO_P (DMA_CTRL_FLAGS_LAST << 1) +#define DMA_PREP_ZERO_Q (DMA_PREP_ZERO_P << 1) + +/** + * ppc440spe_desc_init_dma01pq - initialize the descriptors for PQ operation + * with DMA0/1 + */ +static void ppc440spe_desc_init_dma01pq(struct ppc440spe_adma_desc_slot *desc, + int dst_cnt, int src_cnt, unsigned long flags, + unsigned long op) +{ + struct dma_cdb *hw_desc; + struct ppc440spe_adma_desc_slot *iter; + u8 dopc; + + /* Common initialization of a PQ descriptors chain */ + set_bits(op, &desc->flags); + desc->src_cnt = src_cnt; + desc->dst_cnt = dst_cnt; + + /* WXOR MULTICAST if both P and Q are being computed + * MV_SG1_SG2 if Q only + */ + dopc = (desc->dst_cnt == DMA_DEST_MAX_NUM) ? + DMA_CDB_OPC_MULTICAST : DMA_CDB_OPC_MV_SG1_SG2; + + list_for_each_entry(iter, &desc->group_list, chain_node) { + hw_desc = iter->hw_desc; + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + + if (likely(!list_is_last(&iter->chain_node, + &desc->group_list))) { + /* set 'next' pointer */ + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, chain_node); + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + } else { + /* this is the last descriptor. + * this slot will be pasted from ADMA level + * each time it wants to configure parameters + * of the transaction (src, dst, ...) + */ + iter->hw_next = NULL; + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC440SPE_DESC_INT, &iter->flags); + else + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + } + } + + /* Set OPS depending on WXOR/RXOR type of operation */ + if (!test_bit(PPC440SPE_DESC_RXOR, &desc->flags)) { + /* This is a WXOR only chain: + * - first descriptors are for zeroing destinations + * if PPC440SPE_ZERO_P/Q set; + * - descriptors remained are for GF-XOR operations. + */ + iter = list_first_entry(&desc->group_list, + struct ppc440spe_adma_desc_slot, + chain_node); + + if (test_bit(PPC440SPE_ZERO_P, &desc->flags)) { + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + } + + if (test_bit(PPC440SPE_ZERO_Q, &desc->flags)) { + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + } + + list_for_each_entry_from(iter, &desc->group_list, chain_node) { + hw_desc = iter->hw_desc; + hw_desc->opc = dopc; + } + } else { + /* This is either RXOR-only or mixed RXOR/WXOR */ + + /* The first 1 or 2 slots in chain are always RXOR, + * if need to calculate P & Q, then there are two + * RXOR slots; if only P or only Q, then there is one + */ + iter = list_first_entry(&desc->group_list, + struct ppc440spe_adma_desc_slot, + chain_node); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + + if (desc->dst_cnt == DMA_DEST_MAX_NUM) { + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + } + + /* The remaining descs (if any) are WXORs */ + if (test_bit(PPC440SPE_DESC_WXOR, &desc->flags)) { + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + list_for_each_entry_from(iter, &desc->group_list, + chain_node) { + hw_desc = iter->hw_desc; + hw_desc->opc = dopc; + } + } + } +} + +/** + * ppc440spe_desc_init_dma01pqzero_sum - initialize the descriptor + * for PQ_ZERO_SUM operation + */ +static void ppc440spe_desc_init_dma01pqzero_sum( + struct ppc440spe_adma_desc_slot *desc, + int dst_cnt, int src_cnt) +{ + struct dma_cdb *hw_desc; + struct ppc440spe_adma_desc_slot *iter; + int i = 0; + u8 dopc = (dst_cnt == 2) ? DMA_CDB_OPC_MULTICAST : + DMA_CDB_OPC_MV_SG1_SG2; + /* + * Initialize starting from 2nd or 3rd descriptor dependent + * on dst_cnt. First one or two slots are for cloning P + * and/or Q to chan->pdest and/or chan->qdest as we have + * to preserve original P/Q. + */ + iter = list_first_entry(&desc->group_list, + struct ppc440spe_adma_desc_slot, chain_node); + iter = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, chain_node); + + if (dst_cnt > 1) { + iter = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, chain_node); + } + /* initialize each source descriptor in chain */ + list_for_each_entry_from(iter, &desc->group_list, chain_node) { + hw_desc = iter->hw_desc; + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->src_cnt = 0; + iter->dst_cnt = 0; + + /* This is a ZERO_SUM operation: + * - descriptors starting from 2nd or 3rd + * descriptor are for GF-XOR operations; + * - remaining descriptors are for checking the result + */ + if (i++ < src_cnt) + /* MV_SG1_SG2 if only Q is being verified + * MULTICAST if both P and Q are being verified + */ + hw_desc->opc = dopc; + else + /* DMA_CDB_OPC_DCHECK128 operation */ + hw_desc->opc = DMA_CDB_OPC_DCHECK128; + + if (likely(!list_is_last(&iter->chain_node, + &desc->group_list))) { + /* set 'next' pointer */ + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + } else { + /* this is the last descriptor. + * this slot will be pasted from ADMA level + * each time it wants to configure parameters + * of the transaction (src, dst, ...) + */ + iter->hw_next = NULL; + /* always enable interrupt generation since we get + * the status of pqzero from the handler + */ + set_bit(PPC440SPE_DESC_INT, &iter->flags); + } + } + desc->src_cnt = src_cnt; + desc->dst_cnt = dst_cnt; +} + +/** + * ppc440spe_desc_init_memcpy - initialize the descriptor for MEMCPY operation + */ +static void ppc440spe_desc_init_memcpy(struct ppc440spe_adma_desc_slot *desc, + unsigned long flags) +{ + struct dma_cdb *hw_desc = desc->hw_desc; + + memset(desc->hw_desc, 0, sizeof(struct dma_cdb)); + desc->hw_next = NULL; + desc->src_cnt = 1; + desc->dst_cnt = 1; + + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC440SPE_DESC_INT, &desc->flags); + else + clear_bit(PPC440SPE_DESC_INT, &desc->flags); + + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; +} + +/** + * ppc440spe_desc_set_src_addr - set source address into the descriptor + */ +static void ppc440spe_desc_set_src_addr(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, + int src_idx, dma_addr_t addrh, + dma_addr_t addrl) +{ + struct dma_cdb *dma_hw_desc; + struct xor_cb *xor_hw_desc; + phys_addr_t addr64, tmplow, tmphi; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + if (!addrh) { + addr64 = addrl; + tmphi = (addr64 >> 32); + tmplow = (addr64 & 0xFFFFFFFF); + } else { + tmphi = addrh; + tmplow = addrl; + } + dma_hw_desc = desc->hw_desc; + dma_hw_desc->sg1l = cpu_to_le32((u32)tmplow); + dma_hw_desc->sg1u |= cpu_to_le32((u32)tmphi); + break; + case PPC440SPE_XOR_ID: + xor_hw_desc = desc->hw_desc; + xor_hw_desc->ops[src_idx].l = addrl; + xor_hw_desc->ops[src_idx].h |= addrh; + break; + } +} + +/** + * ppc440spe_desc_set_src_mult - set source address mult into the descriptor + */ +static void ppc440spe_desc_set_src_mult(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, u32 mult_index, + int sg_index, unsigned char mult_value) +{ + struct dma_cdb *dma_hw_desc; + u32 *psgu; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_hw_desc = desc->hw_desc; + + switch (sg_index) { + /* for RXOR operations set multiplier + * into source cued address + */ + case DMA_CDB_SG_SRC: + psgu = &dma_hw_desc->sg1u; + break; + /* for WXOR operations set multiplier + * into destination cued address(es) + */ + case DMA_CDB_SG_DST1: + psgu = &dma_hw_desc->sg2u; + break; + case DMA_CDB_SG_DST2: + psgu = &dma_hw_desc->sg3u; + break; + default: + BUG(); + } + + *psgu |= cpu_to_le32(mult_value << mult_index); + break; + case PPC440SPE_XOR_ID: + break; + default: + BUG(); + } +} + +/** + * ppc440spe_desc_set_dest_addr - set destination address into the descriptor + */ +static void ppc440spe_desc_set_dest_addr(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, + dma_addr_t addrh, dma_addr_t addrl, + u32 dst_idx) +{ + struct dma_cdb *dma_hw_desc; + struct xor_cb *xor_hw_desc; + phys_addr_t addr64, tmphi, tmplow; + u32 *psgu, *psgl; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + if (!addrh) { + addr64 = addrl; + tmphi = (addr64 >> 32); + tmplow = (addr64 & 0xFFFFFFFF); + } else { + tmphi = addrh; + tmplow = addrl; + } + dma_hw_desc = desc->hw_desc; + + psgu = dst_idx ? &dma_hw_desc->sg3u : &dma_hw_desc->sg2u; + psgl = dst_idx ? &dma_hw_desc->sg3l : &dma_hw_desc->sg2l; + + *psgl = cpu_to_le32((u32)tmplow); + *psgu |= cpu_to_le32((u32)tmphi); + break; + case PPC440SPE_XOR_ID: + xor_hw_desc = desc->hw_desc; + xor_hw_desc->cbtal = addrl; + xor_hw_desc->cbtah |= addrh; + break; + } +} + +/** + * ppc440spe_desc_set_byte_count - set number of data bytes involved + * into the operation + */ +static void ppc440spe_desc_set_byte_count(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, + u32 byte_count) +{ + struct dma_cdb *dma_hw_desc; + struct xor_cb *xor_hw_desc; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_hw_desc = desc->hw_desc; + dma_hw_desc->cnt = cpu_to_le32(byte_count); + break; + case PPC440SPE_XOR_ID: + xor_hw_desc = desc->hw_desc; + xor_hw_desc->cbbc = byte_count; + break; + } +} + +/** + * ppc440spe_desc_set_rxor_block_size - set RXOR block size + */ +static inline void ppc440spe_desc_set_rxor_block_size(u32 byte_count) +{ + /* assume that byte_count is aligned on the 512-boundary; + * thus write it directly to the register (bits 23:31 are + * reserved there). + */ + dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CF2H, byte_count); +} + +/** + * ppc440spe_desc_set_dcheck - set CHECK pattern + */ +static void ppc440spe_desc_set_dcheck(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, u8 *qword) +{ + struct dma_cdb *dma_hw_desc; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_hw_desc = desc->hw_desc; + iowrite32(qword[0], &dma_hw_desc->sg3l); + iowrite32(qword[4], &dma_hw_desc->sg3u); + iowrite32(qword[8], &dma_hw_desc->sg2l); + iowrite32(qword[12], &dma_hw_desc->sg2u); + break; + default: + BUG(); + } +} + +/** + * ppc440spe_xor_set_link - set link address in xor CB + */ +static void ppc440spe_xor_set_link(struct ppc440spe_adma_desc_slot *prev_desc, + struct ppc440spe_adma_desc_slot *next_desc) +{ + struct xor_cb *xor_hw_desc = prev_desc->hw_desc; + + if (unlikely(!next_desc || !(next_desc->phys))) { + printk(KERN_ERR "%s: next_desc=0x%p; next_desc->phys=0x%llx\n", + __func__, next_desc, + next_desc ? next_desc->phys : 0); + BUG(); + } + + xor_hw_desc->cbs = 0; + xor_hw_desc->cblal = next_desc->phys; + xor_hw_desc->cblah = 0; + xor_hw_desc->cbc |= XOR_CBCR_LNK_BIT; +} + +/** + * ppc440spe_desc_set_link - set the address of descriptor following this + * descriptor in chain + */ +static void ppc440spe_desc_set_link(struct ppc440spe_adma_chan *chan, + struct ppc440spe_adma_desc_slot *prev_desc, + struct ppc440spe_adma_desc_slot *next_desc) +{ + unsigned long flags; + struct ppc440spe_adma_desc_slot *tail = next_desc; + + if (unlikely(!prev_desc || !next_desc || + (prev_desc->hw_next && prev_desc->hw_next != next_desc))) { + /* If previous next is overwritten something is wrong. + * though we may refetch from append to initiate list + * processing; in this case - it's ok. + */ + printk(KERN_ERR "%s: prev_desc=0x%p; next_desc=0x%p; " + "prev->hw_next=0x%p\n", __func__, prev_desc, + next_desc, prev_desc ? prev_desc->hw_next : 0); + BUG(); + } + + local_irq_save(flags); + + /* do s/w chaining both for DMA and XOR descriptors */ + prev_desc->hw_next = next_desc; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + break; + case PPC440SPE_XOR_ID: + /* bind descriptor to the chain */ + while (tail->hw_next) + tail = tail->hw_next; + xor_last_linked = tail; + + if (prev_desc == xor_last_submit) + /* do not link to the last submitted CB */ + break; + ppc440spe_xor_set_link(prev_desc, next_desc); + break; + } + + local_irq_restore(flags); +} + +/** + * ppc440spe_desc_get_link - get the address of the descriptor that + * follows this one + */ +static inline u32 ppc440spe_desc_get_link(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan) +{ + if (!desc->hw_next) + return 0; + + return desc->hw_next->phys; +} + +/** + * ppc440spe_desc_is_aligned - check alignment + */ +static inline int ppc440spe_desc_is_aligned( + struct ppc440spe_adma_desc_slot *desc, int num_slots) +{ + return (desc->idx & (num_slots - 1)) ? 0 : 1; +} + +/** + * ppc440spe_chan_xor_slot_count - get the number of slots necessary for + * XOR operation + */ +static int ppc440spe_chan_xor_slot_count(size_t len, int src_cnt, + int *slots_per_op) +{ + int slot_cnt; + + /* each XOR descriptor provides up to 16 source operands */ + slot_cnt = *slots_per_op = (src_cnt + XOR_MAX_OPS - 1)/XOR_MAX_OPS; + + if (likely(len <= PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT)) + return slot_cnt; + + printk(KERN_ERR "%s: len %d > max %d !!\n", + __func__, len, PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT); + BUG(); + return slot_cnt; +} + +/** + * ppc440spe_dma2_pq_slot_count - get the number of slots necessary for + * DMA2 PQ operation + */ +static int ppc440spe_dma2_pq_slot_count(dma_addr_t *srcs, + int src_cnt, size_t len) +{ + signed long long order = 0; + int state = 0; + int addr_count = 0; + int i; + for (i = 1; i < src_cnt; i++) { + dma_addr_t cur_addr = srcs[i]; + dma_addr_t old_addr = srcs[i-1]; + switch (state) { + case 0: + if (cur_addr == old_addr + len) { + /* direct RXOR */ + order = 1; + state = 1; + if (i == src_cnt-1) + addr_count++; + } else if (old_addr == cur_addr + len) { + /* reverse RXOR */ + order = -1; + state = 1; + if (i == src_cnt-1) + addr_count++; + } else { + state = 3; + } + break; + case 1: + if (i == src_cnt-2 || (order == -1 + && cur_addr != old_addr - len)) { + order = 0; + state = 0; + addr_count++; + } else if (cur_addr == old_addr + len*order) { + state = 2; + if (i == src_cnt-1) + addr_count++; + } else if (cur_addr == old_addr + 2*len) { + state = 2; + if (i == src_cnt-1) + addr_count++; + } else if (cur_addr == old_addr + 3*len) { + state = 2; + if (i == src_cnt-1) + addr_count++; + } else { + order = 0; + state = 0; + addr_count++; + } + break; + case 2: + order = 0; + state = 0; + addr_count++; + break; + } + if (state == 3) + break; + } + if (src_cnt <= 1 || (state != 1 && state != 2)) { + pr_err("%s: src_cnt=%d, state=%d, addr_count=%d, order=%lld\n", + __func__, src_cnt, state, addr_count, order); + for (i = 0; i < src_cnt; i++) + pr_err("\t[%d] 0x%llx \n", i, srcs[i]); + BUG(); + } + + return (addr_count + XOR_MAX_OPS - 1) / XOR_MAX_OPS; +} + + +/****************************************************************************** + * ADMA channel low-level routines + ******************************************************************************/ + +static u32 +ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan *chan); +static void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan); + +/** + * ppc440spe_adma_device_clear_eot_status - interrupt ack to XOR or DMA engine + */ +static void ppc440spe_adma_device_clear_eot_status( + struct ppc440spe_adma_chan *chan) +{ + struct dma_regs *dma_reg; + struct xor_regs *xor_reg; + u8 *p = chan->device->dma_desc_pool_virt; + struct dma_cdb *cdb; + u32 rv, i; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + /* read FIFO to ack */ + dma_reg = chan->device->dma_reg; + while ((rv = ioread32(&dma_reg->csfpl))) { + i = rv & DMA_CDB_ADDR_MSK; + cdb = (struct dma_cdb *)&p[i - + (u32)chan->device->dma_desc_pool]; + + /* Clear opcode to ack. This is necessary for + * ZeroSum operations only + */ + cdb->opc = 0; + + if (test_bit(PPC440SPE_RXOR_RUN, + &ppc440spe_rxor_state)) { + /* probably this is a completed RXOR op, + * get pointer to CDB using the fact that + * physical and virtual addresses of CDB + * in pools have the same offsets + */ + if (le32_to_cpu(cdb->sg1u) & + DMA_CUED_XOR_BASE) { + /* this is a RXOR */ + clear_bit(PPC440SPE_RXOR_RUN, + &ppc440spe_rxor_state); + } + } + + if (rv & DMA_CDB_STATUS_MSK) { + /* ZeroSum check failed + */ + struct ppc440spe_adma_desc_slot *iter; + dma_addr_t phys = rv & ~DMA_CDB_MSK; + + /* + * Update the status of corresponding + * descriptor. + */ + list_for_each_entry(iter, &chan->chain, + chain_node) { + if (iter->phys == phys) + break; + } + /* + * if cannot find the corresponding + * slot it's a bug + */ + BUG_ON(&iter->chain_node == &chan->chain); + + if (iter->xor_check_result) { + if (test_bit(PPC440SPE_DESC_PCHECK, + &iter->flags)) { + *iter->xor_check_result |= + SUM_CHECK_P_RESULT; + } else + if (test_bit(PPC440SPE_DESC_QCHECK, + &iter->flags)) { + *iter->xor_check_result |= + SUM_CHECK_Q_RESULT; + } else + BUG(); + } + } + } + + rv = ioread32(&dma_reg->dsts); + if (rv) { + pr_err("DMA%d err status: 0x%x\n", + chan->device->id, rv); + /* write back to clear */ + iowrite32(rv, &dma_reg->dsts); + } + break; + case PPC440SPE_XOR_ID: + /* reset status bits to ack */ + xor_reg = chan->device->xor_reg; + rv = ioread32be(&xor_reg->sr); + iowrite32be(rv, &xor_reg->sr); + + if (rv & (XOR_IE_ICBIE_BIT|XOR_IE_ICIE_BIT|XOR_IE_RPTIE_BIT)) { + if (rv & XOR_IE_RPTIE_BIT) { + /* Read PLB Timeout Error. + * Try to resubmit the CB + */ + u32 val = ioread32be(&xor_reg->ccbalr); + + iowrite32be(val, &xor_reg->cblalr); + + val = ioread32be(&xor_reg->crsr); + iowrite32be(val | XOR_CRSR_XAE_BIT, + &xor_reg->crsr); + } else + pr_err("XOR ERR 0x%x status\n", rv); + break; + } + + /* if the XORcore is idle, but there are unprocessed CBs + * then refetch the s/w chain here + */ + if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) && + do_xor_refetch) + ppc440spe_chan_append(chan); + break; + } +} + +/** + * ppc440spe_chan_is_busy - get the channel status + */ +static int ppc440spe_chan_is_busy(struct ppc440spe_adma_chan *chan) +{ + struct dma_regs *dma_reg; + struct xor_regs *xor_reg; + int busy = 0; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_reg = chan->device->dma_reg; + /* if command FIFO's head and tail pointers are equal and + * status tail is the same as command, then channel is free + */ + if (ioread16(&dma_reg->cpfhp) != ioread16(&dma_reg->cpftp) || + ioread16(&dma_reg->cpftp) != ioread16(&dma_reg->csftp)) + busy = 1; + break; + case PPC440SPE_XOR_ID: + /* use the special status bit for the XORcore + */ + xor_reg = chan->device->xor_reg; + busy = (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) ? 1 : 0; + break; + } + + return busy; +} + +/** + * ppc440spe_chan_set_first_xor_descriptor - init XORcore chain + */ +static void ppc440spe_chan_set_first_xor_descriptor( + struct ppc440spe_adma_chan *chan, + struct ppc440spe_adma_desc_slot *next_desc) +{ + struct xor_regs *xor_reg = chan->device->xor_reg; + + if (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) + printk(KERN_INFO "%s: Warn: XORcore is running " + "when try to set the first CDB!\n", + __func__); + + xor_last_submit = xor_last_linked = next_desc; + + iowrite32be(XOR_CRSR_64BA_BIT, &xor_reg->crsr); + + iowrite32be(next_desc->phys, &xor_reg->cblalr); + iowrite32be(0, &xor_reg->cblahr); + iowrite32be(ioread32be(&xor_reg->cbcr) | XOR_CBCR_LNK_BIT, + &xor_reg->cbcr); + + chan->hw_chain_inited = 1; +} + +/** + * ppc440spe_dma_put_desc - put DMA0,1 descriptor to FIFO. + * called with irqs disabled + */ +static void ppc440spe_dma_put_desc(struct ppc440spe_adma_chan *chan, + struct ppc440spe_adma_desc_slot *desc) +{ + u32 pcdb; + struct dma_regs *dma_reg = chan->device->dma_reg; + + pcdb = desc->phys; + if (!test_bit(PPC440SPE_DESC_INT, &desc->flags)) + pcdb |= DMA_CDB_NO_INT; + + chan_last_sub[chan->device->id] = desc; + + ADMA_LL_DBG(print_cb(chan, desc->hw_desc)); + + iowrite32(pcdb, &dma_reg->cpfpl); +} + +/** + * ppc440spe_chan_append - update the h/w chain in the channel + */ +static void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan) +{ + struct xor_regs *xor_reg; + struct ppc440spe_adma_desc_slot *iter; + struct xor_cb *xcb; + u32 cur_desc; + unsigned long flags; + + local_irq_save(flags); + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + cur_desc = ppc440spe_chan_get_current_descriptor(chan); + + if (likely(cur_desc)) { + iter = chan_last_sub[chan->device->id]; + BUG_ON(!iter); + } else { + /* first peer */ + iter = chan_first_cdb[chan->device->id]; + BUG_ON(!iter); + ppc440spe_dma_put_desc(chan, iter); + chan->hw_chain_inited = 1; + } + + /* is there something new to append */ + if (!iter->hw_next) + break; + + /* flush descriptors from the s/w queue to fifo */ + list_for_each_entry_continue(iter, &chan->chain, chain_node) { + ppc440spe_dma_put_desc(chan, iter); + if (!iter->hw_next) + break; + } + break; + case PPC440SPE_XOR_ID: + /* update h/w links and refetch */ + if (!xor_last_submit->hw_next) + break; + + xor_reg = chan->device->xor_reg; + /* the last linked CDB has to generate an interrupt + * that we'd be able to append the next lists to h/w + * regardless of the XOR engine state at the moment of + * appending of these next lists + */ + xcb = xor_last_linked->hw_desc; + xcb->cbc |= XOR_CBCR_CBCE_BIT; + + if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT)) { + /* XORcore is idle. Refetch now */ + do_xor_refetch = 0; + ppc440spe_xor_set_link(xor_last_submit, + xor_last_submit->hw_next); + + ADMA_LL_DBG(print_cb_list(chan, + xor_last_submit->hw_next)); + + xor_last_submit = xor_last_linked; + iowrite32be(ioread32be(&xor_reg->crsr) | + XOR_CRSR_RCBE_BIT | XOR_CRSR_64BA_BIT, + &xor_reg->crsr); + } else { + /* XORcore is running. Refetch later in the handler */ + do_xor_refetch = 1; + } + + break; + } + + local_irq_restore(flags); +} + +/** + * ppc440spe_chan_get_current_descriptor - get the currently executed descriptor + */ +static u32 +ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan *chan) +{ + struct dma_regs *dma_reg; + struct xor_regs *xor_reg; + + if (unlikely(!chan->hw_chain_inited)) + /* h/w descriptor chain is not initialized yet */ + return 0; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_reg = chan->device->dma_reg; + return ioread32(&dma_reg->acpl) & (~DMA_CDB_MSK); + case PPC440SPE_XOR_ID: + xor_reg = chan->device->xor_reg; + return ioread32be(&xor_reg->ccbalr); + } + return 0; +} + +/** + * ppc440spe_chan_run - enable the channel + */ +static void ppc440spe_chan_run(struct ppc440spe_adma_chan *chan) +{ + struct xor_regs *xor_reg; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + /* DMAs are always enabled, do nothing */ + break; + case PPC440SPE_XOR_ID: + /* drain write buffer */ + xor_reg = chan->device->xor_reg; + + /* fetch descriptor pointed to in */ + iowrite32be(XOR_CRSR_64BA_BIT | XOR_CRSR_XAE_BIT, + &xor_reg->crsr); + break; + } +} + +/****************************************************************************** + * ADMA device level + ******************************************************************************/ + +static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan); +static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan); + +static dma_cookie_t +ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx); + +static void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *tx, + dma_addr_t addr, int index); +static void +ppc440spe_adma_memcpy_xor_set_src(struct ppc440spe_adma_desc_slot *tx, + dma_addr_t addr, int index); + +static void +ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot *tx, + dma_addr_t *paddr, unsigned long flags); +static void +ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *tx, + dma_addr_t addr, int index); +static void +ppc440spe_adma_pq_set_src_mult(struct ppc440spe_adma_desc_slot *tx, + unsigned char mult, int index, int dst_pos); +static void +ppc440spe_adma_pqzero_sum_set_dest(struct ppc440spe_adma_desc_slot *tx, + dma_addr_t paddr, dma_addr_t qaddr); + +static struct page *ppc440spe_rxor_srcs[32]; + +/** + * ppc440spe_can_rxor - check if the operands may be processed with RXOR + */ +static int ppc440spe_can_rxor(struct page **srcs, int src_cnt, size_t len) +{ + int i, order = 0, state = 0; + int idx = 0; + + if (unlikely(!(src_cnt > 1))) + return 0; + + BUG_ON(src_cnt > ARRAY_SIZE(ppc440spe_rxor_srcs)); + + /* Skip holes in the source list before checking */ + for (i = 0; i < src_cnt; i++) { + if (!srcs[i]) + continue; + ppc440spe_rxor_srcs[idx++] = srcs[i]; + } + src_cnt = idx; + + for (i = 1; i < src_cnt; i++) { + char *cur_addr = page_address(ppc440spe_rxor_srcs[i]); + char *old_addr = page_address(ppc440spe_rxor_srcs[i - 1]); + + switch (state) { + case 0: + if (cur_addr == old_addr + len) { + /* direct RXOR */ + order = 1; + state = 1; + } else if (old_addr == cur_addr + len) { + /* reverse RXOR */ + order = -1; + state = 1; + } else + goto out; + break; + case 1: + if ((i == src_cnt - 2) || + (order == -1 && cur_addr != old_addr - len)) { + order = 0; + state = 0; + } else if ((cur_addr == old_addr + len * order) || + (cur_addr == old_addr + 2 * len) || + (cur_addr == old_addr + 3 * len)) { + state = 2; + } else { + order = 0; + state = 0; + } + break; + case 2: + order = 0; + state = 0; + break; + } + } + +out: + if (state == 1 || state == 2) + return 1; + + return 0; +} + +/** + * ppc440spe_adma_device_estimate - estimate the efficiency of processing + * the operation given on this channel. It's assumed that 'chan' is + * capable to process 'cap' type of operation. + * @chan: channel to use + * @cap: type of transaction + * @dst_lst: array of destination pointers + * @dst_cnt: number of destination operands + * @src_lst: array of source pointers + * @src_cnt: number of source operands + * @src_sz: size of each source operand + */ +static int ppc440spe_adma_estimate(struct dma_chan *chan, + enum dma_transaction_type cap, struct page **dst_lst, int dst_cnt, + struct page **src_lst, int src_cnt, size_t src_sz) +{ + int ef = 1; + + if (cap == DMA_PQ || cap == DMA_PQ_VAL) { + /* If RAID-6 capabilities were not activated don't try + * to use them + */ + if (unlikely(!ppc440spe_r6_enabled)) + return -1; + } + /* In the current implementation of ppc440spe ADMA driver it + * makes sense to pick out only pq case, because it may be + * processed: + * (1) either using Biskup method on DMA2; + * (2) or on DMA0/1. + * Thus we give a favour to (1) if the sources are suitable; + * else let it be processed on one of the DMA0/1 engines. + * In the sum_product case where destination is also the + * source process it on DMA0/1 only. + */ + if (cap == DMA_PQ && chan->chan_id == PPC440SPE_XOR_ID) { + + if (dst_cnt == 1 && src_cnt == 2 && dst_lst[0] == src_lst[1]) + ef = 0; /* sum_product case, process on DMA0/1 */ + else if (ppc440spe_can_rxor(src_lst, src_cnt, src_sz)) + ef = 3; /* override (DMA0/1 + idle) */ + else + ef = 0; /* can't process on DMA2 if !rxor */ + } + + /* channel idleness increases the priority */ + if (likely(ef) && + !ppc440spe_chan_is_busy(to_ppc440spe_adma_chan(chan))) + ef++; + + return ef; +} + +struct dma_chan * +ppc440spe_async_tx_find_best_channel(enum dma_transaction_type cap, + struct page **dst_lst, int dst_cnt, struct page **src_lst, + int src_cnt, size_t src_sz) +{ + struct dma_chan *best_chan = NULL; + struct ppc_dma_chan_ref *ref; + int best_rank = -1; + + if (unlikely(!src_sz)) + return NULL; + if (src_sz > PAGE_SIZE) { + /* + * should a user of the api ever pass > PAGE_SIZE requests + * we sort out cases where temporary page-sized buffers + * are used. + */ + switch (cap) { + case DMA_PQ: + if (src_cnt == 1 && dst_lst[1] == src_lst[0]) + return NULL; + if (src_cnt == 2 && dst_lst[1] == src_lst[1]) + return NULL; + break; + case DMA_PQ_VAL: + case DMA_XOR_VAL: + return NULL; + default: + break; + } + } + + list_for_each_entry(ref, &ppc440spe_adma_chan_list, node) { + if (dma_has_cap(cap, ref->chan->device->cap_mask)) { + int rank; + + rank = ppc440spe_adma_estimate(ref->chan, cap, dst_lst, + dst_cnt, src_lst, src_cnt, src_sz); + if (rank > best_rank) { + best_rank = rank; + best_chan = ref->chan; + } + } + } + + return best_chan; +} +EXPORT_SYMBOL_GPL(ppc440spe_async_tx_find_best_channel); + +/** + * ppc440spe_get_group_entry - get group entry with index idx + * @tdesc: is the last allocated slot in the group. + */ +static struct ppc440spe_adma_desc_slot * +ppc440spe_get_group_entry(struct ppc440spe_adma_desc_slot *tdesc, u32 entry_idx) +{ + struct ppc440spe_adma_desc_slot *iter = tdesc->group_head; + int i = 0; + + if (entry_idx < 0 || entry_idx >= (tdesc->src_cnt + tdesc->dst_cnt)) { + printk("%s: entry_idx %d, src_cnt %d, dst_cnt %d\n", + __func__, entry_idx, tdesc->src_cnt, tdesc->dst_cnt); + BUG(); + } + + list_for_each_entry(iter, &tdesc->group_list, chain_node) { + if (i++ == entry_idx) + break; + } + return iter; +} + +/** + * ppc440spe_adma_free_slots - flags descriptor slots for reuse + * @slot: Slot to free + * Caller must hold &ppc440spe_chan->lock while calling this function + */ +static void ppc440spe_adma_free_slots(struct ppc440spe_adma_desc_slot *slot, + struct ppc440spe_adma_chan *chan) +{ + int stride = slot->slots_per_op; + + while (stride--) { + slot->slots_per_op = 0; + slot = list_entry(slot->slot_node.next, + struct ppc440spe_adma_desc_slot, + slot_node); + } +} + +/** + * ppc440spe_adma_run_tx_complete_actions - call functions to be called + * upon completion + */ +static dma_cookie_t ppc440spe_adma_run_tx_complete_actions( + struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, + dma_cookie_t cookie) +{ + BUG_ON(desc->async_tx.cookie < 0); + if (desc->async_tx.cookie > 0) { + cookie = desc->async_tx.cookie; + desc->async_tx.cookie = 0; + + dma_descriptor_unmap(&desc->async_tx); + /* call the callback (must not sleep or submit new + * operations to this channel) + */ + dmaengine_desc_get_callback_invoke(&desc->async_tx, NULL); + } + + /* run dependent operations */ + dma_run_dependencies(&desc->async_tx); + + return cookie; +} + +/** + * ppc440spe_adma_clean_slot - clean up CDB slot (if ack is set) + */ +static int ppc440spe_adma_clean_slot(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan) +{ + /* the client is allowed to attach dependent operations + * until 'ack' is set + */ + if (!async_tx_test_ack(&desc->async_tx)) + return 0; + + /* leave the last descriptor in the chain + * so we can append to it + */ + if (list_is_last(&desc->chain_node, &chan->chain) || + desc->phys == ppc440spe_chan_get_current_descriptor(chan)) + return 1; + + if (chan->device->id != PPC440SPE_XOR_ID) { + /* our DMA interrupt handler clears opc field of + * each processed descriptor. For all types of + * operations except for ZeroSum we do not actually + * need ack from the interrupt handler. ZeroSum is a + * special case since the result of this operation + * is available from the handler only, so if we see + * such type of descriptor (which is unprocessed yet) + * then leave it in chain. + */ + struct dma_cdb *cdb = desc->hw_desc; + if (cdb->opc == DMA_CDB_OPC_DCHECK128) + return 1; + } + + dev_dbg(chan->device->common.dev, "\tfree slot %llx: %d stride: %d\n", + desc->phys, desc->idx, desc->slots_per_op); + + list_del(&desc->chain_node); + ppc440spe_adma_free_slots(desc, chan); + return 0; +} + +/** + * __ppc440spe_adma_slot_cleanup - this is the common clean-up routine + * which runs through the channel CDBs list until reach the descriptor + * currently processed. When routine determines that all CDBs of group + * are completed then corresponding callbacks (if any) are called and slots + * are freed. + */ +static void __ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan) +{ + struct ppc440spe_adma_desc_slot *iter, *_iter, *group_start = NULL; + dma_cookie_t cookie = 0; + u32 current_desc = ppc440spe_chan_get_current_descriptor(chan); + int busy = ppc440spe_chan_is_busy(chan); + int seen_current = 0, slot_cnt = 0, slots_per_op = 0; + + dev_dbg(chan->device->common.dev, "ppc440spe adma%d: %s\n", + chan->device->id, __func__); + + if (!current_desc) { + /* There were no transactions yet, so + * nothing to clean + */ + return; + } + + /* free completed slots from the chain starting with + * the oldest descriptor + */ + list_for_each_entry_safe(iter, _iter, &chan->chain, + chain_node) { + dev_dbg(chan->device->common.dev, "\tcookie: %d slot: %d " + "busy: %d this_desc: %#llx next_desc: %#x " + "cur: %#x ack: %d\n", + iter->async_tx.cookie, iter->idx, busy, iter->phys, + ppc440spe_desc_get_link(iter, chan), current_desc, + async_tx_test_ack(&iter->async_tx)); + prefetch(_iter); + prefetch(&_iter->async_tx); + + /* do not advance past the current descriptor loaded into the + * hardware channel,subsequent descriptors are either in process + * or have not been submitted + */ + if (seen_current) + break; + + /* stop the search if we reach the current descriptor and the + * channel is busy, or if it appears that the current descriptor + * needs to be re-read (i.e. has been appended to) + */ + if (iter->phys == current_desc) { + BUG_ON(seen_current++); + if (busy || ppc440spe_desc_get_link(iter, chan)) { + /* not all descriptors of the group have + * been completed; exit. + */ + break; + } + } + + /* detect the start of a group transaction */ + if (!slot_cnt && !slots_per_op) { + slot_cnt = iter->slot_cnt; + slots_per_op = iter->slots_per_op; + if (slot_cnt <= slots_per_op) { + slot_cnt = 0; + slots_per_op = 0; + } + } + + if (slot_cnt) { + if (!group_start) + group_start = iter; + slot_cnt -= slots_per_op; + } + + /* all the members of a group are complete */ + if (slots_per_op != 0 && slot_cnt == 0) { + struct ppc440spe_adma_desc_slot *grp_iter, *_grp_iter; + int end_of_chain = 0; + + /* clean up the group */ + slot_cnt = group_start->slot_cnt; + grp_iter = group_start; + list_for_each_entry_safe_from(grp_iter, _grp_iter, + &chan->chain, chain_node) { + + cookie = ppc440spe_adma_run_tx_complete_actions( + grp_iter, chan, cookie); + + slot_cnt -= slots_per_op; + end_of_chain = ppc440spe_adma_clean_slot( + grp_iter, chan); + if (end_of_chain && slot_cnt) { + /* Should wait for ZeroSum completion */ + if (cookie > 0) + chan->common.completed_cookie = cookie; + return; + } + + if (slot_cnt == 0 || end_of_chain) + break; + } + + /* the group should be complete at this point */ + BUG_ON(slot_cnt); + + slots_per_op = 0; + group_start = NULL; + if (end_of_chain) + break; + else + continue; + } else if (slots_per_op) /* wait for group completion */ + continue; + + cookie = ppc440spe_adma_run_tx_complete_actions(iter, chan, + cookie); + + if (ppc440spe_adma_clean_slot(iter, chan)) + break; + } + + BUG_ON(!seen_current); + + if (cookie > 0) { + chan->common.completed_cookie = cookie; + pr_debug("\tcompleted cookie %d\n", cookie); + } + +} + +/** + * ppc440spe_adma_tasklet - clean up watch-dog initiator + */ +static void ppc440spe_adma_tasklet(struct tasklet_struct *t) +{ + struct ppc440spe_adma_chan *chan = from_tasklet(chan, t, irq_tasklet); + + spin_lock_nested(&chan->lock, SINGLE_DEPTH_NESTING); + __ppc440spe_adma_slot_cleanup(chan); + spin_unlock(&chan->lock); +} + +/** + * ppc440spe_adma_slot_cleanup - clean up scheduled initiator + */ +static void ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan) +{ + spin_lock_bh(&chan->lock); + __ppc440spe_adma_slot_cleanup(chan); + spin_unlock_bh(&chan->lock); +} + +/** + * ppc440spe_adma_alloc_slots - allocate free slots (if any) + */ +static struct ppc440spe_adma_desc_slot *ppc440spe_adma_alloc_slots( + struct ppc440spe_adma_chan *chan, int num_slots, + int slots_per_op) +{ + struct ppc440spe_adma_desc_slot *iter = NULL, *_iter; + struct ppc440spe_adma_desc_slot *alloc_start = NULL; + int slots_found, retry = 0; + LIST_HEAD(chain); + + + BUG_ON(!num_slots || !slots_per_op); + /* start search from the last allocated descrtiptor + * if a contiguous allocation can not be found start searching + * from the beginning of the list + */ +retry: + slots_found = 0; + if (retry == 0) + iter = chan->last_used; + else + iter = list_entry(&chan->all_slots, + struct ppc440spe_adma_desc_slot, + slot_node); + list_for_each_entry_safe_continue(iter, _iter, &chan->all_slots, + slot_node) { + prefetch(_iter); + prefetch(&_iter->async_tx); + if (iter->slots_per_op) { + slots_found = 0; + continue; + } + + /* start the allocation if the slot is correctly aligned */ + if (!slots_found++) + alloc_start = iter; + + if (slots_found == num_slots) { + struct ppc440spe_adma_desc_slot *alloc_tail = NULL; + struct ppc440spe_adma_desc_slot *last_used = NULL; + + iter = alloc_start; + while (num_slots) { + int i; + /* pre-ack all but the last descriptor */ + if (num_slots != slots_per_op) + async_tx_ack(&iter->async_tx); + + list_add_tail(&iter->chain_node, &chain); + alloc_tail = iter; + iter->async_tx.cookie = 0; + iter->hw_next = NULL; + iter->flags = 0; + iter->slot_cnt = num_slots; + iter->xor_check_result = NULL; + for (i = 0; i < slots_per_op; i++) { + iter->slots_per_op = slots_per_op - i; + last_used = iter; + iter = list_entry(iter->slot_node.next, + struct ppc440spe_adma_desc_slot, + slot_node); + } + num_slots -= slots_per_op; + } + alloc_tail->group_head = alloc_start; + alloc_tail->async_tx.cookie = -EBUSY; + list_splice(&chain, &alloc_tail->group_list); + chan->last_used = last_used; + return alloc_tail; + } + } + if (!retry++) + goto retry; + + /* try to free some slots if the allocation fails */ + tasklet_schedule(&chan->irq_tasklet); + return NULL; +} + +/** + * ppc440spe_adma_alloc_chan_resources - allocate pools for CDB slots + */ +static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *slot = NULL; + char *hw_desc; + int i, db_sz; + int init; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + init = ppc440spe_chan->slots_allocated ? 0 : 1; + chan->chan_id = ppc440spe_chan->device->id; + + /* Allocate descriptor slots */ + i = ppc440spe_chan->slots_allocated; + if (ppc440spe_chan->device->id != PPC440SPE_XOR_ID) + db_sz = sizeof(struct dma_cdb); + else + db_sz = sizeof(struct xor_cb); + + for (; i < (ppc440spe_chan->device->pool_size / db_sz); i++) { + slot = kzalloc(sizeof(struct ppc440spe_adma_desc_slot), + GFP_KERNEL); + if (!slot) { + printk(KERN_INFO "SPE ADMA Channel only initialized" + " %d descriptor slots", i--); + break; + } + + hw_desc = (char *) ppc440spe_chan->device->dma_desc_pool_virt; + slot->hw_desc = (void *) &hw_desc[i * db_sz]; + dma_async_tx_descriptor_init(&slot->async_tx, chan); + slot->async_tx.tx_submit = ppc440spe_adma_tx_submit; + INIT_LIST_HEAD(&slot->chain_node); + INIT_LIST_HEAD(&slot->slot_node); + INIT_LIST_HEAD(&slot->group_list); + slot->phys = ppc440spe_chan->device->dma_desc_pool + i * db_sz; + slot->idx = i; + + spin_lock_bh(&ppc440spe_chan->lock); + ppc440spe_chan->slots_allocated++; + list_add_tail(&slot->slot_node, &ppc440spe_chan->all_slots); + spin_unlock_bh(&ppc440spe_chan->lock); + } + + if (i && !ppc440spe_chan->last_used) { + ppc440spe_chan->last_used = + list_entry(ppc440spe_chan->all_slots.next, + struct ppc440spe_adma_desc_slot, + slot_node); + } + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: allocated %d descriptor slots\n", + ppc440spe_chan->device->id, i); + + /* initialize the channel and the chain with a null operation */ + if (init) { + switch (ppc440spe_chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + ppc440spe_chan->hw_chain_inited = 0; + /* Use WXOR for self-testing */ + if (!ppc440spe_r6_tchan) + ppc440spe_r6_tchan = ppc440spe_chan; + break; + case PPC440SPE_XOR_ID: + ppc440spe_chan_start_null_xor(ppc440spe_chan); + break; + default: + BUG(); + } + ppc440spe_chan->needs_unmap = 1; + } + + return (i > 0) ? i : -ENOMEM; +} + +/** + * ppc440spe_rxor_set_region_data - + */ +static void ppc440spe_rxor_set_region(struct ppc440spe_adma_desc_slot *desc, + u8 xor_arg_no, u32 mask) +{ + struct xor_cb *xcb = desc->hw_desc; + + xcb->ops[xor_arg_no].h |= mask; +} + +/** + * ppc440spe_rxor_set_src - + */ +static void ppc440spe_rxor_set_src(struct ppc440spe_adma_desc_slot *desc, + u8 xor_arg_no, dma_addr_t addr) +{ + struct xor_cb *xcb = desc->hw_desc; + + xcb->ops[xor_arg_no].h |= DMA_CUED_XOR_BASE; + xcb->ops[xor_arg_no].l = addr; +} + +/** + * ppc440spe_rxor_set_mult - + */ +static void ppc440spe_rxor_set_mult(struct ppc440spe_adma_desc_slot *desc, + u8 xor_arg_no, u8 idx, u8 mult) +{ + struct xor_cb *xcb = desc->hw_desc; + + xcb->ops[xor_arg_no].h |= mult << (DMA_CUED_MULT1_OFF + idx * 8); +} + +/** + * ppc440spe_adma_check_threshold - append CDBs to h/w chain if threshold + * has been achieved + */ +static void ppc440spe_adma_check_threshold(struct ppc440spe_adma_chan *chan) +{ + dev_dbg(chan->device->common.dev, "ppc440spe adma%d: pending: %d\n", + chan->device->id, chan->pending); + + if (chan->pending >= PPC440SPE_ADMA_THRESHOLD) { + chan->pending = 0; + ppc440spe_chan_append(chan); + } +} + +/** + * ppc440spe_adma_tx_submit - submit new descriptor group to the channel + * (it's not necessary that descriptors will be submitted to the h/w + * chains too right now) + */ +static dma_cookie_t ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct ppc440spe_adma_desc_slot *sw_desc; + struct ppc440spe_adma_chan *chan = to_ppc440spe_adma_chan(tx->chan); + struct ppc440spe_adma_desc_slot *group_start, *old_chain_tail; + int slot_cnt; + int slots_per_op; + dma_cookie_t cookie; + + sw_desc = tx_to_ppc440spe_adma_slot(tx); + + group_start = sw_desc->group_head; + slot_cnt = group_start->slot_cnt; + slots_per_op = group_start->slots_per_op; + + spin_lock_bh(&chan->lock); + cookie = dma_cookie_assign(tx); + + if (unlikely(list_empty(&chan->chain))) { + /* first peer */ + list_splice_init(&sw_desc->group_list, &chan->chain); + chan_first_cdb[chan->device->id] = group_start; + } else { + /* isn't first peer, bind CDBs to chain */ + old_chain_tail = list_entry(chan->chain.prev, + struct ppc440spe_adma_desc_slot, + chain_node); + list_splice_init(&sw_desc->group_list, + &old_chain_tail->chain_node); + /* fix up the hardware chain */ + ppc440spe_desc_set_link(chan, old_chain_tail, group_start); + } + + /* increment the pending count by the number of operations */ + chan->pending += slot_cnt / slots_per_op; + ppc440spe_adma_check_threshold(chan); + spin_unlock_bh(&chan->lock); + + dev_dbg(chan->device->common.dev, + "ppc440spe adma%d: %s cookie: %d slot: %d tx %p\n", + chan->device->id, __func__, + sw_desc->async_tx.cookie, sw_desc->idx, sw_desc); + + return cookie; +} + +/** + * ppc440spe_adma_prep_dma_interrupt - prepare CDB for a pseudo DMA operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_interrupt( + struct dma_chan *chan, unsigned long flags) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *sw_desc, *group_start; + int slot_cnt, slots_per_op; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: %s\n", ppc440spe_chan->device->id, + __func__); + + spin_lock_bh(&ppc440spe_chan->lock); + slot_cnt = slots_per_op = 1; + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, + slots_per_op); + if (sw_desc) { + group_start = sw_desc->group_head; + ppc440spe_desc_init_interrupt(group_start, ppc440spe_chan); + group_start->unmap_len = 0; + sw_desc->async_tx.flags = flags; + } + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +/** + * ppc440spe_adma_prep_dma_memcpy - prepare CDB for a MEMCPY operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *sw_desc, *group_start; + int slot_cnt, slots_per_op; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + + if (unlikely(!len)) + return NULL; + + BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT); + + spin_lock_bh(&ppc440spe_chan->lock); + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: %s len: %u int_en %d\n", + ppc440spe_chan->device->id, __func__, len, + flags & DMA_PREP_INTERRUPT ? 1 : 0); + slot_cnt = slots_per_op = 1; + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, + slots_per_op); + if (sw_desc) { + group_start = sw_desc->group_head; + ppc440spe_desc_init_memcpy(group_start, flags); + ppc440spe_adma_set_dest(group_start, dma_dest, 0); + ppc440spe_adma_memcpy_xor_set_src(group_start, dma_src, 0); + ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len); + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +/** + * ppc440spe_adma_prep_dma_xor - prepare CDB for a XOR operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor( + struct dma_chan *chan, dma_addr_t dma_dest, + dma_addr_t *dma_src, u32 src_cnt, size_t len, + unsigned long flags) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *sw_desc, *group_start; + int slot_cnt, slots_per_op; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + + ADMA_LL_DBG(prep_dma_xor_dbg(ppc440spe_chan->device->id, + dma_dest, dma_src, src_cnt)); + if (unlikely(!len)) + return NULL; + BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT); + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n", + ppc440spe_chan->device->id, __func__, src_cnt, len, + flags & DMA_PREP_INTERRUPT ? 1 : 0); + + spin_lock_bh(&ppc440spe_chan->lock); + slot_cnt = ppc440spe_chan_xor_slot_count(len, src_cnt, &slots_per_op); + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, + slots_per_op); + if (sw_desc) { + group_start = sw_desc->group_head; + ppc440spe_desc_init_xor(group_start, src_cnt, flags); + ppc440spe_adma_set_dest(group_start, dma_dest, 0); + while (src_cnt--) + ppc440spe_adma_memcpy_xor_set_src(group_start, + dma_src[src_cnt], src_cnt); + ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len); + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static inline void +ppc440spe_desc_set_xor_src_cnt(struct ppc440spe_adma_desc_slot *desc, + int src_cnt); +static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor); + +/** + * ppc440spe_adma_init_dma2rxor_slot - + */ +static void ppc440spe_adma_init_dma2rxor_slot( + struct ppc440spe_adma_desc_slot *desc, + dma_addr_t *src, int src_cnt) +{ + int i; + + /* initialize CDB */ + for (i = 0; i < src_cnt; i++) { + ppc440spe_adma_dma2rxor_prep_src(desc, &desc->rxor_cursor, i, + desc->src_cnt, (u32)src[i]); + } +} + +/** + * ppc440spe_dma01_prep_mult - + * for Q operation where destination is also the source + */ +static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_mult( + struct ppc440spe_adma_chan *ppc440spe_chan, + dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt, + const unsigned char *scf, size_t len, unsigned long flags) +{ + struct ppc440spe_adma_desc_slot *sw_desc = NULL; + unsigned long op = 0; + int slot_cnt; + + set_bit(PPC440SPE_DESC_WXOR, &op); + slot_cnt = 2; + + spin_lock_bh(&ppc440spe_chan->lock); + + /* use WXOR, each descriptor occupies one slot */ + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1); + if (sw_desc) { + struct ppc440spe_adma_chan *chan; + struct ppc440spe_adma_desc_slot *iter; + struct dma_cdb *hw_desc; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + set_bits(op, &sw_desc->flags); + sw_desc->src_cnt = src_cnt; + sw_desc->dst_cnt = dst_cnt; + /* First descriptor, zero data in the destination and copy it + * to q page using MULTICAST transfer. + */ + iter = list_first_entry(&sw_desc->group_list, + struct ppc440spe_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + /* set 'next' pointer */ + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MULTICAST; + + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, dst[0], 0); + ppc440spe_desc_set_dest_addr(iter, chan, 0, dst[1], 1); + ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, + src[0]); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len); + iter->unmap_len = len; + + /* + * Second descriptor, multiply data from the q page + * and store the result in real destination. + */ + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = NULL; + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC440SPE_DESC_INT, &iter->flags); + else + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + ppc440spe_desc_set_src_addr(iter, chan, 0, + DMA_CUED_XOR_HB, dst[1]); + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, dst[0], 0); + + ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF, + DMA_CDB_SG_DST1, scf[0]); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len); + iter->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc; +} + +/** + * ppc440spe_dma01_prep_sum_product - + * Dx = A*(P+Pxy) + B*(Q+Qxy) operation where destination is also + * the source. + */ +static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_sum_product( + struct ppc440spe_adma_chan *ppc440spe_chan, + dma_addr_t *dst, dma_addr_t *src, int src_cnt, + const unsigned char *scf, size_t len, unsigned long flags) +{ + struct ppc440spe_adma_desc_slot *sw_desc = NULL; + unsigned long op = 0; + int slot_cnt; + + set_bit(PPC440SPE_DESC_WXOR, &op); + slot_cnt = 3; + + spin_lock_bh(&ppc440spe_chan->lock); + + /* WXOR, each descriptor occupies one slot */ + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1); + if (sw_desc) { + struct ppc440spe_adma_chan *chan; + struct ppc440spe_adma_desc_slot *iter; + struct dma_cdb *hw_desc; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + set_bits(op, &sw_desc->flags); + sw_desc->src_cnt = src_cnt; + sw_desc->dst_cnt = 1; + /* 1st descriptor, src[1] data to q page and zero destination */ + iter = list_first_entry(&sw_desc->group_list, + struct ppc440spe_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MULTICAST; + + ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, + *dst, 0); + ppc440spe_desc_set_dest_addr(iter, chan, 0, + ppc440spe_chan->qdest, 1); + ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, + src[1]); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len); + iter->unmap_len = len; + + /* 2nd descriptor, multiply src[1] data and store the + * result in destination */ + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + /* set 'next' pointer */ + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC440SPE_DESC_INT, &iter->flags); + else + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, + ppc440spe_chan->qdest); + ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, + *dst, 0); + ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF, + DMA_CDB_SG_DST1, scf[1]); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len); + iter->unmap_len = len; + + /* + * 3rd descriptor, multiply src[0] data and xor it + * with destination + */ + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = NULL; + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC440SPE_DESC_INT, &iter->flags); + else + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, + src[0]); + ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, + *dst, 0); + ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF, + DMA_CDB_SG_DST1, scf[0]); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len); + iter->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc; +} + +static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_pq( + struct ppc440spe_adma_chan *ppc440spe_chan, + dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt, + const unsigned char *scf, size_t len, unsigned long flags) +{ + int slot_cnt; + struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter; + unsigned long op = 0; + unsigned char mult = 1; + + pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n", + __func__, dst_cnt, src_cnt, len); + /* select operations WXOR/RXOR depending on the + * source addresses of operators and the number + * of destinations (RXOR support only Q-parity calculations) + */ + set_bit(PPC440SPE_DESC_WXOR, &op); + if (!test_and_set_bit(PPC440SPE_RXOR_RUN, &ppc440spe_rxor_state)) { + /* no active RXOR; + * do RXOR if: + * - there are more than 1 source, + * - len is aligned on 512-byte boundary, + * - source addresses fit to one of 4 possible regions. + */ + if (src_cnt > 1 && + !(len & MQ0_CF2H_RXOR_BS_MASK) && + (src[0] + len) == src[1]) { + /* may do RXOR R1 R2 */ + set_bit(PPC440SPE_DESC_RXOR, &op); + if (src_cnt != 2) { + /* may try to enhance region of RXOR */ + if ((src[1] + len) == src[2]) { + /* do RXOR R1 R2 R3 */ + set_bit(PPC440SPE_DESC_RXOR123, + &op); + } else if ((src[1] + len * 2) == src[2]) { + /* do RXOR R1 R2 R4 */ + set_bit(PPC440SPE_DESC_RXOR124, &op); + } else if ((src[1] + len * 3) == src[2]) { + /* do RXOR R1 R2 R5 */ + set_bit(PPC440SPE_DESC_RXOR125, + &op); + } else { + /* do RXOR R1 R2 */ + set_bit(PPC440SPE_DESC_RXOR12, + &op); + } + } else { + /* do RXOR R1 R2 */ + set_bit(PPC440SPE_DESC_RXOR12, &op); + } + } + + if (!test_bit(PPC440SPE_DESC_RXOR, &op)) { + /* can not do this operation with RXOR */ + clear_bit(PPC440SPE_RXOR_RUN, + &ppc440spe_rxor_state); + } else { + /* can do; set block size right now */ + ppc440spe_desc_set_rxor_block_size(len); + } + } + + /* Number of necessary slots depends on operation type selected */ + if (!test_bit(PPC440SPE_DESC_RXOR, &op)) { + /* This is a WXOR only chain. Need descriptors for each + * source to GF-XOR them with WXOR, and need descriptors + * for each destination to zero them with WXOR + */ + slot_cnt = src_cnt; + + if (flags & DMA_PREP_ZERO_P) { + slot_cnt++; + set_bit(PPC440SPE_ZERO_P, &op); + } + if (flags & DMA_PREP_ZERO_Q) { + slot_cnt++; + set_bit(PPC440SPE_ZERO_Q, &op); + } + } else { + /* Need 1/2 descriptor for RXOR operation, and + * need (src_cnt - (2 or 3)) for WXOR of sources + * remained (if any) + */ + slot_cnt = dst_cnt; + + if (flags & DMA_PREP_ZERO_P) + set_bit(PPC440SPE_ZERO_P, &op); + if (flags & DMA_PREP_ZERO_Q) + set_bit(PPC440SPE_ZERO_Q, &op); + + if (test_bit(PPC440SPE_DESC_RXOR12, &op)) + slot_cnt += src_cnt - 2; + else + slot_cnt += src_cnt - 3; + + /* Thus we have either RXOR only chain or + * mixed RXOR/WXOR + */ + if (slot_cnt == dst_cnt) + /* RXOR only chain */ + clear_bit(PPC440SPE_DESC_WXOR, &op); + } + + spin_lock_bh(&ppc440spe_chan->lock); + /* for both RXOR/WXOR each descriptor occupies one slot */ + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1); + if (sw_desc) { + ppc440spe_desc_init_dma01pq(sw_desc, dst_cnt, src_cnt, + flags, op); + + /* setup dst/src/mult */ + pr_debug("%s: set dst descriptor 0, 1: 0x%016llx, 0x%016llx\n", + __func__, dst[0], dst[1]); + ppc440spe_adma_pq_set_dest(sw_desc, dst, flags); + while (src_cnt--) { + ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt], + src_cnt); + + /* NOTE: "Multi = 0 is equivalent to = 1" as it + * stated in 440SPSPe_RAID6_Addendum_UM_1_17.pdf + * doesn't work for RXOR with DMA0/1! Instead, multi=0 + * leads to zeroing source data after RXOR. + * So, for P case set-up mult=1 explicitly. + */ + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + mult = scf[src_cnt]; + ppc440spe_adma_pq_set_src_mult(sw_desc, + mult, src_cnt, dst_cnt - 1); + } + + /* Setup byte count foreach slot just allocated */ + sw_desc->async_tx.flags = flags; + list_for_each_entry(iter, &sw_desc->group_list, + chain_node) { + ppc440spe_desc_set_byte_count(iter, + ppc440spe_chan, len); + iter->unmap_len = len; + } + } + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc; +} + +static struct ppc440spe_adma_desc_slot *ppc440spe_dma2_prep_pq( + struct ppc440spe_adma_chan *ppc440spe_chan, + dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt, + const unsigned char *scf, size_t len, unsigned long flags) +{ + int slot_cnt, descs_per_op; + struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter; + unsigned long op = 0; + unsigned char mult = 1; + + BUG_ON(!dst_cnt); + /*pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n", + __func__, dst_cnt, src_cnt, len);*/ + + spin_lock_bh(&ppc440spe_chan->lock); + descs_per_op = ppc440spe_dma2_pq_slot_count(src, src_cnt, len); + if (descs_per_op < 0) { + spin_unlock_bh(&ppc440spe_chan->lock); + return NULL; + } + + /* depending on number of sources we have 1 or 2 RXOR chains */ + slot_cnt = descs_per_op * dst_cnt; + + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1); + if (sw_desc) { + op = slot_cnt; + sw_desc->async_tx.flags = flags; + list_for_each_entry(iter, &sw_desc->group_list, chain_node) { + ppc440spe_desc_init_dma2pq(iter, dst_cnt, src_cnt, + --op ? 0 : flags); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, + len); + iter->unmap_len = len; + + ppc440spe_init_rxor_cursor(&(iter->rxor_cursor)); + iter->rxor_cursor.len = len; + iter->descs_per_op = descs_per_op; + } + op = 0; + list_for_each_entry(iter, &sw_desc->group_list, chain_node) { + op++; + if (op % descs_per_op == 0) + ppc440spe_adma_init_dma2rxor_slot(iter, src, + src_cnt); + if (likely(!list_is_last(&iter->chain_node, + &sw_desc->group_list))) { + /* set 'next' pointer */ + iter->hw_next = + list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + ppc440spe_xor_set_link(iter, iter->hw_next); + } else { + /* this is the last descriptor. */ + iter->hw_next = NULL; + } + } + + /* fixup head descriptor */ + sw_desc->dst_cnt = dst_cnt; + if (flags & DMA_PREP_ZERO_P) + set_bit(PPC440SPE_ZERO_P, &sw_desc->flags); + if (flags & DMA_PREP_ZERO_Q) + set_bit(PPC440SPE_ZERO_Q, &sw_desc->flags); + + /* setup dst/src/mult */ + ppc440spe_adma_pq_set_dest(sw_desc, dst, flags); + + while (src_cnt--) { + /* handle descriptors (if dst_cnt == 2) inside + * the ppc440spe_adma_pq_set_srcxxx() functions + */ + ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt], + src_cnt); + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + mult = scf[src_cnt]; + ppc440spe_adma_pq_set_src_mult(sw_desc, + mult, src_cnt, dst_cnt - 1); + } + } + spin_unlock_bh(&ppc440spe_chan->lock); + ppc440spe_desc_set_rxor_block_size(len); + return sw_desc; +} + +/** + * ppc440spe_adma_prep_dma_pq - prepare CDB (group) for a GF-XOR operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pq( + struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *sw_desc = NULL; + int dst_cnt = 0; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + + ADMA_LL_DBG(prep_dma_pq_dbg(ppc440spe_chan->device->id, + dst, src, src_cnt)); + BUG_ON(!len); + BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT); + BUG_ON(!src_cnt); + + if (src_cnt == 1 && dst[1] == src[0]) { + dma_addr_t dest[2]; + + /* dst[1] is real destination (Q) */ + dest[0] = dst[1]; + /* this is the page to multicast source data to */ + dest[1] = ppc440spe_chan->qdest; + sw_desc = ppc440spe_dma01_prep_mult(ppc440spe_chan, + dest, 2, src, src_cnt, scf, len, flags); + return sw_desc ? &sw_desc->async_tx : NULL; + } + + if (src_cnt == 2 && dst[1] == src[1]) { + sw_desc = ppc440spe_dma01_prep_sum_product(ppc440spe_chan, + &dst[1], src, 2, scf, len, flags); + return sw_desc ? &sw_desc->async_tx : NULL; + } + + if (!(flags & DMA_PREP_PQ_DISABLE_P)) { + BUG_ON(!dst[0]); + dst_cnt++; + flags |= DMA_PREP_ZERO_P; + } + + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) { + BUG_ON(!dst[1]); + dst_cnt++; + flags |= DMA_PREP_ZERO_Q; + } + + BUG_ON(!dst_cnt); + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n", + ppc440spe_chan->device->id, __func__, src_cnt, len, + flags & DMA_PREP_INTERRUPT ? 1 : 0); + + switch (ppc440spe_chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + sw_desc = ppc440spe_dma01_prep_pq(ppc440spe_chan, + dst, dst_cnt, src, src_cnt, scf, + len, flags); + break; + + case PPC440SPE_XOR_ID: + sw_desc = ppc440spe_dma2_prep_pq(ppc440spe_chan, + dst, dst_cnt, src, src_cnt, scf, + len, flags); + break; + } + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +/** + * ppc440spe_adma_prep_dma_pqzero_sum - prepare CDB group for + * a PQ_ZERO_SUM operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pqzero_sum( + struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *sw_desc, *iter; + dma_addr_t pdest, qdest; + int slot_cnt, slots_per_op, idst, dst_cnt; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + + if (flags & DMA_PREP_PQ_DISABLE_P) + pdest = 0; + else + pdest = pq[0]; + + if (flags & DMA_PREP_PQ_DISABLE_Q) + qdest = 0; + else + qdest = pq[1]; + + ADMA_LL_DBG(prep_dma_pqzero_sum_dbg(ppc440spe_chan->device->id, + src, src_cnt, scf)); + + /* Always use WXOR for P/Q calculations (two destinations). + * Need 1 or 2 extra slots to verify results are zero. + */ + idst = dst_cnt = (pdest && qdest) ? 2 : 1; + + /* One additional slot per destination to clone P/Q + * before calculation (we have to preserve destinations). + */ + slot_cnt = src_cnt + dst_cnt * 2; + slots_per_op = 1; + + spin_lock_bh(&ppc440spe_chan->lock); + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, + slots_per_op); + if (sw_desc) { + ppc440spe_desc_init_dma01pqzero_sum(sw_desc, dst_cnt, src_cnt); + + /* Setup byte count for each slot just allocated */ + sw_desc->async_tx.flags = flags; + list_for_each_entry(iter, &sw_desc->group_list, chain_node) { + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, + len); + iter->unmap_len = len; + } + + if (pdest) { + struct dma_cdb *hw_desc; + struct ppc440spe_adma_chan *chan; + + iter = sw_desc->group_head; + chan = to_ppc440spe_adma_chan(iter->async_tx.chan); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + iter->src_cnt = 0; + iter->dst_cnt = 0; + ppc440spe_desc_set_dest_addr(iter, chan, 0, + ppc440spe_chan->pdest, 0); + ppc440spe_desc_set_src_addr(iter, chan, 0, 0, pdest); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, + len); + iter->unmap_len = 0; + /* override pdest to preserve original P */ + pdest = ppc440spe_chan->pdest; + } + if (qdest) { + struct dma_cdb *hw_desc; + struct ppc440spe_adma_chan *chan; + + iter = list_first_entry(&sw_desc->group_list, + struct ppc440spe_adma_desc_slot, + chain_node); + chan = to_ppc440spe_adma_chan(iter->async_tx.chan); + + if (pdest) { + iter = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + } + + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + iter->src_cnt = 0; + iter->dst_cnt = 0; + ppc440spe_desc_set_dest_addr(iter, chan, 0, + ppc440spe_chan->qdest, 0); + ppc440spe_desc_set_src_addr(iter, chan, 0, 0, qdest); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, + len); + iter->unmap_len = 0; + /* override qdest to preserve original Q */ + qdest = ppc440spe_chan->qdest; + } + + /* Setup destinations for P/Q ops */ + ppc440spe_adma_pqzero_sum_set_dest(sw_desc, pdest, qdest); + + /* Setup zero QWORDs into DCHECK CDBs */ + idst = dst_cnt; + list_for_each_entry_reverse(iter, &sw_desc->group_list, + chain_node) { + /* + * The last CDB corresponds to Q-parity check, + * the one before last CDB corresponds + * P-parity check + */ + if (idst == DMA_DEST_MAX_NUM) { + if (idst == dst_cnt) { + set_bit(PPC440SPE_DESC_QCHECK, + &iter->flags); + } else { + set_bit(PPC440SPE_DESC_PCHECK, + &iter->flags); + } + } else { + if (qdest) { + set_bit(PPC440SPE_DESC_QCHECK, + &iter->flags); + } else { + set_bit(PPC440SPE_DESC_PCHECK, + &iter->flags); + } + } + iter->xor_check_result = pqres; + + /* + * set it to zero, if check fail then result will + * be updated + */ + *iter->xor_check_result = 0; + ppc440spe_desc_set_dcheck(iter, ppc440spe_chan, + ppc440spe_qword); + + if (!(--dst_cnt)) + break; + } + + /* Setup sources and mults for P/Q ops */ + list_for_each_entry_continue_reverse(iter, &sw_desc->group_list, + chain_node) { + struct ppc440spe_adma_chan *chan; + u32 mult_dst; + + chan = to_ppc440spe_adma_chan(iter->async_tx.chan); + ppc440spe_desc_set_src_addr(iter, chan, 0, + DMA_CUED_XOR_HB, + src[src_cnt - 1]); + if (qdest) { + mult_dst = (dst_cnt - 1) ? DMA_CDB_SG_DST2 : + DMA_CDB_SG_DST1; + ppc440spe_desc_set_src_mult(iter, chan, + DMA_CUED_MULT1_OFF, + mult_dst, + scf[src_cnt - 1]); + } + if (!(--src_cnt)) + break; + } + } + spin_unlock_bh(&ppc440spe_chan->lock); + return sw_desc ? &sw_desc->async_tx : NULL; +} + +/** + * ppc440spe_adma_prep_dma_xor_zero_sum - prepare CDB group for + * XOR ZERO_SUM operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor_zero_sum( + struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, + size_t len, enum sum_check_flags *result, unsigned long flags) +{ + struct dma_async_tx_descriptor *tx; + dma_addr_t pq[2]; + + /* validate P, disable Q */ + pq[0] = src[0]; + pq[1] = 0; + flags |= DMA_PREP_PQ_DISABLE_Q; + + tx = ppc440spe_adma_prep_dma_pqzero_sum(chan, pq, &src[1], + src_cnt - 1, 0, len, + result, flags); + return tx; +} + +/** + * ppc440spe_adma_set_dest - set destination address into descriptor + */ +static void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *sw_desc, + dma_addr_t addr, int index) +{ + struct ppc440spe_adma_chan *chan; + + BUG_ON(index >= sw_desc->dst_cnt); + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + /* to do: support transfers lengths > + * PPC440SPE_ADMA_DMA/XOR_MAX_BYTE_COUNT + */ + ppc440spe_desc_set_dest_addr(sw_desc->group_head, + chan, 0, addr, index); + break; + case PPC440SPE_XOR_ID: + sw_desc = ppc440spe_get_group_entry(sw_desc, index); + ppc440spe_desc_set_dest_addr(sw_desc, + chan, 0, addr, index); + break; + } +} + +static void ppc440spe_adma_pq_zero_op(struct ppc440spe_adma_desc_slot *iter, + struct ppc440spe_adma_chan *chan, dma_addr_t addr) +{ + /* To clear destinations update the descriptor + * (P or Q depending on index) as follows: + * addr is destination (0 corresponds to SG2): + */ + ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, addr, 0); + + /* ... and the addr is source: */ + ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, addr); + + /* addr is always SG2 then the mult is always DST1 */ + ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF, + DMA_CDB_SG_DST1, 1); +} + +/** + * ppc440spe_adma_pq_set_dest - set destination address into descriptor + * for the PQXOR operation + */ +static void ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot *sw_desc, + dma_addr_t *addrs, unsigned long flags) +{ + struct ppc440spe_adma_desc_slot *iter; + struct ppc440spe_adma_chan *chan; + dma_addr_t paddr, qaddr; + dma_addr_t addr = 0, ppath, qpath; + int index = 0, i; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + + if (flags & DMA_PREP_PQ_DISABLE_P) + paddr = 0; + else + paddr = addrs[0]; + + if (flags & DMA_PREP_PQ_DISABLE_Q) + qaddr = 0; + else + qaddr = addrs[1]; + + if (!paddr || !qaddr) + addr = paddr ? paddr : qaddr; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + /* walk through the WXOR source list and set P/Q-destinations + * for each slot: + */ + if (!test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) { + /* This is WXOR-only chain; may have 1/2 zero descs */ + if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags)) + index++; + if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags)) + index++; + + iter = ppc440spe_get_group_entry(sw_desc, index); + if (addr) { + /* one destination */ + list_for_each_entry_from(iter, + &sw_desc->group_list, chain_node) + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, addr, 0); + } else { + /* two destinations */ + list_for_each_entry_from(iter, + &sw_desc->group_list, chain_node) { + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, paddr, 0); + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, qaddr, 1); + } + } + + if (index) { + /* To clear destinations update the descriptor + * (1st,2nd, or both depending on flags) + */ + index = 0; + if (test_bit(PPC440SPE_ZERO_P, + &sw_desc->flags)) { + iter = ppc440spe_get_group_entry( + sw_desc, index++); + ppc440spe_adma_pq_zero_op(iter, chan, + paddr); + } + + if (test_bit(PPC440SPE_ZERO_Q, + &sw_desc->flags)) { + iter = ppc440spe_get_group_entry( + sw_desc, index++); + ppc440spe_adma_pq_zero_op(iter, chan, + qaddr); + } + + return; + } + } else { + /* This is RXOR-only or RXOR/WXOR mixed chain */ + + /* If we want to include destination into calculations, + * then make dest addresses cued with mult=1 (XOR). + */ + ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ? + DMA_CUED_XOR_HB : + DMA_CUED_XOR_BASE | + (1 << DMA_CUED_MULT1_OFF); + qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ? + DMA_CUED_XOR_HB : + DMA_CUED_XOR_BASE | + (1 << DMA_CUED_MULT1_OFF); + + /* Setup destination(s) in RXOR slot(s) */ + iter = ppc440spe_get_group_entry(sw_desc, index++); + ppc440spe_desc_set_dest_addr(iter, chan, + paddr ? ppath : qpath, + paddr ? paddr : qaddr, 0); + if (!addr) { + /* two destinations */ + iter = ppc440spe_get_group_entry(sw_desc, + index++); + ppc440spe_desc_set_dest_addr(iter, chan, + qpath, qaddr, 0); + } + + if (test_bit(PPC440SPE_DESC_WXOR, &sw_desc->flags)) { + /* Setup destination(s) in remaining WXOR + * slots + */ + iter = ppc440spe_get_group_entry(sw_desc, + index); + if (addr) { + /* one destination */ + list_for_each_entry_from(iter, + &sw_desc->group_list, + chain_node) + ppc440spe_desc_set_dest_addr( + iter, chan, + DMA_CUED_XOR_BASE, + addr, 0); + + } else { + /* two destinations */ + list_for_each_entry_from(iter, + &sw_desc->group_list, + chain_node) { + ppc440spe_desc_set_dest_addr( + iter, chan, + DMA_CUED_XOR_BASE, + paddr, 0); + ppc440spe_desc_set_dest_addr( + iter, chan, + DMA_CUED_XOR_BASE, + qaddr, 1); + } + } + } + + } + break; + + case PPC440SPE_XOR_ID: + /* DMA2 descriptors have only 1 destination, so there are + * two chains - one for each dest. + * If we want to include destination into calculations, + * then make dest addresses cued with mult=1 (XOR). + */ + ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ? + DMA_CUED_XOR_HB : + DMA_CUED_XOR_BASE | + (1 << DMA_CUED_MULT1_OFF); + + qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ? + DMA_CUED_XOR_HB : + DMA_CUED_XOR_BASE | + (1 << DMA_CUED_MULT1_OFF); + + iter = ppc440spe_get_group_entry(sw_desc, 0); + for (i = 0; i < sw_desc->descs_per_op; i++) { + ppc440spe_desc_set_dest_addr(iter, chan, + paddr ? ppath : qpath, + paddr ? paddr : qaddr, 0); + iter = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + } + + if (!addr) { + /* Two destinations; setup Q here */ + iter = ppc440spe_get_group_entry(sw_desc, + sw_desc->descs_per_op); + for (i = 0; i < sw_desc->descs_per_op; i++) { + ppc440spe_desc_set_dest_addr(iter, + chan, qpath, qaddr, 0); + iter = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + } + } + + break; + } +} + +/** + * ppc440spe_adma_pq_zero_sum_set_dest - set destination address into descriptor + * for the PQ_ZERO_SUM operation + */ +static void ppc440spe_adma_pqzero_sum_set_dest( + struct ppc440spe_adma_desc_slot *sw_desc, + dma_addr_t paddr, dma_addr_t qaddr) +{ + struct ppc440spe_adma_desc_slot *iter, *end; + struct ppc440spe_adma_chan *chan; + dma_addr_t addr = 0; + int idx; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + + /* walk through the WXOR source list and set P/Q-destinations + * for each slot + */ + idx = (paddr && qaddr) ? 2 : 1; + /* set end */ + list_for_each_entry_reverse(end, &sw_desc->group_list, + chain_node) { + if (!(--idx)) + break; + } + /* set start */ + idx = (paddr && qaddr) ? 2 : 1; + iter = ppc440spe_get_group_entry(sw_desc, idx); + + if (paddr && qaddr) { + /* two destinations */ + list_for_each_entry_from(iter, &sw_desc->group_list, + chain_node) { + if (unlikely(iter == end)) + break; + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, paddr, 0); + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, qaddr, 1); + } + } else { + /* one destination */ + addr = paddr ? paddr : qaddr; + list_for_each_entry_from(iter, &sw_desc->group_list, + chain_node) { + if (unlikely(iter == end)) + break; + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, addr, 0); + } + } + + /* The remaining descriptors are DATACHECK. These have no need in + * destination. Actually, these destinations are used there + * as sources for check operation. So, set addr as source. + */ + ppc440spe_desc_set_src_addr(end, chan, 0, 0, addr ? addr : paddr); + + if (!addr) { + end = list_entry(end->chain_node.next, + struct ppc440spe_adma_desc_slot, chain_node); + ppc440spe_desc_set_src_addr(end, chan, 0, 0, qaddr); + } +} + +/** + * ppc440spe_desc_set_xor_src_cnt - set source count into descriptor + */ +static inline void ppc440spe_desc_set_xor_src_cnt( + struct ppc440spe_adma_desc_slot *desc, + int src_cnt) +{ + struct xor_cb *hw_desc = desc->hw_desc; + + hw_desc->cbc &= ~XOR_CDCR_OAC_MSK; + hw_desc->cbc |= src_cnt; +} + +/** + * ppc440spe_adma_pq_set_src - set source address into descriptor + */ +static void ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *sw_desc, + dma_addr_t addr, int index) +{ + struct ppc440spe_adma_chan *chan; + dma_addr_t haddr = 0; + struct ppc440spe_adma_desc_slot *iter = NULL; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + /* DMA0,1 may do: WXOR, RXOR, RXOR+WXORs chain + */ + if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) { + /* RXOR-only or RXOR/WXOR operation */ + int iskip = test_bit(PPC440SPE_DESC_RXOR12, + &sw_desc->flags) ? 2 : 3; + + if (index == 0) { + /* 1st slot (RXOR) */ + /* setup sources region (R1-2-3, R1-2-4, + * or R1-2-5) + */ + if (test_bit(PPC440SPE_DESC_RXOR12, + &sw_desc->flags)) + haddr = DMA_RXOR12 << + DMA_CUED_REGION_OFF; + else if (test_bit(PPC440SPE_DESC_RXOR123, + &sw_desc->flags)) + haddr = DMA_RXOR123 << + DMA_CUED_REGION_OFF; + else if (test_bit(PPC440SPE_DESC_RXOR124, + &sw_desc->flags)) + haddr = DMA_RXOR124 << + DMA_CUED_REGION_OFF; + else if (test_bit(PPC440SPE_DESC_RXOR125, + &sw_desc->flags)) + haddr = DMA_RXOR125 << + DMA_CUED_REGION_OFF; + else + BUG(); + haddr |= DMA_CUED_XOR_BASE; + iter = ppc440spe_get_group_entry(sw_desc, 0); + } else if (index < iskip) { + /* 1st slot (RXOR) + * shall actually set source address only once + * instead of first + */ + iter = NULL; + } else { + /* 2nd/3d and next slots (WXOR); + * skip first slot with RXOR + */ + haddr = DMA_CUED_XOR_HB; + iter = ppc440spe_get_group_entry(sw_desc, + index - iskip + sw_desc->dst_cnt); + } + } else { + int znum = 0; + + /* WXOR-only operation; skip first slots with + * zeroing destinations + */ + if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags)) + znum++; + if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags)) + znum++; + + haddr = DMA_CUED_XOR_HB; + iter = ppc440spe_get_group_entry(sw_desc, + index + znum); + } + + if (likely(iter)) { + ppc440spe_desc_set_src_addr(iter, chan, 0, haddr, addr); + + if (!index && + test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags) && + sw_desc->dst_cnt == 2) { + /* if we have two destinations for RXOR, then + * setup source in the second descr too + */ + iter = ppc440spe_get_group_entry(sw_desc, 1); + ppc440spe_desc_set_src_addr(iter, chan, 0, + haddr, addr); + } + } + break; + + case PPC440SPE_XOR_ID: + /* DMA2 may do Biskup */ + iter = sw_desc->group_head; + if (iter->dst_cnt == 2) { + /* both P & Q calculations required; set P src here */ + ppc440spe_adma_dma2rxor_set_src(iter, index, addr); + + /* this is for Q */ + iter = ppc440spe_get_group_entry(sw_desc, + sw_desc->descs_per_op); + } + ppc440spe_adma_dma2rxor_set_src(iter, index, addr); + break; + } +} + +/** + * ppc440spe_adma_memcpy_xor_set_src - set source address into descriptor + */ +static void ppc440spe_adma_memcpy_xor_set_src( + struct ppc440spe_adma_desc_slot *sw_desc, + dma_addr_t addr, int index) +{ + struct ppc440spe_adma_chan *chan; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + sw_desc = sw_desc->group_head; + + if (likely(sw_desc)) + ppc440spe_desc_set_src_addr(sw_desc, chan, index, 0, addr); +} + +/** + * ppc440spe_adma_dma2rxor_inc_addr - + */ +static void ppc440spe_adma_dma2rxor_inc_addr( + struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_rxor *cursor, int index, int src_cnt) +{ + cursor->addr_count++; + if (index == src_cnt - 1) { + ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count); + } else if (cursor->addr_count == XOR_MAX_OPS) { + ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count); + cursor->addr_count = 0; + cursor->desc_count++; + } +} + +/** + * ppc440spe_adma_dma2rxor_prep_src - setup RXOR types in DMA2 CDB + */ +static int ppc440spe_adma_dma2rxor_prep_src( + struct ppc440spe_adma_desc_slot *hdesc, + struct ppc440spe_rxor *cursor, int index, + int src_cnt, u32 addr) +{ + u32 sign; + struct ppc440spe_adma_desc_slot *desc = hdesc; + int i; + + for (i = 0; i < cursor->desc_count; i++) { + desc = list_entry(hdesc->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + } + + switch (cursor->state) { + case 0: + if (addr == cursor->addrl + cursor->len) { + /* direct RXOR */ + cursor->state = 1; + cursor->xor_count++; + if (index == src_cnt-1) { + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR12 << DMA_CUED_REGION_OFF); + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + } else if (cursor->addrl == addr + cursor->len) { + /* reverse RXOR */ + cursor->state = 1; + cursor->xor_count++; + set_bit(cursor->addr_count, &desc->reverse_flags[0]); + if (index == src_cnt-1) { + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR12 << DMA_CUED_REGION_OFF); + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + } else { + printk(KERN_ERR "Cannot build " + "DMA2 RXOR command block.\n"); + BUG(); + } + break; + case 1: + sign = test_bit(cursor->addr_count, + desc->reverse_flags) + ? -1 : 1; + if (index == src_cnt-2 || (sign == -1 + && addr != cursor->addrl - 2*cursor->len)) { + cursor->state = 0; + cursor->xor_count = 1; + cursor->addrl = addr; + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR12 << DMA_CUED_REGION_OFF); + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } else if (addr == cursor->addrl + 2*sign*cursor->len) { + cursor->state = 2; + cursor->xor_count = 0; + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR123 << DMA_CUED_REGION_OFF); + if (index == src_cnt-1) { + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + } else if (addr == cursor->addrl + 3*cursor->len) { + cursor->state = 2; + cursor->xor_count = 0; + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR124 << DMA_CUED_REGION_OFF); + if (index == src_cnt-1) { + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + } else if (addr == cursor->addrl + 4*cursor->len) { + cursor->state = 2; + cursor->xor_count = 0; + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR125 << DMA_CUED_REGION_OFF); + if (index == src_cnt-1) { + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + } else { + cursor->state = 0; + cursor->xor_count = 1; + cursor->addrl = addr; + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR12 << DMA_CUED_REGION_OFF); + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + break; + case 2: + cursor->state = 0; + cursor->addrl = addr; + cursor->xor_count++; + if (index) { + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + break; + } + + return 0; +} + +/** + * ppc440spe_adma_dma2rxor_set_src - set RXOR source address; it's assumed that + * ppc440spe_adma_dma2rxor_prep_src() has already done prior this call + */ +static void ppc440spe_adma_dma2rxor_set_src( + struct ppc440spe_adma_desc_slot *desc, + int index, dma_addr_t addr) +{ + struct xor_cb *xcb = desc->hw_desc; + int k = 0, op = 0, lop = 0; + + /* get the RXOR operand which corresponds to index addr */ + while (op <= index) { + lop = op; + if (k == XOR_MAX_OPS) { + k = 0; + desc = list_entry(desc->chain_node.next, + struct ppc440spe_adma_desc_slot, chain_node); + xcb = desc->hw_desc; + + } + if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) == + (DMA_RXOR12 << DMA_CUED_REGION_OFF)) + op += 2; + else + op += 3; + } + + BUG_ON(k < 1); + + if (test_bit(k-1, desc->reverse_flags)) { + /* reverse operand order; put last op in RXOR group */ + if (index == op - 1) + ppc440spe_rxor_set_src(desc, k - 1, addr); + } else { + /* direct operand order; put first op in RXOR group */ + if (index == lop) + ppc440spe_rxor_set_src(desc, k - 1, addr); + } +} + +/** + * ppc440spe_adma_dma2rxor_set_mult - set RXOR multipliers; it's assumed that + * ppc440spe_adma_dma2rxor_prep_src() has already done prior this call + */ +static void ppc440spe_adma_dma2rxor_set_mult( + struct ppc440spe_adma_desc_slot *desc, + int index, u8 mult) +{ + struct xor_cb *xcb = desc->hw_desc; + int k = 0, op = 0, lop = 0; + + /* get the RXOR operand which corresponds to index mult */ + while (op <= index) { + lop = op; + if (k == XOR_MAX_OPS) { + k = 0; + desc = list_entry(desc->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + xcb = desc->hw_desc; + + } + if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) == + (DMA_RXOR12 << DMA_CUED_REGION_OFF)) + op += 2; + else + op += 3; + } + + BUG_ON(k < 1); + if (test_bit(k-1, desc->reverse_flags)) { + /* reverse order */ + ppc440spe_rxor_set_mult(desc, k - 1, op - index - 1, mult); + } else { + /* direct order */ + ppc440spe_rxor_set_mult(desc, k - 1, index - lop, mult); + } +} + +/** + * ppc440spe_init_rxor_cursor - + */ +static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor) +{ + memset(cursor, 0, sizeof(struct ppc440spe_rxor)); + cursor->state = 2; +} + +/** + * ppc440spe_adma_pq_set_src_mult - set multiplication coefficient into + * descriptor for the PQXOR operation + */ +static void ppc440spe_adma_pq_set_src_mult( + struct ppc440spe_adma_desc_slot *sw_desc, + unsigned char mult, int index, int dst_pos) +{ + struct ppc440spe_adma_chan *chan; + u32 mult_idx, mult_dst; + struct ppc440spe_adma_desc_slot *iter = NULL, *iter1 = NULL; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) { + int region = test_bit(PPC440SPE_DESC_RXOR12, + &sw_desc->flags) ? 2 : 3; + + if (index < region) { + /* RXOR multipliers */ + iter = ppc440spe_get_group_entry(sw_desc, + sw_desc->dst_cnt - 1); + if (sw_desc->dst_cnt == 2) + iter1 = ppc440spe_get_group_entry( + sw_desc, 0); + + mult_idx = DMA_CUED_MULT1_OFF + (index << 3); + mult_dst = DMA_CDB_SG_SRC; + } else { + /* WXOR multiplier */ + iter = ppc440spe_get_group_entry(sw_desc, + index - region + + sw_desc->dst_cnt); + mult_idx = DMA_CUED_MULT1_OFF; + mult_dst = dst_pos ? DMA_CDB_SG_DST2 : + DMA_CDB_SG_DST1; + } + } else { + int znum = 0; + + /* WXOR-only; + * skip first slots with destinations (if ZERO_DST has + * place) + */ + if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags)) + znum++; + if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags)) + znum++; + + iter = ppc440spe_get_group_entry(sw_desc, index + znum); + mult_idx = DMA_CUED_MULT1_OFF; + mult_dst = dst_pos ? DMA_CDB_SG_DST2 : DMA_CDB_SG_DST1; + } + + if (likely(iter)) { + ppc440spe_desc_set_src_mult(iter, chan, + mult_idx, mult_dst, mult); + + if (unlikely(iter1)) { + /* if we have two destinations for RXOR, then + * we've just set Q mult. Set-up P now. + */ + ppc440spe_desc_set_src_mult(iter1, chan, + mult_idx, mult_dst, 1); + } + + } + break; + + case PPC440SPE_XOR_ID: + iter = sw_desc->group_head; + if (sw_desc->dst_cnt == 2) { + /* both P & Q calculations required; set P mult here */ + ppc440spe_adma_dma2rxor_set_mult(iter, index, 1); + + /* and then set Q mult */ + iter = ppc440spe_get_group_entry(sw_desc, + sw_desc->descs_per_op); + } + ppc440spe_adma_dma2rxor_set_mult(iter, index, mult); + break; + } +} + +/** + * ppc440spe_adma_free_chan_resources - free the resources allocated + */ +static void ppc440spe_adma_free_chan_resources(struct dma_chan *chan) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *iter, *_iter; + int in_use_descs = 0; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + ppc440spe_adma_slot_cleanup(ppc440spe_chan); + + spin_lock_bh(&ppc440spe_chan->lock); + list_for_each_entry_safe(iter, _iter, &ppc440spe_chan->chain, + chain_node) { + in_use_descs++; + list_del(&iter->chain_node); + } + list_for_each_entry_safe_reverse(iter, _iter, + &ppc440spe_chan->all_slots, slot_node) { + list_del(&iter->slot_node); + kfree(iter); + ppc440spe_chan->slots_allocated--; + } + ppc440spe_chan->last_used = NULL; + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d %s slots_allocated %d\n", + ppc440spe_chan->device->id, + __func__, ppc440spe_chan->slots_allocated); + spin_unlock_bh(&ppc440spe_chan->lock); + + /* one is ok since we left it on there on purpose */ + if (in_use_descs > 1) + printk(KERN_ERR "SPE: Freeing %d in use descriptors!\n", + in_use_descs - 1); +} + +/** + * ppc440spe_adma_tx_status - poll the status of an ADMA transaction + * @chan: ADMA channel handle + * @cookie: ADMA transaction identifier + * @txstate: a holder for the current state of the channel + */ +static enum dma_status ppc440spe_adma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + enum dma_status ret; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; + + ppc440spe_adma_slot_cleanup(ppc440spe_chan); + + return dma_cookie_status(chan, cookie, txstate); +} + +/** + * ppc440spe_adma_eot_handler - end of transfer interrupt handler + */ +static irqreturn_t ppc440spe_adma_eot_handler(int irq, void *data) +{ + struct ppc440spe_adma_chan *chan = data; + + dev_dbg(chan->device->common.dev, + "ppc440spe adma%d: %s\n", chan->device->id, __func__); + + tasklet_schedule(&chan->irq_tasklet); + ppc440spe_adma_device_clear_eot_status(chan); + + return IRQ_HANDLED; +} + +/** + * ppc440spe_adma_err_handler - DMA error interrupt handler; + * do the same things as a eot handler + */ +static irqreturn_t ppc440spe_adma_err_handler(int irq, void *data) +{ + struct ppc440spe_adma_chan *chan = data; + + dev_dbg(chan->device->common.dev, + "ppc440spe adma%d: %s\n", chan->device->id, __func__); + + tasklet_schedule(&chan->irq_tasklet); + ppc440spe_adma_device_clear_eot_status(chan); + + return IRQ_HANDLED; +} + +/** + * ppc440spe_test_callback - called when test operation has been done + */ +static void ppc440spe_test_callback(void *unused) +{ + complete(&ppc440spe_r6_test_comp); +} + +/** + * ppc440spe_adma_issue_pending - flush all pending descriptors to h/w + */ +static void ppc440spe_adma_issue_pending(struct dma_chan *chan) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: %s %d \n", ppc440spe_chan->device->id, + __func__, ppc440spe_chan->pending); + + if (ppc440spe_chan->pending) { + ppc440spe_chan->pending = 0; + ppc440spe_chan_append(ppc440spe_chan); + } +} + +/** + * ppc440spe_chan_start_null_xor - initiate the first XOR operation (DMA engines + * use FIFOs (as opposite to chains used in XOR) so this is a XOR + * specific operation) + */ +static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan) +{ + struct ppc440spe_adma_desc_slot *sw_desc, *group_start; + dma_cookie_t cookie; + int slot_cnt, slots_per_op; + + dev_dbg(chan->device->common.dev, + "ppc440spe adma%d: %s\n", chan->device->id, __func__); + + spin_lock_bh(&chan->lock); + slot_cnt = ppc440spe_chan_xor_slot_count(0, 2, &slots_per_op); + sw_desc = ppc440spe_adma_alloc_slots(chan, slot_cnt, slots_per_op); + if (sw_desc) { + group_start = sw_desc->group_head; + list_splice_init(&sw_desc->group_list, &chan->chain); + async_tx_ack(&sw_desc->async_tx); + ppc440spe_desc_init_null_xor(group_start); + + cookie = dma_cookie_assign(&sw_desc->async_tx); + + /* initialize the completed cookie to be less than + * the most recently used cookie + */ + chan->common.completed_cookie = cookie - 1; + + /* channel should not be busy */ + BUG_ON(ppc440spe_chan_is_busy(chan)); + + /* set the descriptor address */ + ppc440spe_chan_set_first_xor_descriptor(chan, sw_desc); + + /* run the descriptor */ + ppc440spe_chan_run(chan); + } else + printk(KERN_ERR "ppc440spe adma%d" + " failed to allocate null descriptor\n", + chan->device->id); + spin_unlock_bh(&chan->lock); +} + +/** + * ppc440spe_test_raid6 - test are RAID-6 capabilities enabled successfully. + * For this we just perform one WXOR operation with the same source + * and destination addresses, the GF-multiplier is 1; so if RAID-6 + * capabilities are enabled then we'll get src/dst filled with zero. + */ +static int ppc440spe_test_raid6(struct ppc440spe_adma_chan *chan) +{ + struct ppc440spe_adma_desc_slot *sw_desc, *iter; + struct page *pg; + char *a; + dma_addr_t dma_addr, addrs[2]; + unsigned long op = 0; + int rval = 0; + + set_bit(PPC440SPE_DESC_WXOR, &op); + + pg = alloc_page(GFP_KERNEL); + if (!pg) + return -ENOMEM; + + spin_lock_bh(&chan->lock); + sw_desc = ppc440spe_adma_alloc_slots(chan, 1, 1); + if (sw_desc) { + /* 1 src, 1 dsr, int_ena, WXOR */ + ppc440spe_desc_init_dma01pq(sw_desc, 1, 1, 1, op); + list_for_each_entry(iter, &sw_desc->group_list, chain_node) { + ppc440spe_desc_set_byte_count(iter, chan, PAGE_SIZE); + iter->unmap_len = PAGE_SIZE; + } + } else { + rval = -EFAULT; + spin_unlock_bh(&chan->lock); + goto exit; + } + spin_unlock_bh(&chan->lock); + + /* Fill the test page with ones */ + memset(page_address(pg), 0xFF, PAGE_SIZE); + dma_addr = dma_map_page(chan->device->dev, pg, 0, + PAGE_SIZE, DMA_BIDIRECTIONAL); + + /* Setup addresses */ + ppc440spe_adma_pq_set_src(sw_desc, dma_addr, 0); + ppc440spe_adma_pq_set_src_mult(sw_desc, 1, 0, 0); + addrs[0] = dma_addr; + addrs[1] = 0; + ppc440spe_adma_pq_set_dest(sw_desc, addrs, DMA_PREP_PQ_DISABLE_Q); + + async_tx_ack(&sw_desc->async_tx); + sw_desc->async_tx.callback = ppc440spe_test_callback; + sw_desc->async_tx.callback_param = NULL; + + init_completion(&ppc440spe_r6_test_comp); + + ppc440spe_adma_tx_submit(&sw_desc->async_tx); + ppc440spe_adma_issue_pending(&chan->common); + + wait_for_completion(&ppc440spe_r6_test_comp); + + /* Now check if the test page is zeroed */ + a = page_address(pg); + if ((*(u32 *)a) == 0 && memcmp(a, a+4, PAGE_SIZE-4) == 0) { + /* page is zero - RAID-6 enabled */ + rval = 0; + } else { + /* RAID-6 was not enabled */ + rval = -EINVAL; + } +exit: + __free_page(pg); + return rval; +} + +static void ppc440spe_adma_init_capabilities(struct ppc440spe_adma_device *adev) +{ + switch (adev->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_cap_set(DMA_MEMCPY, adev->common.cap_mask); + dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask); + dma_cap_set(DMA_PQ, adev->common.cap_mask); + dma_cap_set(DMA_PQ_VAL, adev->common.cap_mask); + dma_cap_set(DMA_XOR_VAL, adev->common.cap_mask); + break; + case PPC440SPE_XOR_ID: + dma_cap_set(DMA_XOR, adev->common.cap_mask); + dma_cap_set(DMA_PQ, adev->common.cap_mask); + dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask); + adev->common.cap_mask = adev->common.cap_mask; + break; + } + + /* Set base routines */ + adev->common.device_alloc_chan_resources = + ppc440spe_adma_alloc_chan_resources; + adev->common.device_free_chan_resources = + ppc440spe_adma_free_chan_resources; + adev->common.device_tx_status = ppc440spe_adma_tx_status; + adev->common.device_issue_pending = ppc440spe_adma_issue_pending; + + /* Set prep routines based on capability */ + if (dma_has_cap(DMA_MEMCPY, adev->common.cap_mask)) { + adev->common.device_prep_dma_memcpy = + ppc440spe_adma_prep_dma_memcpy; + } + if (dma_has_cap(DMA_XOR, adev->common.cap_mask)) { + adev->common.max_xor = XOR_MAX_OPS; + adev->common.device_prep_dma_xor = + ppc440spe_adma_prep_dma_xor; + } + if (dma_has_cap(DMA_PQ, adev->common.cap_mask)) { + switch (adev->id) { + case PPC440SPE_DMA0_ID: + dma_set_maxpq(&adev->common, + DMA0_FIFO_SIZE / sizeof(struct dma_cdb), 0); + break; + case PPC440SPE_DMA1_ID: + dma_set_maxpq(&adev->common, + DMA1_FIFO_SIZE / sizeof(struct dma_cdb), 0); + break; + case PPC440SPE_XOR_ID: + adev->common.max_pq = XOR_MAX_OPS * 3; + break; + } + adev->common.device_prep_dma_pq = + ppc440spe_adma_prep_dma_pq; + } + if (dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask)) { + switch (adev->id) { + case PPC440SPE_DMA0_ID: + adev->common.max_pq = DMA0_FIFO_SIZE / + sizeof(struct dma_cdb); + break; + case PPC440SPE_DMA1_ID: + adev->common.max_pq = DMA1_FIFO_SIZE / + sizeof(struct dma_cdb); + break; + } + adev->common.device_prep_dma_pq_val = + ppc440spe_adma_prep_dma_pqzero_sum; + } + if (dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask)) { + switch (adev->id) { + case PPC440SPE_DMA0_ID: + adev->common.max_xor = DMA0_FIFO_SIZE / + sizeof(struct dma_cdb); + break; + case PPC440SPE_DMA1_ID: + adev->common.max_xor = DMA1_FIFO_SIZE / + sizeof(struct dma_cdb); + break; + } + adev->common.device_prep_dma_xor_val = + ppc440spe_adma_prep_dma_xor_zero_sum; + } + if (dma_has_cap(DMA_INTERRUPT, adev->common.cap_mask)) { + adev->common.device_prep_dma_interrupt = + ppc440spe_adma_prep_dma_interrupt; + } + pr_info("%s: AMCC(R) PPC440SP(E) ADMA Engine: " + "( %s%s%s%s%s%s)\n", + dev_name(adev->dev), + dma_has_cap(DMA_PQ, adev->common.cap_mask) ? "pq " : "", + dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask) ? "pq_val " : "", + dma_has_cap(DMA_XOR, adev->common.cap_mask) ? "xor " : "", + dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask) ? "xor_val " : "", + dma_has_cap(DMA_MEMCPY, adev->common.cap_mask) ? "memcpy " : "", + dma_has_cap(DMA_INTERRUPT, adev->common.cap_mask) ? "intr " : ""); +} + +static int ppc440spe_adma_setup_irqs(struct ppc440spe_adma_device *adev, + struct ppc440spe_adma_chan *chan, + int *initcode) +{ + struct platform_device *ofdev; + struct device_node *np; + int ret; + + ofdev = container_of(adev->dev, struct platform_device, dev); + np = ofdev->dev.of_node; + if (adev->id != PPC440SPE_XOR_ID) { + adev->err_irq = irq_of_parse_and_map(np, 1); + if (!adev->err_irq) { + dev_warn(adev->dev, "no err irq resource?\n"); + *initcode = PPC_ADMA_INIT_IRQ2; + adev->err_irq = -ENXIO; + } else + atomic_inc(&ppc440spe_adma_err_irq_ref); + } else { + adev->err_irq = -ENXIO; + } + + adev->irq = irq_of_parse_and_map(np, 0); + if (!adev->irq) { + dev_err(adev->dev, "no irq resource\n"); + *initcode = PPC_ADMA_INIT_IRQ1; + ret = -ENXIO; + goto err_irq_map; + } + dev_dbg(adev->dev, "irq %d, err irq %d\n", + adev->irq, adev->err_irq); + + ret = request_irq(adev->irq, ppc440spe_adma_eot_handler, + 0, dev_driver_string(adev->dev), chan); + if (ret) { + dev_err(adev->dev, "can't request irq %d\n", + adev->irq); + *initcode = PPC_ADMA_INIT_IRQ1; + ret = -EIO; + goto err_req1; + } + + /* only DMA engines have a separate error IRQ + * so it's Ok if err_irq < 0 in XOR engine case. + */ + if (adev->err_irq > 0) { + /* both DMA engines share common error IRQ */ + ret = request_irq(adev->err_irq, + ppc440spe_adma_err_handler, + IRQF_SHARED, + dev_driver_string(adev->dev), + chan); + if (ret) { + dev_err(adev->dev, "can't request irq %d\n", + adev->err_irq); + *initcode = PPC_ADMA_INIT_IRQ2; + ret = -EIO; + goto err_req2; + } + } + + if (adev->id == PPC440SPE_XOR_ID) { + /* enable XOR engine interrupts */ + iowrite32be(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT | + XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT, + &adev->xor_reg->ier); + } else { + u32 mask, enable; + + np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe"); + if (!np) { + pr_err("%s: can't find I2O device tree node\n", + __func__); + ret = -ENODEV; + goto err_req2; + } + adev->i2o_reg = of_iomap(np, 0); + if (!adev->i2o_reg) { + pr_err("%s: failed to map I2O registers\n", __func__); + of_node_put(np); + ret = -EINVAL; + goto err_req2; + } + of_node_put(np); + /* Unmask 'CS FIFO Attention' interrupts and + * enable generating interrupts on errors + */ + enable = (adev->id == PPC440SPE_DMA0_ID) ? + ~(I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) : + ~(I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM); + mask = ioread32(&adev->i2o_reg->iopim) & enable; + iowrite32(mask, &adev->i2o_reg->iopim); + } + return 0; + +err_req2: + free_irq(adev->irq, chan); +err_req1: + irq_dispose_mapping(adev->irq); +err_irq_map: + if (adev->err_irq > 0) { + if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref)) + irq_dispose_mapping(adev->err_irq); + } + return ret; +} + +static void ppc440spe_adma_release_irqs(struct ppc440spe_adma_device *adev, + struct ppc440spe_adma_chan *chan) +{ + u32 mask, disable; + + if (adev->id == PPC440SPE_XOR_ID) { + /* disable XOR engine interrupts */ + mask = ioread32be(&adev->xor_reg->ier); + mask &= ~(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT | + XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT); + iowrite32be(mask, &adev->xor_reg->ier); + } else { + /* disable DMAx engine interrupts */ + disable = (adev->id == PPC440SPE_DMA0_ID) ? + (I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) : + (I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM); + mask = ioread32(&adev->i2o_reg->iopim) | disable; + iowrite32(mask, &adev->i2o_reg->iopim); + } + free_irq(adev->irq, chan); + irq_dispose_mapping(adev->irq); + if (adev->err_irq > 0) { + free_irq(adev->err_irq, chan); + if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref)) { + irq_dispose_mapping(adev->err_irq); + iounmap(adev->i2o_reg); + } + } +} + +/** + * ppc440spe_adma_probe - probe the asynch device + */ +static int ppc440spe_adma_probe(struct platform_device *ofdev) +{ + struct device_node *np = ofdev->dev.of_node; + struct resource res; + struct ppc440spe_adma_device *adev; + struct ppc440spe_adma_chan *chan; + struct ppc_dma_chan_ref *ref, *_ref; + int ret = 0, initcode = PPC_ADMA_INIT_OK; + const u32 *idx; + int len; + void *regs; + u32 id, pool_size; + + if (of_device_is_compatible(np, "amcc,xor-accelerator")) { + id = PPC440SPE_XOR_ID; + /* As far as the XOR engine is concerned, it does not + * use FIFOs but uses linked list. So there is no dependency + * between pool size to allocate and the engine configuration. + */ + pool_size = PAGE_SIZE << 1; + } else { + /* it is DMA0 or DMA1 */ + idx = of_get_property(np, "cell-index", &len); + if (!idx || (len != sizeof(u32))) { + dev_err(&ofdev->dev, "Device node %pOF has missing " + "or invalid cell-index property\n", + np); + return -EINVAL; + } + id = *idx; + /* DMA0,1 engines use FIFO to maintain CDBs, so we + * should allocate the pool accordingly to size of this + * FIFO. Thus, the pool size depends on the FIFO depth: + * how much CDBs pointers the FIFO may contain then so + * much CDBs we should provide in the pool. + * That is + * CDB size = 32B; + * CDBs number = (DMA0_FIFO_SIZE >> 3); + * Pool size = CDBs number * CDB size = + * = (DMA0_FIFO_SIZE >> 3) << 5 = DMA0_FIFO_SIZE << 2. + */ + pool_size = (id == PPC440SPE_DMA0_ID) ? + DMA0_FIFO_SIZE : DMA1_FIFO_SIZE; + pool_size <<= 2; + } + + if (of_address_to_resource(np, 0, &res)) { + dev_err(&ofdev->dev, "failed to get memory resource\n"); + initcode = PPC_ADMA_INIT_MEMRES; + ret = -ENODEV; + goto out; + } + + if (!request_mem_region(res.start, resource_size(&res), + dev_driver_string(&ofdev->dev))) { + dev_err(&ofdev->dev, "failed to request memory region %pR\n", + &res); + initcode = PPC_ADMA_INIT_MEMREG; + ret = -EBUSY; + goto out; + } + + /* create a device */ + adev = kzalloc(sizeof(*adev), GFP_KERNEL); + if (!adev) { + initcode = PPC_ADMA_INIT_ALLOC; + ret = -ENOMEM; + goto err_adev_alloc; + } + + adev->id = id; + adev->pool_size = pool_size; + /* allocate coherent memory for hardware descriptors */ + adev->dma_desc_pool_virt = dma_alloc_coherent(&ofdev->dev, + adev->pool_size, &adev->dma_desc_pool, + GFP_KERNEL); + if (adev->dma_desc_pool_virt == NULL) { + dev_err(&ofdev->dev, "failed to allocate %d bytes of coherent " + "memory for hardware descriptors\n", + adev->pool_size); + initcode = PPC_ADMA_INIT_COHERENT; + ret = -ENOMEM; + goto err_dma_alloc; + } + dev_dbg(&ofdev->dev, "allocated descriptor pool virt 0x%p phys 0x%llx\n", + adev->dma_desc_pool_virt, (u64)adev->dma_desc_pool); + + regs = ioremap(res.start, resource_size(&res)); + if (!regs) { + dev_err(&ofdev->dev, "failed to ioremap regs!\n"); + ret = -ENOMEM; + goto err_regs_alloc; + } + + if (adev->id == PPC440SPE_XOR_ID) { + adev->xor_reg = regs; + /* Reset XOR */ + iowrite32be(XOR_CRSR_XASR_BIT, &adev->xor_reg->crsr); + iowrite32be(XOR_CRSR_64BA_BIT, &adev->xor_reg->crrr); + } else { + size_t fifo_size = (adev->id == PPC440SPE_DMA0_ID) ? + DMA0_FIFO_SIZE : DMA1_FIFO_SIZE; + adev->dma_reg = regs; + /* DMAx_FIFO_SIZE is defined in bytes, + * - is defined in number of CDB pointers (8byte). + * DMA FIFO Length = CSlength + CPlength, where + * CSlength = CPlength = (fsiz + 1) * 8. + */ + iowrite32(DMA_FIFO_ENABLE | ((fifo_size >> 3) - 2), + &adev->dma_reg->fsiz); + /* Configure DMA engine */ + iowrite32(DMA_CFG_DXEPR_HP | DMA_CFG_DFMPP_HP | DMA_CFG_FALGN, + &adev->dma_reg->cfg); + /* Clear Status */ + iowrite32(~0, &adev->dma_reg->dsts); + } + + adev->dev = &ofdev->dev; + adev->common.dev = &ofdev->dev; + INIT_LIST_HEAD(&adev->common.channels); + platform_set_drvdata(ofdev, adev); + + /* create a channel */ + chan = kzalloc(sizeof(*chan), GFP_KERNEL); + if (!chan) { + initcode = PPC_ADMA_INIT_CHANNEL; + ret = -ENOMEM; + goto err_chan_alloc; + } + + spin_lock_init(&chan->lock); + INIT_LIST_HEAD(&chan->chain); + INIT_LIST_HEAD(&chan->all_slots); + chan->device = adev; + chan->common.device = &adev->common; + dma_cookie_init(&chan->common); + list_add_tail(&chan->common.device_node, &adev->common.channels); + tasklet_setup(&chan->irq_tasklet, ppc440spe_adma_tasklet); + + /* allocate and map helper pages for async validation or + * async_mult/async_sum_product operations on DMA0/1. + */ + if (adev->id != PPC440SPE_XOR_ID) { + chan->pdest_page = alloc_page(GFP_KERNEL); + chan->qdest_page = alloc_page(GFP_KERNEL); + if (!chan->pdest_page || + !chan->qdest_page) { + if (chan->pdest_page) + __free_page(chan->pdest_page); + if (chan->qdest_page) + __free_page(chan->qdest_page); + ret = -ENOMEM; + goto err_page_alloc; + } + chan->pdest = dma_map_page(&ofdev->dev, chan->pdest_page, 0, + PAGE_SIZE, DMA_BIDIRECTIONAL); + chan->qdest = dma_map_page(&ofdev->dev, chan->qdest_page, 0, + PAGE_SIZE, DMA_BIDIRECTIONAL); + } + + ref = kmalloc(sizeof(*ref), GFP_KERNEL); + if (ref) { + ref->chan = &chan->common; + INIT_LIST_HEAD(&ref->node); + list_add_tail(&ref->node, &ppc440spe_adma_chan_list); + } else { + dev_err(&ofdev->dev, "failed to allocate channel reference!\n"); + ret = -ENOMEM; + goto err_ref_alloc; + } + + ret = ppc440spe_adma_setup_irqs(adev, chan, &initcode); + if (ret) + goto err_irq; + + ppc440spe_adma_init_capabilities(adev); + + ret = dma_async_device_register(&adev->common); + if (ret) { + initcode = PPC_ADMA_INIT_REGISTER; + dev_err(&ofdev->dev, "failed to register dma device\n"); + goto err_dev_reg; + } + + goto out; + +err_dev_reg: + ppc440spe_adma_release_irqs(adev, chan); +err_irq: + list_for_each_entry_safe(ref, _ref, &ppc440spe_adma_chan_list, node) { + if (chan == to_ppc440spe_adma_chan(ref->chan)) { + list_del(&ref->node); + kfree(ref); + } + } +err_ref_alloc: + if (adev->id != PPC440SPE_XOR_ID) { + dma_unmap_page(&ofdev->dev, chan->pdest, + PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_unmap_page(&ofdev->dev, chan->qdest, + PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(chan->pdest_page); + __free_page(chan->qdest_page); + } +err_page_alloc: + kfree(chan); +err_chan_alloc: + if (adev->id == PPC440SPE_XOR_ID) + iounmap(adev->xor_reg); + else + iounmap(adev->dma_reg); +err_regs_alloc: + dma_free_coherent(adev->dev, adev->pool_size, + adev->dma_desc_pool_virt, + adev->dma_desc_pool); +err_dma_alloc: + kfree(adev); +err_adev_alloc: + release_mem_region(res.start, resource_size(&res)); +out: + if (id < PPC440SPE_ADMA_ENGINES_NUM) + ppc440spe_adma_devices[id] = initcode; + + return ret; +} + +/** + * ppc440spe_adma_remove - remove the asynch device + */ +static int ppc440spe_adma_remove(struct platform_device *ofdev) +{ + struct ppc440spe_adma_device *adev = platform_get_drvdata(ofdev); + struct device_node *np = ofdev->dev.of_node; + struct resource res; + struct dma_chan *chan, *_chan; + struct ppc_dma_chan_ref *ref, *_ref; + struct ppc440spe_adma_chan *ppc440spe_chan; + + if (adev->id < PPC440SPE_ADMA_ENGINES_NUM) + ppc440spe_adma_devices[adev->id] = -1; + + dma_async_device_unregister(&adev->common); + + list_for_each_entry_safe(chan, _chan, &adev->common.channels, + device_node) { + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + ppc440spe_adma_release_irqs(adev, ppc440spe_chan); + tasklet_kill(&ppc440spe_chan->irq_tasklet); + if (adev->id != PPC440SPE_XOR_ID) { + dma_unmap_page(&ofdev->dev, ppc440spe_chan->pdest, + PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_unmap_page(&ofdev->dev, ppc440spe_chan->qdest, + PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(ppc440spe_chan->pdest_page); + __free_page(ppc440spe_chan->qdest_page); + } + list_for_each_entry_safe(ref, _ref, &ppc440spe_adma_chan_list, + node) { + if (ppc440spe_chan == + to_ppc440spe_adma_chan(ref->chan)) { + list_del(&ref->node); + kfree(ref); + } + } + list_del(&chan->device_node); + kfree(ppc440spe_chan); + } + + dma_free_coherent(adev->dev, adev->pool_size, + adev->dma_desc_pool_virt, adev->dma_desc_pool); + if (adev->id == PPC440SPE_XOR_ID) + iounmap(adev->xor_reg); + else + iounmap(adev->dma_reg); + of_address_to_resource(np, 0, &res); + release_mem_region(res.start, resource_size(&res)); + kfree(adev); + return 0; +} + +/* + * /sys driver interface to enable h/w RAID-6 capabilities + * Files created in e.g. /sys/devices/plb.0/400100100.dma0/driver/ + * directory are "devices", "enable" and "poly". + * "devices" shows available engines. + * "enable" is used to enable RAID-6 capabilities or to check + * whether these has been activated. + * "poly" allows setting/checking used polynomial (for PPC440SPe only). + */ + +static ssize_t devices_show(struct device_driver *dev, char *buf) +{ + ssize_t size = 0; + int i; + + for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++) { + if (ppc440spe_adma_devices[i] == -1) + continue; + size += sysfs_emit_at(buf, size, "PPC440SP(E)-ADMA.%d: %s\n", + i, ppc_adma_errors[ppc440spe_adma_devices[i]]); + } + return size; +} +static DRIVER_ATTR_RO(devices); + +static ssize_t enable_show(struct device_driver *dev, char *buf) +{ + return sysfs_emit(buf, "PPC440SP(e) RAID-6 capabilities are %sABLED.\n", + ppc440spe_r6_enabled ? "EN" : "DIS"); +} + +static ssize_t enable_store(struct device_driver *dev, const char *buf, + size_t count) +{ + unsigned long val; + int err; + + if (!count || count > 11) + return -EINVAL; + + if (!ppc440spe_r6_tchan) + return -EFAULT; + + /* Write a key */ + err = kstrtoul(buf, 16, &val); + if (err) + return err; + + dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_XORBA, val); + isync(); + + /* Verify whether it really works now */ + if (ppc440spe_test_raid6(ppc440spe_r6_tchan) == 0) { + pr_info("PPC440SP(e) RAID-6 has been activated " + "successfully\n"); + ppc440spe_r6_enabled = 1; + } else { + pr_info("PPC440SP(e) RAID-6 hasn't been activated!" + " Error key ?\n"); + ppc440spe_r6_enabled = 0; + } + return count; +} +static DRIVER_ATTR_RW(enable); + +static ssize_t poly_show(struct device_driver *dev, char *buf) +{ + ssize_t size = 0; + u32 reg; + +#ifdef CONFIG_440SP + /* 440SP has fixed polynomial */ + reg = 0x4d; +#else + reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL); + reg >>= MQ0_CFBHL_POLY; + reg &= 0xFF; +#endif + + size = sysfs_emit(buf, "PPC440SP(e) RAID-6 driver " + "uses 0x1%02x polynomial.\n", reg); + return size; +} + +static ssize_t poly_store(struct device_driver *dev, const char *buf, + size_t count) +{ + unsigned long reg, val; + int err; +#ifdef CONFIG_440SP + /* 440SP uses default 0x14D polynomial only */ + return -EINVAL; +#endif + + if (!count || count > 6) + return -EINVAL; + + /* e.g., 0x14D or 0x11D */ + err = kstrtoul(buf, 16, &val); + if (err) + return err; + + if (val & ~0x1FF) + return -EINVAL; + + val &= 0xFF; + reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL); + reg &= ~(0xFF << MQ0_CFBHL_POLY); + reg |= val << MQ0_CFBHL_POLY; + dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL, reg); + + return count; +} +static DRIVER_ATTR_RW(poly); + +/* + * Common initialisation for RAID engines; allocate memory for + * DMAx FIFOs, perform configuration common for all DMA engines. + * Further DMA engine specific configuration is done at probe time. + */ +static int ppc440spe_configure_raid_devices(void) +{ + struct device_node *np; + struct resource i2o_res; + struct i2o_regs __iomem *i2o_reg; + dcr_host_t i2o_dcr_host; + unsigned int dcr_base, dcr_len; + int i, ret; + + np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe"); + if (!np) { + pr_err("%s: can't find I2O device tree node\n", + __func__); + return -ENODEV; + } + + if (of_address_to_resource(np, 0, &i2o_res)) { + of_node_put(np); + return -EINVAL; + } + + i2o_reg = of_iomap(np, 0); + if (!i2o_reg) { + pr_err("%s: failed to map I2O registers\n", __func__); + of_node_put(np); + return -EINVAL; + } + + /* Get I2O DCRs base */ + dcr_base = dcr_resource_start(np, 0); + dcr_len = dcr_resource_len(np, 0); + if (!dcr_base && !dcr_len) { + pr_err("%pOF: can't get DCR registers base/len!\n", np); + of_node_put(np); + iounmap(i2o_reg); + return -ENODEV; + } + + i2o_dcr_host = dcr_map(np, dcr_base, dcr_len); + if (!DCR_MAP_OK(i2o_dcr_host)) { + pr_err("%pOF: failed to map DCRs!\n", np); + of_node_put(np); + iounmap(i2o_reg); + return -ENODEV; + } + of_node_put(np); + + /* Provide memory regions for DMA's FIFOs: I2O, DMA0 and DMA1 share + * the base address of FIFO memory space. + * Actually we need twice more physical memory than programmed in the + * register (because there are two FIFOs for each DMA: CP and CS) + */ + ppc440spe_dma_fifo_buf = kmalloc((DMA0_FIFO_SIZE + DMA1_FIFO_SIZE) << 1, + GFP_KERNEL); + if (!ppc440spe_dma_fifo_buf) { + pr_err("%s: DMA FIFO buffer allocation failed.\n", __func__); + iounmap(i2o_reg); + dcr_unmap(i2o_dcr_host, dcr_len); + return -ENOMEM; + } + + /* + * Configure h/w + */ + /* Reset I2O/DMA */ + mtdcri(SDR0, DCRN_SDR0_SRST, DCRN_SDR0_SRST_I2ODMA); + mtdcri(SDR0, DCRN_SDR0_SRST, 0); + + /* Setup the base address of mmaped registers */ + dcr_write(i2o_dcr_host, DCRN_I2O0_IBAH, (u32)(i2o_res.start >> 32)); + dcr_write(i2o_dcr_host, DCRN_I2O0_IBAL, (u32)(i2o_res.start) | + I2O_REG_ENABLE); + dcr_unmap(i2o_dcr_host, dcr_len); + + /* Setup FIFO memory space base address */ + iowrite32(0, &i2o_reg->ifbah); + iowrite32(((u32)__pa(ppc440spe_dma_fifo_buf)), &i2o_reg->ifbal); + + /* set zero FIFO size for I2O, so the whole + * ppc440spe_dma_fifo_buf is used by DMAs. + * DMAx_FIFOs will be configured while probe. + */ + iowrite32(0, &i2o_reg->ifsiz); + iounmap(i2o_reg); + + /* To prepare WXOR/RXOR functionality we need access to + * Memory Queue Module DCRs (finally it will be enabled + * via /sys interface of the ppc440spe ADMA driver). + */ + np = of_find_compatible_node(NULL, NULL, "ibm,mq-440spe"); + if (!np) { + pr_err("%s: can't find MQ device tree node\n", + __func__); + ret = -ENODEV; + goto out_free; + } + + /* Get MQ DCRs base */ + dcr_base = dcr_resource_start(np, 0); + dcr_len = dcr_resource_len(np, 0); + if (!dcr_base && !dcr_len) { + pr_err("%pOF: can't get DCR registers base/len!\n", np); + ret = -ENODEV; + goto out_mq; + } + + ppc440spe_mq_dcr_host = dcr_map(np, dcr_base, dcr_len); + if (!DCR_MAP_OK(ppc440spe_mq_dcr_host)) { + pr_err("%pOF: failed to map DCRs!\n", np); + ret = -ENODEV; + goto out_mq; + } + of_node_put(np); + ppc440spe_mq_dcr_len = dcr_len; + + /* Set HB alias */ + dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_BAUH, DMA_CUED_XOR_HB); + + /* Set: + * - LL transaction passing limit to 1; + * - Memory controller cycle limit to 1; + * - Galois Polynomial to 0x14d (default) + */ + dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL, + (1 << MQ0_CFBHL_TPLM) | (1 << MQ0_CFBHL_HBCL) | + (PPC440SPE_DEFAULT_POLY << MQ0_CFBHL_POLY)); + + atomic_set(&ppc440spe_adma_err_irq_ref, 0); + for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++) + ppc440spe_adma_devices[i] = -1; + + return 0; + +out_mq: + of_node_put(np); +out_free: + kfree(ppc440spe_dma_fifo_buf); + return ret; +} + +static const struct of_device_id ppc440spe_adma_of_match[] = { + { .compatible = "ibm,dma-440spe", }, + { .compatible = "amcc,xor-accelerator", }, + {}, +}; +MODULE_DEVICE_TABLE(of, ppc440spe_adma_of_match); + +static struct platform_driver ppc440spe_adma_driver = { + .probe = ppc440spe_adma_probe, + .remove = ppc440spe_adma_remove, + .driver = { + .name = "PPC440SP(E)-ADMA", + .of_match_table = ppc440spe_adma_of_match, + }, +}; + +static __init int ppc440spe_adma_init(void) +{ + int ret; + + ret = ppc440spe_configure_raid_devices(); + if (ret) + return ret; + + ret = platform_driver_register(&ppc440spe_adma_driver); + if (ret) { + pr_err("%s: failed to register platform driver\n", + __func__); + goto out_reg; + } + + /* Initialization status */ + ret = driver_create_file(&ppc440spe_adma_driver.driver, + &driver_attr_devices); + if (ret) + goto out_dev; + + /* RAID-6 h/w enable entry */ + ret = driver_create_file(&ppc440spe_adma_driver.driver, + &driver_attr_enable); + if (ret) + goto out_en; + + /* GF polynomial to use */ + ret = driver_create_file(&ppc440spe_adma_driver.driver, + &driver_attr_poly); + if (!ret) + return ret; + + driver_remove_file(&ppc440spe_adma_driver.driver, + &driver_attr_enable); +out_en: + driver_remove_file(&ppc440spe_adma_driver.driver, + &driver_attr_devices); +out_dev: + /* User will not be able to enable h/w RAID-6 */ + pr_err("%s: failed to create RAID-6 driver interface\n", + __func__); + platform_driver_unregister(&ppc440spe_adma_driver); +out_reg: + dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len); + kfree(ppc440spe_dma_fifo_buf); + return ret; +} + +static void __exit ppc440spe_adma_exit(void) +{ + driver_remove_file(&ppc440spe_adma_driver.driver, + &driver_attr_poly); + driver_remove_file(&ppc440spe_adma_driver.driver, + &driver_attr_enable); + driver_remove_file(&ppc440spe_adma_driver.driver, + &driver_attr_devices); + platform_driver_unregister(&ppc440spe_adma_driver); + dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len); + kfree(ppc440spe_dma_fifo_buf); +} + +arch_initcall(ppc440spe_adma_init); +module_exit(ppc440spe_adma_exit); + +MODULE_AUTHOR("Yuri Tikhonov "); +MODULE_DESCRIPTION("PPC440SPE ADMA Engine Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/ppc4xx/adma.h b/drivers/dma/ppc4xx/adma.h new file mode 100644 index 0000000000..f8a5d7c1fb --- /dev/null +++ b/drivers/dma/ppc4xx/adma.h @@ -0,0 +1,190 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * 2006-2009 (C) DENX Software Engineering. + * + * Author: Yuri Tikhonov + */ + +#ifndef _PPC440SPE_ADMA_H +#define _PPC440SPE_ADMA_H + +#include +#include "dma.h" +#include "xor.h" + +#define to_ppc440spe_adma_chan(chan) \ + container_of(chan, struct ppc440spe_adma_chan, common) +#define to_ppc440spe_adma_device(dev) \ + container_of(dev, struct ppc440spe_adma_device, common) +#define tx_to_ppc440spe_adma_slot(tx) \ + container_of(tx, struct ppc440spe_adma_desc_slot, async_tx) + +/* Default polynomial (for 440SP is only available) */ +#define PPC440SPE_DEFAULT_POLY 0x4d + +#define PPC440SPE_ADMA_ENGINES_NUM (XOR_ENGINES_NUM + DMA_ENGINES_NUM) + +#define PPC440SPE_ADMA_WATCHDOG_MSEC 3 +#define PPC440SPE_ADMA_THRESHOLD 1 + +#define PPC440SPE_DMA0_ID 0 +#define PPC440SPE_DMA1_ID 1 +#define PPC440SPE_XOR_ID 2 + +#define PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT 0xFFFFFFUL +/* this is the XOR_CBBCR width */ +#define PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT (1 << 31) +#define PPC440SPE_ADMA_ZERO_SUM_MAX_BYTE_COUNT PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT + +#define PPC440SPE_RXOR_RUN 0 + +#define MQ0_CF2H_RXOR_BS_MASK 0x1FF + +#undef ADMA_LL_DEBUG + +/** + * struct ppc440spe_adma_device - internal representation of an ADMA device + * @dev: device + * @dma_reg: base for DMAx register access + * @xor_reg: base for XOR register access + * @i2o_reg: base for I2O register access + * @id: HW ADMA Device selector + * @dma_desc_pool_virt: base of DMA descriptor region (CPU address) + * @dma_desc_pool: base of DMA descriptor region (DMA address) + * @pool_size: size of the pool + * @irq: DMAx or XOR irq number + * @err_irq: DMAx error irq number + * @common: embedded struct dma_device + */ +struct ppc440spe_adma_device { + struct device *dev; + struct dma_regs __iomem *dma_reg; + struct xor_regs __iomem *xor_reg; + struct i2o_regs __iomem *i2o_reg; + int id; + void *dma_desc_pool_virt; + dma_addr_t dma_desc_pool; + size_t pool_size; + int irq; + int err_irq; + struct dma_device common; +}; + +/** + * struct ppc440spe_adma_chan - internal representation of an ADMA channel + * @lock: serializes enqueue/dequeue operations to the slot pool + * @device: parent device + * @chain: device chain view of the descriptors + * @common: common dmaengine channel object members + * @all_slots: complete domain of slots usable by the channel + * @pending: allows batching of hardware operations + * @slots_allocated: records the actual size of the descriptor slot pool + * @hw_chain_inited: h/w descriptor chain initialization flag + * @irq_tasklet: bottom half where ppc440spe_adma_slot_cleanup runs + * @needs_unmap: if buffers should not be unmapped upon final processing + * @pdest_page: P destination page for async validate operation + * @qdest_page: Q destination page for async validate operation + * @pdest: P dma addr for async validate operation + * @qdest: Q dma addr for async validate operation + */ +struct ppc440spe_adma_chan { + spinlock_t lock; + struct ppc440spe_adma_device *device; + struct list_head chain; + struct dma_chan common; + struct list_head all_slots; + struct ppc440spe_adma_desc_slot *last_used; + int pending; + int slots_allocated; + int hw_chain_inited; + struct tasklet_struct irq_tasklet; + u8 needs_unmap; + struct page *pdest_page; + struct page *qdest_page; + dma_addr_t pdest; + dma_addr_t qdest; +}; + +struct ppc440spe_rxor { + u32 addrl; + u32 addrh; + int len; + int xor_count; + int addr_count; + int desc_count; + int state; +}; + +/** + * struct ppc440spe_adma_desc_slot - PPC440SPE-ADMA software descriptor + * @phys: hardware address of the hardware descriptor chain + * @group_head: first operation in a transaction + * @hw_next: pointer to the next descriptor in chain + * @async_tx: support for the async_tx api + * @slot_node: node on the iop_adma_chan.all_slots list + * @chain_node: node on the op_adma_chan.chain list + * @group_list: list of slots that make up a multi-descriptor transaction + * for example transfer lengths larger than the supported hw max + * @unmap_len: transaction bytecount + * @hw_desc: virtual address of the hardware descriptor chain + * @stride: currently chained or not + * @idx: pool index + * @slot_cnt: total slots used in an transaction (group of operations) + * @src_cnt: number of sources set in this descriptor + * @dst_cnt: number of destinations set in the descriptor + * @slots_per_op: number of slots per operation + * @descs_per_op: number of slot per P/Q operation see comment + * for ppc440spe_prep_dma_pqxor function + * @flags: desc state/type + * @reverse_flags: 1 if a corresponding rxor address uses reversed address order + * @xor_check_result: result of zero sum + * @crc32_result: result crc calculation + */ +struct ppc440spe_adma_desc_slot { + dma_addr_t phys; + struct ppc440spe_adma_desc_slot *group_head; + struct ppc440spe_adma_desc_slot *hw_next; + struct dma_async_tx_descriptor async_tx; + struct list_head slot_node; + struct list_head chain_node; /* node in channel ops list */ + struct list_head group_list; /* list */ + unsigned int unmap_len; + void *hw_desc; + u16 stride; + u16 idx; + u16 slot_cnt; + u8 src_cnt; + u8 dst_cnt; + u8 slots_per_op; + u8 descs_per_op; + unsigned long flags; + unsigned long reverse_flags[8]; + +#define PPC440SPE_DESC_INT 0 /* generate interrupt on complete */ +#define PPC440SPE_ZERO_P 1 /* clear P destionaion */ +#define PPC440SPE_ZERO_Q 2 /* clear Q destination */ +#define PPC440SPE_COHERENT 3 /* src/dst are coherent */ + +#define PPC440SPE_DESC_WXOR 4 /* WXORs are in chain */ +#define PPC440SPE_DESC_RXOR 5 /* RXOR is in chain */ + +#define PPC440SPE_DESC_RXOR123 8 /* CDB for RXOR123 operation */ +#define PPC440SPE_DESC_RXOR124 9 /* CDB for RXOR124 operation */ +#define PPC440SPE_DESC_RXOR125 10 /* CDB for RXOR125 operation */ +#define PPC440SPE_DESC_RXOR12 11 /* CDB for RXOR12 operation */ +#define PPC440SPE_DESC_RXOR_REV 12 /* CDB has srcs in reversed order */ + +#define PPC440SPE_DESC_PCHECK 13 +#define PPC440SPE_DESC_QCHECK 14 + +#define PPC440SPE_DESC_RXOR_MSK 0x3 + + struct ppc440spe_rxor rxor_cursor; + + union { + u32 *xor_check_result; + u32 *crc32_result; + }; +}; + +#endif /* _PPC440SPE_ADMA_H */ diff --git a/drivers/dma/ppc4xx/dma.h b/drivers/dma/ppc4xx/dma.h new file mode 100644 index 0000000000..1ff4be23db --- /dev/null +++ b/drivers/dma/ppc4xx/dma.h @@ -0,0 +1,220 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * 440SPe's DMA engines support header file + * + * 2006-2009 (C) DENX Software Engineering. + * + * Author: Yuri Tikhonov + */ + +#ifndef _PPC440SPE_DMA_H +#define _PPC440SPE_DMA_H + +#include + +/* Number of elements in the array with statical CDBs */ +#define MAX_STAT_DMA_CDBS 16 +/* Number of DMA engines available on the contoller */ +#define DMA_ENGINES_NUM 2 + +/* Maximum h/w supported number of destinations */ +#define DMA_DEST_MAX_NUM 2 + +/* FIFO's params */ +#define DMA0_FIFO_SIZE 0x1000 +#define DMA1_FIFO_SIZE 0x1000 +#define DMA_FIFO_ENABLE (1<<12) + +/* DMA Configuration Register. Data Transfer Engine PLB Priority: */ +#define DMA_CFG_DXEPR_LP (0<<26) +#define DMA_CFG_DXEPR_HP (3<<26) +#define DMA_CFG_DXEPR_HHP (2<<26) +#define DMA_CFG_DXEPR_HHHP (1<<26) + +/* DMA Configuration Register. DMA FIFO Manager PLB Priority: */ +#define DMA_CFG_DFMPP_LP (0<<23) +#define DMA_CFG_DFMPP_HP (3<<23) +#define DMA_CFG_DFMPP_HHP (2<<23) +#define DMA_CFG_DFMPP_HHHP (1<<23) + +/* DMA Configuration Register. Force 64-byte Alignment */ +#define DMA_CFG_FALGN (1 << 19) + +/*UIC0:*/ +#define D0CPF_INT (1<<12) +#define D0CSF_INT (1<<11) +#define D1CPF_INT (1<<10) +#define D1CSF_INT (1<<9) +/*UIC1:*/ +#define DMAE_INT (1<<9) + +/* I2O IOP Interrupt Mask Register */ +#define I2O_IOPIM_P0SNE (1<<3) +#define I2O_IOPIM_P0EM (1<<5) +#define I2O_IOPIM_P1SNE (1<<6) +#define I2O_IOPIM_P1EM (1<<8) + +/* DMA CDB fields */ +#define DMA_CDB_MSK (0xF) +#define DMA_CDB_64B_ADDR (1<<2) +#define DMA_CDB_NO_INT (1<<3) +#define DMA_CDB_STATUS_MSK (0x3) +#define DMA_CDB_ADDR_MSK (0xFFFFFFF0) + +/* DMA CDB OpCodes */ +#define DMA_CDB_OPC_NO_OP (0x00) +#define DMA_CDB_OPC_MV_SG1_SG2 (0x01) +#define DMA_CDB_OPC_MULTICAST (0x05) +#define DMA_CDB_OPC_DFILL128 (0x24) +#define DMA_CDB_OPC_DCHECK128 (0x23) + +#define DMA_CUED_XOR_BASE (0x10000000) +#define DMA_CUED_XOR_HB (0x00000008) + +#ifdef CONFIG_440SP +#define DMA_CUED_MULT1_OFF 0 +#define DMA_CUED_MULT2_OFF 8 +#define DMA_CUED_MULT3_OFF 16 +#define DMA_CUED_REGION_OFF 24 +#define DMA_CUED_XOR_WIN_MSK (0xFC000000) +#else +#define DMA_CUED_MULT1_OFF 2 +#define DMA_CUED_MULT2_OFF 10 +#define DMA_CUED_MULT3_OFF 18 +#define DMA_CUED_REGION_OFF 26 +#define DMA_CUED_XOR_WIN_MSK (0xF0000000) +#endif + +#define DMA_CUED_REGION_MSK 0x3 +#define DMA_RXOR123 0x0 +#define DMA_RXOR124 0x1 +#define DMA_RXOR125 0x2 +#define DMA_RXOR12 0x3 + +/* S/G addresses */ +#define DMA_CDB_SG_SRC 1 +#define DMA_CDB_SG_DST1 2 +#define DMA_CDB_SG_DST2 3 + +/* + * DMAx engines Command Descriptor Block Type + */ +struct dma_cdb { + /* + * Basic CDB structure (Table 20-17, p.499, 440spe_um_1_22.pdf) + */ + u8 pad0[2]; /* reserved */ + u8 attr; /* attributes */ + u8 opc; /* opcode */ + u32 sg1u; /* upper SG1 address */ + u32 sg1l; /* lower SG1 address */ + u32 cnt; /* SG count, 3B used */ + u32 sg2u; /* upper SG2 address */ + u32 sg2l; /* lower SG2 address */ + u32 sg3u; /* upper SG3 address */ + u32 sg3l; /* lower SG3 address */ +}; + +/* + * DMAx hardware registers (p.515 in 440SPe UM 1.22) + */ +struct dma_regs { + u32 cpfpl; + u32 cpfph; + u32 csfpl; + u32 csfph; + u32 dsts; + u32 cfg; + u8 pad0[0x8]; + u16 cpfhp; + u16 cpftp; + u16 csfhp; + u16 csftp; + u8 pad1[0x8]; + u32 acpl; + u32 acph; + u32 s1bpl; + u32 s1bph; + u32 s2bpl; + u32 s2bph; + u32 s3bpl; + u32 s3bph; + u8 pad2[0x10]; + u32 earl; + u32 earh; + u8 pad3[0x8]; + u32 seat; + u32 sead; + u32 op; + u32 fsiz; +}; + +/* + * I2O hardware registers (p.528 in 440SPe UM 1.22) + */ +struct i2o_regs { + u32 ists; + u32 iseat; + u32 isead; + u8 pad0[0x14]; + u32 idbel; + u8 pad1[0xc]; + u32 ihis; + u32 ihim; + u8 pad2[0x8]; + u32 ihiq; + u32 ihoq; + u8 pad3[0x8]; + u32 iopis; + u32 iopim; + u32 iopiq; + u8 iopoq; + u8 pad4[3]; + u16 iiflh; + u16 iiflt; + u16 iiplh; + u16 iiplt; + u16 ioflh; + u16 ioflt; + u16 ioplh; + u16 ioplt; + u32 iidc; + u32 ictl; + u32 ifcpp; + u8 pad5[0x4]; + u16 mfac0; + u16 mfac1; + u16 mfac2; + u16 mfac3; + u16 mfac4; + u16 mfac5; + u16 mfac6; + u16 mfac7; + u16 ifcfh; + u16 ifcht; + u8 pad6[0x4]; + u32 iifmc; + u32 iodb; + u32 iodbc; + u32 ifbal; + u32 ifbah; + u32 ifsiz; + u32 ispd0; + u32 ispd1; + u32 ispd2; + u32 ispd3; + u32 ihipl; + u32 ihiph; + u32 ihopl; + u32 ihoph; + u32 iiipl; + u32 iiiph; + u32 iiopl; + u32 iioph; + u32 ifcpl; + u32 ifcph; + u8 pad7[0x8]; + u32 iopt; +}; + +#endif /* _PPC440SPE_DMA_H */ diff --git a/drivers/dma/ppc4xx/xor.h b/drivers/dma/ppc4xx/xor.h new file mode 100644 index 0000000000..da1230df28 --- /dev/null +++ b/drivers/dma/ppc4xx/xor.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * 440SPe's XOR engines support header file + * + * 2006-2009 (C) DENX Software Engineering. + * + * Author: Yuri Tikhonov + */ + +#ifndef _PPC440SPE_XOR_H +#define _PPC440SPE_XOR_H + +#include + +/* Number of XOR engines available on the contoller */ +#define XOR_ENGINES_NUM 1 + +/* Number of operands supported in the h/w */ +#define XOR_MAX_OPS 16 + +/* + * XOR Command Block Control Register bits + */ +#define XOR_CBCR_LNK_BIT (1<<31) /* link present */ +#define XOR_CBCR_TGT_BIT (1<<30) /* target present */ +#define XOR_CBCR_CBCE_BIT (1<<29) /* command block compete enable */ +#define XOR_CBCR_RNZE_BIT (1<<28) /* result not zero enable */ +#define XOR_CBCR_XNOR_BIT (1<<15) /* XOR/XNOR */ +#define XOR_CDCR_OAC_MSK (0x7F) /* operand address count */ + +/* + * XORCore Status Register bits + */ +#define XOR_SR_XCP_BIT (1<<31) /* core processing */ +#define XOR_SR_ICB_BIT (1<<17) /* invalid CB */ +#define XOR_SR_IC_BIT (1<<16) /* invalid command */ +#define XOR_SR_IPE_BIT (1<<15) /* internal parity error */ +#define XOR_SR_RNZ_BIT (1<<2) /* result not Zero */ +#define XOR_SR_CBC_BIT (1<<1) /* CB complete */ +#define XOR_SR_CBLC_BIT (1<<0) /* CB list complete */ + +/* + * XORCore Control Set and Reset Register bits + */ +#define XOR_CRSR_XASR_BIT (1<<31) /* soft reset */ +#define XOR_CRSR_XAE_BIT (1<<30) /* enable */ +#define XOR_CRSR_RCBE_BIT (1<<29) /* refetch CB enable */ +#define XOR_CRSR_PAUS_BIT (1<<28) /* pause */ +#define XOR_CRSR_64BA_BIT (1<<27) /* 64/32 CB format */ +#define XOR_CRSR_CLP_BIT (1<<25) /* continue list processing */ + +/* + * XORCore Interrupt Enable Register + */ +#define XOR_IE_ICBIE_BIT (1<<17) /* Invalid Command Block IRQ Enable */ +#define XOR_IE_ICIE_BIT (1<<16) /* Invalid Command IRQ Enable */ +#define XOR_IE_RPTIE_BIT (1<<14) /* Read PLB Timeout Error IRQ Enable */ +#define XOR_IE_CBCIE_BIT (1<<1) /* CB complete interrupt enable */ +#define XOR_IE_CBLCI_BIT (1<<0) /* CB list complete interrupt enable */ + +/* + * XOR Accelerator engine Command Block Type + */ +struct xor_cb { + /* + * Basic 64-bit format XOR CB (Table 19-1, p.463, 440spe_um_1_22.pdf) + */ + u32 cbc; /* control */ + u32 cbbc; /* byte count */ + u32 cbs; /* status */ + u8 pad0[4]; /* reserved */ + u32 cbtah; /* target address high */ + u32 cbtal; /* target address low */ + u32 cblah; /* link address high */ + u32 cblal; /* link address low */ + struct { + u32 h; + u32 l; + } __attribute__ ((packed)) ops[16]; +} __attribute__ ((packed)); + +/* + * XOR hardware registers Table 19-3, UM 1.22 + */ +struct xor_regs { + u32 op_ar[16][2]; /* operand address[0]-high,[1]-low registers */ + u8 pad0[352]; /* reserved */ + u32 cbcr; /* CB control register */ + u32 cbbcr; /* CB byte count register */ + u32 cbsr; /* CB status register */ + u8 pad1[4]; /* reserved */ + u32 cbtahr; /* operand target address high register */ + u32 cbtalr; /* operand target address low register */ + u32 cblahr; /* CB link address high register */ + u32 cblalr; /* CB link address low register */ + u32 crsr; /* control set register */ + u32 crrr; /* control reset register */ + u32 ccbahr; /* current CB address high register */ + u32 ccbalr; /* current CB address low register */ + u32 plbr; /* PLB configuration register */ + u32 ier; /* interrupt enable register */ + u32 pecr; /* parity error count register */ + u32 sr; /* status register */ + u32 revidr; /* revision ID register */ +}; + +#endif /* _PPC440SPE_XOR_H */ diff --git a/drivers/dma/ptdma/Kconfig b/drivers/dma/ptdma/Kconfig new file mode 100644 index 0000000000..b430edd709 --- /dev/null +++ b/drivers/dma/ptdma/Kconfig @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0-only +config AMD_PTDMA + tristate "AMD PassThru DMA Engine" + depends on X86_64 && PCI + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for the AMD PTDMA controller. This controller + provides DMA capabilities to perform high bandwidth memory to + memory and IO copy operations. It performs DMA transfer through + queue-based descriptor management. This DMA controller is intended + to be used with AMD Non-Transparent Bridge devices and not for + general purpose peripheral DMA. diff --git a/drivers/dma/ptdma/Makefile b/drivers/dma/ptdma/Makefile new file mode 100644 index 0000000000..ce5410268a --- /dev/null +++ b/drivers/dma/ptdma/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# AMD Passthru DMA driver +# + +obj-$(CONFIG_AMD_PTDMA) += ptdma.o + +ptdma-objs := ptdma-dev.o ptdma-dmaengine.o ptdma-debugfs.o + +ptdma-$(CONFIG_PCI) += ptdma-pci.o diff --git a/drivers/dma/ptdma/ptdma-debugfs.c b/drivers/dma/ptdma/ptdma-debugfs.c new file mode 100644 index 0000000000..c8307d3044 --- /dev/null +++ b/drivers/dma/ptdma/ptdma-debugfs.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Passthrough DMA device driver + * -- Based on the CCP driver + * + * Copyright (C) 2016,2021 Advanced Micro Devices, Inc. + * + * Author: Sanjay R Mehta + * Author: Gary R Hook + */ + +#include +#include + +#include "ptdma.h" + +/* DebugFS helpers */ +#define RI_VERSION_NUM 0x0000003F + +#define RI_NUM_VQM 0x00078000 +#define RI_NVQM_SHIFT 15 + +static int pt_debugfs_info_show(struct seq_file *s, void *p) +{ + struct pt_device *pt = s->private; + unsigned int regval; + + seq_printf(s, "Device name: %s\n", dev_name(pt->dev)); + seq_printf(s, " # Queues: %d\n", 1); + seq_printf(s, " # Cmds: %d\n", pt->cmd_count); + + regval = ioread32(pt->io_regs + CMD_PT_VERSION); + + seq_printf(s, " Version: %d\n", regval & RI_VERSION_NUM); + seq_puts(s, " Engines:"); + seq_puts(s, "\n"); + seq_printf(s, " Queues: %d\n", (regval & RI_NUM_VQM) >> RI_NVQM_SHIFT); + + return 0; +} + +/* + * Return a formatted buffer containing the current + * statistics of queue for PTDMA + */ +static int pt_debugfs_stats_show(struct seq_file *s, void *p) +{ + struct pt_device *pt = s->private; + + seq_printf(s, "Total Interrupts Handled: %ld\n", pt->total_interrupts); + + return 0; +} + +static int pt_debugfs_queue_show(struct seq_file *s, void *p) +{ + struct pt_cmd_queue *cmd_q = s->private; + unsigned int regval; + + if (!cmd_q) + return 0; + + seq_printf(s, " Pass-Thru: %ld\n", cmd_q->total_pt_ops); + + regval = ioread32(cmd_q->reg_control + 0x000C); + + seq_puts(s, " Enabled Interrupts:"); + if (regval & INT_EMPTY_QUEUE) + seq_puts(s, " EMPTY"); + if (regval & INT_QUEUE_STOPPED) + seq_puts(s, " STOPPED"); + if (regval & INT_ERROR) + seq_puts(s, " ERROR"); + if (regval & INT_COMPLETION) + seq_puts(s, " COMPLETION"); + seq_puts(s, "\n"); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(pt_debugfs_info); +DEFINE_SHOW_ATTRIBUTE(pt_debugfs_queue); +DEFINE_SHOW_ATTRIBUTE(pt_debugfs_stats); + +void ptdma_debugfs_setup(struct pt_device *pt) +{ + struct pt_cmd_queue *cmd_q; + struct dentry *debugfs_q_instance; + + if (!debugfs_initialized()) + return; + + debugfs_create_file("info", 0400, pt->dma_dev.dbg_dev_root, pt, + &pt_debugfs_info_fops); + + debugfs_create_file("stats", 0400, pt->dma_dev.dbg_dev_root, pt, + &pt_debugfs_stats_fops); + + cmd_q = &pt->cmd_q; + + debugfs_q_instance = + debugfs_create_dir("q", pt->dma_dev.dbg_dev_root); + + debugfs_create_file("stats", 0400, debugfs_q_instance, cmd_q, + &pt_debugfs_queue_fops); +} diff --git a/drivers/dma/ptdma/ptdma-dev.c b/drivers/dma/ptdma/ptdma-dev.c new file mode 100644 index 0000000000..a2bf13ff18 --- /dev/null +++ b/drivers/dma/ptdma/ptdma-dev.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Passthru DMA device driver + * -- Based on the CCP driver + * + * Copyright (C) 2016,2021 Advanced Micro Devices, Inc. + * + * Author: Sanjay R Mehta + * Author: Gary R Hook + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ptdma.h" + +/* Human-readable error strings */ +static char *pt_error_codes[] = { + "", + "ERR 01: ILLEGAL_ENGINE", + "ERR 03: ILLEGAL_FUNCTION_TYPE", + "ERR 04: ILLEGAL_FUNCTION_MODE", + "ERR 06: ILLEGAL_FUNCTION_SIZE", + "ERR 08: ILLEGAL_FUNCTION_RSVD", + "ERR 09: ILLEGAL_BUFFER_LENGTH", + "ERR 10: VLSB_FAULT", + "ERR 11: ILLEGAL_MEM_ADDR", + "ERR 12: ILLEGAL_MEM_SEL", + "ERR 13: ILLEGAL_CONTEXT_ID", + "ERR 15: 0xF Reserved", + "ERR 18: CMD_TIMEOUT", + "ERR 19: IDMA0_AXI_SLVERR", + "ERR 20: IDMA0_AXI_DECERR", + "ERR 21: 0x15 Reserved", + "ERR 22: IDMA1_AXI_SLAVE_FAULT", + "ERR 23: IDMA1_AIXI_DECERR", + "ERR 24: 0x18 Reserved", + "ERR 27: 0x1B Reserved", + "ERR 38: ODMA0_AXI_SLVERR", + "ERR 39: ODMA0_AXI_DECERR", + "ERR 40: 0x28 Reserved", + "ERR 41: ODMA1_AXI_SLVERR", + "ERR 42: ODMA1_AXI_DECERR", + "ERR 43: LSB_PARITY_ERR", +}; + +static void pt_log_error(struct pt_device *d, int e) +{ + dev_err(d->dev, "PTDMA error: %s (0x%x)\n", pt_error_codes[e], e); +} + +void pt_start_queue(struct pt_cmd_queue *cmd_q) +{ + /* Turn on the run bit */ + iowrite32(cmd_q->qcontrol | CMD_Q_RUN, cmd_q->reg_control); +} + +void pt_stop_queue(struct pt_cmd_queue *cmd_q) +{ + /* Turn off the run bit */ + iowrite32(cmd_q->qcontrol & ~CMD_Q_RUN, cmd_q->reg_control); +} + +static int pt_core_execute_cmd(struct ptdma_desc *desc, struct pt_cmd_queue *cmd_q) +{ + bool soc = FIELD_GET(DWORD0_SOC, desc->dw0); + u8 *q_desc = (u8 *)&cmd_q->qbase[cmd_q->qidx]; + u32 tail; + unsigned long flags; + + if (soc) { + desc->dw0 |= FIELD_PREP(DWORD0_IOC, desc->dw0); + desc->dw0 &= ~DWORD0_SOC; + } + spin_lock_irqsave(&cmd_q->q_lock, flags); + + /* Copy 32-byte command descriptor to hw queue. */ + memcpy(q_desc, desc, 32); + cmd_q->qidx = (cmd_q->qidx + 1) % CMD_Q_LEN; + + /* The data used by this command must be flushed to memory */ + wmb(); + + /* Write the new tail address back to the queue register */ + tail = lower_32_bits(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE); + iowrite32(tail, cmd_q->reg_control + 0x0004); + + /* Turn the queue back on using our cached control register */ + pt_start_queue(cmd_q); + spin_unlock_irqrestore(&cmd_q->q_lock, flags); + + return 0; +} + +int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q, + struct pt_passthru_engine *pt_engine) +{ + struct ptdma_desc desc; + struct pt_device *pt = container_of(cmd_q, struct pt_device, cmd_q); + + cmd_q->cmd_error = 0; + cmd_q->total_pt_ops++; + memset(&desc, 0, sizeof(desc)); + desc.dw0 = CMD_DESC_DW0_VAL; + desc.length = pt_engine->src_len; + desc.src_lo = lower_32_bits(pt_engine->src_dma); + desc.dw3.src_hi = upper_32_bits(pt_engine->src_dma); + desc.dst_lo = lower_32_bits(pt_engine->dst_dma); + desc.dw5.dst_hi = upper_32_bits(pt_engine->dst_dma); + + if (cmd_q->int_en) + pt_core_enable_queue_interrupts(pt); + else + pt_core_disable_queue_interrupts(pt); + + return pt_core_execute_cmd(&desc, cmd_q); +} + +static void pt_do_cmd_complete(unsigned long data) +{ + struct pt_tasklet_data *tdata = (struct pt_tasklet_data *)data; + struct pt_cmd *cmd = tdata->cmd; + struct pt_cmd_queue *cmd_q = &cmd->pt->cmd_q; + u32 tail; + + if (cmd_q->cmd_error) { + /* + * Log the error and flush the queue by + * moving the head pointer + */ + tail = lower_32_bits(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE); + pt_log_error(cmd_q->pt, cmd_q->cmd_error); + iowrite32(tail, cmd_q->reg_control + 0x0008); + } + + cmd->pt_cmd_callback(cmd->data, cmd->ret); +} + +void pt_check_status_trans(struct pt_device *pt, struct pt_cmd_queue *cmd_q) +{ + u32 status; + + status = ioread32(cmd_q->reg_control + 0x0010); + if (status) { + cmd_q->int_status = status; + cmd_q->q_status = ioread32(cmd_q->reg_control + 0x0100); + cmd_q->q_int_status = ioread32(cmd_q->reg_control + 0x0104); + + /* On error, only save the first error value */ + if ((status & INT_ERROR) && !cmd_q->cmd_error) + cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status); + + /* Acknowledge the completion */ + iowrite32(status, cmd_q->reg_control + 0x0010); + pt_do_cmd_complete((ulong)&pt->tdata); + } +} + +static irqreturn_t pt_core_irq_handler(int irq, void *data) +{ + struct pt_device *pt = data; + struct pt_cmd_queue *cmd_q = &pt->cmd_q; + + pt_core_disable_queue_interrupts(pt); + pt->total_interrupts++; + pt_check_status_trans(pt, cmd_q); + pt_core_enable_queue_interrupts(pt); + return IRQ_HANDLED; +} + +int pt_core_init(struct pt_device *pt) +{ + char dma_pool_name[MAX_DMAPOOL_NAME_LEN]; + struct pt_cmd_queue *cmd_q = &pt->cmd_q; + u32 dma_addr_lo, dma_addr_hi; + struct device *dev = pt->dev; + struct dma_pool *dma_pool; + int ret; + + /* Allocate a dma pool for the queue */ + snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q", dev_name(pt->dev)); + + dma_pool = dma_pool_create(dma_pool_name, dev, + PT_DMAPOOL_MAX_SIZE, + PT_DMAPOOL_ALIGN, 0); + if (!dma_pool) + return -ENOMEM; + + /* ptdma core initialisation */ + iowrite32(CMD_CONFIG_VHB_EN, pt->io_regs + CMD_CONFIG_OFFSET); + iowrite32(CMD_QUEUE_PRIO, pt->io_regs + CMD_QUEUE_PRIO_OFFSET); + iowrite32(CMD_TIMEOUT_DISABLE, pt->io_regs + CMD_TIMEOUT_OFFSET); + iowrite32(CMD_CLK_GATE_CONFIG, pt->io_regs + CMD_CLK_GATE_CTL_OFFSET); + iowrite32(CMD_CONFIG_REQID, pt->io_regs + CMD_REQID_CONFIG_OFFSET); + + cmd_q->pt = pt; + cmd_q->dma_pool = dma_pool; + spin_lock_init(&cmd_q->q_lock); + + /* Page alignment satisfies our needs for N <= 128 */ + cmd_q->qsize = Q_SIZE(Q_DESC_SIZE); + cmd_q->qbase = dma_alloc_coherent(dev, cmd_q->qsize, + &cmd_q->qbase_dma, + GFP_KERNEL); + if (!cmd_q->qbase) { + dev_err(dev, "unable to allocate command queue\n"); + ret = -ENOMEM; + goto e_destroy_pool; + } + + cmd_q->qidx = 0; + + /* Preset some register values */ + cmd_q->reg_control = pt->io_regs + CMD_Q_STATUS_INCR; + + /* Turn off the queues and disable interrupts until ready */ + pt_core_disable_queue_interrupts(pt); + + cmd_q->qcontrol = 0; /* Start with nothing */ + iowrite32(cmd_q->qcontrol, cmd_q->reg_control); + + ioread32(cmd_q->reg_control + 0x0104); + ioread32(cmd_q->reg_control + 0x0100); + + /* Clear the interrupt status */ + iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_control + 0x0010); + + /* Request an irq */ + ret = request_irq(pt->pt_irq, pt_core_irq_handler, 0, dev_name(pt->dev), pt); + if (ret) { + dev_err(dev, "unable to allocate an IRQ\n"); + goto e_free_dma; + } + + /* Update the device registers with queue information. */ + cmd_q->qcontrol &= ~CMD_Q_SIZE; + cmd_q->qcontrol |= FIELD_PREP(CMD_Q_SIZE, QUEUE_SIZE_VAL); + + cmd_q->qdma_tail = cmd_q->qbase_dma; + dma_addr_lo = lower_32_bits(cmd_q->qdma_tail); + iowrite32((u32)dma_addr_lo, cmd_q->reg_control + 0x0004); + iowrite32((u32)dma_addr_lo, cmd_q->reg_control + 0x0008); + + dma_addr_hi = upper_32_bits(cmd_q->qdma_tail); + cmd_q->qcontrol |= (dma_addr_hi << 16); + iowrite32(cmd_q->qcontrol, cmd_q->reg_control); + + pt_core_enable_queue_interrupts(pt); + + /* Register the DMA engine support */ + ret = pt_dmaengine_register(pt); + if (ret) + goto e_free_irq; + + /* Set up debugfs entries */ + ptdma_debugfs_setup(pt); + + return 0; + +e_free_irq: + free_irq(pt->pt_irq, pt); + +e_free_dma: + dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase, cmd_q->qbase_dma); + +e_destroy_pool: + dma_pool_destroy(pt->cmd_q.dma_pool); + + return ret; +} + +void pt_core_destroy(struct pt_device *pt) +{ + struct device *dev = pt->dev; + struct pt_cmd_queue *cmd_q = &pt->cmd_q; + struct pt_cmd *cmd; + + /* Unregister the DMA engine */ + pt_dmaengine_unregister(pt); + + /* Disable and clear interrupts */ + pt_core_disable_queue_interrupts(pt); + + /* Turn off the run bit */ + pt_stop_queue(cmd_q); + + /* Clear the interrupt status */ + iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_control + 0x0010); + ioread32(cmd_q->reg_control + 0x0104); + ioread32(cmd_q->reg_control + 0x0100); + + free_irq(pt->pt_irq, pt); + + dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase, + cmd_q->qbase_dma); + + /* Flush the cmd queue */ + while (!list_empty(&pt->cmd)) { + /* Invoke the callback directly with an error code */ + cmd = list_first_entry(&pt->cmd, struct pt_cmd, entry); + list_del(&cmd->entry); + cmd->pt_cmd_callback(cmd->data, -ENODEV); + } +} diff --git a/drivers/dma/ptdma/ptdma-dmaengine.c b/drivers/dma/ptdma/ptdma-dmaengine.c new file mode 100644 index 0000000000..1aa65e5de0 --- /dev/null +++ b/drivers/dma/ptdma/ptdma-dmaengine.c @@ -0,0 +1,413 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Passthrough DMA device driver + * -- Based on the CCP driver + * + * Copyright (C) 2016,2021 Advanced Micro Devices, Inc. + * + * Author: Sanjay R Mehta + * Author: Gary R Hook + */ + +#include "ptdma.h" +#include "../dmaengine.h" +#include "../virt-dma.h" + +static inline struct pt_dma_chan *to_pt_chan(struct dma_chan *dma_chan) +{ + return container_of(dma_chan, struct pt_dma_chan, vc.chan); +} + +static inline struct pt_dma_desc *to_pt_desc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct pt_dma_desc, vd); +} + +static void pt_free_chan_resources(struct dma_chan *dma_chan) +{ + struct pt_dma_chan *chan = to_pt_chan(dma_chan); + + vchan_free_chan_resources(&chan->vc); +} + +static void pt_synchronize(struct dma_chan *dma_chan) +{ + struct pt_dma_chan *chan = to_pt_chan(dma_chan); + + vchan_synchronize(&chan->vc); +} + +static void pt_do_cleanup(struct virt_dma_desc *vd) +{ + struct pt_dma_desc *desc = to_pt_desc(vd); + struct pt_device *pt = desc->pt; + + kmem_cache_free(pt->dma_desc_cache, desc); +} + +static int pt_dma_start_desc(struct pt_dma_desc *desc) +{ + struct pt_passthru_engine *pt_engine; + struct pt_device *pt; + struct pt_cmd *pt_cmd; + struct pt_cmd_queue *cmd_q; + + desc->issued_to_hw = 1; + + pt_cmd = &desc->pt_cmd; + pt = pt_cmd->pt; + cmd_q = &pt->cmd_q; + pt_engine = &pt_cmd->passthru; + + pt->tdata.cmd = pt_cmd; + + /* Execute the command */ + pt_cmd->ret = pt_core_perform_passthru(cmd_q, pt_engine); + + return 0; +} + +static struct pt_dma_desc *pt_next_dma_desc(struct pt_dma_chan *chan) +{ + /* Get the next DMA descriptor on the active list */ + struct virt_dma_desc *vd = vchan_next_desc(&chan->vc); + + return vd ? to_pt_desc(vd) : NULL; +} + +static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan, + struct pt_dma_desc *desc) +{ + struct dma_async_tx_descriptor *tx_desc; + struct virt_dma_desc *vd; + unsigned long flags; + + /* Loop over descriptors until one is found with commands */ + do { + if (desc) { + if (!desc->issued_to_hw) { + /* No errors, keep going */ + if (desc->status != DMA_ERROR) + return desc; + } + + tx_desc = &desc->vd.tx; + vd = &desc->vd; + } else { + tx_desc = NULL; + } + + spin_lock_irqsave(&chan->vc.lock, flags); + + if (desc) { + if (desc->status != DMA_COMPLETE) { + if (desc->status != DMA_ERROR) + desc->status = DMA_COMPLETE; + + dma_cookie_complete(tx_desc); + dma_descriptor_unmap(tx_desc); + list_del(&desc->vd.node); + } else { + /* Don't handle it twice */ + tx_desc = NULL; + } + } + + desc = pt_next_dma_desc(chan); + + spin_unlock_irqrestore(&chan->vc.lock, flags); + + if (tx_desc) { + dmaengine_desc_get_callback_invoke(tx_desc, NULL); + dma_run_dependencies(tx_desc); + vchan_vdesc_fini(vd); + } + } while (desc); + + return NULL; +} + +static void pt_cmd_callback(void *data, int err) +{ + struct pt_dma_desc *desc = data; + struct dma_chan *dma_chan; + struct pt_dma_chan *chan; + int ret; + + if (err == -EINPROGRESS) + return; + + dma_chan = desc->vd.tx.chan; + chan = to_pt_chan(dma_chan); + + if (err) + desc->status = DMA_ERROR; + + while (true) { + /* Check for DMA descriptor completion */ + desc = pt_handle_active_desc(chan, desc); + + /* Don't submit cmd if no descriptor or DMA is paused */ + if (!desc) + break; + + ret = pt_dma_start_desc(desc); + if (!ret) + break; + + desc->status = DMA_ERROR; + } +} + +static struct pt_dma_desc *pt_alloc_dma_desc(struct pt_dma_chan *chan, + unsigned long flags) +{ + struct pt_dma_desc *desc; + + desc = kmem_cache_zalloc(chan->pt->dma_desc_cache, GFP_NOWAIT); + if (!desc) + return NULL; + + vchan_tx_prep(&chan->vc, &desc->vd, flags); + + desc->pt = chan->pt; + desc->pt->cmd_q.int_en = !!(flags & DMA_PREP_INTERRUPT); + desc->issued_to_hw = 0; + desc->status = DMA_IN_PROGRESS; + + return desc; +} + +static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan, + dma_addr_t dst, + dma_addr_t src, + unsigned int len, + unsigned long flags) +{ + struct pt_dma_chan *chan = to_pt_chan(dma_chan); + struct pt_passthru_engine *pt_engine; + struct pt_dma_desc *desc; + struct pt_cmd *pt_cmd; + + desc = pt_alloc_dma_desc(chan, flags); + if (!desc) + return NULL; + + pt_cmd = &desc->pt_cmd; + pt_cmd->pt = chan->pt; + pt_engine = &pt_cmd->passthru; + pt_cmd->engine = PT_ENGINE_PASSTHRU; + pt_engine->src_dma = src; + pt_engine->dst_dma = dst; + pt_engine->src_len = len; + pt_cmd->pt_cmd_callback = pt_cmd_callback; + pt_cmd->data = desc; + + desc->len = len; + + return desc; +} + +static struct dma_async_tx_descriptor * +pt_prep_dma_memcpy(struct dma_chan *dma_chan, dma_addr_t dst, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct pt_dma_desc *desc; + + desc = pt_create_desc(dma_chan, dst, src, len, flags); + if (!desc) + return NULL; + + return &desc->vd.tx; +} + +static struct dma_async_tx_descriptor * +pt_prep_dma_interrupt(struct dma_chan *dma_chan, unsigned long flags) +{ + struct pt_dma_chan *chan = to_pt_chan(dma_chan); + struct pt_dma_desc *desc; + + desc = pt_alloc_dma_desc(chan, flags); + if (!desc) + return NULL; + + return &desc->vd.tx; +} + +static void pt_issue_pending(struct dma_chan *dma_chan) +{ + struct pt_dma_chan *chan = to_pt_chan(dma_chan); + struct pt_dma_desc *desc; + unsigned long flags; + bool engine_is_idle = true; + + spin_lock_irqsave(&chan->vc.lock, flags); + + desc = pt_next_dma_desc(chan); + if (desc) + engine_is_idle = false; + + vchan_issue_pending(&chan->vc); + + desc = pt_next_dma_desc(chan); + + spin_unlock_irqrestore(&chan->vc.lock, flags); + + /* If there was nothing active, start processing */ + if (engine_is_idle && desc) + pt_cmd_callback(desc, 0); +} + +static enum dma_status +pt_tx_status(struct dma_chan *c, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct pt_device *pt = to_pt_chan(c)->pt; + struct pt_cmd_queue *cmd_q = &pt->cmd_q; + + pt_check_status_trans(pt, cmd_q); + return dma_cookie_status(c, cookie, txstate); +} + +static int pt_pause(struct dma_chan *dma_chan) +{ + struct pt_dma_chan *chan = to_pt_chan(dma_chan); + unsigned long flags; + + spin_lock_irqsave(&chan->vc.lock, flags); + pt_stop_queue(&chan->pt->cmd_q); + spin_unlock_irqrestore(&chan->vc.lock, flags); + + return 0; +} + +static int pt_resume(struct dma_chan *dma_chan) +{ + struct pt_dma_chan *chan = to_pt_chan(dma_chan); + struct pt_dma_desc *desc = NULL; + unsigned long flags; + + spin_lock_irqsave(&chan->vc.lock, flags); + pt_start_queue(&chan->pt->cmd_q); + desc = pt_next_dma_desc(chan); + spin_unlock_irqrestore(&chan->vc.lock, flags); + + /* If there was something active, re-start */ + if (desc) + pt_cmd_callback(desc, 0); + + return 0; +} + +static int pt_terminate_all(struct dma_chan *dma_chan) +{ + struct pt_dma_chan *chan = to_pt_chan(dma_chan); + unsigned long flags; + struct pt_cmd_queue *cmd_q = &chan->pt->cmd_q; + LIST_HEAD(head); + + iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_control + 0x0010); + spin_lock_irqsave(&chan->vc.lock, flags); + vchan_get_all_descriptors(&chan->vc, &head); + spin_unlock_irqrestore(&chan->vc.lock, flags); + + vchan_dma_desc_free_list(&chan->vc, &head); + vchan_free_chan_resources(&chan->vc); + + return 0; +} + +int pt_dmaengine_register(struct pt_device *pt) +{ + struct pt_dma_chan *chan; + struct dma_device *dma_dev = &pt->dma_dev; + char *cmd_cache_name; + char *desc_cache_name; + int ret; + + pt->pt_dma_chan = devm_kzalloc(pt->dev, sizeof(*pt->pt_dma_chan), + GFP_KERNEL); + if (!pt->pt_dma_chan) + return -ENOMEM; + + cmd_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL, + "%s-dmaengine-cmd-cache", + dev_name(pt->dev)); + if (!cmd_cache_name) + return -ENOMEM; + + desc_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL, + "%s-dmaengine-desc-cache", + dev_name(pt->dev)); + if (!desc_cache_name) { + ret = -ENOMEM; + goto err_cache; + } + + pt->dma_desc_cache = kmem_cache_create(desc_cache_name, + sizeof(struct pt_dma_desc), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!pt->dma_desc_cache) { + ret = -ENOMEM; + goto err_cache; + } + + dma_dev->dev = pt->dev; + dma_dev->src_addr_widths = DMA_SLAVE_BUSWIDTH_64_BYTES; + dma_dev->dst_addr_widths = DMA_SLAVE_BUSWIDTH_64_BYTES; + dma_dev->directions = DMA_MEM_TO_MEM; + dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask); + + /* + * PTDMA is intended to be used with the AMD NTB devices, hence + * marking it as DMA_PRIVATE. + */ + dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask); + + INIT_LIST_HEAD(&dma_dev->channels); + + chan = pt->pt_dma_chan; + chan->pt = pt; + + /* Set base and prep routines */ + dma_dev->device_free_chan_resources = pt_free_chan_resources; + dma_dev->device_prep_dma_memcpy = pt_prep_dma_memcpy; + dma_dev->device_prep_dma_interrupt = pt_prep_dma_interrupt; + dma_dev->device_issue_pending = pt_issue_pending; + dma_dev->device_tx_status = pt_tx_status; + dma_dev->device_pause = pt_pause; + dma_dev->device_resume = pt_resume; + dma_dev->device_terminate_all = pt_terminate_all; + dma_dev->device_synchronize = pt_synchronize; + + chan->vc.desc_free = pt_do_cleanup; + vchan_init(&chan->vc, dma_dev); + + dma_set_mask_and_coherent(pt->dev, DMA_BIT_MASK(64)); + + ret = dma_async_device_register(dma_dev); + if (ret) + goto err_reg; + + return 0; + +err_reg: + kmem_cache_destroy(pt->dma_desc_cache); + +err_cache: + kmem_cache_destroy(pt->dma_cmd_cache); + + return ret; +} + +void pt_dmaengine_unregister(struct pt_device *pt) +{ + struct dma_device *dma_dev = &pt->dma_dev; + + dma_async_device_unregister(dma_dev); + + kmem_cache_destroy(pt->dma_desc_cache); + kmem_cache_destroy(pt->dma_cmd_cache); +} diff --git a/drivers/dma/ptdma/ptdma-pci.c b/drivers/dma/ptdma/ptdma-pci.c new file mode 100644 index 0000000000..22739ff0c3 --- /dev/null +++ b/drivers/dma/ptdma/ptdma-pci.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Passthru DMA device driver + * -- Based on the CCP driver + * + * Copyright (C) 2016,2021 Advanced Micro Devices, Inc. + * + * Author: Sanjay R Mehta + * Author: Tom Lendacky + * Author: Gary R Hook + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ptdma.h" + +struct pt_msix { + int msix_count; + struct msix_entry msix_entry; +}; + +/* + * pt_alloc_struct - allocate and initialize the pt_device struct + * + * @dev: device struct of the PTDMA + */ +static struct pt_device *pt_alloc_struct(struct device *dev) +{ + struct pt_device *pt; + + pt = devm_kzalloc(dev, sizeof(*pt), GFP_KERNEL); + + if (!pt) + return NULL; + pt->dev = dev; + + INIT_LIST_HEAD(&pt->cmd); + + return pt; +} + +static int pt_get_msix_irqs(struct pt_device *pt) +{ + struct pt_msix *pt_msix = pt->pt_msix; + struct device *dev = pt->dev; + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + pt_msix->msix_entry.entry = 0; + + ret = pci_enable_msix_range(pdev, &pt_msix->msix_entry, 1, 1); + if (ret < 0) + return ret; + + pt_msix->msix_count = ret; + + pt->pt_irq = pt_msix->msix_entry.vector; + + return 0; +} + +static int pt_get_msi_irq(struct pt_device *pt) +{ + struct device *dev = pt->dev; + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + ret = pci_enable_msi(pdev); + if (ret) + return ret; + + pt->pt_irq = pdev->irq; + + return 0; +} + +static int pt_get_irqs(struct pt_device *pt) +{ + struct device *dev = pt->dev; + int ret; + + ret = pt_get_msix_irqs(pt); + if (!ret) + return 0; + + /* Couldn't get MSI-X vectors, try MSI */ + dev_err(dev, "could not enable MSI-X (%d), trying MSI\n", ret); + ret = pt_get_msi_irq(pt); + if (!ret) + return 0; + + /* Couldn't get MSI interrupt */ + dev_err(dev, "could not enable MSI (%d)\n", ret); + + return ret; +} + +static void pt_free_irqs(struct pt_device *pt) +{ + struct pt_msix *pt_msix = pt->pt_msix; + struct device *dev = pt->dev; + struct pci_dev *pdev = to_pci_dev(dev); + + if (pt_msix->msix_count) + pci_disable_msix(pdev); + else if (pt->pt_irq) + pci_disable_msi(pdev); + + pt->pt_irq = 0; +} + +static int pt_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct pt_device *pt; + struct pt_msix *pt_msix; + struct device *dev = &pdev->dev; + void __iomem * const *iomap_table; + int bar_mask; + int ret = -ENOMEM; + + pt = pt_alloc_struct(dev); + if (!pt) + goto e_err; + + pt_msix = devm_kzalloc(dev, sizeof(*pt_msix), GFP_KERNEL); + if (!pt_msix) + goto e_err; + + pt->pt_msix = pt_msix; + pt->dev_vdata = (struct pt_dev_vdata *)id->driver_data; + if (!pt->dev_vdata) { + ret = -ENODEV; + dev_err(dev, "missing driver data\n"); + goto e_err; + } + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(dev, "pcim_enable_device failed (%d)\n", ret); + goto e_err; + } + + bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); + ret = pcim_iomap_regions(pdev, bar_mask, "ptdma"); + if (ret) { + dev_err(dev, "pcim_iomap_regions failed (%d)\n", ret); + goto e_err; + } + + iomap_table = pcim_iomap_table(pdev); + if (!iomap_table) { + dev_err(dev, "pcim_iomap_table failed\n"); + ret = -ENOMEM; + goto e_err; + } + + pt->io_regs = iomap_table[pt->dev_vdata->bar]; + if (!pt->io_regs) { + dev_err(dev, "ioremap failed\n"); + ret = -ENOMEM; + goto e_err; + } + + ret = pt_get_irqs(pt); + if (ret) + goto e_err; + + pci_set_master(pdev); + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); + if (ret) { + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", + ret); + goto e_err; + } + } + + dev_set_drvdata(dev, pt); + + if (pt->dev_vdata) + ret = pt_core_init(pt); + + if (ret) + goto e_err; + + return 0; + +e_err: + dev_err(dev, "initialization failed ret = %d\n", ret); + + return ret; +} + +static void pt_pci_remove(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct pt_device *pt = dev_get_drvdata(dev); + + if (!pt) + return; + + if (pt->dev_vdata) + pt_core_destroy(pt); + + pt_free_irqs(pt); +} + +static const struct pt_dev_vdata dev_vdata[] = { + { + .bar = 2, + }, +}; + +static const struct pci_device_id pt_pci_table[] = { + { PCI_VDEVICE(AMD, 0x1498), (kernel_ulong_t)&dev_vdata[0] }, + /* Last entry must be zero */ + { 0, } +}; +MODULE_DEVICE_TABLE(pci, pt_pci_table); + +static struct pci_driver pt_pci_driver = { + .name = "ptdma", + .id_table = pt_pci_table, + .probe = pt_pci_probe, + .remove = pt_pci_remove, +}; + +module_pci_driver(pt_pci_driver); + +MODULE_AUTHOR("Sanjay R Mehta "); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("AMD PassThru DMA driver"); diff --git a/drivers/dma/ptdma/ptdma.h b/drivers/dma/ptdma/ptdma.h new file mode 100644 index 0000000000..21b4bf8952 --- /dev/null +++ b/drivers/dma/ptdma/ptdma.h @@ -0,0 +1,337 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AMD Passthru DMA device driver + * -- Based on the CCP driver + * + * Copyright (C) 2016,2021 Advanced Micro Devices, Inc. + * + * Author: Sanjay R Mehta + * Author: Tom Lendacky + * Author: Gary R Hook + */ + +#ifndef __PT_DEV_H__ +#define __PT_DEV_H__ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../virt-dma.h" + +#define MAX_PT_NAME_LEN 16 +#define MAX_DMAPOOL_NAME_LEN 32 + +#define MAX_HW_QUEUES 1 +#define MAX_CMD_QLEN 100 + +#define PT_ENGINE_PASSTHRU 5 + +/* Register Mappings */ +#define IRQ_MASK_REG 0x040 +#define IRQ_STATUS_REG 0x200 + +#define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f) + +#define CMD_QUEUE_PRIO_OFFSET 0x00 +#define CMD_REQID_CONFIG_OFFSET 0x04 +#define CMD_TIMEOUT_OFFSET 0x08 +#define CMD_PT_VERSION 0x10 + +#define CMD_Q_CONTROL_BASE 0x0000 +#define CMD_Q_TAIL_LO_BASE 0x0004 +#define CMD_Q_HEAD_LO_BASE 0x0008 +#define CMD_Q_INT_ENABLE_BASE 0x000C +#define CMD_Q_INTERRUPT_STATUS_BASE 0x0010 + +#define CMD_Q_STATUS_BASE 0x0100 +#define CMD_Q_INT_STATUS_BASE 0x0104 +#define CMD_Q_DMA_STATUS_BASE 0x0108 +#define CMD_Q_DMA_READ_STATUS_BASE 0x010C +#define CMD_Q_DMA_WRITE_STATUS_BASE 0x0110 +#define CMD_Q_ABORT_BASE 0x0114 +#define CMD_Q_AX_CACHE_BASE 0x0118 + +#define CMD_CONFIG_OFFSET 0x1120 +#define CMD_CLK_GATE_CTL_OFFSET 0x6004 + +#define CMD_DESC_DW0_VAL 0x500012 + +/* Address offset for virtual queue registers */ +#define CMD_Q_STATUS_INCR 0x1000 + +/* Bit masks */ +#define CMD_CONFIG_REQID 0 +#define CMD_TIMEOUT_DISABLE 0 +#define CMD_CLK_DYN_GATING_DIS 0 +#define CMD_CLK_SW_GATE_MODE 0 +#define CMD_CLK_GATE_CTL 0 +#define CMD_QUEUE_PRIO GENMASK(2, 1) +#define CMD_CONFIG_VHB_EN BIT(0) +#define CMD_CLK_DYN_GATING_EN BIT(0) +#define CMD_CLK_HW_GATE_MODE BIT(0) +#define CMD_CLK_GATE_ON_DELAY BIT(12) +#define CMD_CLK_GATE_OFF_DELAY BIT(12) + +#define CMD_CLK_GATE_CONFIG (CMD_CLK_GATE_CTL | \ + CMD_CLK_HW_GATE_MODE | \ + CMD_CLK_GATE_ON_DELAY | \ + CMD_CLK_DYN_GATING_EN | \ + CMD_CLK_GATE_OFF_DELAY) + +#define CMD_Q_LEN 32 +#define CMD_Q_RUN BIT(0) +#define CMD_Q_HALT BIT(1) +#define CMD_Q_MEM_LOCATION BIT(2) +#define CMD_Q_SIZE_MASK GENMASK(4, 0) +#define CMD_Q_SIZE GENMASK(7, 3) +#define CMD_Q_SHIFT GENMASK(1, 0) +#define QUEUE_SIZE_VAL ((ffs(CMD_Q_LEN) - 2) & \ + CMD_Q_SIZE_MASK) +#define Q_PTR_MASK (2 << (QUEUE_SIZE_VAL + 5) - 1) +#define Q_DESC_SIZE sizeof(struct ptdma_desc) +#define Q_SIZE(n) (CMD_Q_LEN * (n)) + +#define INT_COMPLETION BIT(0) +#define INT_ERROR BIT(1) +#define INT_QUEUE_STOPPED BIT(2) +#define INT_EMPTY_QUEUE BIT(3) +#define SUPPORTED_INTERRUPTS (INT_COMPLETION | INT_ERROR) + +/****** Local Storage Block ******/ +#define LSB_START 0 +#define LSB_END 127 +#define LSB_COUNT (LSB_END - LSB_START + 1) + +#define PT_DMAPOOL_MAX_SIZE 64 +#define PT_DMAPOOL_ALIGN BIT(5) + +#define PT_PASSTHRU_BLOCKSIZE 512 + +struct pt_device; + +struct pt_tasklet_data { + struct completion completion; + struct pt_cmd *cmd; +}; + +/* + * struct pt_passthru_engine - pass-through operation + * without performing DMA mapping + * @mask: mask to be applied to data + * @mask_len: length in bytes of mask + * @src_dma: data to be used for this operation + * @dst_dma: data produced by this operation + * @src_len: length in bytes of data used for this operation + * + * Variables required to be set when calling pt_enqueue_cmd(): + * - bit_mod, byte_swap, src, dst, src_len + * - mask, mask_len if bit_mod is not PT_PASSTHRU_BITWISE_NOOP + */ +struct pt_passthru_engine { + dma_addr_t mask; + u32 mask_len; /* In bytes */ + + dma_addr_t src_dma, dst_dma; + u64 src_len; /* In bytes */ +}; + +/* + * struct pt_cmd - PTDMA operation request + * @entry: list element + * @work: work element used for callbacks + * @pt: PT device to be run on + * @ret: operation return code + * @flags: cmd processing flags + * @engine: PTDMA operation to perform (passthru) + * @engine_error: PT engine return code + * @passthru: engine specific structures, refer to specific engine struct below + * @callback: operation completion callback function + * @data: parameter value to be supplied to the callback function + * + * Variables required to be set when calling pt_enqueue_cmd(): + * - engine, callback + * - See the operation structures below for what is required for each + * operation. + */ +struct pt_cmd { + struct list_head entry; + struct work_struct work; + struct pt_device *pt; + int ret; + u32 engine; + u32 engine_error; + struct pt_passthru_engine passthru; + /* Completion callback support */ + void (*pt_cmd_callback)(void *data, int err); + void *data; +}; + +struct pt_dma_desc { + struct virt_dma_desc vd; + struct pt_device *pt; + enum dma_status status; + size_t len; + bool issued_to_hw; + struct pt_cmd pt_cmd; +}; + +struct pt_dma_chan { + struct virt_dma_chan vc; + struct pt_device *pt; +}; + +struct pt_cmd_queue { + struct pt_device *pt; + + /* Queue dma pool */ + struct dma_pool *dma_pool; + + /* Queue base address (not neccessarily aligned)*/ + struct ptdma_desc *qbase; + + /* Aligned queue start address (per requirement) */ + spinlock_t q_lock ____cacheline_aligned; + unsigned int qidx; + + unsigned int qsize; + dma_addr_t qbase_dma; + dma_addr_t qdma_tail; + + unsigned int active; + unsigned int suspended; + + /* Interrupt flag */ + bool int_en; + + /* Register addresses for queue */ + void __iomem *reg_control; + u32 qcontrol; /* Cached control register */ + + /* Status values from job */ + u32 int_status; + u32 q_status; + u32 q_int_status; + u32 cmd_error; + /* Queue Statistics */ + unsigned long total_pt_ops; +} ____cacheline_aligned; + +struct pt_device { + struct list_head entry; + + unsigned int ord; + char name[MAX_PT_NAME_LEN]; + + struct device *dev; + + /* Bus specific device information */ + struct pt_msix *pt_msix; + + struct pt_dev_vdata *dev_vdata; + + unsigned int pt_irq; + + /* I/O area used for device communication */ + void __iomem *io_regs; + + spinlock_t cmd_lock ____cacheline_aligned; + unsigned int cmd_count; + struct list_head cmd; + + /* + * The command queue. This represent the queue available on the + * PTDMA that are available for processing cmds + */ + struct pt_cmd_queue cmd_q; + + /* Support for the DMA Engine capabilities */ + struct dma_device dma_dev; + struct pt_dma_chan *pt_dma_chan; + struct kmem_cache *dma_cmd_cache; + struct kmem_cache *dma_desc_cache; + + wait_queue_head_t lsb_queue; + + /* Device Statistics */ + unsigned long total_interrupts; + + struct pt_tasklet_data tdata; +}; + +/* + * descriptor for PTDMA commands + * 8 32-bit words: + * word 0: function; engine; control bits + * word 1: length of source data + * word 2: low 32 bits of source pointer + * word 3: upper 16 bits of source pointer; source memory type + * word 4: low 32 bits of destination pointer + * word 5: upper 16 bits of destination pointer; destination memory type + * word 6: reserved 32 bits + * word 7: reserved 32 bits + */ + +#define DWORD0_SOC BIT(0) +#define DWORD0_IOC BIT(1) + +struct dword3 { + unsigned int src_hi:16; + unsigned int src_mem:2; + unsigned int lsb_cxt_id:8; + unsigned int rsvd1:5; + unsigned int fixed:1; +}; + +struct dword5 { + unsigned int dst_hi:16; + unsigned int dst_mem:2; + unsigned int rsvd1:13; + unsigned int fixed:1; +}; + +struct ptdma_desc { + u32 dw0; + u32 length; + u32 src_lo; + struct dword3 dw3; + u32 dst_lo; + struct dword5 dw5; + __le32 rsvd1; + __le32 rsvd2; +}; + +/* Structure to hold PT device data */ +struct pt_dev_vdata { + const unsigned int bar; +}; + +int pt_dmaengine_register(struct pt_device *pt); +void pt_dmaengine_unregister(struct pt_device *pt); + +void ptdma_debugfs_setup(struct pt_device *pt); +int pt_core_init(struct pt_device *pt); +void pt_core_destroy(struct pt_device *pt); + +int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q, + struct pt_passthru_engine *pt_engine); + +void pt_check_status_trans(struct pt_device *pt, struct pt_cmd_queue *cmd_q); +void pt_start_queue(struct pt_cmd_queue *cmd_q); +void pt_stop_queue(struct pt_cmd_queue *cmd_q); + +static inline void pt_core_disable_queue_interrupts(struct pt_device *pt) +{ + iowrite32(0, pt->cmd_q.reg_control + 0x000C); +} + +static inline void pt_core_enable_queue_interrupts(struct pt_device *pt) +{ + iowrite32(SUPPORTED_INTERRUPTS, pt->cmd_q.reg_control + 0x000C); +} +#endif diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c new file mode 100644 index 0000000000..16d342654d --- /dev/null +++ b/drivers/dma/pxa_dma.c @@ -0,0 +1,1467 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2015 Robert Jarzmik + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" +#include "virt-dma.h" + +#define DCSR(n) (0x0000 + ((n) << 2)) +#define DALGN(n) 0x00a0 +#define DINT 0x00f0 +#define DDADR(n) (0x0200 + ((n) << 4)) +#define DSADR(n) (0x0204 + ((n) << 4)) +#define DTADR(n) (0x0208 + ((n) << 4)) +#define DCMD(n) (0x020c + ((n) << 4)) + +#define PXA_DCSR_RUN BIT(31) /* Run Bit (read / write) */ +#define PXA_DCSR_NODESC BIT(30) /* No-Descriptor Fetch (read / write) */ +#define PXA_DCSR_STOPIRQEN BIT(29) /* Stop Interrupt Enable (R/W) */ +#define PXA_DCSR_REQPEND BIT(8) /* Request Pending (read-only) */ +#define PXA_DCSR_STOPSTATE BIT(3) /* Stop State (read-only) */ +#define PXA_DCSR_ENDINTR BIT(2) /* End Interrupt (read / write) */ +#define PXA_DCSR_STARTINTR BIT(1) /* Start Interrupt (read / write) */ +#define PXA_DCSR_BUSERR BIT(0) /* Bus Error Interrupt (read / write) */ + +#define PXA_DCSR_EORIRQEN BIT(28) /* End of Receive IRQ Enable (R/W) */ +#define PXA_DCSR_EORJMPEN BIT(27) /* Jump to next descriptor on EOR */ +#define PXA_DCSR_EORSTOPEN BIT(26) /* STOP on an EOR */ +#define PXA_DCSR_SETCMPST BIT(25) /* Set Descriptor Compare Status */ +#define PXA_DCSR_CLRCMPST BIT(24) /* Clear Descriptor Compare Status */ +#define PXA_DCSR_CMPST BIT(10) /* The Descriptor Compare Status */ +#define PXA_DCSR_EORINTR BIT(9) /* The end of Receive */ + +#define DRCMR_MAPVLD BIT(7) /* Map Valid (read / write) */ +#define DRCMR_CHLNUM 0x1f /* mask for Channel Number (read / write) */ + +#define DDADR_DESCADDR 0xfffffff0 /* Address of next descriptor (mask) */ +#define DDADR_STOP BIT(0) /* Stop (read / write) */ + +#define PXA_DCMD_INCSRCADDR BIT(31) /* Source Address Increment Setting. */ +#define PXA_DCMD_INCTRGADDR BIT(30) /* Target Address Increment Setting. */ +#define PXA_DCMD_FLOWSRC BIT(29) /* Flow Control by the source. */ +#define PXA_DCMD_FLOWTRG BIT(28) /* Flow Control by the target. */ +#define PXA_DCMD_STARTIRQEN BIT(22) /* Start Interrupt Enable */ +#define PXA_DCMD_ENDIRQEN BIT(21) /* End Interrupt Enable */ +#define PXA_DCMD_ENDIAN BIT(18) /* Device Endian-ness. */ +#define PXA_DCMD_BURST8 (1 << 16) /* 8 byte burst */ +#define PXA_DCMD_BURST16 (2 << 16) /* 16 byte burst */ +#define PXA_DCMD_BURST32 (3 << 16) /* 32 byte burst */ +#define PXA_DCMD_WIDTH1 (1 << 14) /* 1 byte width */ +#define PXA_DCMD_WIDTH2 (2 << 14) /* 2 byte width (HalfWord) */ +#define PXA_DCMD_WIDTH4 (3 << 14) /* 4 byte width (Word) */ +#define PXA_DCMD_LENGTH 0x01fff /* length mask (max = 8K - 1) */ + +#define PDMA_ALIGNMENT 3 +#define PDMA_MAX_DESC_BYTES (PXA_DCMD_LENGTH & ~((1 << PDMA_ALIGNMENT) - 1)) + +struct pxad_desc_hw { + u32 ddadr; /* Points to the next descriptor + flags */ + u32 dsadr; /* DSADR value for the current transfer */ + u32 dtadr; /* DTADR value for the current transfer */ + u32 dcmd; /* DCMD value for the current transfer */ +} __aligned(16); + +struct pxad_desc_sw { + struct virt_dma_desc vd; /* Virtual descriptor */ + int nb_desc; /* Number of hw. descriptors */ + size_t len; /* Number of bytes xfered */ + dma_addr_t first; /* First descriptor's addr */ + + /* At least one descriptor has an src/dst address not multiple of 8 */ + bool misaligned; + bool cyclic; + struct dma_pool *desc_pool; /* Channel's used allocator */ + + struct pxad_desc_hw *hw_desc[]; /* DMA coherent descriptors */ +}; + +struct pxad_phy { + int idx; + void __iomem *base; + struct pxad_chan *vchan; +}; + +struct pxad_chan { + struct virt_dma_chan vc; /* Virtual channel */ + u32 drcmr; /* Requestor of the channel */ + enum pxad_chan_prio prio; /* Required priority of phy */ + /* + * At least one desc_sw in submitted or issued transfers on this channel + * has one address such as: addr % 8 != 0. This implies the DALGN + * setting on the phy. + */ + bool misaligned; + struct dma_slave_config cfg; /* Runtime config */ + + /* protected by vc->lock */ + struct pxad_phy *phy; + struct dma_pool *desc_pool; /* Descriptors pool */ + dma_cookie_t bus_error; + + wait_queue_head_t wq_state; +}; + +struct pxad_device { + struct dma_device slave; + int nr_chans; + int nr_requestors; + void __iomem *base; + struct pxad_phy *phys; + spinlock_t phy_lock; /* Phy association */ +#ifdef CONFIG_DEBUG_FS + struct dentry *dbgfs_root; + struct dentry **dbgfs_chan; +#endif +}; + +#define tx_to_pxad_desc(tx) \ + container_of(tx, struct pxad_desc_sw, async_tx) +#define to_pxad_chan(dchan) \ + container_of(dchan, struct pxad_chan, vc.chan) +#define to_pxad_dev(dmadev) \ + container_of(dmadev, struct pxad_device, slave) +#define to_pxad_sw_desc(_vd) \ + container_of((_vd), struct pxad_desc_sw, vd) + +#define _phy_readl_relaxed(phy, _reg) \ + readl_relaxed((phy)->base + _reg((phy)->idx)) +#define phy_readl_relaxed(phy, _reg) \ + ({ \ + u32 _v; \ + _v = readl_relaxed((phy)->base + _reg((phy)->idx)); \ + dev_vdbg(&phy->vchan->vc.chan.dev->device, \ + "%s(): readl(%s): 0x%08x\n", __func__, #_reg, \ + _v); \ + _v; \ + }) +#define phy_writel(phy, val, _reg) \ + do { \ + writel((val), (phy)->base + _reg((phy)->idx)); \ + dev_vdbg(&phy->vchan->vc.chan.dev->device, \ + "%s(): writel(0x%08x, %s)\n", \ + __func__, (u32)(val), #_reg); \ + } while (0) +#define phy_writel_relaxed(phy, val, _reg) \ + do { \ + writel_relaxed((val), (phy)->base + _reg((phy)->idx)); \ + dev_vdbg(&phy->vchan->vc.chan.dev->device, \ + "%s(): writel_relaxed(0x%08x, %s)\n", \ + __func__, (u32)(val), #_reg); \ + } while (0) + +static unsigned int pxad_drcmr(unsigned int line) +{ + if (line < 64) + return 0x100 + line * 4; + return 0x1000 + line * 4; +} + +static bool pxad_filter_fn(struct dma_chan *chan, void *param); + +/* + * Debug fs + */ +#ifdef CONFIG_DEBUG_FS +#include +#include +#include + +static int requester_chan_show(struct seq_file *s, void *p) +{ + struct pxad_phy *phy = s->private; + int i; + u32 drcmr; + + seq_printf(s, "DMA channel %d requester :\n", phy->idx); + for (i = 0; i < 70; i++) { + drcmr = readl_relaxed(phy->base + pxad_drcmr(i)); + if ((drcmr & DRCMR_CHLNUM) == phy->idx) + seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", i, + !!(drcmr & DRCMR_MAPVLD)); + } + return 0; +} + +static inline int dbg_burst_from_dcmd(u32 dcmd) +{ + int burst = (dcmd >> 16) & 0x3; + + return burst ? 4 << burst : 0; +} + +static int is_phys_valid(unsigned long addr) +{ + return pfn_valid(__phys_to_pfn(addr)); +} + +#define PXA_DCSR_STR(flag) (dcsr & PXA_DCSR_##flag ? #flag" " : "") +#define PXA_DCMD_STR(flag) (dcmd & PXA_DCMD_##flag ? #flag" " : "") + +static int descriptors_show(struct seq_file *s, void *p) +{ + struct pxad_phy *phy = s->private; + int i, max_show = 20, burst, width; + u32 dcmd; + unsigned long phys_desc, ddadr; + struct pxad_desc_hw *desc; + + phys_desc = ddadr = _phy_readl_relaxed(phy, DDADR); + + seq_printf(s, "DMA channel %d descriptors :\n", phy->idx); + seq_printf(s, "[%03d] First descriptor unknown\n", 0); + for (i = 1; i < max_show && is_phys_valid(phys_desc); i++) { + desc = phys_to_virt(phys_desc); + dcmd = desc->dcmd; + burst = dbg_burst_from_dcmd(dcmd); + width = (1 << ((dcmd >> 14) & 0x3)) >> 1; + + seq_printf(s, "[%03d] Desc at %08lx(virt %p)\n", + i, phys_desc, desc); + seq_printf(s, "\tDDADR = %08x\n", desc->ddadr); + seq_printf(s, "\tDSADR = %08x\n", desc->dsadr); + seq_printf(s, "\tDTADR = %08x\n", desc->dtadr); + seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n", + dcmd, + PXA_DCMD_STR(INCSRCADDR), PXA_DCMD_STR(INCTRGADDR), + PXA_DCMD_STR(FLOWSRC), PXA_DCMD_STR(FLOWTRG), + PXA_DCMD_STR(STARTIRQEN), PXA_DCMD_STR(ENDIRQEN), + PXA_DCMD_STR(ENDIAN), burst, width, + dcmd & PXA_DCMD_LENGTH); + phys_desc = desc->ddadr; + } + if (i == max_show) + seq_printf(s, "[%03d] Desc at %08lx ... max display reached\n", + i, phys_desc); + else + seq_printf(s, "[%03d] Desc at %08lx is %s\n", + i, phys_desc, phys_desc == DDADR_STOP ? + "DDADR_STOP" : "invalid"); + + return 0; +} + +static int chan_state_show(struct seq_file *s, void *p) +{ + struct pxad_phy *phy = s->private; + u32 dcsr, dcmd; + int burst, width; + static const char * const str_prio[] = { + "high", "normal", "low", "invalid" + }; + + dcsr = _phy_readl_relaxed(phy, DCSR); + dcmd = _phy_readl_relaxed(phy, DCMD); + burst = dbg_burst_from_dcmd(dcmd); + width = (1 << ((dcmd >> 14) & 0x3)) >> 1; + + seq_printf(s, "DMA channel %d\n", phy->idx); + seq_printf(s, "\tPriority : %s\n", + str_prio[(phy->idx & 0xf) / 4]); + seq_printf(s, "\tUnaligned transfer bit: %s\n", + _phy_readl_relaxed(phy, DALGN) & BIT(phy->idx) ? + "yes" : "no"); + seq_printf(s, "\tDCSR = %08x (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", + dcsr, PXA_DCSR_STR(RUN), PXA_DCSR_STR(NODESC), + PXA_DCSR_STR(STOPIRQEN), PXA_DCSR_STR(EORIRQEN), + PXA_DCSR_STR(EORJMPEN), PXA_DCSR_STR(EORSTOPEN), + PXA_DCSR_STR(SETCMPST), PXA_DCSR_STR(CLRCMPST), + PXA_DCSR_STR(CMPST), PXA_DCSR_STR(EORINTR), + PXA_DCSR_STR(REQPEND), PXA_DCSR_STR(STOPSTATE), + PXA_DCSR_STR(ENDINTR), PXA_DCSR_STR(STARTINTR), + PXA_DCSR_STR(BUSERR)); + + seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n", + dcmd, + PXA_DCMD_STR(INCSRCADDR), PXA_DCMD_STR(INCTRGADDR), + PXA_DCMD_STR(FLOWSRC), PXA_DCMD_STR(FLOWTRG), + PXA_DCMD_STR(STARTIRQEN), PXA_DCMD_STR(ENDIRQEN), + PXA_DCMD_STR(ENDIAN), burst, width, dcmd & PXA_DCMD_LENGTH); + seq_printf(s, "\tDSADR = %08x\n", _phy_readl_relaxed(phy, DSADR)); + seq_printf(s, "\tDTADR = %08x\n", _phy_readl_relaxed(phy, DTADR)); + seq_printf(s, "\tDDADR = %08x\n", _phy_readl_relaxed(phy, DDADR)); + + return 0; +} + +static int state_show(struct seq_file *s, void *p) +{ + struct pxad_device *pdev = s->private; + + /* basic device status */ + seq_puts(s, "DMA engine status\n"); + seq_printf(s, "\tChannel number: %d\n", pdev->nr_chans); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(state); +DEFINE_SHOW_ATTRIBUTE(chan_state); +DEFINE_SHOW_ATTRIBUTE(descriptors); +DEFINE_SHOW_ATTRIBUTE(requester_chan); + +static struct dentry *pxad_dbg_alloc_chan(struct pxad_device *pdev, + int ch, struct dentry *chandir) +{ + char chan_name[11]; + struct dentry *chan; + void *dt; + + scnprintf(chan_name, sizeof(chan_name), "%d", ch); + chan = debugfs_create_dir(chan_name, chandir); + dt = (void *)&pdev->phys[ch]; + + debugfs_create_file("state", 0400, chan, dt, &chan_state_fops); + debugfs_create_file("descriptors", 0400, chan, dt, &descriptors_fops); + debugfs_create_file("requesters", 0400, chan, dt, &requester_chan_fops); + + return chan; +} + +static void pxad_init_debugfs(struct pxad_device *pdev) +{ + int i; + struct dentry *chandir; + + pdev->dbgfs_chan = + kmalloc_array(pdev->nr_chans, sizeof(struct dentry *), + GFP_KERNEL); + if (!pdev->dbgfs_chan) + return; + + pdev->dbgfs_root = debugfs_create_dir(dev_name(pdev->slave.dev), NULL); + + debugfs_create_file("state", 0400, pdev->dbgfs_root, pdev, &state_fops); + + chandir = debugfs_create_dir("channels", pdev->dbgfs_root); + + for (i = 0; i < pdev->nr_chans; i++) + pdev->dbgfs_chan[i] = pxad_dbg_alloc_chan(pdev, i, chandir); +} + +static void pxad_cleanup_debugfs(struct pxad_device *pdev) +{ + debugfs_remove_recursive(pdev->dbgfs_root); +} +#else +static inline void pxad_init_debugfs(struct pxad_device *pdev) {} +static inline void pxad_cleanup_debugfs(struct pxad_device *pdev) {} +#endif + +static struct pxad_phy *lookup_phy(struct pxad_chan *pchan) +{ + int prio, i; + struct pxad_device *pdev = to_pxad_dev(pchan->vc.chan.device); + struct pxad_phy *phy, *found = NULL; + unsigned long flags; + + /* + * dma channel priorities + * ch 0 - 3, 16 - 19 <--> (0) + * ch 4 - 7, 20 - 23 <--> (1) + * ch 8 - 11, 24 - 27 <--> (2) + * ch 12 - 15, 28 - 31 <--> (3) + */ + + spin_lock_irqsave(&pdev->phy_lock, flags); + for (prio = pchan->prio; prio >= PXAD_PRIO_HIGHEST; prio--) { + for (i = 0; i < pdev->nr_chans; i++) { + if (prio != (i & 0xf) >> 2) + continue; + phy = &pdev->phys[i]; + if (!phy->vchan) { + phy->vchan = pchan; + found = phy; + goto out_unlock; + } + } + } + +out_unlock: + spin_unlock_irqrestore(&pdev->phy_lock, flags); + dev_dbg(&pchan->vc.chan.dev->device, + "%s(): phy=%p(%d)\n", __func__, found, + found ? found->idx : -1); + + return found; +} + +static void pxad_free_phy(struct pxad_chan *chan) +{ + struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device); + unsigned long flags; + u32 reg; + + dev_dbg(&chan->vc.chan.dev->device, + "%s(): freeing\n", __func__); + if (!chan->phy) + return; + + /* clear the channel mapping in DRCMR */ + if (chan->drcmr <= pdev->nr_requestors) { + reg = pxad_drcmr(chan->drcmr); + writel_relaxed(0, chan->phy->base + reg); + } + + spin_lock_irqsave(&pdev->phy_lock, flags); + chan->phy->vchan = NULL; + chan->phy = NULL; + spin_unlock_irqrestore(&pdev->phy_lock, flags); +} + +static bool is_chan_running(struct pxad_chan *chan) +{ + u32 dcsr; + struct pxad_phy *phy = chan->phy; + + if (!phy) + return false; + dcsr = phy_readl_relaxed(phy, DCSR); + return dcsr & PXA_DCSR_RUN; +} + +static bool is_running_chan_misaligned(struct pxad_chan *chan) +{ + u32 dalgn; + + BUG_ON(!chan->phy); + dalgn = phy_readl_relaxed(chan->phy, DALGN); + return dalgn & (BIT(chan->phy->idx)); +} + +static void phy_enable(struct pxad_phy *phy, bool misaligned) +{ + struct pxad_device *pdev; + u32 reg, dalgn; + + if (!phy->vchan) + return; + + dev_dbg(&phy->vchan->vc.chan.dev->device, + "%s(); phy=%p(%d) misaligned=%d\n", __func__, + phy, phy->idx, misaligned); + + pdev = to_pxad_dev(phy->vchan->vc.chan.device); + if (phy->vchan->drcmr <= pdev->nr_requestors) { + reg = pxad_drcmr(phy->vchan->drcmr); + writel_relaxed(DRCMR_MAPVLD | phy->idx, phy->base + reg); + } + + dalgn = phy_readl_relaxed(phy, DALGN); + if (misaligned) + dalgn |= BIT(phy->idx); + else + dalgn &= ~BIT(phy->idx); + phy_writel_relaxed(phy, dalgn, DALGN); + + phy_writel(phy, PXA_DCSR_STOPIRQEN | PXA_DCSR_ENDINTR | + PXA_DCSR_BUSERR | PXA_DCSR_RUN, DCSR); +} + +static void phy_disable(struct pxad_phy *phy) +{ + u32 dcsr; + + if (!phy) + return; + + dcsr = phy_readl_relaxed(phy, DCSR); + dev_dbg(&phy->vchan->vc.chan.dev->device, + "%s(): phy=%p(%d)\n", __func__, phy, phy->idx); + phy_writel(phy, dcsr & ~PXA_DCSR_RUN & ~PXA_DCSR_STOPIRQEN, DCSR); +} + +static void pxad_launch_chan(struct pxad_chan *chan, + struct pxad_desc_sw *desc) +{ + dev_dbg(&chan->vc.chan.dev->device, + "%s(): desc=%p\n", __func__, desc); + if (!chan->phy) { + chan->phy = lookup_phy(chan); + if (!chan->phy) { + dev_dbg(&chan->vc.chan.dev->device, + "%s(): no free dma channel\n", __func__); + return; + } + } + chan->bus_error = 0; + + /* + * Program the descriptor's address into the DMA controller, + * then start the DMA transaction + */ + phy_writel(chan->phy, desc->first, DDADR); + phy_enable(chan->phy, chan->misaligned); + wake_up(&chan->wq_state); +} + +static void set_updater_desc(struct pxad_desc_sw *sw_desc, + unsigned long flags) +{ + struct pxad_desc_hw *updater = + sw_desc->hw_desc[sw_desc->nb_desc - 1]; + dma_addr_t dma = sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr; + + updater->ddadr = DDADR_STOP; + updater->dsadr = dma; + updater->dtadr = dma + 8; + updater->dcmd = PXA_DCMD_WIDTH4 | PXA_DCMD_BURST32 | + (PXA_DCMD_LENGTH & sizeof(u32)); + if (flags & DMA_PREP_INTERRUPT) + updater->dcmd |= PXA_DCMD_ENDIRQEN; + if (sw_desc->cyclic) + sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr = sw_desc->first; +} + +static bool is_desc_completed(struct virt_dma_desc *vd) +{ + struct pxad_desc_sw *sw_desc = to_pxad_sw_desc(vd); + struct pxad_desc_hw *updater = + sw_desc->hw_desc[sw_desc->nb_desc - 1]; + + return updater->dtadr != (updater->dsadr + 8); +} + +static void pxad_desc_chain(struct virt_dma_desc *vd1, + struct virt_dma_desc *vd2) +{ + struct pxad_desc_sw *desc1 = to_pxad_sw_desc(vd1); + struct pxad_desc_sw *desc2 = to_pxad_sw_desc(vd2); + dma_addr_t dma_to_chain; + + dma_to_chain = desc2->first; + desc1->hw_desc[desc1->nb_desc - 1]->ddadr = dma_to_chain; +} + +static bool pxad_try_hotchain(struct virt_dma_chan *vc, + struct virt_dma_desc *vd) +{ + struct virt_dma_desc *vd_last_issued = NULL; + struct pxad_chan *chan = to_pxad_chan(&vc->chan); + + /* + * Attempt to hot chain the tx if the phy is still running. This is + * considered successful only if either the channel is still running + * after the chaining, or if the chained transfer is completed after + * having been hot chained. + * A change of alignment is not allowed, and forbids hotchaining. + */ + if (is_chan_running(chan)) { + BUG_ON(list_empty(&vc->desc_issued)); + + if (!is_running_chan_misaligned(chan) && + to_pxad_sw_desc(vd)->misaligned) + return false; + + vd_last_issued = list_entry(vc->desc_issued.prev, + struct virt_dma_desc, node); + pxad_desc_chain(vd_last_issued, vd); + if (is_chan_running(chan) || is_desc_completed(vd)) + return true; + } + + return false; +} + +static unsigned int clear_chan_irq(struct pxad_phy *phy) +{ + u32 dcsr; + u32 dint = readl(phy->base + DINT); + + if (!(dint & BIT(phy->idx))) + return PXA_DCSR_RUN; + + /* clear irq */ + dcsr = phy_readl_relaxed(phy, DCSR); + phy_writel(phy, dcsr, DCSR); + if ((dcsr & PXA_DCSR_BUSERR) && (phy->vchan)) + dev_warn(&phy->vchan->vc.chan.dev->device, + "%s(chan=%p): PXA_DCSR_BUSERR\n", + __func__, &phy->vchan); + + return dcsr & ~PXA_DCSR_RUN; +} + +static irqreturn_t pxad_chan_handler(int irq, void *dev_id) +{ + struct pxad_phy *phy = dev_id; + struct pxad_chan *chan = phy->vchan; + struct virt_dma_desc *vd, *tmp; + unsigned int dcsr; + bool vd_completed; + dma_cookie_t last_started = 0; + + BUG_ON(!chan); + + dcsr = clear_chan_irq(phy); + if (dcsr & PXA_DCSR_RUN) + return IRQ_NONE; + + spin_lock(&chan->vc.lock); + list_for_each_entry_safe(vd, tmp, &chan->vc.desc_issued, node) { + vd_completed = is_desc_completed(vd); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): checking txd %p[%x]: completed=%d dcsr=0x%x\n", + __func__, vd, vd->tx.cookie, vd_completed, + dcsr); + last_started = vd->tx.cookie; + if (to_pxad_sw_desc(vd)->cyclic) { + vchan_cyclic_callback(vd); + break; + } + if (vd_completed) { + list_del(&vd->node); + vchan_cookie_complete(vd); + } else { + break; + } + } + + if (dcsr & PXA_DCSR_BUSERR) { + chan->bus_error = last_started; + phy_disable(phy); + } + + if (!chan->bus_error && dcsr & PXA_DCSR_STOPSTATE) { + dev_dbg(&chan->vc.chan.dev->device, + "%s(): channel stopped, submitted_empty=%d issued_empty=%d", + __func__, + list_empty(&chan->vc.desc_submitted), + list_empty(&chan->vc.desc_issued)); + phy_writel_relaxed(phy, dcsr & ~PXA_DCSR_STOPIRQEN, DCSR); + + if (list_empty(&chan->vc.desc_issued)) { + chan->misaligned = + !list_empty(&chan->vc.desc_submitted); + } else { + vd = list_first_entry(&chan->vc.desc_issued, + struct virt_dma_desc, node); + pxad_launch_chan(chan, to_pxad_sw_desc(vd)); + } + } + spin_unlock(&chan->vc.lock); + wake_up(&chan->wq_state); + + return IRQ_HANDLED; +} + +static irqreturn_t pxad_int_handler(int irq, void *dev_id) +{ + struct pxad_device *pdev = dev_id; + struct pxad_phy *phy; + u32 dint = readl(pdev->base + DINT); + int i, ret = IRQ_NONE; + + while (dint) { + i = __ffs(dint); + dint &= (dint - 1); + phy = &pdev->phys[i]; + if (pxad_chan_handler(irq, phy) == IRQ_HANDLED) + ret = IRQ_HANDLED; + } + + return ret; +} + +static int pxad_alloc_chan_resources(struct dma_chan *dchan) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device); + + if (chan->desc_pool) + return 1; + + chan->desc_pool = dma_pool_create(dma_chan_name(dchan), + pdev->slave.dev, + sizeof(struct pxad_desc_hw), + __alignof__(struct pxad_desc_hw), + 0); + if (!chan->desc_pool) { + dev_err(&chan->vc.chan.dev->device, + "%s(): unable to allocate descriptor pool\n", + __func__); + return -ENOMEM; + } + + return 1; +} + +static void pxad_free_chan_resources(struct dma_chan *dchan) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + + vchan_free_chan_resources(&chan->vc); + dma_pool_destroy(chan->desc_pool); + chan->desc_pool = NULL; + + chan->drcmr = U32_MAX; + chan->prio = PXAD_PRIO_LOWEST; +} + +static void pxad_free_desc(struct virt_dma_desc *vd) +{ + int i; + dma_addr_t dma; + struct pxad_desc_sw *sw_desc = to_pxad_sw_desc(vd); + + for (i = sw_desc->nb_desc - 1; i >= 0; i--) { + if (i > 0) + dma = sw_desc->hw_desc[i - 1]->ddadr; + else + dma = sw_desc->first; + dma_pool_free(sw_desc->desc_pool, + sw_desc->hw_desc[i], dma); + } + sw_desc->nb_desc = 0; + kfree(sw_desc); +} + +static struct pxad_desc_sw * +pxad_alloc_desc(struct pxad_chan *chan, unsigned int nb_hw_desc) +{ + struct pxad_desc_sw *sw_desc; + dma_addr_t dma; + int i; + + sw_desc = kzalloc(struct_size(sw_desc, hw_desc, nb_hw_desc), + GFP_NOWAIT); + if (!sw_desc) + return NULL; + sw_desc->desc_pool = chan->desc_pool; + + for (i = 0; i < nb_hw_desc; i++) { + sw_desc->hw_desc[i] = dma_pool_alloc(sw_desc->desc_pool, + GFP_NOWAIT, &dma); + if (!sw_desc->hw_desc[i]) { + dev_err(&chan->vc.chan.dev->device, + "%s(): Couldn't allocate the %dth hw_desc from dma_pool %p\n", + __func__, i, sw_desc->desc_pool); + goto err; + } + + if (i == 0) + sw_desc->first = dma; + else + sw_desc->hw_desc[i - 1]->ddadr = dma; + sw_desc->nb_desc++; + } + + return sw_desc; +err: + pxad_free_desc(&sw_desc->vd); + return NULL; +} + +static dma_cookie_t pxad_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct virt_dma_chan *vc = to_virt_chan(tx->chan); + struct pxad_chan *chan = to_pxad_chan(&vc->chan); + struct virt_dma_desc *vd_chained = NULL, + *vd = container_of(tx, struct virt_dma_desc, tx); + dma_cookie_t cookie; + unsigned long flags; + + set_updater_desc(to_pxad_sw_desc(vd), tx->flags); + + spin_lock_irqsave(&vc->lock, flags); + cookie = dma_cookie_assign(tx); + + if (list_empty(&vc->desc_submitted) && pxad_try_hotchain(vc, vd)) { + list_move_tail(&vd->node, &vc->desc_issued); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): txd %p[%x]: submitted (hot linked)\n", + __func__, vd, cookie); + goto out; + } + + /* + * Fallback to placing the tx in the submitted queue + */ + if (!list_empty(&vc->desc_submitted)) { + vd_chained = list_entry(vc->desc_submitted.prev, + struct virt_dma_desc, node); + /* + * Only chain the descriptors if no new misalignment is + * introduced. If a new misalignment is chained, let the channel + * stop, and be relaunched in misalign mode from the irq + * handler. + */ + if (chan->misaligned || !to_pxad_sw_desc(vd)->misaligned) + pxad_desc_chain(vd_chained, vd); + else + vd_chained = NULL; + } + dev_dbg(&chan->vc.chan.dev->device, + "%s(): txd %p[%x]: submitted (%s linked)\n", + __func__, vd, cookie, vd_chained ? "cold" : "not"); + list_move_tail(&vd->node, &vc->desc_submitted); + chan->misaligned |= to_pxad_sw_desc(vd)->misaligned; + +out: + spin_unlock_irqrestore(&vc->lock, flags); + return cookie; +} + +static void pxad_issue_pending(struct dma_chan *dchan) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct virt_dma_desc *vd_first; + unsigned long flags; + + spin_lock_irqsave(&chan->vc.lock, flags); + if (list_empty(&chan->vc.desc_submitted)) + goto out; + + vd_first = list_first_entry(&chan->vc.desc_submitted, + struct virt_dma_desc, node); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): txd %p[%x]", __func__, vd_first, vd_first->tx.cookie); + + vchan_issue_pending(&chan->vc); + if (!pxad_try_hotchain(&chan->vc, vd_first)) + pxad_launch_chan(chan, to_pxad_sw_desc(vd_first)); +out: + spin_unlock_irqrestore(&chan->vc.lock, flags); +} + +static inline struct dma_async_tx_descriptor * +pxad_tx_prep(struct virt_dma_chan *vc, struct virt_dma_desc *vd, + unsigned long tx_flags) +{ + struct dma_async_tx_descriptor *tx; + struct pxad_chan *chan = container_of(vc, struct pxad_chan, vc); + + INIT_LIST_HEAD(&vd->node); + tx = vchan_tx_prep(vc, vd, tx_flags); + tx->tx_submit = pxad_tx_submit; + dev_dbg(&chan->vc.chan.dev->device, + "%s(): vc=%p txd=%p[%x] flags=0x%lx\n", __func__, + vc, vd, vd->tx.cookie, + tx_flags); + + return tx; +} + +static void pxad_get_config(struct pxad_chan *chan, + enum dma_transfer_direction dir, + u32 *dcmd, u32 *dev_src, u32 *dev_dst) +{ + u32 maxburst = 0, dev_addr = 0; + enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED; + struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device); + + *dcmd = 0; + if (dir == DMA_DEV_TO_MEM) { + maxburst = chan->cfg.src_maxburst; + width = chan->cfg.src_addr_width; + dev_addr = chan->cfg.src_addr; + *dev_src = dev_addr; + *dcmd |= PXA_DCMD_INCTRGADDR; + if (chan->drcmr <= pdev->nr_requestors) + *dcmd |= PXA_DCMD_FLOWSRC; + } + if (dir == DMA_MEM_TO_DEV) { + maxburst = chan->cfg.dst_maxburst; + width = chan->cfg.dst_addr_width; + dev_addr = chan->cfg.dst_addr; + *dev_dst = dev_addr; + *dcmd |= PXA_DCMD_INCSRCADDR; + if (chan->drcmr <= pdev->nr_requestors) + *dcmd |= PXA_DCMD_FLOWTRG; + } + if (dir == DMA_MEM_TO_MEM) + *dcmd |= PXA_DCMD_BURST32 | PXA_DCMD_INCTRGADDR | + PXA_DCMD_INCSRCADDR; + + dev_dbg(&chan->vc.chan.dev->device, + "%s(): dev_addr=0x%x maxburst=%d width=%d dir=%d\n", + __func__, dev_addr, maxburst, width, dir); + + if (width == DMA_SLAVE_BUSWIDTH_1_BYTE) + *dcmd |= PXA_DCMD_WIDTH1; + else if (width == DMA_SLAVE_BUSWIDTH_2_BYTES) + *dcmd |= PXA_DCMD_WIDTH2; + else if (width == DMA_SLAVE_BUSWIDTH_4_BYTES) + *dcmd |= PXA_DCMD_WIDTH4; + + if (maxburst == 8) + *dcmd |= PXA_DCMD_BURST8; + else if (maxburst == 16) + *dcmd |= PXA_DCMD_BURST16; + else if (maxburst == 32) + *dcmd |= PXA_DCMD_BURST32; +} + +static struct dma_async_tx_descriptor * +pxad_prep_memcpy(struct dma_chan *dchan, + dma_addr_t dma_dst, dma_addr_t dma_src, + size_t len, unsigned long flags) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_desc_sw *sw_desc; + struct pxad_desc_hw *hw_desc; + u32 dcmd; + unsigned int i, nb_desc = 0; + size_t copy; + + if (!dchan || !len) + return NULL; + + dev_dbg(&chan->vc.chan.dev->device, + "%s(): dma_dst=0x%lx dma_src=0x%lx len=%zu flags=%lx\n", + __func__, (unsigned long)dma_dst, (unsigned long)dma_src, + len, flags); + pxad_get_config(chan, DMA_MEM_TO_MEM, &dcmd, NULL, NULL); + + nb_desc = DIV_ROUND_UP(len, PDMA_MAX_DESC_BYTES); + sw_desc = pxad_alloc_desc(chan, nb_desc + 1); + if (!sw_desc) + return NULL; + sw_desc->len = len; + + if (!IS_ALIGNED(dma_src, 1 << PDMA_ALIGNMENT) || + !IS_ALIGNED(dma_dst, 1 << PDMA_ALIGNMENT)) + sw_desc->misaligned = true; + + i = 0; + do { + hw_desc = sw_desc->hw_desc[i++]; + copy = min_t(size_t, len, PDMA_MAX_DESC_BYTES); + hw_desc->dcmd = dcmd | (PXA_DCMD_LENGTH & copy); + hw_desc->dsadr = dma_src; + hw_desc->dtadr = dma_dst; + len -= copy; + dma_src += copy; + dma_dst += copy; + } while (len); + set_updater_desc(sw_desc, flags); + + return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags); +} + +static struct dma_async_tx_descriptor * +pxad_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_desc_sw *sw_desc; + size_t len, avail; + struct scatterlist *sg; + dma_addr_t dma; + u32 dcmd, dsadr = 0, dtadr = 0; + unsigned int nb_desc = 0, i, j = 0; + + if ((sgl == NULL) || (sg_len == 0)) + return NULL; + + pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): dir=%d flags=%lx\n", __func__, dir, flags); + + for_each_sg(sgl, sg, sg_len, i) + nb_desc += DIV_ROUND_UP(sg_dma_len(sg), PDMA_MAX_DESC_BYTES); + sw_desc = pxad_alloc_desc(chan, nb_desc + 1); + if (!sw_desc) + return NULL; + + for_each_sg(sgl, sg, sg_len, i) { + dma = sg_dma_address(sg); + avail = sg_dma_len(sg); + sw_desc->len += avail; + + do { + len = min_t(size_t, avail, PDMA_MAX_DESC_BYTES); + if (dma & 0x7) + sw_desc->misaligned = true; + + sw_desc->hw_desc[j]->dcmd = + dcmd | (PXA_DCMD_LENGTH & len); + sw_desc->hw_desc[j]->dsadr = dsadr ? dsadr : dma; + sw_desc->hw_desc[j++]->dtadr = dtadr ? dtadr : dma; + + dma += len; + avail -= len; + } while (avail); + } + set_updater_desc(sw_desc, flags); + + return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags); +} + +static struct dma_async_tx_descriptor * +pxad_prep_dma_cyclic(struct dma_chan *dchan, + dma_addr_t buf_addr, size_t len, size_t period_len, + enum dma_transfer_direction dir, unsigned long flags) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_desc_sw *sw_desc; + struct pxad_desc_hw **phw_desc; + dma_addr_t dma; + u32 dcmd, dsadr = 0, dtadr = 0; + unsigned int nb_desc = 0; + + if (!dchan || !len || !period_len) + return NULL; + if ((dir != DMA_DEV_TO_MEM) && (dir != DMA_MEM_TO_DEV)) { + dev_err(&chan->vc.chan.dev->device, + "Unsupported direction for cyclic DMA\n"); + return NULL; + } + /* the buffer length must be a multiple of period_len */ + if (len % period_len != 0 || period_len > PDMA_MAX_DESC_BYTES || + !IS_ALIGNED(period_len, 1 << PDMA_ALIGNMENT)) + return NULL; + + pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr); + dcmd |= PXA_DCMD_ENDIRQEN | (PXA_DCMD_LENGTH & period_len); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): buf_addr=0x%lx len=%zu period=%zu dir=%d flags=%lx\n", + __func__, (unsigned long)buf_addr, len, period_len, dir, flags); + + nb_desc = DIV_ROUND_UP(period_len, PDMA_MAX_DESC_BYTES); + nb_desc *= DIV_ROUND_UP(len, period_len); + sw_desc = pxad_alloc_desc(chan, nb_desc + 1); + if (!sw_desc) + return NULL; + sw_desc->cyclic = true; + sw_desc->len = len; + + phw_desc = sw_desc->hw_desc; + dma = buf_addr; + do { + phw_desc[0]->dsadr = dsadr ? dsadr : dma; + phw_desc[0]->dtadr = dtadr ? dtadr : dma; + phw_desc[0]->dcmd = dcmd; + phw_desc++; + dma += period_len; + len -= period_len; + } while (len); + set_updater_desc(sw_desc, flags); + + return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags); +} + +static int pxad_config(struct dma_chan *dchan, + struct dma_slave_config *cfg) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + + if (!dchan) + return -EINVAL; + + chan->cfg = *cfg; + return 0; +} + +static int pxad_terminate_all(struct dma_chan *dchan) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device); + struct virt_dma_desc *vd = NULL; + unsigned long flags; + struct pxad_phy *phy; + LIST_HEAD(head); + + dev_dbg(&chan->vc.chan.dev->device, + "%s(): vchan %p: terminate all\n", __func__, &chan->vc); + + spin_lock_irqsave(&chan->vc.lock, flags); + vchan_get_all_descriptors(&chan->vc, &head); + + list_for_each_entry(vd, &head, node) { + dev_dbg(&chan->vc.chan.dev->device, + "%s(): cancelling txd %p[%x] (completed=%d)", __func__, + vd, vd->tx.cookie, is_desc_completed(vd)); + } + + phy = chan->phy; + if (phy) { + phy_disable(chan->phy); + pxad_free_phy(chan); + chan->phy = NULL; + spin_lock(&pdev->phy_lock); + phy->vchan = NULL; + spin_unlock(&pdev->phy_lock); + } + spin_unlock_irqrestore(&chan->vc.lock, flags); + vchan_dma_desc_free_list(&chan->vc, &head); + + return 0; +} + +static unsigned int pxad_residue(struct pxad_chan *chan, + dma_cookie_t cookie) +{ + struct virt_dma_desc *vd = NULL; + struct pxad_desc_sw *sw_desc = NULL; + struct pxad_desc_hw *hw_desc = NULL; + u32 curr, start, len, end, residue = 0; + unsigned long flags; + bool passed = false; + int i; + + /* + * If the channel does not have a phy pointer anymore, it has already + * been completed. Therefore, its residue is 0. + */ + if (!chan->phy) + return 0; + + spin_lock_irqsave(&chan->vc.lock, flags); + + vd = vchan_find_desc(&chan->vc, cookie); + if (!vd) + goto out; + + sw_desc = to_pxad_sw_desc(vd); + if (sw_desc->hw_desc[0]->dcmd & PXA_DCMD_INCSRCADDR) + curr = phy_readl_relaxed(chan->phy, DSADR); + else + curr = phy_readl_relaxed(chan->phy, DTADR); + + /* + * curr has to be actually read before checking descriptor + * completion, so that a curr inside a status updater + * descriptor implies the following test returns true, and + * preventing reordering of curr load and the test. + */ + rmb(); + if (is_desc_completed(vd)) + goto out; + + for (i = 0; i < sw_desc->nb_desc - 1; i++) { + hw_desc = sw_desc->hw_desc[i]; + if (sw_desc->hw_desc[0]->dcmd & PXA_DCMD_INCSRCADDR) + start = hw_desc->dsadr; + else + start = hw_desc->dtadr; + len = hw_desc->dcmd & PXA_DCMD_LENGTH; + end = start + len; + + /* + * 'passed' will be latched once we found the descriptor + * which lies inside the boundaries of the curr + * pointer. All descriptors that occur in the list + * _after_ we found that partially handled descriptor + * are still to be processed and are hence added to the + * residual bytes counter. + */ + + if (passed) { + residue += len; + } else if (curr >= start && curr <= end) { + residue += end - curr; + passed = true; + } + } + if (!passed) + residue = sw_desc->len; + +out: + spin_unlock_irqrestore(&chan->vc.lock, flags); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): txd %p[%x] sw_desc=%p: %d\n", + __func__, vd, cookie, sw_desc, residue); + return residue; +} + +static enum dma_status pxad_tx_status(struct dma_chan *dchan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + enum dma_status ret; + + if (cookie == chan->bus_error) + return DMA_ERROR; + + ret = dma_cookie_status(dchan, cookie, txstate); + if (likely(txstate && (ret != DMA_ERROR))) + dma_set_residue(txstate, pxad_residue(chan, cookie)); + + return ret; +} + +static void pxad_synchronize(struct dma_chan *dchan) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + + wait_event(chan->wq_state, !is_chan_running(chan)); + vchan_synchronize(&chan->vc); +} + +static void pxad_free_channels(struct dma_device *dmadev) +{ + struct pxad_chan *c, *cn; + + list_for_each_entry_safe(c, cn, &dmadev->channels, + vc.chan.device_node) { + list_del(&c->vc.chan.device_node); + tasklet_kill(&c->vc.task); + } +} + +static int pxad_remove(struct platform_device *op) +{ + struct pxad_device *pdev = platform_get_drvdata(op); + + pxad_cleanup_debugfs(pdev); + pxad_free_channels(&pdev->slave); + return 0; +} + +static int pxad_init_phys(struct platform_device *op, + struct pxad_device *pdev, + unsigned int nb_phy_chans) +{ + int irq0, irq, nr_irq = 0, i, ret; + struct pxad_phy *phy; + + irq0 = platform_get_irq(op, 0); + if (irq0 < 0) + return irq0; + + pdev->phys = devm_kcalloc(&op->dev, nb_phy_chans, + sizeof(pdev->phys[0]), GFP_KERNEL); + if (!pdev->phys) + return -ENOMEM; + + for (i = 0; i < nb_phy_chans; i++) + if (platform_get_irq_optional(op, i) > 0) + nr_irq++; + + for (i = 0; i < nb_phy_chans; i++) { + phy = &pdev->phys[i]; + phy->base = pdev->base; + phy->idx = i; + irq = platform_get_irq_optional(op, i); + if ((nr_irq > 1) && (irq > 0)) + ret = devm_request_irq(&op->dev, irq, + pxad_chan_handler, + IRQF_SHARED, "pxa-dma", phy); + if ((nr_irq == 1) && (i == 0)) + ret = devm_request_irq(&op->dev, irq0, + pxad_int_handler, + IRQF_SHARED, "pxa-dma", pdev); + if (ret) { + dev_err(pdev->slave.dev, + "%s(): can't request irq %d:%d\n", __func__, + irq, ret); + return ret; + } + } + + return 0; +} + +static const struct of_device_id pxad_dt_ids[] = { + { .compatible = "marvell,pdma-1.0", }, + {} +}; +MODULE_DEVICE_TABLE(of, pxad_dt_ids); + +static struct dma_chan *pxad_dma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct pxad_device *d = ofdma->of_dma_data; + struct dma_chan *chan; + + chan = dma_get_any_slave_channel(&d->slave); + if (!chan) + return NULL; + + to_pxad_chan(chan)->drcmr = dma_spec->args[0]; + to_pxad_chan(chan)->prio = dma_spec->args[1]; + + return chan; +} + +static int pxad_init_dmadev(struct platform_device *op, + struct pxad_device *pdev, + unsigned int nr_phy_chans, + unsigned int nr_requestors) +{ + int ret; + unsigned int i; + struct pxad_chan *c; + + pdev->nr_chans = nr_phy_chans; + pdev->nr_requestors = nr_requestors; + INIT_LIST_HEAD(&pdev->slave.channels); + pdev->slave.device_alloc_chan_resources = pxad_alloc_chan_resources; + pdev->slave.device_free_chan_resources = pxad_free_chan_resources; + pdev->slave.device_tx_status = pxad_tx_status; + pdev->slave.device_issue_pending = pxad_issue_pending; + pdev->slave.device_config = pxad_config; + pdev->slave.device_synchronize = pxad_synchronize; + pdev->slave.device_terminate_all = pxad_terminate_all; + + if (op->dev.coherent_dma_mask) + dma_set_mask(&op->dev, op->dev.coherent_dma_mask); + else + dma_set_mask(&op->dev, DMA_BIT_MASK(32)); + + ret = pxad_init_phys(op, pdev, nr_phy_chans); + if (ret) + return ret; + + for (i = 0; i < nr_phy_chans; i++) { + c = devm_kzalloc(&op->dev, sizeof(*c), GFP_KERNEL); + if (!c) + return -ENOMEM; + + c->drcmr = U32_MAX; + c->prio = PXAD_PRIO_LOWEST; + c->vc.desc_free = pxad_free_desc; + vchan_init(&c->vc, &pdev->slave); + init_waitqueue_head(&c->wq_state); + } + + return dmaenginem_async_device_register(&pdev->slave); +} + +static int pxad_probe(struct platform_device *op) +{ + struct pxad_device *pdev; + const struct of_device_id *of_id; + const struct dma_slave_map *slave_map = NULL; + struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev); + int ret, dma_channels = 0, nb_requestors = 0, slave_map_cnt = 0; + const enum dma_slave_buswidth widths = + DMA_SLAVE_BUSWIDTH_1_BYTE | DMA_SLAVE_BUSWIDTH_2_BYTES | + DMA_SLAVE_BUSWIDTH_4_BYTES; + + pdev = devm_kzalloc(&op->dev, sizeof(*pdev), GFP_KERNEL); + if (!pdev) + return -ENOMEM; + + spin_lock_init(&pdev->phy_lock); + + pdev->base = devm_platform_ioremap_resource(op, 0); + if (IS_ERR(pdev->base)) + return PTR_ERR(pdev->base); + + of_id = of_match_device(pxad_dt_ids, &op->dev); + if (of_id) { + /* Parse new and deprecated dma-channels properties */ + if (of_property_read_u32(op->dev.of_node, "dma-channels", + &dma_channels)) + of_property_read_u32(op->dev.of_node, "#dma-channels", + &dma_channels); + /* Parse new and deprecated dma-requests properties */ + ret = of_property_read_u32(op->dev.of_node, "dma-requests", + &nb_requestors); + if (ret) + ret = of_property_read_u32(op->dev.of_node, "#dma-requests", + &nb_requestors); + if (ret) { + dev_warn(pdev->slave.dev, + "#dma-requests set to default 32 as missing in OF: %d", + ret); + nb_requestors = 32; + } + } else if (pdata && pdata->dma_channels) { + dma_channels = pdata->dma_channels; + nb_requestors = pdata->nb_requestors; + slave_map = pdata->slave_map; + slave_map_cnt = pdata->slave_map_cnt; + } else { + dma_channels = 32; /* default 32 channel */ + } + + dma_cap_set(DMA_SLAVE, pdev->slave.cap_mask); + dma_cap_set(DMA_MEMCPY, pdev->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, pdev->slave.cap_mask); + dma_cap_set(DMA_PRIVATE, pdev->slave.cap_mask); + pdev->slave.device_prep_dma_memcpy = pxad_prep_memcpy; + pdev->slave.device_prep_slave_sg = pxad_prep_slave_sg; + pdev->slave.device_prep_dma_cyclic = pxad_prep_dma_cyclic; + pdev->slave.filter.map = slave_map; + pdev->slave.filter.mapcnt = slave_map_cnt; + pdev->slave.filter.fn = pxad_filter_fn; + + pdev->slave.copy_align = PDMA_ALIGNMENT; + pdev->slave.src_addr_widths = widths; + pdev->slave.dst_addr_widths = widths; + pdev->slave.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); + pdev->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + pdev->slave.descriptor_reuse = true; + + pdev->slave.dev = &op->dev; + ret = pxad_init_dmadev(op, pdev, dma_channels, nb_requestors); + if (ret) { + dev_err(pdev->slave.dev, "unable to register\n"); + return ret; + } + + if (op->dev.of_node) { + /* Device-tree DMA controller registration */ + ret = of_dma_controller_register(op->dev.of_node, + pxad_dma_xlate, pdev); + if (ret < 0) { + dev_err(pdev->slave.dev, + "of_dma_controller_register failed\n"); + return ret; + } + } + + platform_set_drvdata(op, pdev); + pxad_init_debugfs(pdev); + dev_info(pdev->slave.dev, "initialized %d channels on %d requestors\n", + dma_channels, nb_requestors); + return 0; +} + +static const struct platform_device_id pxad_id_table[] = { + { "pxa-dma", }, + { }, +}; + +static struct platform_driver pxad_driver = { + .driver = { + .name = "pxa-dma", + .of_match_table = pxad_dt_ids, + }, + .id_table = pxad_id_table, + .probe = pxad_probe, + .remove = pxad_remove, +}; + +static bool pxad_filter_fn(struct dma_chan *chan, void *param) +{ + struct pxad_chan *c = to_pxad_chan(chan); + struct pxad_param *p = param; + + if (chan->device->dev->driver != &pxad_driver.driver) + return false; + + c->drcmr = p->drcmr; + c->prio = p->prio; + + return true; +} + +module_platform_driver(pxad_driver); + +MODULE_DESCRIPTION("Marvell PXA Peripheral DMA Driver"); +MODULE_AUTHOR("Robert Jarzmik "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/qcom/Kconfig b/drivers/dma/qcom/Kconfig new file mode 100644 index 0000000000..ace75d7b83 --- /dev/null +++ b/drivers/dma/qcom/Kconfig @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: GPL-2.0-only +config QCOM_ADM + tristate "Qualcomm ADM support" + depends on (ARCH_QCOM || COMPILE_TEST) && !PHYS_ADDR_T_64BIT + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for the Qualcomm Application Data Mover (ADM) DMA + controller, as present on MSM8x60, APQ8064, and IPQ8064 devices. + This controller provides DMA capabilities for both general purpose + and on-chip peripheral devices. + +config QCOM_BAM_DMA + tristate "QCOM BAM DMA support" + depends on ARCH_QCOM || (COMPILE_TEST && OF && ARM) + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for the QCOM BAM DMA controller. This controller + provides DMA capabilities for a variety of on-chip devices. + +config QCOM_GPI_DMA + tristate "Qualcomm Technologies GPI DMA support" + depends on ARCH_QCOM + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for the QCOM GPI DMA controller. This controller + provides DMA capabilities for a variety of peripheral buses such + as I2C, UART, and SPI. By using GPI dmaengine driver, bus drivers + can use a standardize interface that is protocol independent to + transfer data between DDR and peripheral. + +config QCOM_HIDMA_MGMT + tristate "Qualcomm Technologies HIDMA Management support" + depends on HAS_IOMEM + select DMA_ENGINE + help + Enable support for the Qualcomm Technologies HIDMA Management. + Each DMA device requires one management interface driver + for basic initialization before QCOM_HIDMA channel driver can + start managing the channels. In a virtualized environment, + the guest OS would run QCOM_HIDMA channel driver and the + host would run the QCOM_HIDMA_MGMT management driver. + +config QCOM_HIDMA + tristate "Qualcomm Technologies HIDMA Channel support" + depends on HAS_IOMEM + select DMA_ENGINE + help + Enable support for the Qualcomm Technologies HIDMA controller. + The HIDMA controller supports optimized buffer copies + (user to kernel, kernel to kernel, etc.). It only supports + memcpy interface. The core is not intended for general + purpose slave DMA. diff --git a/drivers/dma/qcom/Makefile b/drivers/dma/qcom/Makefile new file mode 100644 index 0000000000..50f1e70146 --- /dev/null +++ b/drivers/dma/qcom/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_QCOM_ADM) += qcom_adm.o +obj-$(CONFIG_QCOM_BAM_DMA) += bam_dma.o +obj-$(CONFIG_QCOM_GPI_DMA) += gpi.o +obj-$(CONFIG_QCOM_HIDMA_MGMT) += hdma_mgmt.o +hdma_mgmt-objs := hidma_mgmt.o hidma_mgmt_sys.o +obj-$(CONFIG_QCOM_HIDMA) += hdma.o +hdma-objs := hidma_ll.o hidma.o hidma_dbg.o diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c new file mode 100644 index 0000000000..4c3eb97203 --- /dev/null +++ b/drivers/dma/qcom/bam_dma.c @@ -0,0 +1,1490 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2013-2014, The Linux Foundation. All rights reserved. + */ +/* + * QCOM BAM DMA engine driver + * + * QCOM BAM DMA blocks are distributed amongst a number of the on-chip + * peripherals on the MSM 8x74. The configuration of the channels are dependent + * on the way they are hard wired to that specific peripheral. The peripheral + * device tree entries specify the configuration of each channel. + * + * The DMA controller requires the use of external memory for storage of the + * hardware descriptors for each channel. The descriptor FIFO is accessed as a + * circular buffer and operations are managed according to the offset within the + * FIFO. After pipe/channel reset, all of the pipe registers and internal state + * are back to defaults. + * + * During DMA operations, we write descriptors to the FIFO, being careful to + * handle wrapping and then write the last FIFO offset to that channel's + * P_EVNT_REG register to kick off the transaction. The P_SW_OFSTS register + * indicates the current FIFO offset that is being processed, so there is some + * indication of where the hardware is currently working. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../dmaengine.h" +#include "../virt-dma.h" + +struct bam_desc_hw { + __le32 addr; /* Buffer physical address */ + __le16 size; /* Buffer size in bytes */ + __le16 flags; +}; + +#define BAM_DMA_AUTOSUSPEND_DELAY 100 + +#define DESC_FLAG_INT BIT(15) +#define DESC_FLAG_EOT BIT(14) +#define DESC_FLAG_EOB BIT(13) +#define DESC_FLAG_NWD BIT(12) +#define DESC_FLAG_CMD BIT(11) + +struct bam_async_desc { + struct virt_dma_desc vd; + + u32 num_desc; + u32 xfer_len; + + /* transaction flags, EOT|EOB|NWD */ + u16 flags; + + struct bam_desc_hw *curr_desc; + + /* list node for the desc in the bam_chan list of descriptors */ + struct list_head desc_node; + enum dma_transfer_direction dir; + size_t length; + struct bam_desc_hw desc[]; +}; + +enum bam_reg { + BAM_CTRL, + BAM_REVISION, + BAM_NUM_PIPES, + BAM_DESC_CNT_TRSHLD, + BAM_IRQ_SRCS, + BAM_IRQ_SRCS_MSK, + BAM_IRQ_SRCS_UNMASKED, + BAM_IRQ_STTS, + BAM_IRQ_CLR, + BAM_IRQ_EN, + BAM_CNFG_BITS, + BAM_IRQ_SRCS_EE, + BAM_IRQ_SRCS_MSK_EE, + BAM_P_CTRL, + BAM_P_RST, + BAM_P_HALT, + BAM_P_IRQ_STTS, + BAM_P_IRQ_CLR, + BAM_P_IRQ_EN, + BAM_P_EVNT_DEST_ADDR, + BAM_P_EVNT_REG, + BAM_P_SW_OFSTS, + BAM_P_DATA_FIFO_ADDR, + BAM_P_DESC_FIFO_ADDR, + BAM_P_EVNT_GEN_TRSHLD, + BAM_P_FIFO_SIZES, +}; + +struct reg_offset_data { + u32 base_offset; + unsigned int pipe_mult, evnt_mult, ee_mult; +}; + +static const struct reg_offset_data bam_v1_3_reg_info[] = { + [BAM_CTRL] = { 0x0F80, 0x00, 0x00, 0x00 }, + [BAM_REVISION] = { 0x0F84, 0x00, 0x00, 0x00 }, + [BAM_NUM_PIPES] = { 0x0FBC, 0x00, 0x00, 0x00 }, + [BAM_DESC_CNT_TRSHLD] = { 0x0F88, 0x00, 0x00, 0x00 }, + [BAM_IRQ_SRCS] = { 0x0F8C, 0x00, 0x00, 0x00 }, + [BAM_IRQ_SRCS_MSK] = { 0x0F90, 0x00, 0x00, 0x00 }, + [BAM_IRQ_SRCS_UNMASKED] = { 0x0FB0, 0x00, 0x00, 0x00 }, + [BAM_IRQ_STTS] = { 0x0F94, 0x00, 0x00, 0x00 }, + [BAM_IRQ_CLR] = { 0x0F98, 0x00, 0x00, 0x00 }, + [BAM_IRQ_EN] = { 0x0F9C, 0x00, 0x00, 0x00 }, + [BAM_CNFG_BITS] = { 0x0FFC, 0x00, 0x00, 0x00 }, + [BAM_IRQ_SRCS_EE] = { 0x1800, 0x00, 0x00, 0x80 }, + [BAM_IRQ_SRCS_MSK_EE] = { 0x1804, 0x00, 0x00, 0x80 }, + [BAM_P_CTRL] = { 0x0000, 0x80, 0x00, 0x00 }, + [BAM_P_RST] = { 0x0004, 0x80, 0x00, 0x00 }, + [BAM_P_HALT] = { 0x0008, 0x80, 0x00, 0x00 }, + [BAM_P_IRQ_STTS] = { 0x0010, 0x80, 0x00, 0x00 }, + [BAM_P_IRQ_CLR] = { 0x0014, 0x80, 0x00, 0x00 }, + [BAM_P_IRQ_EN] = { 0x0018, 0x80, 0x00, 0x00 }, + [BAM_P_EVNT_DEST_ADDR] = { 0x102C, 0x00, 0x40, 0x00 }, + [BAM_P_EVNT_REG] = { 0x1018, 0x00, 0x40, 0x00 }, + [BAM_P_SW_OFSTS] = { 0x1000, 0x00, 0x40, 0x00 }, + [BAM_P_DATA_FIFO_ADDR] = { 0x1024, 0x00, 0x40, 0x00 }, + [BAM_P_DESC_FIFO_ADDR] = { 0x101C, 0x00, 0x40, 0x00 }, + [BAM_P_EVNT_GEN_TRSHLD] = { 0x1028, 0x00, 0x40, 0x00 }, + [BAM_P_FIFO_SIZES] = { 0x1020, 0x00, 0x40, 0x00 }, +}; + +static const struct reg_offset_data bam_v1_4_reg_info[] = { + [BAM_CTRL] = { 0x0000, 0x00, 0x00, 0x00 }, + [BAM_REVISION] = { 0x0004, 0x00, 0x00, 0x00 }, + [BAM_NUM_PIPES] = { 0x003C, 0x00, 0x00, 0x00 }, + [BAM_DESC_CNT_TRSHLD] = { 0x0008, 0x00, 0x00, 0x00 }, + [BAM_IRQ_SRCS] = { 0x000C, 0x00, 0x00, 0x00 }, + [BAM_IRQ_SRCS_MSK] = { 0x0010, 0x00, 0x00, 0x00 }, + [BAM_IRQ_SRCS_UNMASKED] = { 0x0030, 0x00, 0x00, 0x00 }, + [BAM_IRQ_STTS] = { 0x0014, 0x00, 0x00, 0x00 }, + [BAM_IRQ_CLR] = { 0x0018, 0x00, 0x00, 0x00 }, + [BAM_IRQ_EN] = { 0x001C, 0x00, 0x00, 0x00 }, + [BAM_CNFG_BITS] = { 0x007C, 0x00, 0x00, 0x00 }, + [BAM_IRQ_SRCS_EE] = { 0x0800, 0x00, 0x00, 0x80 }, + [BAM_IRQ_SRCS_MSK_EE] = { 0x0804, 0x00, 0x00, 0x80 }, + [BAM_P_CTRL] = { 0x1000, 0x1000, 0x00, 0x00 }, + [BAM_P_RST] = { 0x1004, 0x1000, 0x00, 0x00 }, + [BAM_P_HALT] = { 0x1008, 0x1000, 0x00, 0x00 }, + [BAM_P_IRQ_STTS] = { 0x1010, 0x1000, 0x00, 0x00 }, + [BAM_P_IRQ_CLR] = { 0x1014, 0x1000, 0x00, 0x00 }, + [BAM_P_IRQ_EN] = { 0x1018, 0x1000, 0x00, 0x00 }, + [BAM_P_EVNT_DEST_ADDR] = { 0x182C, 0x00, 0x1000, 0x00 }, + [BAM_P_EVNT_REG] = { 0x1818, 0x00, 0x1000, 0x00 }, + [BAM_P_SW_OFSTS] = { 0x1800, 0x00, 0x1000, 0x00 }, + [BAM_P_DATA_FIFO_ADDR] = { 0x1824, 0x00, 0x1000, 0x00 }, + [BAM_P_DESC_FIFO_ADDR] = { 0x181C, 0x00, 0x1000, 0x00 }, + [BAM_P_EVNT_GEN_TRSHLD] = { 0x1828, 0x00, 0x1000, 0x00 }, + [BAM_P_FIFO_SIZES] = { 0x1820, 0x00, 0x1000, 0x00 }, +}; + +static const struct reg_offset_data bam_v1_7_reg_info[] = { + [BAM_CTRL] = { 0x00000, 0x00, 0x00, 0x00 }, + [BAM_REVISION] = { 0x01000, 0x00, 0x00, 0x00 }, + [BAM_NUM_PIPES] = { 0x01008, 0x00, 0x00, 0x00 }, + [BAM_DESC_CNT_TRSHLD] = { 0x00008, 0x00, 0x00, 0x00 }, + [BAM_IRQ_SRCS] = { 0x03010, 0x00, 0x00, 0x00 }, + [BAM_IRQ_SRCS_MSK] = { 0x03014, 0x00, 0x00, 0x00 }, + [BAM_IRQ_SRCS_UNMASKED] = { 0x03018, 0x00, 0x00, 0x00 }, + [BAM_IRQ_STTS] = { 0x00014, 0x00, 0x00, 0x00 }, + [BAM_IRQ_CLR] = { 0x00018, 0x00, 0x00, 0x00 }, + [BAM_IRQ_EN] = { 0x0001C, 0x00, 0x00, 0x00 }, + [BAM_CNFG_BITS] = { 0x0007C, 0x00, 0x00, 0x00 }, + [BAM_IRQ_SRCS_EE] = { 0x03000, 0x00, 0x00, 0x1000 }, + [BAM_IRQ_SRCS_MSK_EE] = { 0x03004, 0x00, 0x00, 0x1000 }, + [BAM_P_CTRL] = { 0x13000, 0x1000, 0x00, 0x00 }, + [BAM_P_RST] = { 0x13004, 0x1000, 0x00, 0x00 }, + [BAM_P_HALT] = { 0x13008, 0x1000, 0x00, 0x00 }, + [BAM_P_IRQ_STTS] = { 0x13010, 0x1000, 0x00, 0x00 }, + [BAM_P_IRQ_CLR] = { 0x13014, 0x1000, 0x00, 0x00 }, + [BAM_P_IRQ_EN] = { 0x13018, 0x1000, 0x00, 0x00 }, + [BAM_P_EVNT_DEST_ADDR] = { 0x1382C, 0x00, 0x1000, 0x00 }, + [BAM_P_EVNT_REG] = { 0x13818, 0x00, 0x1000, 0x00 }, + [BAM_P_SW_OFSTS] = { 0x13800, 0x00, 0x1000, 0x00 }, + [BAM_P_DATA_FIFO_ADDR] = { 0x13824, 0x00, 0x1000, 0x00 }, + [BAM_P_DESC_FIFO_ADDR] = { 0x1381C, 0x00, 0x1000, 0x00 }, + [BAM_P_EVNT_GEN_TRSHLD] = { 0x13828, 0x00, 0x1000, 0x00 }, + [BAM_P_FIFO_SIZES] = { 0x13820, 0x00, 0x1000, 0x00 }, +}; + +/* BAM CTRL */ +#define BAM_SW_RST BIT(0) +#define BAM_EN BIT(1) +#define BAM_EN_ACCUM BIT(4) +#define BAM_TESTBUS_SEL_SHIFT 5 +#define BAM_TESTBUS_SEL_MASK 0x3F +#define BAM_DESC_CACHE_SEL_SHIFT 13 +#define BAM_DESC_CACHE_SEL_MASK 0x3 +#define BAM_CACHED_DESC_STORE BIT(15) +#define IBC_DISABLE BIT(16) + +/* BAM REVISION */ +#define REVISION_SHIFT 0 +#define REVISION_MASK 0xFF +#define NUM_EES_SHIFT 8 +#define NUM_EES_MASK 0xF +#define CE_BUFFER_SIZE BIT(13) +#define AXI_ACTIVE BIT(14) +#define USE_VMIDMT BIT(15) +#define SECURED BIT(16) +#define BAM_HAS_NO_BYPASS BIT(17) +#define HIGH_FREQUENCY_BAM BIT(18) +#define INACTIV_TMRS_EXST BIT(19) +#define NUM_INACTIV_TMRS BIT(20) +#define DESC_CACHE_DEPTH_SHIFT 21 +#define DESC_CACHE_DEPTH_1 (0 << DESC_CACHE_DEPTH_SHIFT) +#define DESC_CACHE_DEPTH_2 (1 << DESC_CACHE_DEPTH_SHIFT) +#define DESC_CACHE_DEPTH_3 (2 << DESC_CACHE_DEPTH_SHIFT) +#define DESC_CACHE_DEPTH_4 (3 << DESC_CACHE_DEPTH_SHIFT) +#define CMD_DESC_EN BIT(23) +#define INACTIV_TMR_BASE_SHIFT 24 +#define INACTIV_TMR_BASE_MASK 0xFF + +/* BAM NUM PIPES */ +#define BAM_NUM_PIPES_SHIFT 0 +#define BAM_NUM_PIPES_MASK 0xFF +#define PERIPH_NON_PIPE_GRP_SHIFT 16 +#define PERIPH_NON_PIP_GRP_MASK 0xFF +#define BAM_NON_PIPE_GRP_SHIFT 24 +#define BAM_NON_PIPE_GRP_MASK 0xFF + +/* BAM CNFG BITS */ +#define BAM_PIPE_CNFG BIT(2) +#define BAM_FULL_PIPE BIT(11) +#define BAM_NO_EXT_P_RST BIT(12) +#define BAM_IBC_DISABLE BIT(13) +#define BAM_SB_CLK_REQ BIT(14) +#define BAM_PSM_CSW_REQ BIT(15) +#define BAM_PSM_P_RES BIT(16) +#define BAM_AU_P_RES BIT(17) +#define BAM_SI_P_RES BIT(18) +#define BAM_WB_P_RES BIT(19) +#define BAM_WB_BLK_CSW BIT(20) +#define BAM_WB_CSW_ACK_IDL BIT(21) +#define BAM_WB_RETR_SVPNT BIT(22) +#define BAM_WB_DSC_AVL_P_RST BIT(23) +#define BAM_REG_P_EN BIT(24) +#define BAM_PSM_P_HD_DATA BIT(25) +#define BAM_AU_ACCUMED BIT(26) +#define BAM_CMD_ENABLE BIT(27) + +#define BAM_CNFG_BITS_DEFAULT (BAM_PIPE_CNFG | \ + BAM_NO_EXT_P_RST | \ + BAM_IBC_DISABLE | \ + BAM_SB_CLK_REQ | \ + BAM_PSM_CSW_REQ | \ + BAM_PSM_P_RES | \ + BAM_AU_P_RES | \ + BAM_SI_P_RES | \ + BAM_WB_P_RES | \ + BAM_WB_BLK_CSW | \ + BAM_WB_CSW_ACK_IDL | \ + BAM_WB_RETR_SVPNT | \ + BAM_WB_DSC_AVL_P_RST | \ + BAM_REG_P_EN | \ + BAM_PSM_P_HD_DATA | \ + BAM_AU_ACCUMED | \ + BAM_CMD_ENABLE) + +/* PIPE CTRL */ +#define P_EN BIT(1) +#define P_DIRECTION BIT(3) +#define P_SYS_STRM BIT(4) +#define P_SYS_MODE BIT(5) +#define P_AUTO_EOB BIT(6) +#define P_AUTO_EOB_SEL_SHIFT 7 +#define P_AUTO_EOB_SEL_512 (0 << P_AUTO_EOB_SEL_SHIFT) +#define P_AUTO_EOB_SEL_256 (1 << P_AUTO_EOB_SEL_SHIFT) +#define P_AUTO_EOB_SEL_128 (2 << P_AUTO_EOB_SEL_SHIFT) +#define P_AUTO_EOB_SEL_64 (3 << P_AUTO_EOB_SEL_SHIFT) +#define P_PREFETCH_LIMIT_SHIFT 9 +#define P_PREFETCH_LIMIT_32 (0 << P_PREFETCH_LIMIT_SHIFT) +#define P_PREFETCH_LIMIT_16 (1 << P_PREFETCH_LIMIT_SHIFT) +#define P_PREFETCH_LIMIT_4 (2 << P_PREFETCH_LIMIT_SHIFT) +#define P_WRITE_NWD BIT(11) +#define P_LOCK_GROUP_SHIFT 16 +#define P_LOCK_GROUP_MASK 0x1F + +/* BAM_DESC_CNT_TRSHLD */ +#define CNT_TRSHLD 0xffff +#define DEFAULT_CNT_THRSHLD 0x4 + +/* BAM_IRQ_SRCS */ +#define BAM_IRQ BIT(31) +#define P_IRQ 0x7fffffff + +/* BAM_IRQ_SRCS_MSK */ +#define BAM_IRQ_MSK BAM_IRQ +#define P_IRQ_MSK P_IRQ + +/* BAM_IRQ_STTS */ +#define BAM_TIMER_IRQ BIT(4) +#define BAM_EMPTY_IRQ BIT(3) +#define BAM_ERROR_IRQ BIT(2) +#define BAM_HRESP_ERR_IRQ BIT(1) + +/* BAM_IRQ_CLR */ +#define BAM_TIMER_CLR BIT(4) +#define BAM_EMPTY_CLR BIT(3) +#define BAM_ERROR_CLR BIT(2) +#define BAM_HRESP_ERR_CLR BIT(1) + +/* BAM_IRQ_EN */ +#define BAM_TIMER_EN BIT(4) +#define BAM_EMPTY_EN BIT(3) +#define BAM_ERROR_EN BIT(2) +#define BAM_HRESP_ERR_EN BIT(1) + +/* BAM_P_IRQ_EN */ +#define P_PRCSD_DESC_EN BIT(0) +#define P_TIMER_EN BIT(1) +#define P_WAKE_EN BIT(2) +#define P_OUT_OF_DESC_EN BIT(3) +#define P_ERR_EN BIT(4) +#define P_TRNSFR_END_EN BIT(5) +#define P_DEFAULT_IRQS_EN (P_PRCSD_DESC_EN | P_ERR_EN | P_TRNSFR_END_EN) + +/* BAM_P_SW_OFSTS */ +#define P_SW_OFSTS_MASK 0xffff + +#define BAM_DESC_FIFO_SIZE SZ_32K +#define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1) +#define BAM_FIFO_SIZE (SZ_32K - 8) +#define IS_BUSY(chan) (CIRC_SPACE(bchan->tail, bchan->head,\ + MAX_DESCRIPTORS + 1) == 0) + +struct bam_chan { + struct virt_dma_chan vc; + + struct bam_device *bdev; + + /* configuration from device tree */ + u32 id; + + /* runtime configuration */ + struct dma_slave_config slave; + + /* fifo storage */ + struct bam_desc_hw *fifo_virt; + dma_addr_t fifo_phys; + + /* fifo markers */ + unsigned short head; /* start of active descriptor entries */ + unsigned short tail; /* end of active descriptor entries */ + + unsigned int initialized; /* is the channel hw initialized? */ + unsigned int paused; /* is the channel paused? */ + unsigned int reconfigure; /* new slave config? */ + /* list of descriptors currently processed */ + struct list_head desc_list; + + struct list_head node; +}; + +static inline struct bam_chan *to_bam_chan(struct dma_chan *common) +{ + return container_of(common, struct bam_chan, vc.chan); +} + +struct bam_device { + void __iomem *regs; + struct device *dev; + struct dma_device common; + struct bam_chan *channels; + u32 num_channels; + u32 num_ees; + + /* execution environment ID, from DT */ + u32 ee; + bool controlled_remotely; + bool powered_remotely; + u32 active_channels; + + const struct reg_offset_data *layout; + + struct clk *bamclk; + int irq; + + /* dma start transaction tasklet */ + struct tasklet_struct task; +}; + +/** + * bam_addr - returns BAM register address + * @bdev: bam device + * @pipe: pipe instance (ignored when register doesn't have multiple instances) + * @reg: register enum + */ +static inline void __iomem *bam_addr(struct bam_device *bdev, u32 pipe, + enum bam_reg reg) +{ + const struct reg_offset_data r = bdev->layout[reg]; + + return bdev->regs + r.base_offset + + r.pipe_mult * pipe + + r.evnt_mult * pipe + + r.ee_mult * bdev->ee; +} + +/** + * bam_reset() - reset and initialize BAM registers + * @bdev: bam device + */ +static void bam_reset(struct bam_device *bdev) +{ + u32 val; + + /* s/w reset bam */ + /* after reset all pipes are disabled and idle */ + val = readl_relaxed(bam_addr(bdev, 0, BAM_CTRL)); + val |= BAM_SW_RST; + writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL)); + val &= ~BAM_SW_RST; + writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL)); + + /* make sure previous stores are visible before enabling BAM */ + wmb(); + + /* enable bam */ + val |= BAM_EN; + writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL)); + + /* set descriptor threshhold, start with 4 bytes */ + writel_relaxed(DEFAULT_CNT_THRSHLD, + bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD)); + + /* Enable default set of h/w workarounds, ie all except BAM_FULL_PIPE */ + writel_relaxed(BAM_CNFG_BITS_DEFAULT, bam_addr(bdev, 0, BAM_CNFG_BITS)); + + /* enable irqs for errors */ + writel_relaxed(BAM_ERROR_EN | BAM_HRESP_ERR_EN, + bam_addr(bdev, 0, BAM_IRQ_EN)); + + /* unmask global bam interrupt */ + writel_relaxed(BAM_IRQ_MSK, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE)); +} + +/** + * bam_reset_channel - Reset individual BAM DMA channel + * @bchan: bam channel + * + * This function resets a specific BAM channel + */ +static void bam_reset_channel(struct bam_chan *bchan) +{ + struct bam_device *bdev = bchan->bdev; + + lockdep_assert_held(&bchan->vc.lock); + + /* reset channel */ + writel_relaxed(1, bam_addr(bdev, bchan->id, BAM_P_RST)); + writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_RST)); + + /* don't allow cpu to reorder BAM register accesses done after this */ + wmb(); + + /* make sure hw is initialized when channel is used the first time */ + bchan->initialized = 0; +} + +/** + * bam_chan_init_hw - Initialize channel hardware + * @bchan: bam channel + * @dir: DMA transfer direction + * + * This function resets and initializes the BAM channel + */ +static void bam_chan_init_hw(struct bam_chan *bchan, + enum dma_transfer_direction dir) +{ + struct bam_device *bdev = bchan->bdev; + u32 val; + + /* Reset the channel to clear internal state of the FIFO */ + bam_reset_channel(bchan); + + /* + * write out 8 byte aligned address. We have enough space for this + * because we allocated 1 more descriptor (8 bytes) than we can use + */ + writel_relaxed(ALIGN(bchan->fifo_phys, sizeof(struct bam_desc_hw)), + bam_addr(bdev, bchan->id, BAM_P_DESC_FIFO_ADDR)); + writel_relaxed(BAM_FIFO_SIZE, + bam_addr(bdev, bchan->id, BAM_P_FIFO_SIZES)); + + /* enable the per pipe interrupts, enable EOT, ERR, and INT irqs */ + writel_relaxed(P_DEFAULT_IRQS_EN, + bam_addr(bdev, bchan->id, BAM_P_IRQ_EN)); + + /* unmask the specific pipe and EE combo */ + val = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE)); + val |= BIT(bchan->id); + writel_relaxed(val, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE)); + + /* don't allow cpu to reorder the channel enable done below */ + wmb(); + + /* set fixed direction and mode, then enable channel */ + val = P_EN | P_SYS_MODE; + if (dir == DMA_DEV_TO_MEM) + val |= P_DIRECTION; + + writel_relaxed(val, bam_addr(bdev, bchan->id, BAM_P_CTRL)); + + bchan->initialized = 1; + + /* init FIFO pointers */ + bchan->head = 0; + bchan->tail = 0; +} + +/** + * bam_alloc_chan - Allocate channel resources for DMA channel. + * @chan: specified channel + * + * This function allocates the FIFO descriptor memory + */ +static int bam_alloc_chan(struct dma_chan *chan) +{ + struct bam_chan *bchan = to_bam_chan(chan); + struct bam_device *bdev = bchan->bdev; + + if (bchan->fifo_virt) + return 0; + + /* allocate FIFO descriptor space, but only if necessary */ + bchan->fifo_virt = dma_alloc_wc(bdev->dev, BAM_DESC_FIFO_SIZE, + &bchan->fifo_phys, GFP_KERNEL); + + if (!bchan->fifo_virt) { + dev_err(bdev->dev, "Failed to allocate desc fifo\n"); + return -ENOMEM; + } + + if (bdev->active_channels++ == 0 && bdev->powered_remotely) + bam_reset(bdev); + + return 0; +} + +/** + * bam_free_chan - Frees dma resources associated with specific channel + * @chan: specified channel + * + * Free the allocated fifo descriptor memory and channel resources + * + */ +static void bam_free_chan(struct dma_chan *chan) +{ + struct bam_chan *bchan = to_bam_chan(chan); + struct bam_device *bdev = bchan->bdev; + u32 val; + unsigned long flags; + int ret; + + ret = pm_runtime_get_sync(bdev->dev); + if (ret < 0) + return; + + vchan_free_chan_resources(to_virt_chan(chan)); + + if (!list_empty(&bchan->desc_list)) { + dev_err(bchan->bdev->dev, "Cannot free busy channel\n"); + goto err; + } + + spin_lock_irqsave(&bchan->vc.lock, flags); + bam_reset_channel(bchan); + spin_unlock_irqrestore(&bchan->vc.lock, flags); + + dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt, + bchan->fifo_phys); + bchan->fifo_virt = NULL; + + /* mask irq for pipe/channel */ + val = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE)); + val &= ~BIT(bchan->id); + writel_relaxed(val, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE)); + + /* disable irq */ + writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_IRQ_EN)); + + if (--bdev->active_channels == 0 && bdev->powered_remotely) { + /* s/w reset bam */ + val = readl_relaxed(bam_addr(bdev, 0, BAM_CTRL)); + val |= BAM_SW_RST; + writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL)); + } + +err: + pm_runtime_mark_last_busy(bdev->dev); + pm_runtime_put_autosuspend(bdev->dev); +} + +/** + * bam_slave_config - set slave configuration for channel + * @chan: dma channel + * @cfg: slave configuration + * + * Sets slave configuration for channel + * + */ +static int bam_slave_config(struct dma_chan *chan, + struct dma_slave_config *cfg) +{ + struct bam_chan *bchan = to_bam_chan(chan); + unsigned long flag; + + spin_lock_irqsave(&bchan->vc.lock, flag); + memcpy(&bchan->slave, cfg, sizeof(*cfg)); + bchan->reconfigure = 1; + spin_unlock_irqrestore(&bchan->vc.lock, flag); + + return 0; +} + +/** + * bam_prep_slave_sg - Prep slave sg transaction + * + * @chan: dma channel + * @sgl: scatter gather list + * @sg_len: length of sg + * @direction: DMA transfer direction + * @flags: DMA flags + * @context: transfer context (unused) + */ +static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan, + struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, unsigned long flags, + void *context) +{ + struct bam_chan *bchan = to_bam_chan(chan); + struct bam_device *bdev = bchan->bdev; + struct bam_async_desc *async_desc; + struct scatterlist *sg; + u32 i; + struct bam_desc_hw *desc; + unsigned int num_alloc = 0; + + + if (!is_slave_direction(direction)) { + dev_err(bdev->dev, "invalid dma direction\n"); + return NULL; + } + + /* calculate number of required entries */ + for_each_sg(sgl, sg, sg_len, i) + num_alloc += DIV_ROUND_UP(sg_dma_len(sg), BAM_FIFO_SIZE); + + /* allocate enough room to accomodate the number of entries */ + async_desc = kzalloc(struct_size(async_desc, desc, num_alloc), + GFP_NOWAIT); + + if (!async_desc) + return NULL; + + if (flags & DMA_PREP_FENCE) + async_desc->flags |= DESC_FLAG_NWD; + + if (flags & DMA_PREP_INTERRUPT) + async_desc->flags |= DESC_FLAG_EOT; + + async_desc->num_desc = num_alloc; + async_desc->curr_desc = async_desc->desc; + async_desc->dir = direction; + + /* fill in temporary descriptors */ + desc = async_desc->desc; + for_each_sg(sgl, sg, sg_len, i) { + unsigned int remainder = sg_dma_len(sg); + unsigned int curr_offset = 0; + + do { + if (flags & DMA_PREP_CMD) + desc->flags |= cpu_to_le16(DESC_FLAG_CMD); + + desc->addr = cpu_to_le32(sg_dma_address(sg) + + curr_offset); + + if (remainder > BAM_FIFO_SIZE) { + desc->size = cpu_to_le16(BAM_FIFO_SIZE); + remainder -= BAM_FIFO_SIZE; + curr_offset += BAM_FIFO_SIZE; + } else { + desc->size = cpu_to_le16(remainder); + remainder = 0; + } + + async_desc->length += le16_to_cpu(desc->size); + desc++; + } while (remainder > 0); + } + + return vchan_tx_prep(&bchan->vc, &async_desc->vd, flags); +} + +/** + * bam_dma_terminate_all - terminate all transactions on a channel + * @chan: bam dma channel + * + * Dequeues and frees all transactions + * No callbacks are done + * + */ +static int bam_dma_terminate_all(struct dma_chan *chan) +{ + struct bam_chan *bchan = to_bam_chan(chan); + struct bam_async_desc *async_desc, *tmp; + unsigned long flag; + LIST_HEAD(head); + + /* remove all transactions, including active transaction */ + spin_lock_irqsave(&bchan->vc.lock, flag); + /* + * If we have transactions queued, then some might be committed to the + * hardware in the desc fifo. The only way to reset the desc fifo is + * to do a hardware reset (either by pipe or the entire block). + * bam_chan_init_hw() will trigger a pipe reset, and also reinit the + * pipe. If the pipe is left disabled (default state after pipe reset) + * and is accessed by a connected hardware engine, a fatal error in + * the BAM will occur. There is a small window where this could happen + * with bam_chan_init_hw(), but it is assumed that the caller has + * stopped activity on any attached hardware engine. Make sure to do + * this first so that the BAM hardware doesn't cause memory corruption + * by accessing freed resources. + */ + if (!list_empty(&bchan->desc_list)) { + async_desc = list_first_entry(&bchan->desc_list, + struct bam_async_desc, desc_node); + bam_chan_init_hw(bchan, async_desc->dir); + } + + list_for_each_entry_safe(async_desc, tmp, + &bchan->desc_list, desc_node) { + list_add(&async_desc->vd.node, &bchan->vc.desc_issued); + list_del(&async_desc->desc_node); + } + + vchan_get_all_descriptors(&bchan->vc, &head); + spin_unlock_irqrestore(&bchan->vc.lock, flag); + + vchan_dma_desc_free_list(&bchan->vc, &head); + + return 0; +} + +/** + * bam_pause - Pause DMA channel + * @chan: dma channel + * + */ +static int bam_pause(struct dma_chan *chan) +{ + struct bam_chan *bchan = to_bam_chan(chan); + struct bam_device *bdev = bchan->bdev; + unsigned long flag; + int ret; + + ret = pm_runtime_get_sync(bdev->dev); + if (ret < 0) + return ret; + + spin_lock_irqsave(&bchan->vc.lock, flag); + writel_relaxed(1, bam_addr(bdev, bchan->id, BAM_P_HALT)); + bchan->paused = 1; + spin_unlock_irqrestore(&bchan->vc.lock, flag); + pm_runtime_mark_last_busy(bdev->dev); + pm_runtime_put_autosuspend(bdev->dev); + + return 0; +} + +/** + * bam_resume - Resume DMA channel operations + * @chan: dma channel + * + */ +static int bam_resume(struct dma_chan *chan) +{ + struct bam_chan *bchan = to_bam_chan(chan); + struct bam_device *bdev = bchan->bdev; + unsigned long flag; + int ret; + + ret = pm_runtime_get_sync(bdev->dev); + if (ret < 0) + return ret; + + spin_lock_irqsave(&bchan->vc.lock, flag); + writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_HALT)); + bchan->paused = 0; + spin_unlock_irqrestore(&bchan->vc.lock, flag); + pm_runtime_mark_last_busy(bdev->dev); + pm_runtime_put_autosuspend(bdev->dev); + + return 0; +} + +/** + * process_channel_irqs - processes the channel interrupts + * @bdev: bam controller + * + * This function processes the channel interrupts + * + */ +static u32 process_channel_irqs(struct bam_device *bdev) +{ + u32 i, srcs, pipe_stts, offset, avail; + unsigned long flags; + struct bam_async_desc *async_desc, *tmp; + + srcs = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_EE)); + + /* return early if no pipe/channel interrupts are present */ + if (!(srcs & P_IRQ)) + return srcs; + + for (i = 0; i < bdev->num_channels; i++) { + struct bam_chan *bchan = &bdev->channels[i]; + + if (!(srcs & BIT(i))) + continue; + + /* clear pipe irq */ + pipe_stts = readl_relaxed(bam_addr(bdev, i, BAM_P_IRQ_STTS)); + + writel_relaxed(pipe_stts, bam_addr(bdev, i, BAM_P_IRQ_CLR)); + + spin_lock_irqsave(&bchan->vc.lock, flags); + + offset = readl_relaxed(bam_addr(bdev, i, BAM_P_SW_OFSTS)) & + P_SW_OFSTS_MASK; + offset /= sizeof(struct bam_desc_hw); + + /* Number of bytes available to read */ + avail = CIRC_CNT(offset, bchan->head, MAX_DESCRIPTORS + 1); + + if (offset < bchan->head) + avail--; + + list_for_each_entry_safe(async_desc, tmp, + &bchan->desc_list, desc_node) { + /* Not enough data to read */ + if (avail < async_desc->xfer_len) + break; + + /* manage FIFO */ + bchan->head += async_desc->xfer_len; + bchan->head %= MAX_DESCRIPTORS; + + async_desc->num_desc -= async_desc->xfer_len; + async_desc->curr_desc += async_desc->xfer_len; + avail -= async_desc->xfer_len; + + /* + * if complete, process cookie. Otherwise + * push back to front of desc_issued so that + * it gets restarted by the tasklet + */ + if (!async_desc->num_desc) { + vchan_cookie_complete(&async_desc->vd); + } else { + list_add(&async_desc->vd.node, + &bchan->vc.desc_issued); + } + list_del(&async_desc->desc_node); + } + + spin_unlock_irqrestore(&bchan->vc.lock, flags); + } + + return srcs; +} + +/** + * bam_dma_irq - irq handler for bam controller + * @irq: IRQ of interrupt + * @data: callback data + * + * IRQ handler for the bam controller + */ +static irqreturn_t bam_dma_irq(int irq, void *data) +{ + struct bam_device *bdev = data; + u32 clr_mask = 0, srcs = 0; + int ret; + + srcs |= process_channel_irqs(bdev); + + /* kick off tasklet to start next dma transfer */ + if (srcs & P_IRQ) + tasklet_schedule(&bdev->task); + + ret = pm_runtime_get_sync(bdev->dev); + if (ret < 0) + return IRQ_NONE; + + if (srcs & BAM_IRQ) { + clr_mask = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_STTS)); + + /* + * don't allow reorder of the various accesses to the BAM + * registers + */ + mb(); + + writel_relaxed(clr_mask, bam_addr(bdev, 0, BAM_IRQ_CLR)); + } + + pm_runtime_mark_last_busy(bdev->dev); + pm_runtime_put_autosuspend(bdev->dev); + + return IRQ_HANDLED; +} + +/** + * bam_tx_status - returns status of transaction + * @chan: dma channel + * @cookie: transaction cookie + * @txstate: DMA transaction state + * + * Return status of dma transaction + */ +static enum dma_status bam_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct bam_chan *bchan = to_bam_chan(chan); + struct bam_async_desc *async_desc; + struct virt_dma_desc *vd; + int ret; + size_t residue = 0; + unsigned int i; + unsigned long flags; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; + + if (!txstate) + return bchan->paused ? DMA_PAUSED : ret; + + spin_lock_irqsave(&bchan->vc.lock, flags); + vd = vchan_find_desc(&bchan->vc, cookie); + if (vd) { + residue = container_of(vd, struct bam_async_desc, vd)->length; + } else { + list_for_each_entry(async_desc, &bchan->desc_list, desc_node) { + if (async_desc->vd.tx.cookie != cookie) + continue; + + for (i = 0; i < async_desc->num_desc; i++) + residue += le16_to_cpu( + async_desc->curr_desc[i].size); + } + } + + spin_unlock_irqrestore(&bchan->vc.lock, flags); + + dma_set_residue(txstate, residue); + + if (ret == DMA_IN_PROGRESS && bchan->paused) + ret = DMA_PAUSED; + + return ret; +} + +/** + * bam_apply_new_config + * @bchan: bam dma channel + * @dir: DMA direction + */ +static void bam_apply_new_config(struct bam_chan *bchan, + enum dma_transfer_direction dir) +{ + struct bam_device *bdev = bchan->bdev; + u32 maxburst; + + if (!bdev->controlled_remotely) { + if (dir == DMA_DEV_TO_MEM) + maxburst = bchan->slave.src_maxburst; + else + maxburst = bchan->slave.dst_maxburst; + + writel_relaxed(maxburst, + bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD)); + } + + bchan->reconfigure = 0; +} + +/** + * bam_start_dma - start next transaction + * @bchan: bam dma channel + */ +static void bam_start_dma(struct bam_chan *bchan) +{ + struct virt_dma_desc *vd = vchan_next_desc(&bchan->vc); + struct bam_device *bdev = bchan->bdev; + struct bam_async_desc *async_desc = NULL; + struct bam_desc_hw *desc; + struct bam_desc_hw *fifo = PTR_ALIGN(bchan->fifo_virt, + sizeof(struct bam_desc_hw)); + int ret; + unsigned int avail; + struct dmaengine_desc_callback cb; + + lockdep_assert_held(&bchan->vc.lock); + + if (!vd) + return; + + ret = pm_runtime_get_sync(bdev->dev); + if (ret < 0) + return; + + while (vd && !IS_BUSY(bchan)) { + list_del(&vd->node); + + async_desc = container_of(vd, struct bam_async_desc, vd); + + /* on first use, initialize the channel hardware */ + if (!bchan->initialized) + bam_chan_init_hw(bchan, async_desc->dir); + + /* apply new slave config changes, if necessary */ + if (bchan->reconfigure) + bam_apply_new_config(bchan, async_desc->dir); + + desc = async_desc->curr_desc; + avail = CIRC_SPACE(bchan->tail, bchan->head, + MAX_DESCRIPTORS + 1); + + if (async_desc->num_desc > avail) + async_desc->xfer_len = avail; + else + async_desc->xfer_len = async_desc->num_desc; + + /* set any special flags on the last descriptor */ + if (async_desc->num_desc == async_desc->xfer_len) + desc[async_desc->xfer_len - 1].flags |= + cpu_to_le16(async_desc->flags); + + vd = vchan_next_desc(&bchan->vc); + + dmaengine_desc_get_callback(&async_desc->vd.tx, &cb); + + /* + * An interrupt is generated at this desc, if + * - FIFO is FULL. + * - No more descriptors to add. + * - If a callback completion was requested for this DESC, + * In this case, BAM will deliver the completion callback + * for this desc and continue processing the next desc. + */ + if (((avail <= async_desc->xfer_len) || !vd || + dmaengine_desc_callback_valid(&cb)) && + !(async_desc->flags & DESC_FLAG_EOT)) + desc[async_desc->xfer_len - 1].flags |= + cpu_to_le16(DESC_FLAG_INT); + + if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) { + u32 partial = MAX_DESCRIPTORS - bchan->tail; + + memcpy(&fifo[bchan->tail], desc, + partial * sizeof(struct bam_desc_hw)); + memcpy(fifo, &desc[partial], + (async_desc->xfer_len - partial) * + sizeof(struct bam_desc_hw)); + } else { + memcpy(&fifo[bchan->tail], desc, + async_desc->xfer_len * + sizeof(struct bam_desc_hw)); + } + + bchan->tail += async_desc->xfer_len; + bchan->tail %= MAX_DESCRIPTORS; + list_add_tail(&async_desc->desc_node, &bchan->desc_list); + } + + /* ensure descriptor writes and dma start not reordered */ + wmb(); + writel_relaxed(bchan->tail * sizeof(struct bam_desc_hw), + bam_addr(bdev, bchan->id, BAM_P_EVNT_REG)); + + pm_runtime_mark_last_busy(bdev->dev); + pm_runtime_put_autosuspend(bdev->dev); +} + +/** + * dma_tasklet - DMA IRQ tasklet + * @t: tasklet argument (bam controller structure) + * + * Sets up next DMA operation and then processes all completed transactions + */ +static void dma_tasklet(struct tasklet_struct *t) +{ + struct bam_device *bdev = from_tasklet(bdev, t, task); + struct bam_chan *bchan; + unsigned long flags; + unsigned int i; + + /* go through the channels and kick off transactions */ + for (i = 0; i < bdev->num_channels; i++) { + bchan = &bdev->channels[i]; + spin_lock_irqsave(&bchan->vc.lock, flags); + + if (!list_empty(&bchan->vc.desc_issued) && !IS_BUSY(bchan)) + bam_start_dma(bchan); + spin_unlock_irqrestore(&bchan->vc.lock, flags); + } + +} + +/** + * bam_issue_pending - starts pending transactions + * @chan: dma channel + * + * Calls tasklet directly which in turn starts any pending transactions + */ +static void bam_issue_pending(struct dma_chan *chan) +{ + struct bam_chan *bchan = to_bam_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&bchan->vc.lock, flags); + + /* if work pending and idle, start a transaction */ + if (vchan_issue_pending(&bchan->vc) && !IS_BUSY(bchan)) + bam_start_dma(bchan); + + spin_unlock_irqrestore(&bchan->vc.lock, flags); +} + +/** + * bam_dma_free_desc - free descriptor memory + * @vd: virtual descriptor + * + */ +static void bam_dma_free_desc(struct virt_dma_desc *vd) +{ + struct bam_async_desc *async_desc = container_of(vd, + struct bam_async_desc, vd); + + kfree(async_desc); +} + +static struct dma_chan *bam_dma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *of) +{ + struct bam_device *bdev = container_of(of->of_dma_data, + struct bam_device, common); + unsigned int request; + + if (dma_spec->args_count != 1) + return NULL; + + request = dma_spec->args[0]; + if (request >= bdev->num_channels) + return NULL; + + return dma_get_slave_channel(&(bdev->channels[request].vc.chan)); +} + +/** + * bam_init + * @bdev: bam device + * + * Initialization helper for global bam registers + */ +static int bam_init(struct bam_device *bdev) +{ + u32 val; + + /* read revision and configuration information */ + if (!bdev->num_ees) { + val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION)); + bdev->num_ees = (val >> NUM_EES_SHIFT) & NUM_EES_MASK; + } + + /* check that configured EE is within range */ + if (bdev->ee >= bdev->num_ees) + return -EINVAL; + + if (!bdev->num_channels) { + val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES)); + bdev->num_channels = val & BAM_NUM_PIPES_MASK; + } + + /* Reset BAM now if fully controlled locally */ + if (!bdev->controlled_remotely && !bdev->powered_remotely) + bam_reset(bdev); + + return 0; +} + +static void bam_channel_init(struct bam_device *bdev, struct bam_chan *bchan, + u32 index) +{ + bchan->id = index; + bchan->bdev = bdev; + + vchan_init(&bchan->vc, &bdev->common); + bchan->vc.desc_free = bam_dma_free_desc; + INIT_LIST_HEAD(&bchan->desc_list); +} + +static const struct of_device_id bam_of_match[] = { + { .compatible = "qcom,bam-v1.3.0", .data = &bam_v1_3_reg_info }, + { .compatible = "qcom,bam-v1.4.0", .data = &bam_v1_4_reg_info }, + { .compatible = "qcom,bam-v1.7.0", .data = &bam_v1_7_reg_info }, + {} +}; + +MODULE_DEVICE_TABLE(of, bam_of_match); + +static int bam_dma_probe(struct platform_device *pdev) +{ + struct bam_device *bdev; + const struct of_device_id *match; + int ret, i; + + bdev = devm_kzalloc(&pdev->dev, sizeof(*bdev), GFP_KERNEL); + if (!bdev) + return -ENOMEM; + + bdev->dev = &pdev->dev; + + match = of_match_node(bam_of_match, pdev->dev.of_node); + if (!match) { + dev_err(&pdev->dev, "Unsupported BAM module\n"); + return -ENODEV; + } + + bdev->layout = match->data; + + bdev->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(bdev->regs)) + return PTR_ERR(bdev->regs); + + bdev->irq = platform_get_irq(pdev, 0); + if (bdev->irq < 0) + return bdev->irq; + + ret = of_property_read_u32(pdev->dev.of_node, "qcom,ee", &bdev->ee); + if (ret) { + dev_err(bdev->dev, "Execution environment unspecified\n"); + return ret; + } + + bdev->controlled_remotely = of_property_read_bool(pdev->dev.of_node, + "qcom,controlled-remotely"); + bdev->powered_remotely = of_property_read_bool(pdev->dev.of_node, + "qcom,powered-remotely"); + + if (bdev->controlled_remotely || bdev->powered_remotely) + bdev->bamclk = devm_clk_get_optional(bdev->dev, "bam_clk"); + else + bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk"); + + if (IS_ERR(bdev->bamclk)) + return PTR_ERR(bdev->bamclk); + + if (!bdev->bamclk) { + ret = of_property_read_u32(pdev->dev.of_node, "num-channels", + &bdev->num_channels); + if (ret) + dev_err(bdev->dev, "num-channels unspecified in dt\n"); + + ret = of_property_read_u32(pdev->dev.of_node, "qcom,num-ees", + &bdev->num_ees); + if (ret) + dev_err(bdev->dev, "num-ees unspecified in dt\n"); + } + + ret = clk_prepare_enable(bdev->bamclk); + if (ret) { + dev_err(bdev->dev, "failed to prepare/enable clock\n"); + return ret; + } + + ret = bam_init(bdev); + if (ret) + goto err_disable_clk; + + tasklet_setup(&bdev->task, dma_tasklet); + + bdev->channels = devm_kcalloc(bdev->dev, bdev->num_channels, + sizeof(*bdev->channels), GFP_KERNEL); + + if (!bdev->channels) { + ret = -ENOMEM; + goto err_tasklet_kill; + } + + /* allocate and initialize channels */ + INIT_LIST_HEAD(&bdev->common.channels); + + for (i = 0; i < bdev->num_channels; i++) + bam_channel_init(bdev, &bdev->channels[i], i); + + ret = devm_request_irq(bdev->dev, bdev->irq, bam_dma_irq, + IRQF_TRIGGER_HIGH, "bam_dma", bdev); + if (ret) + goto err_bam_channel_exit; + + /* set max dma segment size */ + bdev->common.dev = bdev->dev; + ret = dma_set_max_seg_size(bdev->common.dev, BAM_FIFO_SIZE); + if (ret) { + dev_err(bdev->dev, "cannot set maximum segment size\n"); + goto err_bam_channel_exit; + } + + platform_set_drvdata(pdev, bdev); + + /* set capabilities */ + dma_cap_zero(bdev->common.cap_mask); + dma_cap_set(DMA_SLAVE, bdev->common.cap_mask); + + /* initialize dmaengine apis */ + bdev->common.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + bdev->common.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + bdev->common.src_addr_widths = DMA_SLAVE_BUSWIDTH_4_BYTES; + bdev->common.dst_addr_widths = DMA_SLAVE_BUSWIDTH_4_BYTES; + bdev->common.device_alloc_chan_resources = bam_alloc_chan; + bdev->common.device_free_chan_resources = bam_free_chan; + bdev->common.device_prep_slave_sg = bam_prep_slave_sg; + bdev->common.device_config = bam_slave_config; + bdev->common.device_pause = bam_pause; + bdev->common.device_resume = bam_resume; + bdev->common.device_terminate_all = bam_dma_terminate_all; + bdev->common.device_issue_pending = bam_issue_pending; + bdev->common.device_tx_status = bam_tx_status; + bdev->common.dev = bdev->dev; + + ret = dma_async_device_register(&bdev->common); + if (ret) { + dev_err(bdev->dev, "failed to register dma async device\n"); + goto err_bam_channel_exit; + } + + ret = of_dma_controller_register(pdev->dev.of_node, bam_dma_xlate, + &bdev->common); + if (ret) + goto err_unregister_dma; + + pm_runtime_irq_safe(&pdev->dev); + pm_runtime_set_autosuspend_delay(&pdev->dev, BAM_DMA_AUTOSUSPEND_DELAY); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + + return 0; + +err_unregister_dma: + dma_async_device_unregister(&bdev->common); +err_bam_channel_exit: + for (i = 0; i < bdev->num_channels; i++) + tasklet_kill(&bdev->channels[i].vc.task); +err_tasklet_kill: + tasklet_kill(&bdev->task); +err_disable_clk: + clk_disable_unprepare(bdev->bamclk); + + return ret; +} + +static int bam_dma_remove(struct platform_device *pdev) +{ + struct bam_device *bdev = platform_get_drvdata(pdev); + u32 i; + + pm_runtime_force_suspend(&pdev->dev); + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&bdev->common); + + /* mask all interrupts for this execution environment */ + writel_relaxed(0, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE)); + + devm_free_irq(bdev->dev, bdev->irq, bdev); + + for (i = 0; i < bdev->num_channels; i++) { + bam_dma_terminate_all(&bdev->channels[i].vc.chan); + tasklet_kill(&bdev->channels[i].vc.task); + + if (!bdev->channels[i].fifo_virt) + continue; + + dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE, + bdev->channels[i].fifo_virt, + bdev->channels[i].fifo_phys); + } + + tasklet_kill(&bdev->task); + + clk_disable_unprepare(bdev->bamclk); + + return 0; +} + +static int __maybe_unused bam_dma_runtime_suspend(struct device *dev) +{ + struct bam_device *bdev = dev_get_drvdata(dev); + + clk_disable(bdev->bamclk); + + return 0; +} + +static int __maybe_unused bam_dma_runtime_resume(struct device *dev) +{ + struct bam_device *bdev = dev_get_drvdata(dev); + int ret; + + ret = clk_enable(bdev->bamclk); + if (ret < 0) { + dev_err(dev, "clk_enable failed: %d\n", ret); + return ret; + } + + return 0; +} + +static int __maybe_unused bam_dma_suspend(struct device *dev) +{ + struct bam_device *bdev = dev_get_drvdata(dev); + + pm_runtime_force_suspend(dev); + clk_unprepare(bdev->bamclk); + + return 0; +} + +static int __maybe_unused bam_dma_resume(struct device *dev) +{ + struct bam_device *bdev = dev_get_drvdata(dev); + int ret; + + ret = clk_prepare(bdev->bamclk); + if (ret) + return ret; + + pm_runtime_force_resume(dev); + + return 0; +} + +static const struct dev_pm_ops bam_dma_pm_ops = { + SET_LATE_SYSTEM_SLEEP_PM_OPS(bam_dma_suspend, bam_dma_resume) + SET_RUNTIME_PM_OPS(bam_dma_runtime_suspend, bam_dma_runtime_resume, + NULL) +}; + +static struct platform_driver bam_dma_driver = { + .probe = bam_dma_probe, + .remove = bam_dma_remove, + .driver = { + .name = "bam-dma-engine", + .pm = &bam_dma_pm_ops, + .of_match_table = bam_of_match, + }, +}; + +module_platform_driver(bam_dma_driver); + +MODULE_AUTHOR("Andy Gross "); +MODULE_DESCRIPTION("QCOM BAM DMA engine driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c new file mode 100644 index 0000000000..1c93864e0e --- /dev/null +++ b/drivers/dma/qcom/gpi.c @@ -0,0 +1,2319 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2020, Linaro Limited + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../dmaengine.h" +#include "../virt-dma.h" + +#define TRE_TYPE_DMA 0x10 +#define TRE_TYPE_GO 0x20 +#define TRE_TYPE_CONFIG0 0x22 + +/* TRE flags */ +#define TRE_FLAGS_CHAIN BIT(0) +#define TRE_FLAGS_IEOB BIT(8) +#define TRE_FLAGS_IEOT BIT(9) +#define TRE_FLAGS_BEI BIT(10) +#define TRE_FLAGS_LINK BIT(11) +#define TRE_FLAGS_TYPE GENMASK(23, 16) + +/* SPI CONFIG0 WD0 */ +#define TRE_SPI_C0_WORD_SZ GENMASK(4, 0) +#define TRE_SPI_C0_LOOPBACK BIT(8) +#define TRE_SPI_C0_CS BIT(11) +#define TRE_SPI_C0_CPHA BIT(12) +#define TRE_SPI_C0_CPOL BIT(13) +#define TRE_SPI_C0_TX_PACK BIT(24) +#define TRE_SPI_C0_RX_PACK BIT(25) + +/* CONFIG0 WD2 */ +#define TRE_C0_CLK_DIV GENMASK(11, 0) +#define TRE_C0_CLK_SRC GENMASK(19, 16) + +/* SPI GO WD0 */ +#define TRE_SPI_GO_CMD GENMASK(4, 0) +#define TRE_SPI_GO_CS GENMASK(10, 8) +#define TRE_SPI_GO_FRAG BIT(26) + +/* GO WD2 */ +#define TRE_RX_LEN GENMASK(23, 0) + +/* I2C Config0 WD0 */ +#define TRE_I2C_C0_TLOW GENMASK(7, 0) +#define TRE_I2C_C0_THIGH GENMASK(15, 8) +#define TRE_I2C_C0_TCYL GENMASK(23, 16) +#define TRE_I2C_C0_TX_PACK BIT(24) +#define TRE_I2C_C0_RX_PACK BIT(25) + +/* I2C GO WD0 */ +#define TRE_I2C_GO_CMD GENMASK(4, 0) +#define TRE_I2C_GO_ADDR GENMASK(14, 8) +#define TRE_I2C_GO_STRETCH BIT(26) + +/* DMA TRE */ +#define TRE_DMA_LEN GENMASK(23, 0) + +/* Register offsets from gpi-top */ +#define GPII_n_CH_k_CNTXT_0_OFFS(n, k) (0x20000 + (0x4000 * (n)) + (0x80 * (k))) +#define GPII_n_CH_k_CNTXT_0_EL_SIZE GENMASK(31, 24) +#define GPII_n_CH_k_CNTXT_0_CHSTATE GENMASK(23, 20) +#define GPII_n_CH_k_CNTXT_0_ERIDX GENMASK(18, 14) +#define GPII_n_CH_k_CNTXT_0_DIR BIT(3) +#define GPII_n_CH_k_CNTXT_0_PROTO GENMASK(2, 0) + +#define GPII_n_CH_k_CNTXT_0(el_size, erindex, dir, chtype_proto) \ + (FIELD_PREP(GPII_n_CH_k_CNTXT_0_EL_SIZE, el_size) | \ + FIELD_PREP(GPII_n_CH_k_CNTXT_0_ERIDX, erindex) | \ + FIELD_PREP(GPII_n_CH_k_CNTXT_0_DIR, dir) | \ + FIELD_PREP(GPII_n_CH_k_CNTXT_0_PROTO, chtype_proto)) + +#define GPI_CHTYPE_DIR_IN (0) +#define GPI_CHTYPE_DIR_OUT (1) + +#define GPI_CHTYPE_PROTO_GPI (0x2) + +#define GPII_n_CH_k_DOORBELL_0_OFFS(n, k) (0x22000 + (0x4000 * (n)) + (0x8 * (k))) +#define GPII_n_CH_CMD_OFFS(n) (0x23008 + (0x4000 * (n))) +#define GPII_n_CH_CMD_OPCODE GENMASK(31, 24) +#define GPII_n_CH_CMD_CHID GENMASK(7, 0) +#define GPII_n_CH_CMD(opcode, chid) \ + (FIELD_PREP(GPII_n_CH_CMD_OPCODE, opcode) | \ + FIELD_PREP(GPII_n_CH_CMD_CHID, chid)) + +#define GPII_n_CH_CMD_ALLOCATE (0) +#define GPII_n_CH_CMD_START (1) +#define GPII_n_CH_CMD_STOP (2) +#define GPII_n_CH_CMD_RESET (9) +#define GPII_n_CH_CMD_DE_ALLOC (10) +#define GPII_n_CH_CMD_UART_SW_STALE (32) +#define GPII_n_CH_CMD_UART_RFR_READY (33) +#define GPII_n_CH_CMD_UART_RFR_NOT_READY (34) + +/* EV Context Array */ +#define GPII_n_EV_CH_k_CNTXT_0_OFFS(n, k) (0x21000 + (0x4000 * (n)) + (0x80 * (k))) +#define GPII_n_EV_k_CNTXT_0_EL_SIZE GENMASK(31, 24) +#define GPII_n_EV_k_CNTXT_0_CHSTATE GENMASK(23, 20) +#define GPII_n_EV_k_CNTXT_0_INTYPE BIT(16) +#define GPII_n_EV_k_CNTXT_0_CHTYPE GENMASK(3, 0) + +#define GPII_n_EV_k_CNTXT_0(el_size, inttype, chtype) \ + (FIELD_PREP(GPII_n_EV_k_CNTXT_0_EL_SIZE, el_size) | \ + FIELD_PREP(GPII_n_EV_k_CNTXT_0_INTYPE, inttype) | \ + FIELD_PREP(GPII_n_EV_k_CNTXT_0_CHTYPE, chtype)) + +#define GPI_INTTYPE_IRQ (1) +#define GPI_CHTYPE_GPI_EV (0x2) + +enum CNTXT_OFFS { + CNTXT_0_CONFIG = 0x0, + CNTXT_1_R_LENGTH = 0x4, + CNTXT_2_RING_BASE_LSB = 0x8, + CNTXT_3_RING_BASE_MSB = 0xC, + CNTXT_4_RING_RP_LSB = 0x10, + CNTXT_5_RING_RP_MSB = 0x14, + CNTXT_6_RING_WP_LSB = 0x18, + CNTXT_7_RING_WP_MSB = 0x1C, + CNTXT_8_RING_INT_MOD = 0x20, + CNTXT_9_RING_INTVEC = 0x24, + CNTXT_10_RING_MSI_LSB = 0x28, + CNTXT_11_RING_MSI_MSB = 0x2C, + CNTXT_12_RING_RP_UPDATE_LSB = 0x30, + CNTXT_13_RING_RP_UPDATE_MSB = 0x34, +}; + +#define GPII_n_EV_CH_k_DOORBELL_0_OFFS(n, k) (0x22100 + (0x4000 * (n)) + (0x8 * (k))) +#define GPII_n_EV_CH_CMD_OFFS(n) (0x23010 + (0x4000 * (n))) +#define GPII_n_EV_CMD_OPCODE GENMASK(31, 24) +#define GPII_n_EV_CMD_CHID GENMASK(7, 0) +#define GPII_n_EV_CMD(opcode, chid) \ + (FIELD_PREP(GPII_n_EV_CMD_OPCODE, opcode) | \ + FIELD_PREP(GPII_n_EV_CMD_CHID, chid)) + +#define GPII_n_EV_CH_CMD_ALLOCATE (0x00) +#define GPII_n_EV_CH_CMD_RESET (0x09) +#define GPII_n_EV_CH_CMD_DE_ALLOC (0x0A) + +#define GPII_n_CNTXT_TYPE_IRQ_OFFS(n) (0x23080 + (0x4000 * (n))) + +/* mask type register */ +#define GPII_n_CNTXT_TYPE_IRQ_MSK_OFFS(n) (0x23088 + (0x4000 * (n))) +#define GPII_n_CNTXT_TYPE_IRQ_MSK_BMSK GENMASK(6, 0) +#define GPII_n_CNTXT_TYPE_IRQ_MSK_GENERAL BIT(6) +#define GPII_n_CNTXT_TYPE_IRQ_MSK_IEOB BIT(3) +#define GPII_n_CNTXT_TYPE_IRQ_MSK_GLOB BIT(2) +#define GPII_n_CNTXT_TYPE_IRQ_MSK_EV_CTRL BIT(1) +#define GPII_n_CNTXT_TYPE_IRQ_MSK_CH_CTRL BIT(0) + +#define GPII_n_CNTXT_SRC_GPII_CH_IRQ_OFFS(n) (0x23090 + (0x4000 * (n))) +#define GPII_n_CNTXT_SRC_EV_CH_IRQ_OFFS(n) (0x23094 + (0x4000 * (n))) + +/* Mask channel control interrupt register */ +#define GPII_n_CNTXT_SRC_CH_IRQ_MSK_OFFS(n) (0x23098 + (0x4000 * (n))) +#define GPII_n_CNTXT_SRC_CH_IRQ_MSK_BMSK GENMASK(1, 0) + +/* Mask event control interrupt register */ +#define GPII_n_CNTXT_SRC_EV_CH_IRQ_MSK_OFFS(n) (0x2309C + (0x4000 * (n))) +#define GPII_n_CNTXT_SRC_EV_CH_IRQ_MSK_BMSK BIT(0) + +#define GPII_n_CNTXT_SRC_CH_IRQ_CLR_OFFS(n) (0x230A0 + (0x4000 * (n))) +#define GPII_n_CNTXT_SRC_EV_CH_IRQ_CLR_OFFS(n) (0x230A4 + (0x4000 * (n))) + +/* Mask event interrupt register */ +#define GPII_n_CNTXT_SRC_IEOB_IRQ_MSK_OFFS(n) (0x230B8 + (0x4000 * (n))) +#define GPII_n_CNTXT_SRC_IEOB_IRQ_MSK_BMSK BIT(0) + +#define GPII_n_CNTXT_SRC_IEOB_IRQ_CLR_OFFS(n) (0x230C0 + (0x4000 * (n))) +#define GPII_n_CNTXT_GLOB_IRQ_STTS_OFFS(n) (0x23100 + (0x4000 * (n))) +#define GPI_GLOB_IRQ_ERROR_INT_MSK BIT(0) + +/* GPII specific Global - Enable bit register */ +#define GPII_n_CNTXT_GLOB_IRQ_EN_OFFS(n) (0x23108 + (0x4000 * (n))) +#define GPII_n_CNTXT_GLOB_IRQ_CLR_OFFS(n) (0x23110 + (0x4000 * (n))) +#define GPII_n_CNTXT_GPII_IRQ_STTS_OFFS(n) (0x23118 + (0x4000 * (n))) + +/* GPII general interrupt - Enable bit register */ +#define GPII_n_CNTXT_GPII_IRQ_EN_OFFS(n) (0x23120 + (0x4000 * (n))) +#define GPII_n_CNTXT_GPII_IRQ_EN_BMSK GENMASK(3, 0) + +#define GPII_n_CNTXT_GPII_IRQ_CLR_OFFS(n) (0x23128 + (0x4000 * (n))) + +/* GPII Interrupt Type register */ +#define GPII_n_CNTXT_INTSET_OFFS(n) (0x23180 + (0x4000 * (n))) +#define GPII_n_CNTXT_INTSET_BMSK BIT(0) + +#define GPII_n_CNTXT_MSI_BASE_LSB_OFFS(n) (0x23188 + (0x4000 * (n))) +#define GPII_n_CNTXT_MSI_BASE_MSB_OFFS(n) (0x2318C + (0x4000 * (n))) +#define GPII_n_CNTXT_SCRATCH_0_OFFS(n) (0x23400 + (0x4000 * (n))) +#define GPII_n_CNTXT_SCRATCH_1_OFFS(n) (0x23404 + (0x4000 * (n))) + +#define GPII_n_ERROR_LOG_OFFS(n) (0x23200 + (0x4000 * (n))) + +/* QOS Registers */ +#define GPII_n_CH_k_QOS_OFFS(n, k) (0x2005C + (0x4000 * (n)) + (0x80 * (k))) + +/* Scratch registers */ +#define GPII_n_CH_k_SCRATCH_0_OFFS(n, k) (0x20060 + (0x4000 * (n)) + (0x80 * (k))) +#define GPII_n_CH_k_SCRATCH_0_SEID GENMASK(2, 0) +#define GPII_n_CH_k_SCRATCH_0_PROTO GENMASK(7, 4) +#define GPII_n_CH_k_SCRATCH_0_PAIR GENMASK(20, 16) +#define GPII_n_CH_k_SCRATCH_0(pair, proto, seid) \ + (FIELD_PREP(GPII_n_CH_k_SCRATCH_0_PAIR, pair) | \ + FIELD_PREP(GPII_n_CH_k_SCRATCH_0_PROTO, proto) | \ + FIELD_PREP(GPII_n_CH_k_SCRATCH_0_SEID, seid)) +#define GPII_n_CH_k_SCRATCH_1_OFFS(n, k) (0x20064 + (0x4000 * (n)) + (0x80 * (k))) +#define GPII_n_CH_k_SCRATCH_2_OFFS(n, k) (0x20068 + (0x4000 * (n)) + (0x80 * (k))) +#define GPII_n_CH_k_SCRATCH_3_OFFS(n, k) (0x2006C + (0x4000 * (n)) + (0x80 * (k))) + +struct __packed gpi_tre { + u32 dword[4]; +}; + +enum msm_gpi_tce_code { + MSM_GPI_TCE_SUCCESS = 1, + MSM_GPI_TCE_EOT = 2, + MSM_GPI_TCE_EOB = 4, + MSM_GPI_TCE_UNEXP_ERR = 16, +}; + +#define CMD_TIMEOUT_MS (250) + +#define MAX_CHANNELS_PER_GPII (2) +#define GPI_TX_CHAN (0) +#define GPI_RX_CHAN (1) +#define STATE_IGNORE (U32_MAX) +#define EV_FACTOR (2) +#define REQ_OF_DMA_ARGS (5) /* # of arguments required from client */ +#define CHAN_TRES 64 + +struct __packed xfer_compl_event { + u64 ptr; + u32 length:24; + u8 code; + u16 status; + u8 type; + u8 chid; +}; + +struct __packed immediate_data_event { + u8 data_bytes[8]; + u8 length:4; + u8 resvd:4; + u16 tre_index; + u8 code; + u16 status; + u8 type; + u8 chid; +}; + +struct __packed qup_notif_event { + u32 status; + u32 time; + u32 count:24; + u8 resvd; + u16 resvd1; + u8 type; + u8 chid; +}; + +struct __packed gpi_ere { + u32 dword[4]; +}; + +enum GPI_EV_TYPE { + XFER_COMPLETE_EV_TYPE = 0x22, + IMMEDIATE_DATA_EV_TYPE = 0x30, + QUP_NOTIF_EV_TYPE = 0x31, + STALE_EV_TYPE = 0xFF, +}; + +union __packed gpi_event { + struct __packed xfer_compl_event xfer_compl_event; + struct __packed immediate_data_event immediate_data_event; + struct __packed qup_notif_event qup_notif_event; + struct __packed gpi_ere gpi_ere; +}; + +enum gpii_irq_settings { + DEFAULT_IRQ_SETTINGS, + MASK_IEOB_SETTINGS, +}; + +enum gpi_ev_state { + DEFAULT_EV_CH_STATE = 0, + EV_STATE_NOT_ALLOCATED = DEFAULT_EV_CH_STATE, + EV_STATE_ALLOCATED, + MAX_EV_STATES +}; + +static const char *const gpi_ev_state_str[MAX_EV_STATES] = { + [EV_STATE_NOT_ALLOCATED] = "NOT ALLOCATED", + [EV_STATE_ALLOCATED] = "ALLOCATED", +}; + +#define TO_GPI_EV_STATE_STR(_state) (((_state) >= MAX_EV_STATES) ? \ + "INVALID" : gpi_ev_state_str[(_state)]) + +enum gpi_ch_state { + DEFAULT_CH_STATE = 0x0, + CH_STATE_NOT_ALLOCATED = DEFAULT_CH_STATE, + CH_STATE_ALLOCATED = 0x1, + CH_STATE_STARTED = 0x2, + CH_STATE_STOPPED = 0x3, + CH_STATE_STOP_IN_PROC = 0x4, + CH_STATE_ERROR = 0xf, + MAX_CH_STATES +}; + +enum gpi_cmd { + GPI_CH_CMD_BEGIN, + GPI_CH_CMD_ALLOCATE = GPI_CH_CMD_BEGIN, + GPI_CH_CMD_START, + GPI_CH_CMD_STOP, + GPI_CH_CMD_RESET, + GPI_CH_CMD_DE_ALLOC, + GPI_CH_CMD_UART_SW_STALE, + GPI_CH_CMD_UART_RFR_READY, + GPI_CH_CMD_UART_RFR_NOT_READY, + GPI_CH_CMD_END = GPI_CH_CMD_UART_RFR_NOT_READY, + GPI_EV_CMD_BEGIN, + GPI_EV_CMD_ALLOCATE = GPI_EV_CMD_BEGIN, + GPI_EV_CMD_RESET, + GPI_EV_CMD_DEALLOC, + GPI_EV_CMD_END = GPI_EV_CMD_DEALLOC, + GPI_MAX_CMD, +}; + +#define IS_CHAN_CMD(_cmd) ((_cmd) <= GPI_CH_CMD_END) + +static const char *const gpi_cmd_str[GPI_MAX_CMD] = { + [GPI_CH_CMD_ALLOCATE] = "CH ALLOCATE", + [GPI_CH_CMD_START] = "CH START", + [GPI_CH_CMD_STOP] = "CH STOP", + [GPI_CH_CMD_RESET] = "CH_RESET", + [GPI_CH_CMD_DE_ALLOC] = "DE ALLOC", + [GPI_CH_CMD_UART_SW_STALE] = "UART SW STALE", + [GPI_CH_CMD_UART_RFR_READY] = "UART RFR READY", + [GPI_CH_CMD_UART_RFR_NOT_READY] = "UART RFR NOT READY", + [GPI_EV_CMD_ALLOCATE] = "EV ALLOCATE", + [GPI_EV_CMD_RESET] = "EV RESET", + [GPI_EV_CMD_DEALLOC] = "EV DEALLOC", +}; + +#define TO_GPI_CMD_STR(_cmd) (((_cmd) >= GPI_MAX_CMD) ? "INVALID" : \ + gpi_cmd_str[(_cmd)]) + +/* + * @DISABLE_STATE: no register access allowed + * @CONFIG_STATE: client has configured the channel + * @PREP_HARDWARE: register access is allowed + * however, no processing EVENTS + * @ACTIVE_STATE: channels are fully operational + * @PREPARE_TERMINATE: graceful termination of channels + * register access is allowed + * @PAUSE_STATE: channels are active, but not processing any events + */ +enum gpi_pm_state { + DISABLE_STATE, + CONFIG_STATE, + PREPARE_HARDWARE, + ACTIVE_STATE, + PREPARE_TERMINATE, + PAUSE_STATE, + MAX_PM_STATE +}; + +#define REG_ACCESS_VALID(_pm_state) ((_pm_state) >= PREPARE_HARDWARE) + +static const char *const gpi_pm_state_str[MAX_PM_STATE] = { + [DISABLE_STATE] = "DISABLE", + [CONFIG_STATE] = "CONFIG", + [PREPARE_HARDWARE] = "PREPARE HARDWARE", + [ACTIVE_STATE] = "ACTIVE", + [PREPARE_TERMINATE] = "PREPARE TERMINATE", + [PAUSE_STATE] = "PAUSE", +}; + +#define TO_GPI_PM_STR(_state) (((_state) >= MAX_PM_STATE) ? \ + "INVALID" : gpi_pm_state_str[(_state)]) + +static const struct { + enum gpi_cmd gpi_cmd; + u32 opcode; + u32 state; +} gpi_cmd_info[GPI_MAX_CMD] = { + { + GPI_CH_CMD_ALLOCATE, + GPII_n_CH_CMD_ALLOCATE, + CH_STATE_ALLOCATED, + }, + { + GPI_CH_CMD_START, + GPII_n_CH_CMD_START, + CH_STATE_STARTED, + }, + { + GPI_CH_CMD_STOP, + GPII_n_CH_CMD_STOP, + CH_STATE_STOPPED, + }, + { + GPI_CH_CMD_RESET, + GPII_n_CH_CMD_RESET, + CH_STATE_ALLOCATED, + }, + { + GPI_CH_CMD_DE_ALLOC, + GPII_n_CH_CMD_DE_ALLOC, + CH_STATE_NOT_ALLOCATED, + }, + { + GPI_CH_CMD_UART_SW_STALE, + GPII_n_CH_CMD_UART_SW_STALE, + STATE_IGNORE, + }, + { + GPI_CH_CMD_UART_RFR_READY, + GPII_n_CH_CMD_UART_RFR_READY, + STATE_IGNORE, + }, + { + GPI_CH_CMD_UART_RFR_NOT_READY, + GPII_n_CH_CMD_UART_RFR_NOT_READY, + STATE_IGNORE, + }, + { + GPI_EV_CMD_ALLOCATE, + GPII_n_EV_CH_CMD_ALLOCATE, + EV_STATE_ALLOCATED, + }, + { + GPI_EV_CMD_RESET, + GPII_n_EV_CH_CMD_RESET, + EV_STATE_ALLOCATED, + }, + { + GPI_EV_CMD_DEALLOC, + GPII_n_EV_CH_CMD_DE_ALLOC, + EV_STATE_NOT_ALLOCATED, + }, +}; + +struct gpi_ring { + void *pre_aligned; + size_t alloc_size; + phys_addr_t phys_addr; + dma_addr_t dma_handle; + void *base; + void *wp; + void *rp; + u32 len; + u32 el_size; + u32 elements; + bool configured; +}; + +struct gpi_dev { + struct dma_device dma_device; + struct device *dev; + struct resource *res; + void __iomem *regs; + void __iomem *ee_base; /*ee register base address*/ + u32 max_gpii; /* maximum # of gpii instances available per gpi block */ + u32 gpii_mask; /* gpii instances available for apps */ + u32 ev_factor; /* ev ring length factor */ + struct gpii *gpiis; +}; + +struct reg_info { + char *name; + u32 offset; + u32 val; +}; + +struct gchan { + struct virt_dma_chan vc; + u32 chid; + u32 seid; + u32 protocol; + struct gpii *gpii; + enum gpi_ch_state ch_state; + enum gpi_pm_state pm_state; + void __iomem *ch_cntxt_base_reg; + void __iomem *ch_cntxt_db_reg; + void __iomem *ch_cmd_reg; + u32 dir; + struct gpi_ring ch_ring; + void *config; +}; + +struct gpii { + u32 gpii_id; + struct gchan gchan[MAX_CHANNELS_PER_GPII]; + struct gpi_dev *gpi_dev; + int irq; + void __iomem *regs; /* points to gpi top */ + void __iomem *ev_cntxt_base_reg; + void __iomem *ev_cntxt_db_reg; + void __iomem *ev_ring_rp_lsb_reg; + void __iomem *ev_cmd_reg; + void __iomem *ieob_clr_reg; + struct mutex ctrl_lock; + enum gpi_ev_state ev_state; + bool configured_irq; + enum gpi_pm_state pm_state; + rwlock_t pm_lock; + struct gpi_ring ev_ring; + struct tasklet_struct ev_task; /* event processing tasklet */ + struct completion cmd_completion; + enum gpi_cmd gpi_cmd; + u32 cntxt_type_irq_msk; + bool ieob_set; +}; + +#define MAX_TRE 3 + +struct gpi_desc { + struct virt_dma_desc vd; + size_t len; + void *db; /* DB register to program */ + struct gchan *gchan; + struct gpi_tre tre[MAX_TRE]; + u32 num_tre; +}; + +static const u32 GPII_CHAN_DIR[MAX_CHANNELS_PER_GPII] = { + GPI_CHTYPE_DIR_OUT, GPI_CHTYPE_DIR_IN +}; + +static irqreturn_t gpi_handle_irq(int irq, void *data); +static void gpi_ring_recycle_ev_element(struct gpi_ring *ring); +static int gpi_ring_add_element(struct gpi_ring *ring, void **wp); +static void gpi_process_events(struct gpii *gpii); + +static inline struct gchan *to_gchan(struct dma_chan *dma_chan) +{ + return container_of(dma_chan, struct gchan, vc.chan); +} + +static inline struct gpi_desc *to_gpi_desc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct gpi_desc, vd); +} + +static inline phys_addr_t to_physical(const struct gpi_ring *const ring, + void *addr) +{ + return ring->phys_addr + (addr - ring->base); +} + +static inline void *to_virtual(const struct gpi_ring *const ring, phys_addr_t addr) +{ + return ring->base + (addr - ring->phys_addr); +} + +static inline u32 gpi_read_reg(struct gpii *gpii, void __iomem *addr) +{ + return readl_relaxed(addr); +} + +static inline void gpi_write_reg(struct gpii *gpii, void __iomem *addr, u32 val) +{ + writel_relaxed(val, addr); +} + +/* gpi_write_reg_field - write to specific bit field */ +static inline void gpi_write_reg_field(struct gpii *gpii, void __iomem *addr, + u32 mask, u32 shift, u32 val) +{ + u32 tmp = gpi_read_reg(gpii, addr); + + tmp &= ~mask; + val = tmp | ((val << shift) & mask); + gpi_write_reg(gpii, addr, val); +} + +static __always_inline void +gpi_update_reg(struct gpii *gpii, u32 offset, u32 mask, u32 val) +{ + void __iomem *addr = gpii->regs + offset; + u32 tmp = gpi_read_reg(gpii, addr); + + tmp &= ~mask; + tmp |= u32_encode_bits(val, mask); + + gpi_write_reg(gpii, addr, tmp); +} + +static void gpi_disable_interrupts(struct gpii *gpii) +{ + gpi_update_reg(gpii, GPII_n_CNTXT_TYPE_IRQ_MSK_OFFS(gpii->gpii_id), + GPII_n_CNTXT_TYPE_IRQ_MSK_BMSK, 0); + gpi_update_reg(gpii, GPII_n_CNTXT_SRC_IEOB_IRQ_MSK_OFFS(gpii->gpii_id), + GPII_n_CNTXT_SRC_IEOB_IRQ_MSK_BMSK, 0); + gpi_update_reg(gpii, GPII_n_CNTXT_SRC_CH_IRQ_MSK_OFFS(gpii->gpii_id), + GPII_n_CNTXT_SRC_CH_IRQ_MSK_BMSK, 0); + gpi_update_reg(gpii, GPII_n_CNTXT_SRC_EV_CH_IRQ_MSK_OFFS(gpii->gpii_id), + GPII_n_CNTXT_SRC_EV_CH_IRQ_MSK_BMSK, 0); + gpi_update_reg(gpii, GPII_n_CNTXT_GLOB_IRQ_EN_OFFS(gpii->gpii_id), + GPII_n_CNTXT_GPII_IRQ_EN_BMSK, 0); + gpi_update_reg(gpii, GPII_n_CNTXT_GPII_IRQ_EN_OFFS(gpii->gpii_id), + GPII_n_CNTXT_GPII_IRQ_EN_BMSK, 0); + gpi_update_reg(gpii, GPII_n_CNTXT_INTSET_OFFS(gpii->gpii_id), + GPII_n_CNTXT_INTSET_BMSK, 0); + + gpii->cntxt_type_irq_msk = 0; + devm_free_irq(gpii->gpi_dev->dev, gpii->irq, gpii); + gpii->configured_irq = false; +} + +/* configure and enable interrupts */ +static int gpi_config_interrupts(struct gpii *gpii, enum gpii_irq_settings settings, bool mask) +{ + const u32 enable = (GPII_n_CNTXT_TYPE_IRQ_MSK_GENERAL | + GPII_n_CNTXT_TYPE_IRQ_MSK_IEOB | + GPII_n_CNTXT_TYPE_IRQ_MSK_GLOB | + GPII_n_CNTXT_TYPE_IRQ_MSK_EV_CTRL | + GPII_n_CNTXT_TYPE_IRQ_MSK_CH_CTRL); + int ret; + + if (!gpii->configured_irq) { + ret = devm_request_irq(gpii->gpi_dev->dev, gpii->irq, + gpi_handle_irq, IRQF_TRIGGER_HIGH, + "gpi-dma", gpii); + if (ret < 0) { + dev_err(gpii->gpi_dev->dev, "error request irq:%d ret:%d\n", + gpii->irq, ret); + return ret; + } + } + + if (settings == MASK_IEOB_SETTINGS) { + /* + * GPII only uses one EV ring per gpii so we can globally + * enable/disable IEOB interrupt + */ + if (mask) + gpii->cntxt_type_irq_msk |= GPII_n_CNTXT_TYPE_IRQ_MSK_IEOB; + else + gpii->cntxt_type_irq_msk &= ~(GPII_n_CNTXT_TYPE_IRQ_MSK_IEOB); + gpi_update_reg(gpii, GPII_n_CNTXT_TYPE_IRQ_MSK_OFFS(gpii->gpii_id), + GPII_n_CNTXT_TYPE_IRQ_MSK_BMSK, gpii->cntxt_type_irq_msk); + } else { + gpi_update_reg(gpii, GPII_n_CNTXT_TYPE_IRQ_MSK_OFFS(gpii->gpii_id), + GPII_n_CNTXT_TYPE_IRQ_MSK_BMSK, enable); + gpi_update_reg(gpii, GPII_n_CNTXT_SRC_IEOB_IRQ_MSK_OFFS(gpii->gpii_id), + GPII_n_CNTXT_SRC_IEOB_IRQ_MSK_BMSK, + GPII_n_CNTXT_SRC_IEOB_IRQ_MSK_BMSK); + gpi_update_reg(gpii, GPII_n_CNTXT_SRC_CH_IRQ_MSK_OFFS(gpii->gpii_id), + GPII_n_CNTXT_SRC_CH_IRQ_MSK_BMSK, + GPII_n_CNTXT_SRC_CH_IRQ_MSK_BMSK); + gpi_update_reg(gpii, GPII_n_CNTXT_SRC_EV_CH_IRQ_MSK_OFFS(gpii->gpii_id), + GPII_n_CNTXT_SRC_EV_CH_IRQ_MSK_BMSK, + GPII_n_CNTXT_SRC_EV_CH_IRQ_MSK_BMSK); + gpi_update_reg(gpii, GPII_n_CNTXT_GLOB_IRQ_EN_OFFS(gpii->gpii_id), + GPII_n_CNTXT_GPII_IRQ_EN_BMSK, + GPII_n_CNTXT_GPII_IRQ_EN_BMSK); + gpi_update_reg(gpii, GPII_n_CNTXT_GPII_IRQ_EN_OFFS(gpii->gpii_id), + GPII_n_CNTXT_GPII_IRQ_EN_BMSK, GPII_n_CNTXT_GPII_IRQ_EN_BMSK); + gpi_update_reg(gpii, GPII_n_CNTXT_MSI_BASE_LSB_OFFS(gpii->gpii_id), U32_MAX, 0); + gpi_update_reg(gpii, GPII_n_CNTXT_MSI_BASE_MSB_OFFS(gpii->gpii_id), U32_MAX, 0); + gpi_update_reg(gpii, GPII_n_CNTXT_SCRATCH_0_OFFS(gpii->gpii_id), U32_MAX, 0); + gpi_update_reg(gpii, GPII_n_CNTXT_SCRATCH_1_OFFS(gpii->gpii_id), U32_MAX, 0); + gpi_update_reg(gpii, GPII_n_CNTXT_INTSET_OFFS(gpii->gpii_id), + GPII_n_CNTXT_INTSET_BMSK, 1); + gpi_update_reg(gpii, GPII_n_ERROR_LOG_OFFS(gpii->gpii_id), U32_MAX, 0); + + gpii->cntxt_type_irq_msk = enable; + } + + gpii->configured_irq = true; + return 0; +} + +/* Sends gpii event or channel command */ +static int gpi_send_cmd(struct gpii *gpii, struct gchan *gchan, + enum gpi_cmd gpi_cmd) +{ + u32 chid = MAX_CHANNELS_PER_GPII; + unsigned long timeout; + void __iomem *cmd_reg; + u32 cmd; + + if (gpi_cmd >= GPI_MAX_CMD) + return -EINVAL; + if (IS_CHAN_CMD(gpi_cmd)) + chid = gchan->chid; + + dev_dbg(gpii->gpi_dev->dev, + "sending cmd: %s:%u\n", TO_GPI_CMD_STR(gpi_cmd), chid); + + /* send opcode and wait for completion */ + reinit_completion(&gpii->cmd_completion); + gpii->gpi_cmd = gpi_cmd; + + cmd_reg = IS_CHAN_CMD(gpi_cmd) ? gchan->ch_cmd_reg : gpii->ev_cmd_reg; + cmd = IS_CHAN_CMD(gpi_cmd) ? GPII_n_CH_CMD(gpi_cmd_info[gpi_cmd].opcode, chid) : + GPII_n_EV_CMD(gpi_cmd_info[gpi_cmd].opcode, 0); + gpi_write_reg(gpii, cmd_reg, cmd); + timeout = wait_for_completion_timeout(&gpii->cmd_completion, + msecs_to_jiffies(CMD_TIMEOUT_MS)); + if (!timeout) { + dev_err(gpii->gpi_dev->dev, "cmd: %s completion timeout:%u\n", + TO_GPI_CMD_STR(gpi_cmd), chid); + return -EIO; + } + + /* confirm new ch state is correct , if the cmd is a state change cmd */ + if (gpi_cmd_info[gpi_cmd].state == STATE_IGNORE) + return 0; + + if (IS_CHAN_CMD(gpi_cmd) && gchan->ch_state == gpi_cmd_info[gpi_cmd].state) + return 0; + + if (!IS_CHAN_CMD(gpi_cmd) && gpii->ev_state == gpi_cmd_info[gpi_cmd].state) + return 0; + + return -EIO; +} + +/* program transfer ring DB register */ +static inline void gpi_write_ch_db(struct gchan *gchan, + struct gpi_ring *ring, void *wp) +{ + struct gpii *gpii = gchan->gpii; + phys_addr_t p_wp; + + p_wp = to_physical(ring, wp); + gpi_write_reg(gpii, gchan->ch_cntxt_db_reg, p_wp); +} + +/* program event ring DB register */ +static inline void gpi_write_ev_db(struct gpii *gpii, + struct gpi_ring *ring, void *wp) +{ + phys_addr_t p_wp; + + p_wp = ring->phys_addr + (wp - ring->base); + gpi_write_reg(gpii, gpii->ev_cntxt_db_reg, p_wp); +} + +/* process transfer completion interrupt */ +static void gpi_process_ieob(struct gpii *gpii) +{ + gpi_write_reg(gpii, gpii->ieob_clr_reg, BIT(0)); + + gpi_config_interrupts(gpii, MASK_IEOB_SETTINGS, 0); + tasklet_hi_schedule(&gpii->ev_task); +} + +/* process channel control interrupt */ +static void gpi_process_ch_ctrl_irq(struct gpii *gpii) +{ + u32 gpii_id = gpii->gpii_id; + u32 offset = GPII_n_CNTXT_SRC_GPII_CH_IRQ_OFFS(gpii_id); + u32 ch_irq = gpi_read_reg(gpii, gpii->regs + offset); + struct gchan *gchan; + u32 chid, state; + + /* clear the status */ + offset = GPII_n_CNTXT_SRC_CH_IRQ_CLR_OFFS(gpii_id); + gpi_write_reg(gpii, gpii->regs + offset, (u32)ch_irq); + + for (chid = 0; chid < MAX_CHANNELS_PER_GPII; chid++) { + if (!(BIT(chid) & ch_irq)) + continue; + + gchan = &gpii->gchan[chid]; + state = gpi_read_reg(gpii, gchan->ch_cntxt_base_reg + + CNTXT_0_CONFIG); + state = FIELD_GET(GPII_n_CH_k_CNTXT_0_CHSTATE, state); + + /* + * CH_CMD_DEALLOC cmd always successful. However cmd does + * not change hardware status. So overwriting software state + * to default state. + */ + if (gpii->gpi_cmd == GPI_CH_CMD_DE_ALLOC) + state = DEFAULT_CH_STATE; + gchan->ch_state = state; + + /* + * Triggering complete all if ch_state is not a stop in process. + * Stop in process is a transition state and we will wait for + * stop interrupt before notifying. + */ + if (gchan->ch_state != CH_STATE_STOP_IN_PROC) + complete_all(&gpii->cmd_completion); + } +} + +/* processing gpi general error interrupts */ +static void gpi_process_gen_err_irq(struct gpii *gpii) +{ + u32 gpii_id = gpii->gpii_id; + u32 offset = GPII_n_CNTXT_GPII_IRQ_STTS_OFFS(gpii_id); + u32 irq_stts = gpi_read_reg(gpii, gpii->regs + offset); + + /* clear the status */ + dev_dbg(gpii->gpi_dev->dev, "irq_stts:0x%x\n", irq_stts); + + /* Clear the register */ + offset = GPII_n_CNTXT_GPII_IRQ_CLR_OFFS(gpii_id); + gpi_write_reg(gpii, gpii->regs + offset, irq_stts); +} + +/* processing gpi level error interrupts */ +static void gpi_process_glob_err_irq(struct gpii *gpii) +{ + u32 gpii_id = gpii->gpii_id; + u32 offset = GPII_n_CNTXT_GLOB_IRQ_STTS_OFFS(gpii_id); + u32 irq_stts = gpi_read_reg(gpii, gpii->regs + offset); + + offset = GPII_n_CNTXT_GLOB_IRQ_CLR_OFFS(gpii_id); + gpi_write_reg(gpii, gpii->regs + offset, irq_stts); + + /* only error interrupt should be set */ + if (irq_stts & ~GPI_GLOB_IRQ_ERROR_INT_MSK) { + dev_err(gpii->gpi_dev->dev, "invalid error status:0x%x\n", irq_stts); + return; + } + + offset = GPII_n_ERROR_LOG_OFFS(gpii_id); + gpi_write_reg(gpii, gpii->regs + offset, 0); +} + +/* gpii interrupt handler */ +static irqreturn_t gpi_handle_irq(int irq, void *data) +{ + struct gpii *gpii = data; + u32 gpii_id = gpii->gpii_id; + u32 type, offset; + unsigned long flags; + + read_lock_irqsave(&gpii->pm_lock, flags); + + /* + * States are out of sync to receive interrupt + * while software state is in DISABLE state, bailing out. + */ + if (!REG_ACCESS_VALID(gpii->pm_state)) { + dev_err(gpii->gpi_dev->dev, "receive interrupt while in %s state\n", + TO_GPI_PM_STR(gpii->pm_state)); + goto exit_irq; + } + + offset = GPII_n_CNTXT_TYPE_IRQ_OFFS(gpii->gpii_id); + type = gpi_read_reg(gpii, gpii->regs + offset); + + do { + /* global gpii error */ + if (type & GPII_n_CNTXT_TYPE_IRQ_MSK_GLOB) { + gpi_process_glob_err_irq(gpii); + type &= ~(GPII_n_CNTXT_TYPE_IRQ_MSK_GLOB); + } + + /* transfer complete interrupt */ + if (type & GPII_n_CNTXT_TYPE_IRQ_MSK_IEOB) { + gpi_process_ieob(gpii); + type &= ~GPII_n_CNTXT_TYPE_IRQ_MSK_IEOB; + } + + /* event control irq */ + if (type & GPII_n_CNTXT_TYPE_IRQ_MSK_EV_CTRL) { + u32 ev_state; + u32 ev_ch_irq; + + dev_dbg(gpii->gpi_dev->dev, + "processing EV CTRL interrupt\n"); + offset = GPII_n_CNTXT_SRC_EV_CH_IRQ_OFFS(gpii_id); + ev_ch_irq = gpi_read_reg(gpii, gpii->regs + offset); + + offset = GPII_n_CNTXT_SRC_EV_CH_IRQ_CLR_OFFS + (gpii_id); + gpi_write_reg(gpii, gpii->regs + offset, ev_ch_irq); + ev_state = gpi_read_reg(gpii, gpii->ev_cntxt_base_reg + + CNTXT_0_CONFIG); + ev_state = FIELD_GET(GPII_n_EV_k_CNTXT_0_CHSTATE, ev_state); + + /* + * CMD EV_CMD_DEALLOC is always successful. However + * cmd does not change hardware status. So overwriting + * software state to default state. + */ + if (gpii->gpi_cmd == GPI_EV_CMD_DEALLOC) + ev_state = DEFAULT_EV_CH_STATE; + + gpii->ev_state = ev_state; + dev_dbg(gpii->gpi_dev->dev, "setting EV state to %s\n", + TO_GPI_EV_STATE_STR(gpii->ev_state)); + complete_all(&gpii->cmd_completion); + type &= ~(GPII_n_CNTXT_TYPE_IRQ_MSK_EV_CTRL); + } + + /* channel control irq */ + if (type & GPII_n_CNTXT_TYPE_IRQ_MSK_CH_CTRL) { + dev_dbg(gpii->gpi_dev->dev, "process CH CTRL interrupts\n"); + gpi_process_ch_ctrl_irq(gpii); + type &= ~(GPII_n_CNTXT_TYPE_IRQ_MSK_CH_CTRL); + } + + if (type) { + dev_err(gpii->gpi_dev->dev, "Unhandled interrupt status:0x%x\n", type); + gpi_process_gen_err_irq(gpii); + goto exit_irq; + } + + offset = GPII_n_CNTXT_TYPE_IRQ_OFFS(gpii->gpii_id); + type = gpi_read_reg(gpii, gpii->regs + offset); + } while (type); + +exit_irq: + read_unlock_irqrestore(&gpii->pm_lock, flags); + + return IRQ_HANDLED; +} + +/* process DMA Immediate completion data events */ +static void gpi_process_imed_data_event(struct gchan *gchan, + struct immediate_data_event *imed_event) +{ + struct gpii *gpii = gchan->gpii; + struct gpi_ring *ch_ring = &gchan->ch_ring; + void *tre = ch_ring->base + (ch_ring->el_size * imed_event->tre_index); + struct dmaengine_result result; + struct gpi_desc *gpi_desc; + struct virt_dma_desc *vd; + unsigned long flags; + u32 chid; + + /* + * If channel not active don't process event + */ + if (gchan->pm_state != ACTIVE_STATE) { + dev_err(gpii->gpi_dev->dev, "skipping processing event because ch @ %s state\n", + TO_GPI_PM_STR(gchan->pm_state)); + return; + } + + spin_lock_irqsave(&gchan->vc.lock, flags); + vd = vchan_next_desc(&gchan->vc); + if (!vd) { + struct gpi_ere *gpi_ere; + struct gpi_tre *gpi_tre; + + spin_unlock_irqrestore(&gchan->vc.lock, flags); + dev_dbg(gpii->gpi_dev->dev, "event without a pending descriptor!\n"); + gpi_ere = (struct gpi_ere *)imed_event; + dev_dbg(gpii->gpi_dev->dev, + "Event: %08x %08x %08x %08x\n", + gpi_ere->dword[0], gpi_ere->dword[1], + gpi_ere->dword[2], gpi_ere->dword[3]); + gpi_tre = tre; + dev_dbg(gpii->gpi_dev->dev, + "Pending TRE: %08x %08x %08x %08x\n", + gpi_tre->dword[0], gpi_tre->dword[1], + gpi_tre->dword[2], gpi_tre->dword[3]); + return; + } + gpi_desc = to_gpi_desc(vd); + spin_unlock_irqrestore(&gchan->vc.lock, flags); + + /* + * RP pointed by Event is to last TRE processed, + * we need to update ring rp to tre + 1 + */ + tre += ch_ring->el_size; + if (tre >= (ch_ring->base + ch_ring->len)) + tre = ch_ring->base; + ch_ring->rp = tre; + + /* make sure rp updates are immediately visible to all cores */ + smp_wmb(); + + chid = imed_event->chid; + if (imed_event->code == MSM_GPI_TCE_EOT && gpii->ieob_set) { + if (chid == GPI_RX_CHAN) + goto gpi_free_desc; + else + return; + } + + if (imed_event->code == MSM_GPI_TCE_UNEXP_ERR) + result.result = DMA_TRANS_ABORTED; + else + result.result = DMA_TRANS_NOERROR; + result.residue = gpi_desc->len - imed_event->length; + + dma_cookie_complete(&vd->tx); + dmaengine_desc_get_callback_invoke(&vd->tx, &result); + +gpi_free_desc: + spin_lock_irqsave(&gchan->vc.lock, flags); + list_del(&vd->node); + spin_unlock_irqrestore(&gchan->vc.lock, flags); + kfree(gpi_desc); + gpi_desc = NULL; +} + +/* processing transfer completion events */ +static void gpi_process_xfer_compl_event(struct gchan *gchan, + struct xfer_compl_event *compl_event) +{ + struct gpii *gpii = gchan->gpii; + struct gpi_ring *ch_ring = &gchan->ch_ring; + void *ev_rp = to_virtual(ch_ring, compl_event->ptr); + struct virt_dma_desc *vd; + struct gpi_desc *gpi_desc; + struct dmaengine_result result; + unsigned long flags; + u32 chid; + + /* only process events on active channel */ + if (unlikely(gchan->pm_state != ACTIVE_STATE)) { + dev_err(gpii->gpi_dev->dev, "skipping processing event because ch @ %s state\n", + TO_GPI_PM_STR(gchan->pm_state)); + return; + } + + spin_lock_irqsave(&gchan->vc.lock, flags); + vd = vchan_next_desc(&gchan->vc); + if (!vd) { + struct gpi_ere *gpi_ere; + + spin_unlock_irqrestore(&gchan->vc.lock, flags); + dev_err(gpii->gpi_dev->dev, "Event without a pending descriptor!\n"); + gpi_ere = (struct gpi_ere *)compl_event; + dev_err(gpii->gpi_dev->dev, + "Event: %08x %08x %08x %08x\n", + gpi_ere->dword[0], gpi_ere->dword[1], + gpi_ere->dword[2], gpi_ere->dword[3]); + return; + } + + gpi_desc = to_gpi_desc(vd); + spin_unlock_irqrestore(&gchan->vc.lock, flags); + + /* + * RP pointed by Event is to last TRE processed, + * we need to update ring rp to ev_rp + 1 + */ + ev_rp += ch_ring->el_size; + if (ev_rp >= (ch_ring->base + ch_ring->len)) + ev_rp = ch_ring->base; + ch_ring->rp = ev_rp; + + /* update must be visible to other cores */ + smp_wmb(); + + chid = compl_event->chid; + if (compl_event->code == MSM_GPI_TCE_EOT && gpii->ieob_set) { + if (chid == GPI_RX_CHAN) + goto gpi_free_desc; + else + return; + } + + if (compl_event->code == MSM_GPI_TCE_UNEXP_ERR) { + dev_err(gpii->gpi_dev->dev, "Error in Transaction\n"); + result.result = DMA_TRANS_ABORTED; + } else { + dev_dbg(gpii->gpi_dev->dev, "Transaction Success\n"); + result.result = DMA_TRANS_NOERROR; + } + result.residue = gpi_desc->len - compl_event->length; + dev_dbg(gpii->gpi_dev->dev, "Residue %d\n", result.residue); + + dma_cookie_complete(&vd->tx); + dmaengine_desc_get_callback_invoke(&vd->tx, &result); + +gpi_free_desc: + spin_lock_irqsave(&gchan->vc.lock, flags); + list_del(&vd->node); + spin_unlock_irqrestore(&gchan->vc.lock, flags); + kfree(gpi_desc); + gpi_desc = NULL; +} + +/* process all events */ +static void gpi_process_events(struct gpii *gpii) +{ + struct gpi_ring *ev_ring = &gpii->ev_ring; + phys_addr_t cntxt_rp; + void *rp; + union gpi_event *gpi_event; + struct gchan *gchan; + u32 chid, type; + + cntxt_rp = gpi_read_reg(gpii, gpii->ev_ring_rp_lsb_reg); + rp = to_virtual(ev_ring, cntxt_rp); + + do { + while (rp != ev_ring->rp) { + gpi_event = ev_ring->rp; + chid = gpi_event->xfer_compl_event.chid; + type = gpi_event->xfer_compl_event.type; + + dev_dbg(gpii->gpi_dev->dev, + "Event: CHID:%u, type:%x %08x %08x %08x %08x\n", + chid, type, gpi_event->gpi_ere.dword[0], + gpi_event->gpi_ere.dword[1], gpi_event->gpi_ere.dword[2], + gpi_event->gpi_ere.dword[3]); + + switch (type) { + case XFER_COMPLETE_EV_TYPE: + gchan = &gpii->gchan[chid]; + gpi_process_xfer_compl_event(gchan, + &gpi_event->xfer_compl_event); + break; + case STALE_EV_TYPE: + dev_dbg(gpii->gpi_dev->dev, "stale event, not processing\n"); + break; + case IMMEDIATE_DATA_EV_TYPE: + gchan = &gpii->gchan[chid]; + gpi_process_imed_data_event(gchan, + &gpi_event->immediate_data_event); + break; + case QUP_NOTIF_EV_TYPE: + dev_dbg(gpii->gpi_dev->dev, "QUP_NOTIF_EV_TYPE\n"); + break; + default: + dev_dbg(gpii->gpi_dev->dev, + "not supported event type:0x%x\n", type); + } + gpi_ring_recycle_ev_element(ev_ring); + } + gpi_write_ev_db(gpii, ev_ring, ev_ring->wp); + + /* clear pending IEOB events */ + gpi_write_reg(gpii, gpii->ieob_clr_reg, BIT(0)); + + cntxt_rp = gpi_read_reg(gpii, gpii->ev_ring_rp_lsb_reg); + rp = to_virtual(ev_ring, cntxt_rp); + + } while (rp != ev_ring->rp); +} + +/* processing events using tasklet */ +static void gpi_ev_tasklet(unsigned long data) +{ + struct gpii *gpii = (struct gpii *)data; + + read_lock(&gpii->pm_lock); + if (!REG_ACCESS_VALID(gpii->pm_state)) { + read_unlock(&gpii->pm_lock); + dev_err(gpii->gpi_dev->dev, "not processing any events, pm_state:%s\n", + TO_GPI_PM_STR(gpii->pm_state)); + return; + } + + /* process the events */ + gpi_process_events(gpii); + + /* enable IEOB, switching back to interrupts */ + gpi_config_interrupts(gpii, MASK_IEOB_SETTINGS, 1); + read_unlock(&gpii->pm_lock); +} + +/* marks all pending events for the channel as stale */ +static void gpi_mark_stale_events(struct gchan *gchan) +{ + struct gpii *gpii = gchan->gpii; + struct gpi_ring *ev_ring = &gpii->ev_ring; + u32 cntxt_rp, local_rp; + void *ev_rp; + + cntxt_rp = gpi_read_reg(gpii, gpii->ev_ring_rp_lsb_reg); + + ev_rp = ev_ring->rp; + local_rp = (u32)to_physical(ev_ring, ev_rp); + while (local_rp != cntxt_rp) { + union gpi_event *gpi_event = ev_rp; + u32 chid = gpi_event->xfer_compl_event.chid; + + if (chid == gchan->chid) + gpi_event->xfer_compl_event.type = STALE_EV_TYPE; + ev_rp += ev_ring->el_size; + if (ev_rp >= (ev_ring->base + ev_ring->len)) + ev_rp = ev_ring->base; + cntxt_rp = gpi_read_reg(gpii, gpii->ev_ring_rp_lsb_reg); + local_rp = (u32)to_physical(ev_ring, ev_rp); + } +} + +/* reset sw state and issue channel reset or de-alloc */ +static int gpi_reset_chan(struct gchan *gchan, enum gpi_cmd gpi_cmd) +{ + struct gpii *gpii = gchan->gpii; + struct gpi_ring *ch_ring = &gchan->ch_ring; + unsigned long flags; + LIST_HEAD(list); + int ret; + + ret = gpi_send_cmd(gpii, gchan, gpi_cmd); + if (ret) { + dev_err(gpii->gpi_dev->dev, "Error with cmd:%s ret:%d\n", + TO_GPI_CMD_STR(gpi_cmd), ret); + return ret; + } + + /* initialize the local ring ptrs */ + ch_ring->rp = ch_ring->base; + ch_ring->wp = ch_ring->base; + + /* visible to other cores */ + smp_wmb(); + + /* check event ring for any stale events */ + write_lock_irq(&gpii->pm_lock); + gpi_mark_stale_events(gchan); + + /* remove all async descriptors */ + spin_lock_irqsave(&gchan->vc.lock, flags); + vchan_get_all_descriptors(&gchan->vc, &list); + spin_unlock_irqrestore(&gchan->vc.lock, flags); + write_unlock_irq(&gpii->pm_lock); + vchan_dma_desc_free_list(&gchan->vc, &list); + + return 0; +} + +static int gpi_start_chan(struct gchan *gchan) +{ + struct gpii *gpii = gchan->gpii; + int ret; + + ret = gpi_send_cmd(gpii, gchan, GPI_CH_CMD_START); + if (ret) { + dev_err(gpii->gpi_dev->dev, "Error with cmd:%s ret:%d\n", + TO_GPI_CMD_STR(GPI_CH_CMD_START), ret); + return ret; + } + + /* gpii CH is active now */ + write_lock_irq(&gpii->pm_lock); + gchan->pm_state = ACTIVE_STATE; + write_unlock_irq(&gpii->pm_lock); + + return 0; +} + +static int gpi_stop_chan(struct gchan *gchan) +{ + struct gpii *gpii = gchan->gpii; + int ret; + + ret = gpi_send_cmd(gpii, gchan, GPI_CH_CMD_STOP); + if (ret) { + dev_err(gpii->gpi_dev->dev, "Error with cmd:%s ret:%d\n", + TO_GPI_CMD_STR(GPI_CH_CMD_STOP), ret); + return ret; + } + + return 0; +} + +/* allocate and configure the transfer channel */ +static int gpi_alloc_chan(struct gchan *chan, bool send_alloc_cmd) +{ + struct gpii *gpii = chan->gpii; + struct gpi_ring *ring = &chan->ch_ring; + int ret; + u32 id = gpii->gpii_id; + u32 chid = chan->chid; + u32 pair_chid = !chid; + + if (send_alloc_cmd) { + ret = gpi_send_cmd(gpii, chan, GPI_CH_CMD_ALLOCATE); + if (ret) { + dev_err(gpii->gpi_dev->dev, "Error with cmd:%s ret:%d\n", + TO_GPI_CMD_STR(GPI_CH_CMD_ALLOCATE), ret); + return ret; + } + } + + gpi_write_reg(gpii, chan->ch_cntxt_base_reg + CNTXT_0_CONFIG, + GPII_n_CH_k_CNTXT_0(ring->el_size, 0, chan->dir, GPI_CHTYPE_PROTO_GPI)); + gpi_write_reg(gpii, chan->ch_cntxt_base_reg + CNTXT_1_R_LENGTH, ring->len); + gpi_write_reg(gpii, chan->ch_cntxt_base_reg + CNTXT_2_RING_BASE_LSB, ring->phys_addr); + gpi_write_reg(gpii, chan->ch_cntxt_base_reg + CNTXT_3_RING_BASE_MSB, + upper_32_bits(ring->phys_addr)); + gpi_write_reg(gpii, chan->ch_cntxt_db_reg + CNTXT_5_RING_RP_MSB - CNTXT_4_RING_RP_LSB, + upper_32_bits(ring->phys_addr)); + gpi_write_reg(gpii, gpii->regs + GPII_n_CH_k_SCRATCH_0_OFFS(id, chid), + GPII_n_CH_k_SCRATCH_0(pair_chid, chan->protocol, chan->seid)); + gpi_write_reg(gpii, gpii->regs + GPII_n_CH_k_SCRATCH_1_OFFS(id, chid), 0); + gpi_write_reg(gpii, gpii->regs + GPII_n_CH_k_SCRATCH_2_OFFS(id, chid), 0); + gpi_write_reg(gpii, gpii->regs + GPII_n_CH_k_SCRATCH_3_OFFS(id, chid), 0); + gpi_write_reg(gpii, gpii->regs + GPII_n_CH_k_QOS_OFFS(id, chid), 1); + + /* flush all the writes */ + wmb(); + return 0; +} + +/* allocate and configure event ring */ +static int gpi_alloc_ev_chan(struct gpii *gpii) +{ + struct gpi_ring *ring = &gpii->ev_ring; + void __iomem *base = gpii->ev_cntxt_base_reg; + int ret; + + ret = gpi_send_cmd(gpii, NULL, GPI_EV_CMD_ALLOCATE); + if (ret) { + dev_err(gpii->gpi_dev->dev, "error with cmd:%s ret:%d\n", + TO_GPI_CMD_STR(GPI_EV_CMD_ALLOCATE), ret); + return ret; + } + + /* program event context */ + gpi_write_reg(gpii, base + CNTXT_0_CONFIG, + GPII_n_EV_k_CNTXT_0(ring->el_size, GPI_INTTYPE_IRQ, GPI_CHTYPE_GPI_EV)); + gpi_write_reg(gpii, base + CNTXT_1_R_LENGTH, ring->len); + gpi_write_reg(gpii, base + CNTXT_2_RING_BASE_LSB, lower_32_bits(ring->phys_addr)); + gpi_write_reg(gpii, base + CNTXT_3_RING_BASE_MSB, upper_32_bits(ring->phys_addr)); + gpi_write_reg(gpii, gpii->ev_cntxt_db_reg + CNTXT_5_RING_RP_MSB - CNTXT_4_RING_RP_LSB, + upper_32_bits(ring->phys_addr)); + gpi_write_reg(gpii, base + CNTXT_8_RING_INT_MOD, 0); + gpi_write_reg(gpii, base + CNTXT_10_RING_MSI_LSB, 0); + gpi_write_reg(gpii, base + CNTXT_11_RING_MSI_MSB, 0); + gpi_write_reg(gpii, base + CNTXT_8_RING_INT_MOD, 0); + gpi_write_reg(gpii, base + CNTXT_12_RING_RP_UPDATE_LSB, 0); + gpi_write_reg(gpii, base + CNTXT_13_RING_RP_UPDATE_MSB, 0); + + /* add events to ring */ + ring->wp = (ring->base + ring->len - ring->el_size); + + /* flush all the writes */ + wmb(); + + /* gpii is active now */ + write_lock_irq(&gpii->pm_lock); + gpii->pm_state = ACTIVE_STATE; + write_unlock_irq(&gpii->pm_lock); + gpi_write_ev_db(gpii, ring, ring->wp); + + return 0; +} + +/* calculate # of ERE/TRE available to queue */ +static int gpi_ring_num_elements_avail(const struct gpi_ring * const ring) +{ + int elements = 0; + + if (ring->wp < ring->rp) { + elements = ((ring->rp - ring->wp) / ring->el_size) - 1; + } else { + elements = (ring->rp - ring->base) / ring->el_size; + elements += ((ring->base + ring->len - ring->wp) / ring->el_size) - 1; + } + + return elements; +} + +static int gpi_ring_add_element(struct gpi_ring *ring, void **wp) +{ + if (gpi_ring_num_elements_avail(ring) <= 0) + return -ENOMEM; + + *wp = ring->wp; + ring->wp += ring->el_size; + if (ring->wp >= (ring->base + ring->len)) + ring->wp = ring->base; + + /* visible to other cores */ + smp_wmb(); + + return 0; +} + +static void gpi_ring_recycle_ev_element(struct gpi_ring *ring) +{ + /* Update the WP */ + ring->wp += ring->el_size; + if (ring->wp >= (ring->base + ring->len)) + ring->wp = ring->base; + + /* Update the RP */ + ring->rp += ring->el_size; + if (ring->rp >= (ring->base + ring->len)) + ring->rp = ring->base; + + /* visible to other cores */ + smp_wmb(); +} + +static void gpi_free_ring(struct gpi_ring *ring, + struct gpii *gpii) +{ + dma_free_coherent(gpii->gpi_dev->dev, ring->alloc_size, + ring->pre_aligned, ring->dma_handle); + memset(ring, 0, sizeof(*ring)); +} + +/* allocate memory for transfer and event rings */ +static int gpi_alloc_ring(struct gpi_ring *ring, u32 elements, + u32 el_size, struct gpii *gpii) +{ + u64 len = elements * el_size; + int bit; + + /* ring len must be power of 2 */ + bit = find_last_bit((unsigned long *)&len, 32); + if (((1 << bit) - 1) & len) + bit++; + len = 1 << bit; + ring->alloc_size = (len + (len - 1)); + dev_dbg(gpii->gpi_dev->dev, + "#el:%u el_size:%u len:%u actual_len:%llu alloc_size:%zu\n", + elements, el_size, (elements * el_size), len, + ring->alloc_size); + + ring->pre_aligned = dma_alloc_coherent(gpii->gpi_dev->dev, + ring->alloc_size, + &ring->dma_handle, GFP_KERNEL); + if (!ring->pre_aligned) { + dev_err(gpii->gpi_dev->dev, "could not alloc size:%zu mem for ring\n", + ring->alloc_size); + return -ENOMEM; + } + + /* align the physical mem */ + ring->phys_addr = (ring->dma_handle + (len - 1)) & ~(len - 1); + ring->base = ring->pre_aligned + (ring->phys_addr - ring->dma_handle); + ring->rp = ring->base; + ring->wp = ring->base; + ring->len = len; + ring->el_size = el_size; + ring->elements = ring->len / ring->el_size; + memset(ring->base, 0, ring->len); + ring->configured = true; + + /* update to other cores */ + smp_wmb(); + + dev_dbg(gpii->gpi_dev->dev, + "phy_pre:%pad phy_alig:%pa len:%u el_size:%u elements:%u\n", + &ring->dma_handle, &ring->phys_addr, ring->len, + ring->el_size, ring->elements); + + return 0; +} + +/* copy tre into transfer ring */ +static void gpi_queue_xfer(struct gpii *gpii, struct gchan *gchan, + struct gpi_tre *gpi_tre, void **wp) +{ + struct gpi_tre *ch_tre; + int ret; + + /* get next tre location we can copy */ + ret = gpi_ring_add_element(&gchan->ch_ring, (void **)&ch_tre); + if (unlikely(ret)) { + dev_err(gpii->gpi_dev->dev, "Error adding ring element to xfer ring\n"); + return; + } + + /* copy the tre info */ + memcpy(ch_tre, gpi_tre, sizeof(*ch_tre)); + *wp = ch_tre; +} + +/* reset and restart transfer channel */ +static int gpi_terminate_all(struct dma_chan *chan) +{ + struct gchan *gchan = to_gchan(chan); + struct gpii *gpii = gchan->gpii; + int schid, echid, i; + int ret = 0; + + mutex_lock(&gpii->ctrl_lock); + + /* + * treat both channels as a group if its protocol is not UART + * STOP, RESET, or START needs to be in lockstep + */ + schid = (gchan->protocol == QCOM_GPI_UART) ? gchan->chid : 0; + echid = (gchan->protocol == QCOM_GPI_UART) ? schid + 1 : MAX_CHANNELS_PER_GPII; + + /* stop the channel */ + for (i = schid; i < echid; i++) { + gchan = &gpii->gchan[i]; + + /* disable ch state so no more TRE processing */ + write_lock_irq(&gpii->pm_lock); + gchan->pm_state = PREPARE_TERMINATE; + write_unlock_irq(&gpii->pm_lock); + + /* send command to Stop the channel */ + ret = gpi_stop_chan(gchan); + } + + /* reset the channels (clears any pending tre) */ + for (i = schid; i < echid; i++) { + gchan = &gpii->gchan[i]; + + ret = gpi_reset_chan(gchan, GPI_CH_CMD_RESET); + if (ret) { + dev_err(gpii->gpi_dev->dev, "Error resetting channel ret:%d\n", ret); + goto terminate_exit; + } + + /* reprogram channel CNTXT */ + ret = gpi_alloc_chan(gchan, false); + if (ret) { + dev_err(gpii->gpi_dev->dev, "Error alloc_channel ret:%d\n", ret); + goto terminate_exit; + } + } + + /* restart the channels */ + for (i = schid; i < echid; i++) { + gchan = &gpii->gchan[i]; + + ret = gpi_start_chan(gchan); + if (ret) { + dev_err(gpii->gpi_dev->dev, "Error Starting Channel ret:%d\n", ret); + goto terminate_exit; + } + } + +terminate_exit: + mutex_unlock(&gpii->ctrl_lock); + return ret; +} + +/* pause dma transfer for all channels */ +static int gpi_pause(struct dma_chan *chan) +{ + struct gchan *gchan = to_gchan(chan); + struct gpii *gpii = gchan->gpii; + int i, ret; + + mutex_lock(&gpii->ctrl_lock); + + /* + * pause/resume are per gpii not per channel, so + * client needs to call pause only once + */ + if (gpii->pm_state == PAUSE_STATE) { + dev_dbg(gpii->gpi_dev->dev, "channel is already paused\n"); + mutex_unlock(&gpii->ctrl_lock); + return 0; + } + + /* send stop command to stop the channels */ + for (i = 0; i < MAX_CHANNELS_PER_GPII; i++) { + ret = gpi_stop_chan(&gpii->gchan[i]); + if (ret) { + mutex_unlock(&gpii->ctrl_lock); + return ret; + } + } + + disable_irq(gpii->irq); + + /* Wait for threads to complete out */ + tasklet_kill(&gpii->ev_task); + + write_lock_irq(&gpii->pm_lock); + gpii->pm_state = PAUSE_STATE; + write_unlock_irq(&gpii->pm_lock); + mutex_unlock(&gpii->ctrl_lock); + + return 0; +} + +/* resume dma transfer */ +static int gpi_resume(struct dma_chan *chan) +{ + struct gchan *gchan = to_gchan(chan); + struct gpii *gpii = gchan->gpii; + int i, ret; + + mutex_lock(&gpii->ctrl_lock); + if (gpii->pm_state == ACTIVE_STATE) { + dev_dbg(gpii->gpi_dev->dev, "channel is already active\n"); + mutex_unlock(&gpii->ctrl_lock); + return 0; + } + + enable_irq(gpii->irq); + + /* send start command to start the channels */ + for (i = 0; i < MAX_CHANNELS_PER_GPII; i++) { + ret = gpi_send_cmd(gpii, &gpii->gchan[i], GPI_CH_CMD_START); + if (ret) { + dev_err(gpii->gpi_dev->dev, "Error starting chan, ret:%d\n", ret); + mutex_unlock(&gpii->ctrl_lock); + return ret; + } + } + + write_lock_irq(&gpii->pm_lock); + gpii->pm_state = ACTIVE_STATE; + write_unlock_irq(&gpii->pm_lock); + mutex_unlock(&gpii->ctrl_lock); + + return 0; +} + +static void gpi_desc_free(struct virt_dma_desc *vd) +{ + struct gpi_desc *gpi_desc = to_gpi_desc(vd); + + kfree(gpi_desc); + gpi_desc = NULL; +} + +static int +gpi_peripheral_config(struct dma_chan *chan, struct dma_slave_config *config) +{ + struct gchan *gchan = to_gchan(chan); + + if (!config->peripheral_config) + return -EINVAL; + + gchan->config = krealloc(gchan->config, config->peripheral_size, GFP_NOWAIT); + if (!gchan->config) + return -ENOMEM; + + memcpy(gchan->config, config->peripheral_config, config->peripheral_size); + + return 0; +} + +static int gpi_create_i2c_tre(struct gchan *chan, struct gpi_desc *desc, + struct scatterlist *sgl, enum dma_transfer_direction direction) +{ + struct gpi_i2c_config *i2c = chan->config; + struct device *dev = chan->gpii->gpi_dev->dev; + unsigned int tre_idx = 0; + dma_addr_t address; + struct gpi_tre *tre; + unsigned int i; + + /* first create config tre if applicable */ + if (i2c->set_config) { + tre = &desc->tre[tre_idx]; + tre_idx++; + + tre->dword[0] = u32_encode_bits(i2c->low_count, TRE_I2C_C0_TLOW); + tre->dword[0] |= u32_encode_bits(i2c->high_count, TRE_I2C_C0_THIGH); + tre->dword[0] |= u32_encode_bits(i2c->cycle_count, TRE_I2C_C0_TCYL); + tre->dword[0] |= u32_encode_bits(i2c->pack_enable, TRE_I2C_C0_TX_PACK); + tre->dword[0] |= u32_encode_bits(i2c->pack_enable, TRE_I2C_C0_RX_PACK); + + tre->dword[1] = 0; + + tre->dword[2] = u32_encode_bits(i2c->clk_div, TRE_C0_CLK_DIV); + + tre->dword[3] = u32_encode_bits(TRE_TYPE_CONFIG0, TRE_FLAGS_TYPE); + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_CHAIN); + } + + /* create the GO tre for Tx */ + if (i2c->op == I2C_WRITE) { + tre = &desc->tre[tre_idx]; + tre_idx++; + + if (i2c->multi_msg) + tre->dword[0] = u32_encode_bits(I2C_READ, TRE_I2C_GO_CMD); + else + tre->dword[0] = u32_encode_bits(i2c->op, TRE_I2C_GO_CMD); + + tre->dword[0] |= u32_encode_bits(i2c->addr, TRE_I2C_GO_ADDR); + tre->dword[0] |= u32_encode_bits(i2c->stretch, TRE_I2C_GO_STRETCH); + + tre->dword[1] = 0; + tre->dword[2] = u32_encode_bits(i2c->rx_len, TRE_RX_LEN); + + tre->dword[3] = u32_encode_bits(TRE_TYPE_GO, TRE_FLAGS_TYPE); + + if (i2c->multi_msg) + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_LINK); + else + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_CHAIN); + } + + if (i2c->op == I2C_READ || i2c->multi_msg == false) { + /* create the DMA TRE */ + tre = &desc->tre[tre_idx]; + tre_idx++; + + address = sg_dma_address(sgl); + tre->dword[0] = lower_32_bits(address); + tre->dword[1] = upper_32_bits(address); + + tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN); + + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); + } + + for (i = 0; i < tre_idx; i++) + dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0], + desc->tre[i].dword[1], desc->tre[i].dword[2], desc->tre[i].dword[3]); + + return tre_idx; +} + +static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, + struct scatterlist *sgl, enum dma_transfer_direction direction) +{ + struct gpi_spi_config *spi = chan->config; + struct device *dev = chan->gpii->gpi_dev->dev; + unsigned int tre_idx = 0; + dma_addr_t address; + struct gpi_tre *tre; + unsigned int i; + + /* first create config tre if applicable */ + if (direction == DMA_MEM_TO_DEV && spi->set_config) { + tre = &desc->tre[tre_idx]; + tre_idx++; + + tre->dword[0] = u32_encode_bits(spi->word_len, TRE_SPI_C0_WORD_SZ); + tre->dword[0] |= u32_encode_bits(spi->loopback_en, TRE_SPI_C0_LOOPBACK); + tre->dword[0] |= u32_encode_bits(spi->clock_pol_high, TRE_SPI_C0_CPOL); + tre->dword[0] |= u32_encode_bits(spi->data_pol_high, TRE_SPI_C0_CPHA); + tre->dword[0] |= u32_encode_bits(spi->pack_en, TRE_SPI_C0_TX_PACK); + tre->dword[0] |= u32_encode_bits(spi->pack_en, TRE_SPI_C0_RX_PACK); + + tre->dword[1] = 0; + + tre->dword[2] = u32_encode_bits(spi->clk_div, TRE_C0_CLK_DIV); + tre->dword[2] |= u32_encode_bits(spi->clk_src, TRE_C0_CLK_SRC); + + tre->dword[3] = u32_encode_bits(TRE_TYPE_CONFIG0, TRE_FLAGS_TYPE); + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_CHAIN); + } + + /* create the GO tre for Tx */ + if (direction == DMA_MEM_TO_DEV) { + tre = &desc->tre[tre_idx]; + tre_idx++; + + tre->dword[0] = u32_encode_bits(spi->fragmentation, TRE_SPI_GO_FRAG); + tre->dword[0] |= u32_encode_bits(spi->cs, TRE_SPI_GO_CS); + tre->dword[0] |= u32_encode_bits(spi->cmd, TRE_SPI_GO_CMD); + + tre->dword[1] = 0; + + tre->dword[2] = u32_encode_bits(spi->rx_len, TRE_RX_LEN); + + tre->dword[3] = u32_encode_bits(TRE_TYPE_GO, TRE_FLAGS_TYPE); + if (spi->cmd == SPI_RX) { + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOB); + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_LINK); + } else if (spi->cmd == SPI_TX) { + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_CHAIN); + } else { /* SPI_DUPLEX */ + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_CHAIN); + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_LINK); + } + } + + /* create the dma tre */ + tre = &desc->tre[tre_idx]; + tre_idx++; + + address = sg_dma_address(sgl); + tre->dword[0] = lower_32_bits(address); + tre->dword[1] = upper_32_bits(address); + + tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN); + + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); + if (direction == DMA_MEM_TO_DEV) + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); + + for (i = 0; i < tre_idx; i++) + dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0], + desc->tre[i].dword[1], desc->tre[i].dword[2], desc->tre[i].dword[3]); + + return tre_idx; +} + +/* copy tre into transfer ring */ +static struct dma_async_tx_descriptor * +gpi_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct gchan *gchan = to_gchan(chan); + struct gpii *gpii = gchan->gpii; + struct device *dev = gpii->gpi_dev->dev; + struct gpi_ring *ch_ring = &gchan->ch_ring; + struct gpi_desc *gpi_desc; + u32 nr, nr_tre = 0; + u8 set_config; + int i; + + gpii->ieob_set = false; + if (!is_slave_direction(direction)) { + dev_err(gpii->gpi_dev->dev, "invalid dma direction: %d\n", direction); + return NULL; + } + + if (sg_len > 1) { + dev_err(dev, "Multi sg sent, we support only one atm: %d\n", sg_len); + return NULL; + } + + nr_tre = 3; + set_config = *(u32 *)gchan->config; + if (!set_config) + nr_tre = 2; + if (direction == DMA_DEV_TO_MEM) /* rx */ + nr_tre = 1; + + /* calculate # of elements required & available */ + nr = gpi_ring_num_elements_avail(ch_ring); + if (nr < nr_tre) { + dev_err(dev, "not enough space in ring, avail:%u required:%u\n", nr, nr_tre); + return NULL; + } + + gpi_desc = kzalloc(sizeof(*gpi_desc), GFP_NOWAIT); + if (!gpi_desc) + return NULL; + + /* create TREs for xfer */ + if (gchan->protocol == QCOM_GPI_SPI) { + i = gpi_create_spi_tre(gchan, gpi_desc, sgl, direction); + } else if (gchan->protocol == QCOM_GPI_I2C) { + i = gpi_create_i2c_tre(gchan, gpi_desc, sgl, direction); + } else { + dev_err(dev, "invalid peripheral: %d\n", gchan->protocol); + kfree(gpi_desc); + return NULL; + } + + /* set up the descriptor */ + gpi_desc->gchan = gchan; + gpi_desc->len = sg_dma_len(sgl); + gpi_desc->num_tre = i; + + return vchan_tx_prep(&gchan->vc, &gpi_desc->vd, flags); +} + +/* rings transfer ring db to being transfer */ +static void gpi_issue_pending(struct dma_chan *chan) +{ + struct gchan *gchan = to_gchan(chan); + struct gpii *gpii = gchan->gpii; + unsigned long flags, pm_lock_flags; + struct virt_dma_desc *vd = NULL; + struct gpi_desc *gpi_desc; + struct gpi_ring *ch_ring = &gchan->ch_ring; + void *tre, *wp = NULL; + int i; + + read_lock_irqsave(&gpii->pm_lock, pm_lock_flags); + + /* move all submitted discriptors to issued list */ + spin_lock_irqsave(&gchan->vc.lock, flags); + if (vchan_issue_pending(&gchan->vc)) + vd = list_last_entry(&gchan->vc.desc_issued, + struct virt_dma_desc, node); + spin_unlock_irqrestore(&gchan->vc.lock, flags); + + /* nothing to do list is empty */ + if (!vd) { + read_unlock_irqrestore(&gpii->pm_lock, pm_lock_flags); + return; + } + + gpi_desc = to_gpi_desc(vd); + for (i = 0; i < gpi_desc->num_tre; i++) { + tre = &gpi_desc->tre[i]; + gpi_queue_xfer(gpii, gchan, tre, &wp); + } + + gpi_desc->db = ch_ring->wp; + gpi_write_ch_db(gchan, &gchan->ch_ring, gpi_desc->db); + read_unlock_irqrestore(&gpii->pm_lock, pm_lock_flags); +} + +static int gpi_ch_init(struct gchan *gchan) +{ + struct gpii *gpii = gchan->gpii; + const int ev_factor = gpii->gpi_dev->ev_factor; + u32 elements; + int i = 0, ret = 0; + + gchan->pm_state = CONFIG_STATE; + + /* check if both channels are configured before continue */ + for (i = 0; i < MAX_CHANNELS_PER_GPII; i++) + if (gpii->gchan[i].pm_state != CONFIG_STATE) + goto exit_gpi_init; + + /* protocol must be same for both channels */ + if (gpii->gchan[0].protocol != gpii->gchan[1].protocol) { + dev_err(gpii->gpi_dev->dev, "protocol did not match protocol %u != %u\n", + gpii->gchan[0].protocol, gpii->gchan[1].protocol); + ret = -EINVAL; + goto exit_gpi_init; + } + + /* allocate memory for event ring */ + elements = CHAN_TRES << ev_factor; + ret = gpi_alloc_ring(&gpii->ev_ring, elements, + sizeof(union gpi_event), gpii); + if (ret) + goto exit_gpi_init; + + /* configure interrupts */ + write_lock_irq(&gpii->pm_lock); + gpii->pm_state = PREPARE_HARDWARE; + write_unlock_irq(&gpii->pm_lock); + ret = gpi_config_interrupts(gpii, DEFAULT_IRQ_SETTINGS, 0); + if (ret) { + dev_err(gpii->gpi_dev->dev, "error config. interrupts, ret:%d\n", ret); + goto error_config_int; + } + + /* allocate event rings */ + ret = gpi_alloc_ev_chan(gpii); + if (ret) { + dev_err(gpii->gpi_dev->dev, "error alloc_ev_chan:%d\n", ret); + goto error_alloc_ev_ring; + } + + /* Allocate all channels */ + for (i = 0; i < MAX_CHANNELS_PER_GPII; i++) { + ret = gpi_alloc_chan(&gpii->gchan[i], true); + if (ret) { + dev_err(gpii->gpi_dev->dev, "Error allocating chan:%d\n", ret); + goto error_alloc_chan; + } + } + + /* start channels */ + for (i = 0; i < MAX_CHANNELS_PER_GPII; i++) { + ret = gpi_start_chan(&gpii->gchan[i]); + if (ret) { + dev_err(gpii->gpi_dev->dev, "Error start chan:%d\n", ret); + goto error_start_chan; + } + } + return ret; + +error_start_chan: + for (i = i - 1; i >= 0; i--) { + gpi_stop_chan(&gpii->gchan[i]); + gpi_send_cmd(gpii, gchan, GPI_CH_CMD_RESET); + } + i = 2; +error_alloc_chan: + for (i = i - 1; i >= 0; i--) + gpi_reset_chan(gchan, GPI_CH_CMD_DE_ALLOC); +error_alloc_ev_ring: + gpi_disable_interrupts(gpii); +error_config_int: + gpi_free_ring(&gpii->ev_ring, gpii); +exit_gpi_init: + return ret; +} + +/* release all channel resources */ +static void gpi_free_chan_resources(struct dma_chan *chan) +{ + struct gchan *gchan = to_gchan(chan); + struct gpii *gpii = gchan->gpii; + enum gpi_pm_state cur_state; + int ret, i; + + mutex_lock(&gpii->ctrl_lock); + + cur_state = gchan->pm_state; + + /* disable ch state so no more TRE processing for this channel */ + write_lock_irq(&gpii->pm_lock); + gchan->pm_state = PREPARE_TERMINATE; + write_unlock_irq(&gpii->pm_lock); + + /* attempt to do graceful hardware shutdown */ + if (cur_state == ACTIVE_STATE) { + gpi_stop_chan(gchan); + + ret = gpi_send_cmd(gpii, gchan, GPI_CH_CMD_RESET); + if (ret) + dev_err(gpii->gpi_dev->dev, "error resetting channel:%d\n", ret); + + gpi_reset_chan(gchan, GPI_CH_CMD_DE_ALLOC); + } + + /* free all allocated memory */ + gpi_free_ring(&gchan->ch_ring, gpii); + vchan_free_chan_resources(&gchan->vc); + kfree(gchan->config); + + write_lock_irq(&gpii->pm_lock); + gchan->pm_state = DISABLE_STATE; + write_unlock_irq(&gpii->pm_lock); + + /* if other rings are still active exit */ + for (i = 0; i < MAX_CHANNELS_PER_GPII; i++) + if (gpii->gchan[i].ch_ring.configured) + goto exit_free; + + /* deallocate EV Ring */ + cur_state = gpii->pm_state; + write_lock_irq(&gpii->pm_lock); + gpii->pm_state = PREPARE_TERMINATE; + write_unlock_irq(&gpii->pm_lock); + + /* wait for threads to complete out */ + tasklet_kill(&gpii->ev_task); + + /* send command to de allocate event ring */ + if (cur_state == ACTIVE_STATE) + gpi_send_cmd(gpii, NULL, GPI_EV_CMD_DEALLOC); + + gpi_free_ring(&gpii->ev_ring, gpii); + + /* disable interrupts */ + if (cur_state == ACTIVE_STATE) + gpi_disable_interrupts(gpii); + + /* set final state to disable */ + write_lock_irq(&gpii->pm_lock); + gpii->pm_state = DISABLE_STATE; + write_unlock_irq(&gpii->pm_lock); + +exit_free: + mutex_unlock(&gpii->ctrl_lock); +} + +/* allocate channel resources */ +static int gpi_alloc_chan_resources(struct dma_chan *chan) +{ + struct gchan *gchan = to_gchan(chan); + struct gpii *gpii = gchan->gpii; + int ret; + + mutex_lock(&gpii->ctrl_lock); + + /* allocate memory for transfer ring */ + ret = gpi_alloc_ring(&gchan->ch_ring, CHAN_TRES, + sizeof(struct gpi_tre), gpii); + if (ret) + goto xfer_alloc_err; + + ret = gpi_ch_init(gchan); + + mutex_unlock(&gpii->ctrl_lock); + + return ret; +xfer_alloc_err: + mutex_unlock(&gpii->ctrl_lock); + + return ret; +} + +static int gpi_find_avail_gpii(struct gpi_dev *gpi_dev, u32 seid) +{ + struct gchan *tx_chan, *rx_chan; + unsigned int gpii; + + /* check if same seid is already configured for another chid */ + for (gpii = 0; gpii < gpi_dev->max_gpii; gpii++) { + if (!((1 << gpii) & gpi_dev->gpii_mask)) + continue; + + tx_chan = &gpi_dev->gpiis[gpii].gchan[GPI_TX_CHAN]; + rx_chan = &gpi_dev->gpiis[gpii].gchan[GPI_RX_CHAN]; + + if (rx_chan->vc.chan.client_count && rx_chan->seid == seid) + return gpii; + if (tx_chan->vc.chan.client_count && tx_chan->seid == seid) + return gpii; + } + + /* no channels configured with same seid, return next avail gpii */ + for (gpii = 0; gpii < gpi_dev->max_gpii; gpii++) { + if (!((1 << gpii) & gpi_dev->gpii_mask)) + continue; + + tx_chan = &gpi_dev->gpiis[gpii].gchan[GPI_TX_CHAN]; + rx_chan = &gpi_dev->gpiis[gpii].gchan[GPI_RX_CHAN]; + + /* check if gpii is configured */ + if (tx_chan->vc.chan.client_count || + rx_chan->vc.chan.client_count) + continue; + + /* found a free gpii */ + return gpii; + } + + /* no gpii instance available to use */ + return -EIO; +} + +/* gpi_of_dma_xlate: open client requested channel */ +static struct dma_chan *gpi_of_dma_xlate(struct of_phandle_args *args, + struct of_dma *of_dma) +{ + struct gpi_dev *gpi_dev = (struct gpi_dev *)of_dma->of_dma_data; + u32 seid, chid; + int gpii; + struct gchan *gchan; + + if (args->args_count < 3) { + dev_err(gpi_dev->dev, "gpii require minimum 2 args, client passed:%d args\n", + args->args_count); + return NULL; + } + + chid = args->args[0]; + if (chid >= MAX_CHANNELS_PER_GPII) { + dev_err(gpi_dev->dev, "gpii channel:%d not valid\n", chid); + return NULL; + } + + seid = args->args[1]; + + /* find next available gpii to use */ + gpii = gpi_find_avail_gpii(gpi_dev, seid); + if (gpii < 0) { + dev_err(gpi_dev->dev, "no available gpii instances\n"); + return NULL; + } + + gchan = &gpi_dev->gpiis[gpii].gchan[chid]; + if (gchan->vc.chan.client_count) { + dev_err(gpi_dev->dev, "gpii:%d chid:%d seid:%d already configured\n", + gpii, chid, gchan->seid); + return NULL; + } + + gchan->seid = seid; + gchan->protocol = args->args[2]; + + return dma_get_slave_channel(&gchan->vc.chan); +} + +static int gpi_probe(struct platform_device *pdev) +{ + struct gpi_dev *gpi_dev; + unsigned int i; + u32 ee_offset; + int ret; + + gpi_dev = devm_kzalloc(&pdev->dev, sizeof(*gpi_dev), GFP_KERNEL); + if (!gpi_dev) + return -ENOMEM; + + gpi_dev->dev = &pdev->dev; + gpi_dev->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &gpi_dev->res); + if (IS_ERR(gpi_dev->regs)) + return PTR_ERR(gpi_dev->regs); + gpi_dev->ee_base = gpi_dev->regs; + + ret = of_property_read_u32(gpi_dev->dev->of_node, "dma-channels", + &gpi_dev->max_gpii); + if (ret) { + dev_err(gpi_dev->dev, "missing 'max-no-gpii' DT node\n"); + return ret; + } + + ret = of_property_read_u32(gpi_dev->dev->of_node, "dma-channel-mask", + &gpi_dev->gpii_mask); + if (ret) { + dev_err(gpi_dev->dev, "missing 'gpii-mask' DT node\n"); + return ret; + } + + ee_offset = (uintptr_t)device_get_match_data(gpi_dev->dev); + gpi_dev->ee_base = gpi_dev->ee_base - ee_offset; + + gpi_dev->ev_factor = EV_FACTOR; + + ret = dma_set_mask(gpi_dev->dev, DMA_BIT_MASK(64)); + if (ret) { + dev_err(gpi_dev->dev, "Error setting dma_mask to 64, ret:%d\n", ret); + return ret; + } + + gpi_dev->gpiis = devm_kzalloc(gpi_dev->dev, sizeof(*gpi_dev->gpiis) * + gpi_dev->max_gpii, GFP_KERNEL); + if (!gpi_dev->gpiis) + return -ENOMEM; + + /* setup all the supported gpii */ + INIT_LIST_HEAD(&gpi_dev->dma_device.channels); + for (i = 0; i < gpi_dev->max_gpii; i++) { + struct gpii *gpii = &gpi_dev->gpiis[i]; + int chan; + + if (!((1 << i) & gpi_dev->gpii_mask)) + continue; + + /* set up ev cntxt register map */ + gpii->ev_cntxt_base_reg = gpi_dev->ee_base + GPII_n_EV_CH_k_CNTXT_0_OFFS(i, 0); + gpii->ev_cntxt_db_reg = gpi_dev->ee_base + GPII_n_EV_CH_k_DOORBELL_0_OFFS(i, 0); + gpii->ev_ring_rp_lsb_reg = gpii->ev_cntxt_base_reg + CNTXT_4_RING_RP_LSB; + gpii->ev_cmd_reg = gpi_dev->ee_base + GPII_n_EV_CH_CMD_OFFS(i); + gpii->ieob_clr_reg = gpi_dev->ee_base + GPII_n_CNTXT_SRC_IEOB_IRQ_CLR_OFFS(i); + + /* set up irq */ + ret = platform_get_irq(pdev, i); + if (ret < 0) + return ret; + gpii->irq = ret; + + /* set up channel specific register info */ + for (chan = 0; chan < MAX_CHANNELS_PER_GPII; chan++) { + struct gchan *gchan = &gpii->gchan[chan]; + + /* set up ch cntxt register map */ + gchan->ch_cntxt_base_reg = gpi_dev->ee_base + + GPII_n_CH_k_CNTXT_0_OFFS(i, chan); + gchan->ch_cntxt_db_reg = gpi_dev->ee_base + + GPII_n_CH_k_DOORBELL_0_OFFS(i, chan); + gchan->ch_cmd_reg = gpi_dev->ee_base + GPII_n_CH_CMD_OFFS(i); + + /* vchan setup */ + vchan_init(&gchan->vc, &gpi_dev->dma_device); + gchan->vc.desc_free = gpi_desc_free; + gchan->chid = chan; + gchan->gpii = gpii; + gchan->dir = GPII_CHAN_DIR[chan]; + } + mutex_init(&gpii->ctrl_lock); + rwlock_init(&gpii->pm_lock); + tasklet_init(&gpii->ev_task, gpi_ev_tasklet, + (unsigned long)gpii); + init_completion(&gpii->cmd_completion); + gpii->gpii_id = i; + gpii->regs = gpi_dev->ee_base; + gpii->gpi_dev = gpi_dev; + } + + platform_set_drvdata(pdev, gpi_dev); + + /* clear and Set capabilities */ + dma_cap_zero(gpi_dev->dma_device.cap_mask); + dma_cap_set(DMA_SLAVE, gpi_dev->dma_device.cap_mask); + + /* configure dmaengine apis */ + gpi_dev->dma_device.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + gpi_dev->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + gpi_dev->dma_device.src_addr_widths = DMA_SLAVE_BUSWIDTH_8_BYTES; + gpi_dev->dma_device.dst_addr_widths = DMA_SLAVE_BUSWIDTH_8_BYTES; + gpi_dev->dma_device.device_alloc_chan_resources = gpi_alloc_chan_resources; + gpi_dev->dma_device.device_free_chan_resources = gpi_free_chan_resources; + gpi_dev->dma_device.device_tx_status = dma_cookie_status; + gpi_dev->dma_device.device_issue_pending = gpi_issue_pending; + gpi_dev->dma_device.device_prep_slave_sg = gpi_prep_slave_sg; + gpi_dev->dma_device.device_config = gpi_peripheral_config; + gpi_dev->dma_device.device_terminate_all = gpi_terminate_all; + gpi_dev->dma_device.dev = gpi_dev->dev; + gpi_dev->dma_device.device_pause = gpi_pause; + gpi_dev->dma_device.device_resume = gpi_resume; + + /* register with dmaengine framework */ + ret = dma_async_device_register(&gpi_dev->dma_device); + if (ret) { + dev_err(gpi_dev->dev, "async_device_register failed ret:%d", ret); + return ret; + } + + ret = of_dma_controller_register(gpi_dev->dev->of_node, + gpi_of_dma_xlate, gpi_dev); + if (ret) { + dev_err(gpi_dev->dev, "of_dma_controller_reg failed ret:%d", ret); + return ret; + } + + return ret; +} + +static const struct of_device_id gpi_of_match[] = { + { .compatible = "qcom,sdm845-gpi-dma", .data = (void *)0x0 }, + { .compatible = "qcom,sm6350-gpi-dma", .data = (void *)0x10000 }, + /* + * Do not grow the list for compatible devices. Instead use + * qcom,sdm845-gpi-dma (for ee_offset = 0x0) or qcom,sm6350-gpi-dma + * (for ee_offset = 0x10000). + */ + { .compatible = "qcom,sc7280-gpi-dma", .data = (void *)0x10000 }, + { .compatible = "qcom,sm8150-gpi-dma", .data = (void *)0x0 }, + { .compatible = "qcom,sm8250-gpi-dma", .data = (void *)0x0 }, + { .compatible = "qcom,sm8350-gpi-dma", .data = (void *)0x10000 }, + { .compatible = "qcom,sm8450-gpi-dma", .data = (void *)0x10000 }, + { }, +}; +MODULE_DEVICE_TABLE(of, gpi_of_match); + +static struct platform_driver gpi_driver = { + .probe = gpi_probe, + .driver = { + .name = KBUILD_MODNAME, + .of_match_table = gpi_of_match, + }, +}; + +static int __init gpi_init(void) +{ + return platform_driver_register(&gpi_driver); +} +subsys_initcall(gpi_init) + +MODULE_DESCRIPTION("QCOM GPI DMA engine driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/qcom/hidma.c b/drivers/dma/qcom/hidma.c new file mode 100644 index 0000000000..834ae519c1 --- /dev/null +++ b/drivers/dma/qcom/hidma.c @@ -0,0 +1,973 @@ +/* + * Qualcomm Technologies HIDMA DMA engine interface + * + * Copyright (c) 2015-2017, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * Copyright (C) Freescale Semicondutor, Inc. 2007, 2008. + * Copyright (C) Semihalf 2009 + * Copyright (C) Ilya Yanok, Emcraft Systems 2010 + * Copyright (C) Alexander Popov, Promcontroller 2014 + * + * Written by Piotr Ziecik . Hardware description + * (defines, structures and comments) was taken from MPC5121 DMA driver + * written by Hongjun Chen . + * + * Approved as OSADL project by a majority of OSADL members and funded + * by OSADL membership fees in 2009; for details see www.osadl.org. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ + +/* Linux Foundation elects GPLv2 license only. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../dmaengine.h" +#include "hidma.h" + +/* + * Default idle time is 2 seconds. This parameter can + * be overridden by changing the following + * /sys/bus/platform/devices/QCOM8061:/power/autosuspend_delay_ms + * during kernel boot. + */ +#define HIDMA_AUTOSUSPEND_TIMEOUT 2000 +#define HIDMA_ERR_INFO_SW 0xFF +#define HIDMA_ERR_CODE_UNEXPECTED_TERMINATE 0x0 +#define HIDMA_NR_DEFAULT_DESC 10 +#define HIDMA_MSI_INTS 11 + +static inline struct hidma_dev *to_hidma_dev(struct dma_device *dmadev) +{ + return container_of(dmadev, struct hidma_dev, ddev); +} + +static inline +struct hidma_dev *to_hidma_dev_from_lldev(struct hidma_lldev **_lldevp) +{ + return container_of(_lldevp, struct hidma_dev, lldev); +} + +static inline struct hidma_chan *to_hidma_chan(struct dma_chan *dmach) +{ + return container_of(dmach, struct hidma_chan, chan); +} + +static void hidma_free(struct hidma_dev *dmadev) +{ + INIT_LIST_HEAD(&dmadev->ddev.channels); +} + +static unsigned int nr_desc_prm; +module_param(nr_desc_prm, uint, 0644); +MODULE_PARM_DESC(nr_desc_prm, "number of descriptors (default: 0)"); + +enum hidma_cap { + HIDMA_MSI_CAP = 1, + HIDMA_IDENTITY_CAP, +}; + +/* process completed descriptors */ +static void hidma_process_completed(struct hidma_chan *mchan) +{ + struct dma_device *ddev = mchan->chan.device; + struct hidma_dev *mdma = to_hidma_dev(ddev); + struct dma_async_tx_descriptor *desc; + dma_cookie_t last_cookie; + struct hidma_desc *mdesc; + struct hidma_desc *next; + unsigned long irqflags; + struct list_head list; + + INIT_LIST_HEAD(&list); + + /* Get all completed descriptors */ + spin_lock_irqsave(&mchan->lock, irqflags); + list_splice_tail_init(&mchan->completed, &list); + spin_unlock_irqrestore(&mchan->lock, irqflags); + + /* Execute callbacks and run dependencies */ + list_for_each_entry_safe(mdesc, next, &list, node) { + enum dma_status llstat; + struct dmaengine_desc_callback cb; + struct dmaengine_result result; + + desc = &mdesc->desc; + last_cookie = desc->cookie; + + llstat = hidma_ll_status(mdma->lldev, mdesc->tre_ch); + + spin_lock_irqsave(&mchan->lock, irqflags); + if (llstat == DMA_COMPLETE) { + mchan->last_success = last_cookie; + result.result = DMA_TRANS_NOERROR; + } else { + result.result = DMA_TRANS_ABORTED; + } + + dma_cookie_complete(desc); + spin_unlock_irqrestore(&mchan->lock, irqflags); + + dmaengine_desc_get_callback(desc, &cb); + + dma_run_dependencies(desc); + + spin_lock_irqsave(&mchan->lock, irqflags); + list_move(&mdesc->node, &mchan->free); + spin_unlock_irqrestore(&mchan->lock, irqflags); + + dmaengine_desc_callback_invoke(&cb, &result); + } +} + +/* + * Called once for each submitted descriptor. + * PM is locked once for each descriptor that is currently + * in execution. + */ +static void hidma_callback(void *data) +{ + struct hidma_desc *mdesc = data; + struct hidma_chan *mchan = to_hidma_chan(mdesc->desc.chan); + struct dma_device *ddev = mchan->chan.device; + struct hidma_dev *dmadev = to_hidma_dev(ddev); + unsigned long irqflags; + bool queued = false; + + spin_lock_irqsave(&mchan->lock, irqflags); + if (mdesc->node.next) { + /* Delete from the active list, add to completed list */ + list_move_tail(&mdesc->node, &mchan->completed); + queued = true; + + /* calculate the next running descriptor */ + mchan->running = list_first_entry(&mchan->active, + struct hidma_desc, node); + } + spin_unlock_irqrestore(&mchan->lock, irqflags); + + hidma_process_completed(mchan); + + if (queued) { + pm_runtime_mark_last_busy(dmadev->ddev.dev); + pm_runtime_put_autosuspend(dmadev->ddev.dev); + } +} + +static int hidma_chan_init(struct hidma_dev *dmadev, u32 dma_sig) +{ + struct hidma_chan *mchan; + struct dma_device *ddev; + + mchan = devm_kzalloc(dmadev->ddev.dev, sizeof(*mchan), GFP_KERNEL); + if (!mchan) + return -ENOMEM; + + ddev = &dmadev->ddev; + mchan->dma_sig = dma_sig; + mchan->dmadev = dmadev; + mchan->chan.device = ddev; + dma_cookie_init(&mchan->chan); + + INIT_LIST_HEAD(&mchan->free); + INIT_LIST_HEAD(&mchan->prepared); + INIT_LIST_HEAD(&mchan->active); + INIT_LIST_HEAD(&mchan->completed); + INIT_LIST_HEAD(&mchan->queued); + + spin_lock_init(&mchan->lock); + list_add_tail(&mchan->chan.device_node, &ddev->channels); + return 0; +} + +static void hidma_issue_task(struct tasklet_struct *t) +{ + struct hidma_dev *dmadev = from_tasklet(dmadev, t, task); + + pm_runtime_get_sync(dmadev->ddev.dev); + hidma_ll_start(dmadev->lldev); +} + +static void hidma_issue_pending(struct dma_chan *dmach) +{ + struct hidma_chan *mchan = to_hidma_chan(dmach); + struct hidma_dev *dmadev = mchan->dmadev; + unsigned long flags; + struct hidma_desc *qdesc, *next; + int status; + + spin_lock_irqsave(&mchan->lock, flags); + list_for_each_entry_safe(qdesc, next, &mchan->queued, node) { + hidma_ll_queue_request(dmadev->lldev, qdesc->tre_ch); + list_move_tail(&qdesc->node, &mchan->active); + } + + if (!mchan->running) { + struct hidma_desc *desc = list_first_entry(&mchan->active, + struct hidma_desc, + node); + mchan->running = desc; + } + spin_unlock_irqrestore(&mchan->lock, flags); + + /* PM will be released in hidma_callback function. */ + status = pm_runtime_get(dmadev->ddev.dev); + if (status < 0) + tasklet_schedule(&dmadev->task); + else + hidma_ll_start(dmadev->lldev); +} + +static inline bool hidma_txn_is_success(dma_cookie_t cookie, + dma_cookie_t last_success, dma_cookie_t last_used) +{ + if (last_success <= last_used) { + if ((cookie <= last_success) || (cookie > last_used)) + return true; + } else { + if ((cookie <= last_success) && (cookie > last_used)) + return true; + } + return false; +} + +static enum dma_status hidma_tx_status(struct dma_chan *dmach, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct hidma_chan *mchan = to_hidma_chan(dmach); + enum dma_status ret; + + ret = dma_cookie_status(dmach, cookie, txstate); + if (ret == DMA_COMPLETE) { + bool is_success; + + is_success = hidma_txn_is_success(cookie, mchan->last_success, + dmach->cookie); + return is_success ? ret : DMA_ERROR; + } + + if (mchan->paused && (ret == DMA_IN_PROGRESS)) { + unsigned long flags; + dma_cookie_t runcookie; + + spin_lock_irqsave(&mchan->lock, flags); + if (mchan->running) + runcookie = mchan->running->desc.cookie; + else + runcookie = -EINVAL; + + if (runcookie == cookie) + ret = DMA_PAUSED; + + spin_unlock_irqrestore(&mchan->lock, flags); + } + + return ret; +} + +/* + * Submit descriptor to hardware. + * Lock the PM for each descriptor we are sending. + */ +static dma_cookie_t hidma_tx_submit(struct dma_async_tx_descriptor *txd) +{ + struct hidma_chan *mchan = to_hidma_chan(txd->chan); + struct hidma_dev *dmadev = mchan->dmadev; + struct hidma_desc *mdesc; + unsigned long irqflags; + dma_cookie_t cookie; + + pm_runtime_get_sync(dmadev->ddev.dev); + if (!hidma_ll_isenabled(dmadev->lldev)) { + pm_runtime_mark_last_busy(dmadev->ddev.dev); + pm_runtime_put_autosuspend(dmadev->ddev.dev); + return -ENODEV; + } + pm_runtime_mark_last_busy(dmadev->ddev.dev); + pm_runtime_put_autosuspend(dmadev->ddev.dev); + + mdesc = container_of(txd, struct hidma_desc, desc); + spin_lock_irqsave(&mchan->lock, irqflags); + + /* Move descriptor to queued */ + list_move_tail(&mdesc->node, &mchan->queued); + + /* Update cookie */ + cookie = dma_cookie_assign(txd); + + spin_unlock_irqrestore(&mchan->lock, irqflags); + + return cookie; +} + +static int hidma_alloc_chan_resources(struct dma_chan *dmach) +{ + struct hidma_chan *mchan = to_hidma_chan(dmach); + struct hidma_dev *dmadev = mchan->dmadev; + struct hidma_desc *mdesc, *tmp; + unsigned long irqflags; + LIST_HEAD(descs); + unsigned int i; + int rc = 0; + + if (mchan->allocated) + return 0; + + /* Alloc descriptors for this channel */ + for (i = 0; i < dmadev->nr_descriptors; i++) { + mdesc = kzalloc(sizeof(struct hidma_desc), GFP_NOWAIT); + if (!mdesc) { + rc = -ENOMEM; + break; + } + dma_async_tx_descriptor_init(&mdesc->desc, dmach); + mdesc->desc.tx_submit = hidma_tx_submit; + + rc = hidma_ll_request(dmadev->lldev, mchan->dma_sig, + "DMA engine", hidma_callback, mdesc, + &mdesc->tre_ch); + if (rc) { + dev_err(dmach->device->dev, + "channel alloc failed at %u\n", i); + kfree(mdesc); + break; + } + list_add_tail(&mdesc->node, &descs); + } + + if (rc) { + /* return the allocated descriptors */ + list_for_each_entry_safe(mdesc, tmp, &descs, node) { + hidma_ll_free(dmadev->lldev, mdesc->tre_ch); + kfree(mdesc); + } + return rc; + } + + spin_lock_irqsave(&mchan->lock, irqflags); + list_splice_tail_init(&descs, &mchan->free); + mchan->allocated = true; + spin_unlock_irqrestore(&mchan->lock, irqflags); + return 1; +} + +static struct dma_async_tx_descriptor * +hidma_prep_dma_memcpy(struct dma_chan *dmach, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct hidma_chan *mchan = to_hidma_chan(dmach); + struct hidma_desc *mdesc = NULL; + struct hidma_dev *mdma = mchan->dmadev; + unsigned long irqflags; + + /* Get free descriptor */ + spin_lock_irqsave(&mchan->lock, irqflags); + if (!list_empty(&mchan->free)) { + mdesc = list_first_entry(&mchan->free, struct hidma_desc, node); + list_del(&mdesc->node); + } + spin_unlock_irqrestore(&mchan->lock, irqflags); + + if (!mdesc) + return NULL; + + mdesc->desc.flags = flags; + hidma_ll_set_transfer_params(mdma->lldev, mdesc->tre_ch, + src, dest, len, flags, + HIDMA_TRE_MEMCPY); + + /* Place descriptor in prepared list */ + spin_lock_irqsave(&mchan->lock, irqflags); + list_add_tail(&mdesc->node, &mchan->prepared); + spin_unlock_irqrestore(&mchan->lock, irqflags); + + return &mdesc->desc; +} + +static struct dma_async_tx_descriptor * +hidma_prep_dma_memset(struct dma_chan *dmach, dma_addr_t dest, int value, + size_t len, unsigned long flags) +{ + struct hidma_chan *mchan = to_hidma_chan(dmach); + struct hidma_desc *mdesc = NULL; + struct hidma_dev *mdma = mchan->dmadev; + unsigned long irqflags; + u64 byte_pattern, fill_pattern; + + /* Get free descriptor */ + spin_lock_irqsave(&mchan->lock, irqflags); + if (!list_empty(&mchan->free)) { + mdesc = list_first_entry(&mchan->free, struct hidma_desc, node); + list_del(&mdesc->node); + } + spin_unlock_irqrestore(&mchan->lock, irqflags); + + if (!mdesc) + return NULL; + + byte_pattern = (char)value; + fill_pattern = (byte_pattern << 56) | + (byte_pattern << 48) | + (byte_pattern << 40) | + (byte_pattern << 32) | + (byte_pattern << 24) | + (byte_pattern << 16) | + (byte_pattern << 8) | + byte_pattern; + + mdesc->desc.flags = flags; + hidma_ll_set_transfer_params(mdma->lldev, mdesc->tre_ch, + fill_pattern, dest, len, flags, + HIDMA_TRE_MEMSET); + + /* Place descriptor in prepared list */ + spin_lock_irqsave(&mchan->lock, irqflags); + list_add_tail(&mdesc->node, &mchan->prepared); + spin_unlock_irqrestore(&mchan->lock, irqflags); + + return &mdesc->desc; +} + +static int hidma_terminate_channel(struct dma_chan *chan) +{ + struct hidma_chan *mchan = to_hidma_chan(chan); + struct hidma_dev *dmadev = to_hidma_dev(mchan->chan.device); + struct hidma_desc *tmp, *mdesc; + unsigned long irqflags; + LIST_HEAD(list); + int rc; + + pm_runtime_get_sync(dmadev->ddev.dev); + /* give completed requests a chance to finish */ + hidma_process_completed(mchan); + + spin_lock_irqsave(&mchan->lock, irqflags); + mchan->last_success = 0; + list_splice_init(&mchan->active, &list); + list_splice_init(&mchan->prepared, &list); + list_splice_init(&mchan->completed, &list); + list_splice_init(&mchan->queued, &list); + spin_unlock_irqrestore(&mchan->lock, irqflags); + + /* this suspends the existing transfer */ + rc = hidma_ll_disable(dmadev->lldev); + if (rc) { + dev_err(dmadev->ddev.dev, "channel did not pause\n"); + goto out; + } + + /* return all user requests */ + list_for_each_entry_safe(mdesc, tmp, &list, node) { + struct dma_async_tx_descriptor *txd = &mdesc->desc; + + dma_descriptor_unmap(txd); + dmaengine_desc_get_callback_invoke(txd, NULL); + dma_run_dependencies(txd); + + /* move myself to free_list */ + list_move(&mdesc->node, &mchan->free); + } + + rc = hidma_ll_enable(dmadev->lldev); +out: + pm_runtime_mark_last_busy(dmadev->ddev.dev); + pm_runtime_put_autosuspend(dmadev->ddev.dev); + return rc; +} + +static int hidma_terminate_all(struct dma_chan *chan) +{ + struct hidma_chan *mchan = to_hidma_chan(chan); + struct hidma_dev *dmadev = to_hidma_dev(mchan->chan.device); + int rc; + + rc = hidma_terminate_channel(chan); + if (rc) + return rc; + + /* reinitialize the hardware */ + pm_runtime_get_sync(dmadev->ddev.dev); + rc = hidma_ll_setup(dmadev->lldev); + pm_runtime_mark_last_busy(dmadev->ddev.dev); + pm_runtime_put_autosuspend(dmadev->ddev.dev); + return rc; +} + +static void hidma_free_chan_resources(struct dma_chan *dmach) +{ + struct hidma_chan *mchan = to_hidma_chan(dmach); + struct hidma_dev *mdma = mchan->dmadev; + struct hidma_desc *mdesc, *tmp; + unsigned long irqflags; + LIST_HEAD(descs); + + /* terminate running transactions and free descriptors */ + hidma_terminate_channel(dmach); + + spin_lock_irqsave(&mchan->lock, irqflags); + + /* Move data */ + list_splice_tail_init(&mchan->free, &descs); + + /* Free descriptors */ + list_for_each_entry_safe(mdesc, tmp, &descs, node) { + hidma_ll_free(mdma->lldev, mdesc->tre_ch); + list_del(&mdesc->node); + kfree(mdesc); + } + + mchan->allocated = false; + spin_unlock_irqrestore(&mchan->lock, irqflags); +} + +static int hidma_pause(struct dma_chan *chan) +{ + struct hidma_chan *mchan; + struct hidma_dev *dmadev; + + mchan = to_hidma_chan(chan); + dmadev = to_hidma_dev(mchan->chan.device); + if (!mchan->paused) { + pm_runtime_get_sync(dmadev->ddev.dev); + if (hidma_ll_disable(dmadev->lldev)) + dev_warn(dmadev->ddev.dev, "channel did not stop\n"); + mchan->paused = true; + pm_runtime_mark_last_busy(dmadev->ddev.dev); + pm_runtime_put_autosuspend(dmadev->ddev.dev); + } + return 0; +} + +static int hidma_resume(struct dma_chan *chan) +{ + struct hidma_chan *mchan; + struct hidma_dev *dmadev; + int rc = 0; + + mchan = to_hidma_chan(chan); + dmadev = to_hidma_dev(mchan->chan.device); + if (mchan->paused) { + pm_runtime_get_sync(dmadev->ddev.dev); + rc = hidma_ll_enable(dmadev->lldev); + if (!rc) + mchan->paused = false; + else + dev_err(dmadev->ddev.dev, + "failed to resume the channel"); + pm_runtime_mark_last_busy(dmadev->ddev.dev); + pm_runtime_put_autosuspend(dmadev->ddev.dev); + } + return rc; +} + +static irqreturn_t hidma_chirq_handler(int chirq, void *arg) +{ + struct hidma_lldev *lldev = arg; + + /* + * All interrupts are request driven. + * HW doesn't send an interrupt by itself. + */ + return hidma_ll_inthandler(chirq, lldev); +} + +#ifdef CONFIG_GENERIC_MSI_IRQ +static irqreturn_t hidma_chirq_handler_msi(int chirq, void *arg) +{ + struct hidma_lldev **lldevp = arg; + struct hidma_dev *dmadev = to_hidma_dev_from_lldev(lldevp); + + return hidma_ll_inthandler_msi(chirq, *lldevp, + 1 << (chirq - dmadev->msi_virqbase)); +} +#endif + +static ssize_t hidma_show_values(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hidma_dev *mdev = dev_get_drvdata(dev); + + buf[0] = 0; + + if (strcmp(attr->attr.name, "chid") == 0) + sprintf(buf, "%d\n", mdev->chidx); + + return strlen(buf); +} + +static inline void hidma_sysfs_uninit(struct hidma_dev *dev) +{ + device_remove_file(dev->ddev.dev, dev->chid_attrs); +} + +static struct device_attribute* +hidma_create_sysfs_entry(struct hidma_dev *dev, char *name, int mode) +{ + struct device_attribute *attrs; + char *name_copy; + + attrs = devm_kmalloc(dev->ddev.dev, sizeof(struct device_attribute), + GFP_KERNEL); + if (!attrs) + return NULL; + + name_copy = devm_kstrdup(dev->ddev.dev, name, GFP_KERNEL); + if (!name_copy) + return NULL; + + attrs->attr.name = name_copy; + attrs->attr.mode = mode; + attrs->show = hidma_show_values; + sysfs_attr_init(&attrs->attr); + + return attrs; +} + +static int hidma_sysfs_init(struct hidma_dev *dev) +{ + dev->chid_attrs = hidma_create_sysfs_entry(dev, "chid", S_IRUGO); + if (!dev->chid_attrs) + return -ENOMEM; + + return device_create_file(dev->ddev.dev, dev->chid_attrs); +} + +#ifdef CONFIG_GENERIC_MSI_IRQ +static void hidma_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) +{ + struct device *dev = msi_desc_to_dev(desc); + struct hidma_dev *dmadev = dev_get_drvdata(dev); + + if (!desc->msi_index) { + writel(msg->address_lo, dmadev->dev_evca + 0x118); + writel(msg->address_hi, dmadev->dev_evca + 0x11C); + writel(msg->data, dmadev->dev_evca + 0x120); + } +} +#endif + +static void hidma_free_msis(struct hidma_dev *dmadev) +{ +#ifdef CONFIG_GENERIC_MSI_IRQ + struct device *dev = dmadev->ddev.dev; + int i, virq; + + for (i = 0; i < HIDMA_MSI_INTS; i++) { + virq = msi_get_virq(dev, i); + if (virq) + devm_free_irq(dev, virq, &dmadev->lldev); + } + + platform_msi_domain_free_irqs(dev); +#endif +} + +static int hidma_request_msi(struct hidma_dev *dmadev, + struct platform_device *pdev) +{ +#ifdef CONFIG_GENERIC_MSI_IRQ + int rc, i, virq; + + rc = platform_msi_domain_alloc_irqs(&pdev->dev, HIDMA_MSI_INTS, + hidma_write_msi_msg); + if (rc) + return rc; + + for (i = 0; i < HIDMA_MSI_INTS; i++) { + virq = msi_get_virq(&pdev->dev, i); + rc = devm_request_irq(&pdev->dev, virq, + hidma_chirq_handler_msi, + 0, "qcom-hidma-msi", + &dmadev->lldev); + if (rc) + break; + if (!i) + dmadev->msi_virqbase = virq; + } + + if (rc) { + /* free allocated MSI interrupts above */ + for (--i; i >= 0; i--) { + virq = msi_get_virq(&pdev->dev, i); + devm_free_irq(&pdev->dev, virq, &dmadev->lldev); + } + dev_warn(&pdev->dev, + "failed to request MSI irq, falling back to wired IRQ\n"); + } else { + /* Add callback to free MSIs on teardown */ + hidma_ll_setup_irq(dmadev->lldev, true); + } + return rc; +#else + return -EINVAL; +#endif +} + +static bool hidma_test_capability(struct device *dev, enum hidma_cap test_cap) +{ + enum hidma_cap cap; + + cap = (enum hidma_cap) device_get_match_data(dev); + return cap ? ((cap & test_cap) > 0) : 0; +} + +static int hidma_probe(struct platform_device *pdev) +{ + struct hidma_dev *dmadev; + struct resource *trca_resource; + struct resource *evca_resource; + int chirq; + void __iomem *evca; + void __iomem *trca; + int rc; + bool msi; + + pm_runtime_set_autosuspend_delay(&pdev->dev, HIDMA_AUTOSUSPEND_TIMEOUT); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + + trca = devm_platform_get_and_ioremap_resource(pdev, 0, &trca_resource); + if (IS_ERR(trca)) { + rc = PTR_ERR(trca); + goto bailout; + } + + evca = devm_platform_get_and_ioremap_resource(pdev, 1, &evca_resource); + if (IS_ERR(evca)) { + rc = PTR_ERR(evca); + goto bailout; + } + + /* + * This driver only handles the channel IRQs. + * Common IRQ is handled by the management driver. + */ + chirq = platform_get_irq(pdev, 0); + if (chirq < 0) { + rc = chirq; + goto bailout; + } + + dmadev = devm_kzalloc(&pdev->dev, sizeof(*dmadev), GFP_KERNEL); + if (!dmadev) { + rc = -ENOMEM; + goto bailout; + } + + INIT_LIST_HEAD(&dmadev->ddev.channels); + spin_lock_init(&dmadev->lock); + dmadev->ddev.dev = &pdev->dev; + pm_runtime_get_sync(dmadev->ddev.dev); + + dma_cap_set(DMA_MEMCPY, dmadev->ddev.cap_mask); + dma_cap_set(DMA_MEMSET, dmadev->ddev.cap_mask); + if (WARN_ON(!pdev->dev.dma_mask)) { + rc = -ENXIO; + goto dmafree; + } + + dmadev->dev_evca = evca; + dmadev->evca_resource = evca_resource; + dmadev->dev_trca = trca; + dmadev->trca_resource = trca_resource; + dmadev->ddev.device_prep_dma_memcpy = hidma_prep_dma_memcpy; + dmadev->ddev.device_prep_dma_memset = hidma_prep_dma_memset; + dmadev->ddev.device_alloc_chan_resources = hidma_alloc_chan_resources; + dmadev->ddev.device_free_chan_resources = hidma_free_chan_resources; + dmadev->ddev.device_tx_status = hidma_tx_status; + dmadev->ddev.device_issue_pending = hidma_issue_pending; + dmadev->ddev.device_pause = hidma_pause; + dmadev->ddev.device_resume = hidma_resume; + dmadev->ddev.device_terminate_all = hidma_terminate_all; + dmadev->ddev.copy_align = 8; + + /* + * Determine the MSI capability of the platform. Old HW doesn't + * support MSI. + */ + msi = hidma_test_capability(&pdev->dev, HIDMA_MSI_CAP); + device_property_read_u32(&pdev->dev, "desc-count", + &dmadev->nr_descriptors); + + if (nr_desc_prm) { + dev_info(&pdev->dev, "overriding number of descriptors as %d\n", + nr_desc_prm); + dmadev->nr_descriptors = nr_desc_prm; + } + + if (!dmadev->nr_descriptors) + dmadev->nr_descriptors = HIDMA_NR_DEFAULT_DESC; + + if (hidma_test_capability(&pdev->dev, HIDMA_IDENTITY_CAP)) + dmadev->chidx = readl(dmadev->dev_trca + 0x40); + else + dmadev->chidx = readl(dmadev->dev_trca + 0x28); + + /* Set DMA mask to 64 bits. */ + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (rc) { + dev_warn(&pdev->dev, "unable to set coherent mask to 64"); + goto dmafree; + } + + dmadev->lldev = hidma_ll_init(dmadev->ddev.dev, + dmadev->nr_descriptors, dmadev->dev_trca, + dmadev->dev_evca, dmadev->chidx); + if (!dmadev->lldev) { + rc = -EPROBE_DEFER; + goto dmafree; + } + + platform_set_drvdata(pdev, dmadev); + if (msi) + rc = hidma_request_msi(dmadev, pdev); + + if (!msi || rc) { + hidma_ll_setup_irq(dmadev->lldev, false); + rc = devm_request_irq(&pdev->dev, chirq, hidma_chirq_handler, + 0, "qcom-hidma", dmadev->lldev); + if (rc) + goto uninit; + } + + INIT_LIST_HEAD(&dmadev->ddev.channels); + rc = hidma_chan_init(dmadev, 0); + if (rc) + goto uninit; + + rc = dma_async_device_register(&dmadev->ddev); + if (rc) + goto uninit; + + dmadev->irq = chirq; + tasklet_setup(&dmadev->task, hidma_issue_task); + hidma_debug_init(dmadev); + hidma_sysfs_init(dmadev); + dev_info(&pdev->dev, "HI-DMA engine driver registration complete\n"); + pm_runtime_mark_last_busy(dmadev->ddev.dev); + pm_runtime_put_autosuspend(dmadev->ddev.dev); + return 0; + +uninit: + if (msi) + hidma_free_msis(dmadev); + + hidma_ll_uninit(dmadev->lldev); +dmafree: + if (dmadev) + hidma_free(dmadev); +bailout: + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return rc; +} + +static void hidma_shutdown(struct platform_device *pdev) +{ + struct hidma_dev *dmadev = platform_get_drvdata(pdev); + + dev_info(dmadev->ddev.dev, "HI-DMA engine shutdown\n"); + + pm_runtime_get_sync(dmadev->ddev.dev); + if (hidma_ll_disable(dmadev->lldev)) + dev_warn(dmadev->ddev.dev, "channel did not stop\n"); + pm_runtime_mark_last_busy(dmadev->ddev.dev); + pm_runtime_put_autosuspend(dmadev->ddev.dev); + +} + +static int hidma_remove(struct platform_device *pdev) +{ + struct hidma_dev *dmadev = platform_get_drvdata(pdev); + + pm_runtime_get_sync(dmadev->ddev.dev); + dma_async_device_unregister(&dmadev->ddev); + if (!dmadev->lldev->msi_support) + devm_free_irq(dmadev->ddev.dev, dmadev->irq, dmadev->lldev); + else + hidma_free_msis(dmadev); + + tasklet_kill(&dmadev->task); + hidma_sysfs_uninit(dmadev); + hidma_debug_uninit(dmadev); + hidma_ll_uninit(dmadev->lldev); + hidma_free(dmadev); + + dev_info(&pdev->dev, "HI-DMA engine removed\n"); + pm_runtime_put_sync_suspend(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + return 0; +} + +#if IS_ENABLED(CONFIG_ACPI) +static const struct acpi_device_id hidma_acpi_ids[] = { + {"QCOM8061"}, + {"QCOM8062", HIDMA_MSI_CAP}, + {"QCOM8063", (HIDMA_MSI_CAP | HIDMA_IDENTITY_CAP)}, + {}, +}; +MODULE_DEVICE_TABLE(acpi, hidma_acpi_ids); +#endif + +static const struct of_device_id hidma_match[] = { + {.compatible = "qcom,hidma-1.0",}, + {.compatible = "qcom,hidma-1.1", .data = (void *)(HIDMA_MSI_CAP),}, + {.compatible = "qcom,hidma-1.2", + .data = (void *)(HIDMA_MSI_CAP | HIDMA_IDENTITY_CAP),}, + {}, +}; +MODULE_DEVICE_TABLE(of, hidma_match); + +static struct platform_driver hidma_driver = { + .probe = hidma_probe, + .remove = hidma_remove, + .shutdown = hidma_shutdown, + .driver = { + .name = "hidma", + .of_match_table = hidma_match, + .acpi_match_table = ACPI_PTR(hidma_acpi_ids), + }, +}; + +module_platform_driver(hidma_driver); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/qcom/hidma.h b/drivers/dma/qcom/hidma.h new file mode 100644 index 0000000000..f212466744 --- /dev/null +++ b/drivers/dma/qcom/hidma.h @@ -0,0 +1,160 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Qualcomm Technologies HIDMA data structures + * + * Copyright (c) 2014-2016, The Linux Foundation. All rights reserved. + */ + +#ifndef QCOM_HIDMA_H +#define QCOM_HIDMA_H + +#include +#include +#include + +#define HIDMA_TRE_SIZE 32 /* each TRE is 32 bytes */ +#define HIDMA_TRE_CFG_IDX 0 +#define HIDMA_TRE_LEN_IDX 1 +#define HIDMA_TRE_SRC_LOW_IDX 2 +#define HIDMA_TRE_SRC_HI_IDX 3 +#define HIDMA_TRE_DEST_LOW_IDX 4 +#define HIDMA_TRE_DEST_HI_IDX 5 + +enum tre_type { + HIDMA_TRE_MEMCPY = 3, + HIDMA_TRE_MEMSET = 4, +}; + +struct hidma_tre { + atomic_t allocated; /* if this channel is allocated */ + bool queued; /* flag whether this is pending */ + u16 status; /* status */ + u32 idx; /* index of the tre */ + u32 dma_sig; /* signature of the tre */ + const char *dev_name; /* name of the device */ + void (*callback)(void *data); /* requester callback */ + void *data; /* Data associated with this channel*/ + struct hidma_lldev *lldev; /* lldma device pointer */ + u32 tre_local[HIDMA_TRE_SIZE / sizeof(u32) + 1]; /* TRE local copy */ + u32 tre_index; /* the offset where this was written*/ + u32 int_flags; /* interrupt flags */ + u8 err_info; /* error record in this transfer */ + u8 err_code; /* completion code */ +}; + +struct hidma_lldev { + bool msi_support; /* flag indicating MSI support */ + bool initialized; /* initialized flag */ + u8 trch_state; /* trch_state of the device */ + u8 evch_state; /* evch_state of the device */ + u8 chidx; /* channel index in the core */ + u32 nr_tres; /* max number of configs */ + spinlock_t lock; /* reentrancy */ + struct hidma_tre *trepool; /* trepool of user configs */ + struct device *dev; /* device */ + void __iomem *trca; /* Transfer Channel address */ + void __iomem *evca; /* Event Channel address */ + struct hidma_tre + **pending_tre_list; /* Pointers to pending TREs */ + atomic_t pending_tre_count; /* Number of TREs pending */ + + void *tre_ring; /* TRE ring */ + dma_addr_t tre_dma; /* TRE ring to be shared with HW */ + u32 tre_ring_size; /* Byte size of the ring */ + u32 tre_processed_off; /* last processed TRE */ + + void *evre_ring; /* EVRE ring */ + dma_addr_t evre_dma; /* EVRE ring to be shared with HW */ + u32 evre_ring_size; /* Byte size of the ring */ + u32 evre_processed_off; /* last processed EVRE */ + + u32 tre_write_offset; /* TRE write location */ + struct tasklet_struct task; /* task delivering notifications */ + DECLARE_KFIFO_PTR(handoff_fifo, + struct hidma_tre *); /* pending TREs FIFO */ +}; + +struct hidma_desc { + struct dma_async_tx_descriptor desc; + /* link list node for this channel*/ + struct list_head node; + u32 tre_ch; +}; + +struct hidma_chan { + bool paused; + bool allocated; + char dbg_name[16]; + u32 dma_sig; + dma_cookie_t last_success; + + /* + * active descriptor on this channel + * It is used by the DMA complete notification to + * locate the descriptor that initiated the transfer. + */ + struct hidma_dev *dmadev; + struct hidma_desc *running; + + struct dma_chan chan; + struct list_head free; + struct list_head prepared; + struct list_head queued; + struct list_head active; + struct list_head completed; + + /* Lock for this structure */ + spinlock_t lock; +}; + +struct hidma_dev { + int irq; + int chidx; + u32 nr_descriptors; + int msi_virqbase; + + struct hidma_lldev *lldev; + void __iomem *dev_trca; + struct resource *trca_resource; + void __iomem *dev_evca; + struct resource *evca_resource; + + /* used to protect the pending channel list*/ + spinlock_t lock; + struct dma_device ddev; + + struct dentry *debugfs; + + /* sysfs entry for the channel id */ + struct device_attribute *chid_attrs; + + /* Task delivering issue_pending */ + struct tasklet_struct task; +}; + +int hidma_ll_request(struct hidma_lldev *llhndl, u32 dev_id, + const char *dev_name, + void (*callback)(void *data), void *data, u32 *tre_ch); + +void hidma_ll_free(struct hidma_lldev *llhndl, u32 tre_ch); +enum dma_status hidma_ll_status(struct hidma_lldev *llhndl, u32 tre_ch); +bool hidma_ll_isenabled(struct hidma_lldev *llhndl); +void hidma_ll_queue_request(struct hidma_lldev *llhndl, u32 tre_ch); +void hidma_ll_start(struct hidma_lldev *llhndl); +int hidma_ll_disable(struct hidma_lldev *lldev); +int hidma_ll_enable(struct hidma_lldev *llhndl); +void hidma_ll_set_transfer_params(struct hidma_lldev *llhndl, u32 tre_ch, + dma_addr_t src, dma_addr_t dest, u32 len, u32 flags, u32 txntype); +void hidma_ll_setup_irq(struct hidma_lldev *lldev, bool msi); +int hidma_ll_setup(struct hidma_lldev *lldev); +struct hidma_lldev *hidma_ll_init(struct device *dev, u32 max_channels, + void __iomem *trca, void __iomem *evca, + u8 chidx); +int hidma_ll_uninit(struct hidma_lldev *llhndl); +irqreturn_t hidma_ll_inthandler(int irq, void *arg); +irqreturn_t hidma_ll_inthandler_msi(int irq, void *arg, int cause); +void hidma_cleanup_pending_tre(struct hidma_lldev *llhndl, u8 err_info, + u8 err_code); +void hidma_debug_init(struct hidma_dev *dmadev); +void hidma_debug_uninit(struct hidma_dev *dmadev); +#endif diff --git a/drivers/dma/qcom/hidma_dbg.c b/drivers/dma/qcom/hidma_dbg.c new file mode 100644 index 0000000000..ce87c7937a --- /dev/null +++ b/drivers/dma/qcom/hidma_dbg.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Qualcomm Technologies HIDMA debug file + * + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include + +#include "hidma.h" + +static void hidma_ll_chstats(struct seq_file *s, void *llhndl, u32 tre_ch) +{ + struct hidma_lldev *lldev = llhndl; + struct hidma_tre *tre; + u32 length; + dma_addr_t src_start; + dma_addr_t dest_start; + u32 *tre_local; + + if (tre_ch >= lldev->nr_tres) { + dev_err(lldev->dev, "invalid TRE number in chstats:%d", tre_ch); + return; + } + tre = &lldev->trepool[tre_ch]; + seq_printf(s, "------Channel %d -----\n", tre_ch); + seq_printf(s, "allocated=%d\n", atomic_read(&tre->allocated)); + seq_printf(s, "queued = 0x%x\n", tre->queued); + seq_printf(s, "err_info = 0x%x\n", tre->err_info); + seq_printf(s, "err_code = 0x%x\n", tre->err_code); + seq_printf(s, "status = 0x%x\n", tre->status); + seq_printf(s, "idx = 0x%x\n", tre->idx); + seq_printf(s, "dma_sig = 0x%x\n", tre->dma_sig); + seq_printf(s, "dev_name=%s\n", tre->dev_name); + seq_printf(s, "callback=%p\n", tre->callback); + seq_printf(s, "data=%p\n", tre->data); + seq_printf(s, "tre_index = 0x%x\n", tre->tre_index); + + tre_local = &tre->tre_local[0]; + src_start = tre_local[HIDMA_TRE_SRC_LOW_IDX]; + src_start = ((u64) (tre_local[HIDMA_TRE_SRC_HI_IDX]) << 32) + src_start; + dest_start = tre_local[HIDMA_TRE_DEST_LOW_IDX]; + dest_start += ((u64) (tre_local[HIDMA_TRE_DEST_HI_IDX]) << 32); + length = tre_local[HIDMA_TRE_LEN_IDX]; + + seq_printf(s, "src=%pap\n", &src_start); + seq_printf(s, "dest=%pap\n", &dest_start); + seq_printf(s, "length = 0x%x\n", length); +} + +static void hidma_ll_devstats(struct seq_file *s, void *llhndl) +{ + struct hidma_lldev *lldev = llhndl; + + seq_puts(s, "------Device -----\n"); + seq_printf(s, "lldev init = 0x%x\n", lldev->initialized); + seq_printf(s, "trch_state = 0x%x\n", lldev->trch_state); + seq_printf(s, "evch_state = 0x%x\n", lldev->evch_state); + seq_printf(s, "chidx = 0x%x\n", lldev->chidx); + seq_printf(s, "nr_tres = 0x%x\n", lldev->nr_tres); + seq_printf(s, "trca=%p\n", lldev->trca); + seq_printf(s, "tre_ring=%p\n", lldev->tre_ring); + seq_printf(s, "tre_ring_handle=%pap\n", &lldev->tre_dma); + seq_printf(s, "tre_ring_size = 0x%x\n", lldev->tre_ring_size); + seq_printf(s, "tre_processed_off = 0x%x\n", lldev->tre_processed_off); + seq_printf(s, "pending_tre_count=%d\n", + atomic_read(&lldev->pending_tre_count)); + seq_printf(s, "evca=%p\n", lldev->evca); + seq_printf(s, "evre_ring=%p\n", lldev->evre_ring); + seq_printf(s, "evre_ring_handle=%pap\n", &lldev->evre_dma); + seq_printf(s, "evre_ring_size = 0x%x\n", lldev->evre_ring_size); + seq_printf(s, "evre_processed_off = 0x%x\n", lldev->evre_processed_off); + seq_printf(s, "tre_write_offset = 0x%x\n", lldev->tre_write_offset); +} + +/* + * hidma_chan_show: display HIDMA channel statistics + * + * Display the statistics for the current HIDMA virtual channel device. + */ +static int hidma_chan_show(struct seq_file *s, void *unused) +{ + struct hidma_chan *mchan = s->private; + struct hidma_desc *mdesc; + struct hidma_dev *dmadev = mchan->dmadev; + + pm_runtime_get_sync(dmadev->ddev.dev); + seq_printf(s, "paused=%u\n", mchan->paused); + seq_printf(s, "dma_sig=%u\n", mchan->dma_sig); + seq_puts(s, "prepared\n"); + list_for_each_entry(mdesc, &mchan->prepared, node) + hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch); + + seq_puts(s, "active\n"); + list_for_each_entry(mdesc, &mchan->active, node) + hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch); + + seq_puts(s, "completed\n"); + list_for_each_entry(mdesc, &mchan->completed, node) + hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch); + + hidma_ll_devstats(s, mchan->dmadev->lldev); + pm_runtime_mark_last_busy(dmadev->ddev.dev); + pm_runtime_put_autosuspend(dmadev->ddev.dev); + return 0; +} + +/* + * hidma_dma_show: display HIDMA device info + * + * Display the info for the current HIDMA device. + */ +static int hidma_dma_show(struct seq_file *s, void *unused) +{ + struct hidma_dev *dmadev = s->private; + resource_size_t sz; + + seq_printf(s, "nr_descriptors=%d\n", dmadev->nr_descriptors); + seq_printf(s, "dev_trca=%p\n", &dmadev->dev_trca); + seq_printf(s, "dev_trca_phys=%pa\n", &dmadev->trca_resource->start); + sz = resource_size(dmadev->trca_resource); + seq_printf(s, "dev_trca_size=%pa\n", &sz); + seq_printf(s, "dev_evca=%p\n", &dmadev->dev_evca); + seq_printf(s, "dev_evca_phys=%pa\n", &dmadev->evca_resource->start); + sz = resource_size(dmadev->evca_resource); + seq_printf(s, "dev_evca_size=%pa\n", &sz); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(hidma_chan); +DEFINE_SHOW_ATTRIBUTE(hidma_dma); + +void hidma_debug_uninit(struct hidma_dev *dmadev) +{ + debugfs_remove_recursive(dmadev->debugfs); +} + +void hidma_debug_init(struct hidma_dev *dmadev) +{ + int chidx = 0; + struct list_head *position = NULL; + struct dentry *dir; + + dmadev->debugfs = debugfs_create_dir(dev_name(dmadev->ddev.dev), NULL); + + /* walk through the virtual channel list */ + list_for_each(position, &dmadev->ddev.channels) { + struct hidma_chan *chan; + + chan = list_entry(position, struct hidma_chan, + chan.device_node); + sprintf(chan->dbg_name, "chan%d", chidx); + dir = debugfs_create_dir(chan->dbg_name, + dmadev->debugfs); + debugfs_create_file("stats", S_IRUGO, dir, chan, + &hidma_chan_fops); + chidx++; + } + + debugfs_create_file("stats", S_IRUGO, dmadev->debugfs, dmadev, + &hidma_dma_fops); +} diff --git a/drivers/dma/qcom/hidma_ll.c b/drivers/dma/qcom/hidma_ll.c new file mode 100644 index 0000000000..53244e0e34 --- /dev/null +++ b/drivers/dma/qcom/hidma_ll.c @@ -0,0 +1,855 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Qualcomm Technologies HIDMA DMA engine low level code + * + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hidma.h" + +#define HIDMA_EVRE_SIZE 16 /* each EVRE is 16 bytes */ + +#define HIDMA_TRCA_CTRLSTS_REG 0x000 +#define HIDMA_TRCA_RING_LOW_REG 0x008 +#define HIDMA_TRCA_RING_HIGH_REG 0x00C +#define HIDMA_TRCA_RING_LEN_REG 0x010 +#define HIDMA_TRCA_DOORBELL_REG 0x400 + +#define HIDMA_EVCA_CTRLSTS_REG 0x000 +#define HIDMA_EVCA_INTCTRL_REG 0x004 +#define HIDMA_EVCA_RING_LOW_REG 0x008 +#define HIDMA_EVCA_RING_HIGH_REG 0x00C +#define HIDMA_EVCA_RING_LEN_REG 0x010 +#define HIDMA_EVCA_WRITE_PTR_REG 0x020 +#define HIDMA_EVCA_DOORBELL_REG 0x400 + +#define HIDMA_EVCA_IRQ_STAT_REG 0x100 +#define HIDMA_EVCA_IRQ_CLR_REG 0x108 +#define HIDMA_EVCA_IRQ_EN_REG 0x110 + +#define HIDMA_EVRE_CFG_IDX 0 + +#define HIDMA_EVRE_ERRINFO_BIT_POS 24 +#define HIDMA_EVRE_CODE_BIT_POS 28 + +#define HIDMA_EVRE_ERRINFO_MASK GENMASK(3, 0) +#define HIDMA_EVRE_CODE_MASK GENMASK(3, 0) + +#define HIDMA_CH_CONTROL_MASK GENMASK(7, 0) +#define HIDMA_CH_STATE_MASK GENMASK(7, 0) +#define HIDMA_CH_STATE_BIT_POS 0x8 + +#define HIDMA_IRQ_EV_CH_EOB_IRQ_BIT_POS 0 +#define HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS 1 +#define HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS 9 +#define HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS 10 +#define HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS 11 +#define HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS 14 + +#define ENABLE_IRQS (BIT(HIDMA_IRQ_EV_CH_EOB_IRQ_BIT_POS) | \ + BIT(HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS)) + +#define HIDMA_INCREMENT_ITERATOR(iter, size, ring_size) \ +do { \ + iter += size; \ + if (iter >= ring_size) \ + iter -= ring_size; \ +} while (0) + +#define HIDMA_CH_STATE(val) \ + ((val >> HIDMA_CH_STATE_BIT_POS) & HIDMA_CH_STATE_MASK) + +#define HIDMA_ERR_INT_MASK \ + (BIT(HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS)) + +enum ch_command { + HIDMA_CH_DISABLE = 0, + HIDMA_CH_ENABLE = 1, + HIDMA_CH_SUSPEND = 2, + HIDMA_CH_RESET = 9, +}; + +enum ch_state { + HIDMA_CH_DISABLED = 0, + HIDMA_CH_ENABLED = 1, + HIDMA_CH_RUNNING = 2, + HIDMA_CH_SUSPENDED = 3, + HIDMA_CH_STOPPED = 4, +}; + +enum err_code { + HIDMA_EVRE_STATUS_COMPLETE = 1, + HIDMA_EVRE_STATUS_ERROR = 4, +}; + +static int hidma_is_chan_enabled(int state) +{ + switch (state) { + case HIDMA_CH_ENABLED: + case HIDMA_CH_RUNNING: + return true; + default: + return false; + } +} + +void hidma_ll_free(struct hidma_lldev *lldev, u32 tre_ch) +{ + struct hidma_tre *tre; + + if (tre_ch >= lldev->nr_tres) { + dev_err(lldev->dev, "invalid TRE number in free:%d", tre_ch); + return; + } + + tre = &lldev->trepool[tre_ch]; + if (atomic_read(&tre->allocated) != true) { + dev_err(lldev->dev, "trying to free an unused TRE:%d", tre_ch); + return; + } + + atomic_set(&tre->allocated, 0); +} + +int hidma_ll_request(struct hidma_lldev *lldev, u32 sig, const char *dev_name, + void (*callback)(void *data), void *data, u32 *tre_ch) +{ + unsigned int i; + struct hidma_tre *tre; + u32 *tre_local; + + if (!tre_ch || !lldev) + return -EINVAL; + + /* need to have at least one empty spot in the queue */ + for (i = 0; i < lldev->nr_tres - 1; i++) { + if (atomic_add_unless(&lldev->trepool[i].allocated, 1, 1)) + break; + } + + if (i == (lldev->nr_tres - 1)) + return -ENOMEM; + + tre = &lldev->trepool[i]; + tre->dma_sig = sig; + tre->dev_name = dev_name; + tre->callback = callback; + tre->data = data; + tre->idx = i; + tre->status = 0; + tre->queued = 0; + tre->err_code = 0; + tre->err_info = 0; + tre->lldev = lldev; + tre_local = &tre->tre_local[0]; + tre_local[HIDMA_TRE_CFG_IDX] = (lldev->chidx & 0xFF) << 8; + tre_local[HIDMA_TRE_CFG_IDX] |= BIT(16); /* set IEOB */ + *tre_ch = i; + if (callback) + callback(data); + return 0; +} + +/* + * Multiple TREs may be queued and waiting in the pending queue. + */ +static void hidma_ll_tre_complete(struct tasklet_struct *t) +{ + struct hidma_lldev *lldev = from_tasklet(lldev, t, task); + struct hidma_tre *tre; + + while (kfifo_out(&lldev->handoff_fifo, &tre, 1)) { + /* call the user if it has been read by the hardware */ + if (tre->callback) + tre->callback(tre->data); + } +} + +static int hidma_post_completed(struct hidma_lldev *lldev, u8 err_info, + u8 err_code) +{ + struct hidma_tre *tre; + unsigned long flags; + u32 tre_iterator; + + spin_lock_irqsave(&lldev->lock, flags); + + tre_iterator = lldev->tre_processed_off; + tre = lldev->pending_tre_list[tre_iterator / HIDMA_TRE_SIZE]; + if (!tre) { + spin_unlock_irqrestore(&lldev->lock, flags); + dev_warn(lldev->dev, "tre_index [%d] and tre out of sync\n", + tre_iterator / HIDMA_TRE_SIZE); + return -EINVAL; + } + lldev->pending_tre_list[tre->tre_index] = NULL; + + /* + * Keep track of pending TREs that SW is expecting to receive + * from HW. We got one now. Decrement our counter. + */ + if (atomic_dec_return(&lldev->pending_tre_count) < 0) { + dev_warn(lldev->dev, "tre count mismatch on completion"); + atomic_set(&lldev->pending_tre_count, 0); + } + + HIDMA_INCREMENT_ITERATOR(tre_iterator, HIDMA_TRE_SIZE, + lldev->tre_ring_size); + lldev->tre_processed_off = tre_iterator; + spin_unlock_irqrestore(&lldev->lock, flags); + + tre->err_info = err_info; + tre->err_code = err_code; + tre->queued = 0; + + kfifo_put(&lldev->handoff_fifo, tre); + tasklet_schedule(&lldev->task); + + return 0; +} + +/* + * Called to handle the interrupt for the channel. + * Return a positive number if TRE or EVRE were consumed on this run. + * Return a positive number if there are pending TREs or EVREs. + * Return 0 if there is nothing to consume or no pending TREs/EVREs found. + */ +static int hidma_handle_tre_completion(struct hidma_lldev *lldev) +{ + u32 evre_ring_size = lldev->evre_ring_size; + u32 err_info, err_code, evre_write_off; + u32 evre_iterator; + u32 num_completed = 0; + + evre_write_off = readl_relaxed(lldev->evca + HIDMA_EVCA_WRITE_PTR_REG); + evre_iterator = lldev->evre_processed_off; + + if ((evre_write_off > evre_ring_size) || + (evre_write_off % HIDMA_EVRE_SIZE)) { + dev_err(lldev->dev, "HW reports invalid EVRE write offset\n"); + return 0; + } + + /* + * By the time control reaches here the number of EVREs and TREs + * may not match. Only consume the ones that hardware told us. + */ + while ((evre_iterator != evre_write_off)) { + u32 *current_evre = lldev->evre_ring + evre_iterator; + u32 cfg; + + cfg = current_evre[HIDMA_EVRE_CFG_IDX]; + err_info = cfg >> HIDMA_EVRE_ERRINFO_BIT_POS; + err_info &= HIDMA_EVRE_ERRINFO_MASK; + err_code = + (cfg >> HIDMA_EVRE_CODE_BIT_POS) & HIDMA_EVRE_CODE_MASK; + + if (hidma_post_completed(lldev, err_info, err_code)) + break; + + HIDMA_INCREMENT_ITERATOR(evre_iterator, HIDMA_EVRE_SIZE, + evre_ring_size); + + /* + * Read the new event descriptor written by the HW. + * As we are processing the delivered events, other events + * get queued to the SW for processing. + */ + evre_write_off = + readl_relaxed(lldev->evca + HIDMA_EVCA_WRITE_PTR_REG); + num_completed++; + + /* + * An error interrupt might have arrived while we are processing + * the completed interrupt. + */ + if (!hidma_ll_isenabled(lldev)) + break; + } + + if (num_completed) { + u32 evre_read_off = (lldev->evre_processed_off + + HIDMA_EVRE_SIZE * num_completed); + evre_read_off = evre_read_off % evre_ring_size; + writel(evre_read_off, lldev->evca + HIDMA_EVCA_DOORBELL_REG); + + /* record the last processed tre offset */ + lldev->evre_processed_off = evre_read_off; + } + + return num_completed; +} + +void hidma_cleanup_pending_tre(struct hidma_lldev *lldev, u8 err_info, + u8 err_code) +{ + while (atomic_read(&lldev->pending_tre_count)) { + if (hidma_post_completed(lldev, err_info, err_code)) + break; + } +} + +static int hidma_ll_reset(struct hidma_lldev *lldev) +{ + u32 val; + int ret; + + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_RESET << 16; + writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + + /* + * Delay 10ms after reset to allow DMA logic to quiesce. + * Do a polled read up to 1ms and 10ms maximum. + */ + ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val, + HIDMA_CH_STATE(val) == HIDMA_CH_DISABLED, + 1000, 10000); + if (ret) { + dev_err(lldev->dev, "transfer channel did not reset\n"); + return ret; + } + + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_RESET << 16; + writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + + /* + * Delay 10ms after reset to allow DMA logic to quiesce. + * Do a polled read up to 1ms and 10ms maximum. + */ + ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val, + HIDMA_CH_STATE(val) == HIDMA_CH_DISABLED, + 1000, 10000); + if (ret) + return ret; + + lldev->trch_state = HIDMA_CH_DISABLED; + lldev->evch_state = HIDMA_CH_DISABLED; + return 0; +} + +/* + * The interrupt handler for HIDMA will try to consume as many pending + * EVRE from the event queue as possible. Each EVRE has an associated + * TRE that holds the user interface parameters. EVRE reports the + * result of the transaction. Hardware guarantees ordering between EVREs + * and TREs. We use last processed offset to figure out which TRE is + * associated with which EVRE. If two TREs are consumed by HW, the EVREs + * are in order in the event ring. + * + * This handler will do a one pass for consuming EVREs. Other EVREs may + * be delivered while we are working. It will try to consume incoming + * EVREs one more time and return. + * + * For unprocessed EVREs, hardware will trigger another interrupt until + * all the interrupt bits are cleared. + * + * Hardware guarantees that by the time interrupt is observed, all data + * transactions in flight are delivered to their respective places and + * are visible to the CPU. + * + * On demand paging for IOMMU is only supported for PCIe via PRI + * (Page Request Interface) not for HIDMA. All other hardware instances + * including HIDMA work on pinned DMA addresses. + * + * HIDMA is not aware of IOMMU presence since it follows the DMA API. All + * IOMMU latency will be built into the data movement time. By the time + * interrupt happens, IOMMU lookups + data movement has already taken place. + * + * While the first read in a typical PCI endpoint ISR flushes all outstanding + * requests traditionally to the destination, this concept does not apply + * here for this HW. + */ +static void hidma_ll_int_handler_internal(struct hidma_lldev *lldev, int cause) +{ + unsigned long irqflags; + + if (cause & HIDMA_ERR_INT_MASK) { + dev_err(lldev->dev, "error 0x%x, disabling...\n", + cause); + + /* Clear out pending interrupts */ + writel(cause, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + + /* No further submissions. */ + hidma_ll_disable(lldev); + + /* Driver completes the txn and intimates the client.*/ + hidma_cleanup_pending_tre(lldev, 0xFF, + HIDMA_EVRE_STATUS_ERROR); + + return; + } + + spin_lock_irqsave(&lldev->lock, irqflags); + writel_relaxed(cause, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + spin_unlock_irqrestore(&lldev->lock, irqflags); + + /* + * Fine tuned for this HW... + * + * This ISR has been designed for this particular hardware. Relaxed + * read and write accessors are used for performance reasons due to + * interrupt delivery guarantees. Do not copy this code blindly and + * expect that to work. + * + * Try to consume as many EVREs as possible. + */ + hidma_handle_tre_completion(lldev); +} + +irqreturn_t hidma_ll_inthandler(int chirq, void *arg) +{ + struct hidma_lldev *lldev = arg; + u32 status; + u32 enable; + u32 cause; + + status = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + enable = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + cause = status & enable; + + while (cause) { + hidma_ll_int_handler_internal(lldev, cause); + + /* + * Another interrupt might have arrived while we are + * processing this one. Read the new cause. + */ + status = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + enable = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + cause = status & enable; + } + + return IRQ_HANDLED; +} + +irqreturn_t hidma_ll_inthandler_msi(int chirq, void *arg, int cause) +{ + struct hidma_lldev *lldev = arg; + + hidma_ll_int_handler_internal(lldev, cause); + return IRQ_HANDLED; +} + +int hidma_ll_enable(struct hidma_lldev *lldev) +{ + u32 val; + int ret; + + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_ENABLE << 16; + writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + + ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val, + hidma_is_chan_enabled(HIDMA_CH_STATE(val)), + 1000, 10000); + if (ret) { + dev_err(lldev->dev, "event channel did not get enabled\n"); + return ret; + } + + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_ENABLE << 16; + writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + + ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val, + hidma_is_chan_enabled(HIDMA_CH_STATE(val)), + 1000, 10000); + if (ret) { + dev_err(lldev->dev, "transfer channel did not get enabled\n"); + return ret; + } + + lldev->trch_state = HIDMA_CH_ENABLED; + lldev->evch_state = HIDMA_CH_ENABLED; + + /* enable irqs */ + writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + + return 0; +} + +void hidma_ll_start(struct hidma_lldev *lldev) +{ + unsigned long irqflags; + + spin_lock_irqsave(&lldev->lock, irqflags); + writel(lldev->tre_write_offset, lldev->trca + HIDMA_TRCA_DOORBELL_REG); + spin_unlock_irqrestore(&lldev->lock, irqflags); +} + +bool hidma_ll_isenabled(struct hidma_lldev *lldev) +{ + u32 val; + + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + lldev->trch_state = HIDMA_CH_STATE(val); + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + lldev->evch_state = HIDMA_CH_STATE(val); + + /* both channels have to be enabled before calling this function */ + if (hidma_is_chan_enabled(lldev->trch_state) && + hidma_is_chan_enabled(lldev->evch_state)) + return true; + + return false; +} + +void hidma_ll_queue_request(struct hidma_lldev *lldev, u32 tre_ch) +{ + struct hidma_tre *tre; + unsigned long flags; + + tre = &lldev->trepool[tre_ch]; + + /* copy the TRE into its location in the TRE ring */ + spin_lock_irqsave(&lldev->lock, flags); + tre->tre_index = lldev->tre_write_offset / HIDMA_TRE_SIZE; + lldev->pending_tre_list[tre->tre_index] = tre; + memcpy(lldev->tre_ring + lldev->tre_write_offset, + &tre->tre_local[0], HIDMA_TRE_SIZE); + tre->err_code = 0; + tre->err_info = 0; + tre->queued = 1; + atomic_inc(&lldev->pending_tre_count); + lldev->tre_write_offset = (lldev->tre_write_offset + HIDMA_TRE_SIZE) + % lldev->tre_ring_size; + spin_unlock_irqrestore(&lldev->lock, flags); +} + +/* + * Note that even though we stop this channel if there is a pending transaction + * in flight it will complete and follow the callback. This request will + * prevent further requests to be made. + */ +int hidma_ll_disable(struct hidma_lldev *lldev) +{ + u32 val; + int ret; + + /* The channel needs to be in working state */ + if (!hidma_ll_isenabled(lldev)) + return 0; + + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_SUSPEND << 16; + writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + + /* + * Start the wait right after the suspend is confirmed. + * Do a polled read up to 1ms and 10ms maximum. + */ + ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val, + HIDMA_CH_STATE(val) == HIDMA_CH_SUSPENDED, + 1000, 10000); + if (ret) + return ret; + + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_SUSPEND << 16; + writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + + /* + * Start the wait right after the suspend is confirmed + * Delay up to 10ms after reset to allow DMA logic to quiesce. + */ + ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val, + HIDMA_CH_STATE(val) == HIDMA_CH_SUSPENDED, + 1000, 10000); + if (ret) + return ret; + + lldev->trch_state = HIDMA_CH_SUSPENDED; + lldev->evch_state = HIDMA_CH_SUSPENDED; + + /* disable interrupts */ + writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + return 0; +} + +void hidma_ll_set_transfer_params(struct hidma_lldev *lldev, u32 tre_ch, + dma_addr_t src, dma_addr_t dest, u32 len, + u32 flags, u32 txntype) +{ + struct hidma_tre *tre; + u32 *tre_local; + + if (tre_ch >= lldev->nr_tres) { + dev_err(lldev->dev, "invalid TRE number in transfer params:%d", + tre_ch); + return; + } + + tre = &lldev->trepool[tre_ch]; + if (atomic_read(&tre->allocated) != true) { + dev_err(lldev->dev, "trying to set params on an unused TRE:%d", + tre_ch); + return; + } + + tre_local = &tre->tre_local[0]; + tre_local[HIDMA_TRE_CFG_IDX] &= ~GENMASK(7, 0); + tre_local[HIDMA_TRE_CFG_IDX] |= txntype; + tre_local[HIDMA_TRE_LEN_IDX] = len; + tre_local[HIDMA_TRE_SRC_LOW_IDX] = lower_32_bits(src); + tre_local[HIDMA_TRE_SRC_HI_IDX] = upper_32_bits(src); + tre_local[HIDMA_TRE_DEST_LOW_IDX] = lower_32_bits(dest); + tre_local[HIDMA_TRE_DEST_HI_IDX] = upper_32_bits(dest); + tre->int_flags = flags; +} + +/* + * Called during initialization and after an error condition + * to restore hardware state. + */ +int hidma_ll_setup(struct hidma_lldev *lldev) +{ + int rc; + u64 addr; + u32 val; + u32 nr_tres = lldev->nr_tres; + + atomic_set(&lldev->pending_tre_count, 0); + lldev->tre_processed_off = 0; + lldev->evre_processed_off = 0; + lldev->tre_write_offset = 0; + + /* disable interrupts */ + writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + + /* clear all pending interrupts */ + val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + + rc = hidma_ll_reset(lldev); + if (rc) + return rc; + + /* + * Clear all pending interrupts again. + * Otherwise, we observe reset complete interrupts. + */ + val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + + /* disable interrupts again after reset */ + writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + + addr = lldev->tre_dma; + writel(lower_32_bits(addr), lldev->trca + HIDMA_TRCA_RING_LOW_REG); + writel(upper_32_bits(addr), lldev->trca + HIDMA_TRCA_RING_HIGH_REG); + writel(lldev->tre_ring_size, lldev->trca + HIDMA_TRCA_RING_LEN_REG); + + addr = lldev->evre_dma; + writel(lower_32_bits(addr), lldev->evca + HIDMA_EVCA_RING_LOW_REG); + writel(upper_32_bits(addr), lldev->evca + HIDMA_EVCA_RING_HIGH_REG); + writel(HIDMA_EVRE_SIZE * nr_tres, + lldev->evca + HIDMA_EVCA_RING_LEN_REG); + + /* configure interrupts */ + hidma_ll_setup_irq(lldev, lldev->msi_support); + + rc = hidma_ll_enable(lldev); + if (rc) + return rc; + + return rc; +} + +void hidma_ll_setup_irq(struct hidma_lldev *lldev, bool msi) +{ + u32 val; + + lldev->msi_support = msi; + + /* disable interrupts again after reset */ + writel(0, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + + /* support IRQ by default */ + val = readl(lldev->evca + HIDMA_EVCA_INTCTRL_REG); + val &= ~0xF; + if (!lldev->msi_support) + val = val | 0x1; + writel(val, lldev->evca + HIDMA_EVCA_INTCTRL_REG); + + /* clear all pending interrupts and enable them */ + writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); +} + +struct hidma_lldev *hidma_ll_init(struct device *dev, u32 nr_tres, + void __iomem *trca, void __iomem *evca, + u8 chidx) +{ + u32 required_bytes; + struct hidma_lldev *lldev; + int rc; + size_t sz; + + if (!trca || !evca || !dev || !nr_tres) + return NULL; + + /* need at least four TREs */ + if (nr_tres < 4) + return NULL; + + /* need an extra space */ + nr_tres += 1; + + lldev = devm_kzalloc(dev, sizeof(struct hidma_lldev), GFP_KERNEL); + if (!lldev) + return NULL; + + lldev->evca = evca; + lldev->trca = trca; + lldev->dev = dev; + sz = sizeof(struct hidma_tre); + lldev->trepool = devm_kcalloc(lldev->dev, nr_tres, sz, GFP_KERNEL); + if (!lldev->trepool) + return NULL; + + required_bytes = sizeof(lldev->pending_tre_list[0]); + lldev->pending_tre_list = devm_kcalloc(dev, nr_tres, required_bytes, + GFP_KERNEL); + if (!lldev->pending_tre_list) + return NULL; + + sz = (HIDMA_TRE_SIZE + 1) * nr_tres; + lldev->tre_ring = dmam_alloc_coherent(dev, sz, &lldev->tre_dma, + GFP_KERNEL); + if (!lldev->tre_ring) + return NULL; + + lldev->tre_ring_size = HIDMA_TRE_SIZE * nr_tres; + lldev->nr_tres = nr_tres; + + /* the TRE ring has to be TRE_SIZE aligned */ + if (!IS_ALIGNED(lldev->tre_dma, HIDMA_TRE_SIZE)) { + u8 tre_ring_shift; + + tre_ring_shift = lldev->tre_dma % HIDMA_TRE_SIZE; + tre_ring_shift = HIDMA_TRE_SIZE - tre_ring_shift; + lldev->tre_dma += tre_ring_shift; + lldev->tre_ring += tre_ring_shift; + } + + sz = (HIDMA_EVRE_SIZE + 1) * nr_tres; + lldev->evre_ring = dmam_alloc_coherent(dev, sz, &lldev->evre_dma, + GFP_KERNEL); + if (!lldev->evre_ring) + return NULL; + + lldev->evre_ring_size = HIDMA_EVRE_SIZE * nr_tres; + + /* the EVRE ring has to be EVRE_SIZE aligned */ + if (!IS_ALIGNED(lldev->evre_dma, HIDMA_EVRE_SIZE)) { + u8 evre_ring_shift; + + evre_ring_shift = lldev->evre_dma % HIDMA_EVRE_SIZE; + evre_ring_shift = HIDMA_EVRE_SIZE - evre_ring_shift; + lldev->evre_dma += evre_ring_shift; + lldev->evre_ring += evre_ring_shift; + } + lldev->nr_tres = nr_tres; + lldev->chidx = chidx; + + sz = nr_tres * sizeof(struct hidma_tre *); + rc = kfifo_alloc(&lldev->handoff_fifo, sz, GFP_KERNEL); + if (rc) + return NULL; + + rc = hidma_ll_setup(lldev); + if (rc) + return NULL; + + spin_lock_init(&lldev->lock); + tasklet_setup(&lldev->task, hidma_ll_tre_complete); + lldev->initialized = 1; + writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + return lldev; +} + +int hidma_ll_uninit(struct hidma_lldev *lldev) +{ + u32 required_bytes; + int rc = 0; + u32 val; + + if (!lldev) + return -ENODEV; + + if (!lldev->initialized) + return 0; + + lldev->initialized = 0; + + required_bytes = sizeof(struct hidma_tre) * lldev->nr_tres; + tasklet_kill(&lldev->task); + memset(lldev->trepool, 0, required_bytes); + lldev->trepool = NULL; + atomic_set(&lldev->pending_tre_count, 0); + lldev->tre_write_offset = 0; + + rc = hidma_ll_reset(lldev); + + /* + * Clear all pending interrupts again. + * Otherwise, we observe reset complete interrupts. + */ + val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + return rc; +} + +enum dma_status hidma_ll_status(struct hidma_lldev *lldev, u32 tre_ch) +{ + enum dma_status ret = DMA_ERROR; + struct hidma_tre *tre; + unsigned long flags; + u8 err_code; + + spin_lock_irqsave(&lldev->lock, flags); + + tre = &lldev->trepool[tre_ch]; + err_code = tre->err_code; + + if (err_code & HIDMA_EVRE_STATUS_COMPLETE) + ret = DMA_COMPLETE; + else if (err_code & HIDMA_EVRE_STATUS_ERROR) + ret = DMA_ERROR; + else + ret = DMA_IN_PROGRESS; + spin_unlock_irqrestore(&lldev->lock, flags); + + return ret; +} diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c new file mode 100644 index 0000000000..1d675f3125 --- /dev/null +++ b/drivers/dma/qcom/hidma_mgmt.c @@ -0,0 +1,441 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Qualcomm Technologies HIDMA DMA engine Management interface + * + * Copyright (c) 2015-2017, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hidma_mgmt.h" + +#define HIDMA_QOS_N_OFFSET 0x700 +#define HIDMA_CFG_OFFSET 0x400 +#define HIDMA_MAX_BUS_REQ_LEN_OFFSET 0x41C +#define HIDMA_MAX_XACTIONS_OFFSET 0x420 +#define HIDMA_HW_VERSION_OFFSET 0x424 +#define HIDMA_CHRESET_TIMEOUT_OFFSET 0x418 + +#define HIDMA_MAX_WR_XACTIONS_MASK GENMASK(4, 0) +#define HIDMA_MAX_RD_XACTIONS_MASK GENMASK(4, 0) +#define HIDMA_WEIGHT_MASK GENMASK(6, 0) +#define HIDMA_MAX_BUS_REQ_LEN_MASK GENMASK(15, 0) +#define HIDMA_CHRESET_TIMEOUT_MASK GENMASK(19, 0) + +#define HIDMA_MAX_WR_XACTIONS_BIT_POS 16 +#define HIDMA_MAX_BUS_WR_REQ_BIT_POS 16 +#define HIDMA_WRR_BIT_POS 8 +#define HIDMA_PRIORITY_BIT_POS 15 + +#define HIDMA_AUTOSUSPEND_TIMEOUT 2000 +#define HIDMA_MAX_CHANNEL_WEIGHT 15 + +static unsigned int max_write_request; +module_param(max_write_request, uint, 0644); +MODULE_PARM_DESC(max_write_request, + "maximum write burst (default: ACPI/DT value)"); + +static unsigned int max_read_request; +module_param(max_read_request, uint, 0644); +MODULE_PARM_DESC(max_read_request, + "maximum read burst (default: ACPI/DT value)"); + +static unsigned int max_wr_xactions; +module_param(max_wr_xactions, uint, 0644); +MODULE_PARM_DESC(max_wr_xactions, + "maximum number of write transactions (default: ACPI/DT value)"); + +static unsigned int max_rd_xactions; +module_param(max_rd_xactions, uint, 0644); +MODULE_PARM_DESC(max_rd_xactions, + "maximum number of read transactions (default: ACPI/DT value)"); + +int hidma_mgmt_setup(struct hidma_mgmt_dev *mgmtdev) +{ + unsigned int i; + u32 val; + + if (!is_power_of_2(mgmtdev->max_write_request) || + (mgmtdev->max_write_request < 128) || + (mgmtdev->max_write_request > 1024)) { + dev_err(&mgmtdev->pdev->dev, "invalid write request %d\n", + mgmtdev->max_write_request); + return -EINVAL; + } + + if (!is_power_of_2(mgmtdev->max_read_request) || + (mgmtdev->max_read_request < 128) || + (mgmtdev->max_read_request > 1024)) { + dev_err(&mgmtdev->pdev->dev, "invalid read request %d\n", + mgmtdev->max_read_request); + return -EINVAL; + } + + if (mgmtdev->max_wr_xactions > HIDMA_MAX_WR_XACTIONS_MASK) { + dev_err(&mgmtdev->pdev->dev, + "max_wr_xactions cannot be bigger than %ld\n", + HIDMA_MAX_WR_XACTIONS_MASK); + return -EINVAL; + } + + if (mgmtdev->max_rd_xactions > HIDMA_MAX_RD_XACTIONS_MASK) { + dev_err(&mgmtdev->pdev->dev, + "max_rd_xactions cannot be bigger than %ld\n", + HIDMA_MAX_RD_XACTIONS_MASK); + return -EINVAL; + } + + for (i = 0; i < mgmtdev->dma_channels; i++) { + if (mgmtdev->priority[i] > 1) { + dev_err(&mgmtdev->pdev->dev, + "priority can be 0 or 1\n"); + return -EINVAL; + } + + if (mgmtdev->weight[i] > HIDMA_MAX_CHANNEL_WEIGHT) { + dev_err(&mgmtdev->pdev->dev, + "max value of weight can be %d.\n", + HIDMA_MAX_CHANNEL_WEIGHT); + return -EINVAL; + } + + /* weight needs to be at least one */ + if (mgmtdev->weight[i] == 0) + mgmtdev->weight[i] = 1; + } + + pm_runtime_get_sync(&mgmtdev->pdev->dev); + val = readl(mgmtdev->virtaddr + HIDMA_MAX_BUS_REQ_LEN_OFFSET); + val &= ~(HIDMA_MAX_BUS_REQ_LEN_MASK << HIDMA_MAX_BUS_WR_REQ_BIT_POS); + val |= mgmtdev->max_write_request << HIDMA_MAX_BUS_WR_REQ_BIT_POS; + val &= ~HIDMA_MAX_BUS_REQ_LEN_MASK; + val |= mgmtdev->max_read_request; + writel(val, mgmtdev->virtaddr + HIDMA_MAX_BUS_REQ_LEN_OFFSET); + + val = readl(mgmtdev->virtaddr + HIDMA_MAX_XACTIONS_OFFSET); + val &= ~(HIDMA_MAX_WR_XACTIONS_MASK << HIDMA_MAX_WR_XACTIONS_BIT_POS); + val |= mgmtdev->max_wr_xactions << HIDMA_MAX_WR_XACTIONS_BIT_POS; + val &= ~HIDMA_MAX_RD_XACTIONS_MASK; + val |= mgmtdev->max_rd_xactions; + writel(val, mgmtdev->virtaddr + HIDMA_MAX_XACTIONS_OFFSET); + + mgmtdev->hw_version = + readl(mgmtdev->virtaddr + HIDMA_HW_VERSION_OFFSET); + mgmtdev->hw_version_major = (mgmtdev->hw_version >> 28) & 0xF; + mgmtdev->hw_version_minor = (mgmtdev->hw_version >> 16) & 0xF; + + for (i = 0; i < mgmtdev->dma_channels; i++) { + u32 weight = mgmtdev->weight[i]; + u32 priority = mgmtdev->priority[i]; + + val = readl(mgmtdev->virtaddr + HIDMA_QOS_N_OFFSET + (4 * i)); + val &= ~(1 << HIDMA_PRIORITY_BIT_POS); + val |= (priority & 0x1) << HIDMA_PRIORITY_BIT_POS; + val &= ~(HIDMA_WEIGHT_MASK << HIDMA_WRR_BIT_POS); + val |= (weight & HIDMA_WEIGHT_MASK) << HIDMA_WRR_BIT_POS; + writel(val, mgmtdev->virtaddr + HIDMA_QOS_N_OFFSET + (4 * i)); + } + + val = readl(mgmtdev->virtaddr + HIDMA_CHRESET_TIMEOUT_OFFSET); + val &= ~HIDMA_CHRESET_TIMEOUT_MASK; + val |= mgmtdev->chreset_timeout_cycles & HIDMA_CHRESET_TIMEOUT_MASK; + writel(val, mgmtdev->virtaddr + HIDMA_CHRESET_TIMEOUT_OFFSET); + + pm_runtime_mark_last_busy(&mgmtdev->pdev->dev); + pm_runtime_put_autosuspend(&mgmtdev->pdev->dev); + return 0; +} +EXPORT_SYMBOL_GPL(hidma_mgmt_setup); + +static int hidma_mgmt_probe(struct platform_device *pdev) +{ + struct hidma_mgmt_dev *mgmtdev; + struct resource *res; + void __iomem *virtaddr; + int irq; + int rc; + u32 val; + + pm_runtime_set_autosuspend_delay(&pdev->dev, HIDMA_AUTOSUSPEND_TIMEOUT); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + + virtaddr = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(virtaddr)) { + rc = PTR_ERR(virtaddr); + goto out; + } + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + rc = irq; + goto out; + } + + mgmtdev = devm_kzalloc(&pdev->dev, sizeof(*mgmtdev), GFP_KERNEL); + if (!mgmtdev) { + rc = -ENOMEM; + goto out; + } + + mgmtdev->pdev = pdev; + mgmtdev->addrsize = resource_size(res); + mgmtdev->virtaddr = virtaddr; + + rc = device_property_read_u32(&pdev->dev, "dma-channels", + &mgmtdev->dma_channels); + if (rc) { + dev_err(&pdev->dev, "number of channels missing\n"); + goto out; + } + + rc = device_property_read_u32(&pdev->dev, + "channel-reset-timeout-cycles", + &mgmtdev->chreset_timeout_cycles); + if (rc) { + dev_err(&pdev->dev, "channel reset timeout missing\n"); + goto out; + } + + rc = device_property_read_u32(&pdev->dev, "max-write-burst-bytes", + &mgmtdev->max_write_request); + if (rc) { + dev_err(&pdev->dev, "max-write-burst-bytes missing\n"); + goto out; + } + + if (max_write_request && + (max_write_request != mgmtdev->max_write_request)) { + dev_info(&pdev->dev, "overriding max-write-burst-bytes: %d\n", + max_write_request); + mgmtdev->max_write_request = max_write_request; + } else + max_write_request = mgmtdev->max_write_request; + + rc = device_property_read_u32(&pdev->dev, "max-read-burst-bytes", + &mgmtdev->max_read_request); + if (rc) { + dev_err(&pdev->dev, "max-read-burst-bytes missing\n"); + goto out; + } + if (max_read_request && + (max_read_request != mgmtdev->max_read_request)) { + dev_info(&pdev->dev, "overriding max-read-burst-bytes: %d\n", + max_read_request); + mgmtdev->max_read_request = max_read_request; + } else + max_read_request = mgmtdev->max_read_request; + + rc = device_property_read_u32(&pdev->dev, "max-write-transactions", + &mgmtdev->max_wr_xactions); + if (rc) { + dev_err(&pdev->dev, "max-write-transactions missing\n"); + goto out; + } + if (max_wr_xactions && + (max_wr_xactions != mgmtdev->max_wr_xactions)) { + dev_info(&pdev->dev, "overriding max-write-transactions: %d\n", + max_wr_xactions); + mgmtdev->max_wr_xactions = max_wr_xactions; + } else + max_wr_xactions = mgmtdev->max_wr_xactions; + + rc = device_property_read_u32(&pdev->dev, "max-read-transactions", + &mgmtdev->max_rd_xactions); + if (rc) { + dev_err(&pdev->dev, "max-read-transactions missing\n"); + goto out; + } + if (max_rd_xactions && + (max_rd_xactions != mgmtdev->max_rd_xactions)) { + dev_info(&pdev->dev, "overriding max-read-transactions: %d\n", + max_rd_xactions); + mgmtdev->max_rd_xactions = max_rd_xactions; + } else + max_rd_xactions = mgmtdev->max_rd_xactions; + + mgmtdev->priority = devm_kcalloc(&pdev->dev, + mgmtdev->dma_channels, + sizeof(*mgmtdev->priority), + GFP_KERNEL); + if (!mgmtdev->priority) { + rc = -ENOMEM; + goto out; + } + + mgmtdev->weight = devm_kcalloc(&pdev->dev, + mgmtdev->dma_channels, + sizeof(*mgmtdev->weight), GFP_KERNEL); + if (!mgmtdev->weight) { + rc = -ENOMEM; + goto out; + } + + rc = hidma_mgmt_setup(mgmtdev); + if (rc) { + dev_err(&pdev->dev, "setup failed\n"); + goto out; + } + + /* start the HW */ + val = readl(mgmtdev->virtaddr + HIDMA_CFG_OFFSET); + val |= 1; + writel(val, mgmtdev->virtaddr + HIDMA_CFG_OFFSET); + + rc = hidma_mgmt_init_sys(mgmtdev); + if (rc) { + dev_err(&pdev->dev, "sysfs setup failed\n"); + goto out; + } + + dev_info(&pdev->dev, + "HW rev: %d.%d @ %pa with %d physical channels\n", + mgmtdev->hw_version_major, mgmtdev->hw_version_minor, + &res->start, mgmtdev->dma_channels); + + platform_set_drvdata(pdev, mgmtdev); + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_put_autosuspend(&pdev->dev); + return 0; +out: + pm_runtime_put_sync_suspend(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return rc; +} + +#if IS_ENABLED(CONFIG_ACPI) +static const struct acpi_device_id hidma_mgmt_acpi_ids[] = { + {"QCOM8060"}, + {}, +}; +MODULE_DEVICE_TABLE(acpi, hidma_mgmt_acpi_ids); +#endif + +static const struct of_device_id hidma_mgmt_match[] = { + {.compatible = "qcom,hidma-mgmt-1.0",}, + {}, +}; +MODULE_DEVICE_TABLE(of, hidma_mgmt_match); + +static struct platform_driver hidma_mgmt_driver = { + .probe = hidma_mgmt_probe, + .driver = { + .name = "hidma-mgmt", + .of_match_table = hidma_mgmt_match, + .acpi_match_table = ACPI_PTR(hidma_mgmt_acpi_ids), + }, +}; + +#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ) +static int object_counter; + +static int __init hidma_mgmt_of_populate_channels(struct device_node *np) +{ + struct platform_device *pdev_parent = of_find_device_by_node(np); + struct platform_device_info pdevinfo; + struct device_node *child; + struct resource *res; + int ret = 0; + + /* allocate a resource array */ + res = kcalloc(3, sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + + for_each_available_child_of_node(np, child) { + struct platform_device *new_pdev; + + ret = of_address_to_resource(child, 0, &res[0]); + if (!ret) + goto out; + + ret = of_address_to_resource(child, 1, &res[1]); + if (!ret) + goto out; + + ret = of_irq_to_resource(child, 0, &res[2]); + if (ret <= 0) + goto out; + + memset(&pdevinfo, 0, sizeof(pdevinfo)); + pdevinfo.fwnode = &child->fwnode; + pdevinfo.parent = pdev_parent ? &pdev_parent->dev : NULL; + pdevinfo.name = child->name; + pdevinfo.id = object_counter++; + pdevinfo.res = res; + pdevinfo.num_res = 3; + pdevinfo.data = NULL; + pdevinfo.size_data = 0; + pdevinfo.dma_mask = DMA_BIT_MASK(64); + new_pdev = platform_device_register_full(&pdevinfo); + if (IS_ERR(new_pdev)) { + ret = PTR_ERR(new_pdev); + goto out; + } + new_pdev->dev.of_node = child; + of_dma_configure(&new_pdev->dev, child, true); + /* + * It is assumed that calling of_msi_configure is safe on + * platforms with or without MSI support. + */ + of_msi_configure(&new_pdev->dev, child); + } + + kfree(res); + + return ret; + +out: + of_node_put(child); + kfree(res); + + return ret; +} +#endif + +static int __init hidma_mgmt_init(void) +{ +#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ) + struct device_node *child; + + for_each_matching_node(child, hidma_mgmt_match) { + /* device tree based firmware here */ + hidma_mgmt_of_populate_channels(child); + } +#endif + /* + * We do not check for return value here, as it is assumed that + * platform_driver_register must not fail. The reason for this is that + * the (potential) hidma_mgmt_of_populate_channels calls above are not + * cleaned up if it does fail, and to do this work is quite + * complicated. In particular, various calls of of_address_to_resource, + * of_irq_to_resource, platform_device_register_full, of_dma_configure, + * and of_msi_configure which then call other functions and so on, must + * be cleaned up - this is not a trivial exercise. + * + * Currently, this module is not intended to be unloaded, and there is + * no module_exit function defined which does the needed cleanup. For + * this reason, we have to assume success here. + */ + platform_driver_register(&hidma_mgmt_driver); + + return 0; +} +module_init(hidma_mgmt_init); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/qcom/hidma_mgmt.h b/drivers/dma/qcom/hidma_mgmt.h new file mode 100644 index 0000000000..30e8095988 --- /dev/null +++ b/drivers/dma/qcom/hidma_mgmt.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Qualcomm Technologies HIDMA Management common header + * + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + */ + +struct hidma_mgmt_dev { + u8 hw_version_major; + u8 hw_version_minor; + + u32 max_wr_xactions; + u32 max_rd_xactions; + u32 max_write_request; + u32 max_read_request; + u32 dma_channels; + u32 chreset_timeout_cycles; + u32 hw_version; + u32 *priority; + u32 *weight; + + /* Hardware device constants */ + void __iomem *virtaddr; + resource_size_t addrsize; + + struct kobject **chroots; + struct platform_device *pdev; +}; + +int hidma_mgmt_init_sys(struct hidma_mgmt_dev *dev); +int hidma_mgmt_setup(struct hidma_mgmt_dev *mgmtdev); diff --git a/drivers/dma/qcom/hidma_mgmt_sys.c b/drivers/dma/qcom/hidma_mgmt_sys.c new file mode 100644 index 0000000000..930eae0a62 --- /dev/null +++ b/drivers/dma/qcom/hidma_mgmt_sys.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Qualcomm Technologies HIDMA Management SYS interface + * + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + */ + +#include +#include + +#include "hidma_mgmt.h" + +struct hidma_chan_attr { + struct hidma_mgmt_dev *mdev; + int index; + struct kobj_attribute attr; +}; + +struct hidma_mgmt_fileinfo { + char *name; + int mode; + int (*get)(struct hidma_mgmt_dev *mdev); + int (*set)(struct hidma_mgmt_dev *mdev, u64 val); +}; + +#define IMPLEMENT_GETSET(name) \ +static int get_##name(struct hidma_mgmt_dev *mdev) \ +{ \ + return mdev->name; \ +} \ +static int set_##name(struct hidma_mgmt_dev *mdev, u64 val) \ +{ \ + u64 tmp; \ + int rc; \ + \ + tmp = mdev->name; \ + mdev->name = val; \ + rc = hidma_mgmt_setup(mdev); \ + if (rc) \ + mdev->name = tmp; \ + return rc; \ +} + +#define DECLARE_ATTRIBUTE(name, mode) \ + {#name, mode, get_##name, set_##name} + +IMPLEMENT_GETSET(hw_version_major) +IMPLEMENT_GETSET(hw_version_minor) +IMPLEMENT_GETSET(max_wr_xactions) +IMPLEMENT_GETSET(max_rd_xactions) +IMPLEMENT_GETSET(max_write_request) +IMPLEMENT_GETSET(max_read_request) +IMPLEMENT_GETSET(dma_channels) +IMPLEMENT_GETSET(chreset_timeout_cycles) + +static int set_priority(struct hidma_mgmt_dev *mdev, unsigned int i, u64 val) +{ + u64 tmp; + int rc; + + if (i >= mdev->dma_channels) + return -EINVAL; + + tmp = mdev->priority[i]; + mdev->priority[i] = val; + rc = hidma_mgmt_setup(mdev); + if (rc) + mdev->priority[i] = tmp; + return rc; +} + +static int set_weight(struct hidma_mgmt_dev *mdev, unsigned int i, u64 val) +{ + u64 tmp; + int rc; + + if (i >= mdev->dma_channels) + return -EINVAL; + + tmp = mdev->weight[i]; + mdev->weight[i] = val; + rc = hidma_mgmt_setup(mdev); + if (rc) + mdev->weight[i] = tmp; + return rc; +} + +static struct hidma_mgmt_fileinfo hidma_mgmt_files[] = { + DECLARE_ATTRIBUTE(hw_version_major, S_IRUGO), + DECLARE_ATTRIBUTE(hw_version_minor, S_IRUGO), + DECLARE_ATTRIBUTE(dma_channels, S_IRUGO), + DECLARE_ATTRIBUTE(chreset_timeout_cycles, S_IRUGO), + DECLARE_ATTRIBUTE(max_wr_xactions, S_IRUGO), + DECLARE_ATTRIBUTE(max_rd_xactions, S_IRUGO), + DECLARE_ATTRIBUTE(max_write_request, S_IRUGO), + DECLARE_ATTRIBUTE(max_read_request, S_IRUGO), +}; + +static ssize_t show_values(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hidma_mgmt_dev *mdev = dev_get_drvdata(dev); + unsigned int i; + + buf[0] = 0; + + for (i = 0; i < ARRAY_SIZE(hidma_mgmt_files); i++) { + if (strcmp(attr->attr.name, hidma_mgmt_files[i].name) == 0) { + sprintf(buf, "%d\n", hidma_mgmt_files[i].get(mdev)); + break; + } + } + return strlen(buf); +} + +static ssize_t set_values(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct hidma_mgmt_dev *mdev = dev_get_drvdata(dev); + unsigned long tmp; + unsigned int i; + int rc; + + rc = kstrtoul(buf, 0, &tmp); + if (rc) + return rc; + + for (i = 0; i < ARRAY_SIZE(hidma_mgmt_files); i++) { + if (strcmp(attr->attr.name, hidma_mgmt_files[i].name) == 0) { + rc = hidma_mgmt_files[i].set(mdev, tmp); + if (rc) + return rc; + + break; + } + } + return count; +} + +static ssize_t show_values_channel(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct hidma_chan_attr *chattr; + struct hidma_mgmt_dev *mdev; + + buf[0] = 0; + chattr = container_of(attr, struct hidma_chan_attr, attr); + mdev = chattr->mdev; + if (strcmp(attr->attr.name, "priority") == 0) + sprintf(buf, "%d\n", mdev->priority[chattr->index]); + else if (strcmp(attr->attr.name, "weight") == 0) + sprintf(buf, "%d\n", mdev->weight[chattr->index]); + + return strlen(buf); +} + +static ssize_t set_values_channel(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, + size_t count) +{ + struct hidma_chan_attr *chattr; + struct hidma_mgmt_dev *mdev; + unsigned long tmp; + int rc; + + chattr = container_of(attr, struct hidma_chan_attr, attr); + mdev = chattr->mdev; + + rc = kstrtoul(buf, 0, &tmp); + if (rc) + return rc; + + if (strcmp(attr->attr.name, "priority") == 0) { + rc = set_priority(mdev, chattr->index, tmp); + if (rc) + return rc; + } else if (strcmp(attr->attr.name, "weight") == 0) { + rc = set_weight(mdev, chattr->index, tmp); + if (rc) + return rc; + } + return count; +} + +static int create_sysfs_entry(struct hidma_mgmt_dev *dev, char *name, int mode) +{ + struct device_attribute *attrs; + char *name_copy; + + attrs = devm_kmalloc(&dev->pdev->dev, + sizeof(struct device_attribute), GFP_KERNEL); + if (!attrs) + return -ENOMEM; + + name_copy = devm_kstrdup(&dev->pdev->dev, name, GFP_KERNEL); + if (!name_copy) + return -ENOMEM; + + attrs->attr.name = name_copy; + attrs->attr.mode = mode; + attrs->show = show_values; + attrs->store = set_values; + sysfs_attr_init(&attrs->attr); + + return device_create_file(&dev->pdev->dev, attrs); +} + +static int create_sysfs_entry_channel(struct hidma_mgmt_dev *mdev, char *name, + int mode, int index, + struct kobject *parent) +{ + struct hidma_chan_attr *chattr; + char *name_copy; + + chattr = devm_kmalloc(&mdev->pdev->dev, sizeof(*chattr), GFP_KERNEL); + if (!chattr) + return -ENOMEM; + + name_copy = devm_kstrdup(&mdev->pdev->dev, name, GFP_KERNEL); + if (!name_copy) + return -ENOMEM; + + chattr->mdev = mdev; + chattr->index = index; + chattr->attr.attr.name = name_copy; + chattr->attr.attr.mode = mode; + chattr->attr.show = show_values_channel; + chattr->attr.store = set_values_channel; + sysfs_attr_init(&chattr->attr.attr); + + return sysfs_create_file(parent, &chattr->attr.attr); +} + +int hidma_mgmt_init_sys(struct hidma_mgmt_dev *mdev) +{ + unsigned int i; + int rc; + int required; + struct kobject *chanops; + + required = sizeof(*mdev->chroots) * mdev->dma_channels; + mdev->chroots = devm_kmalloc(&mdev->pdev->dev, required, GFP_KERNEL); + if (!mdev->chroots) + return -ENOMEM; + + chanops = kobject_create_and_add("chanops", &mdev->pdev->dev.kobj); + if (!chanops) + return -ENOMEM; + + /* create each channel directory here */ + for (i = 0; i < mdev->dma_channels; i++) { + char name[20]; + + snprintf(name, sizeof(name), "chan%d", i); + mdev->chroots[i] = kobject_create_and_add(name, chanops); + if (!mdev->chroots[i]) + return -ENOMEM; + } + + /* populate common parameters */ + for (i = 0; i < ARRAY_SIZE(hidma_mgmt_files); i++) { + rc = create_sysfs_entry(mdev, hidma_mgmt_files[i].name, + hidma_mgmt_files[i].mode); + if (rc) + return rc; + } + + /* populate parameters that are per channel */ + for (i = 0; i < mdev->dma_channels; i++) { + rc = create_sysfs_entry_channel(mdev, "priority", + (S_IRUGO | S_IWUGO), i, + mdev->chroots[i]); + if (rc) + return rc; + + rc = create_sysfs_entry_channel(mdev, "weight", + (S_IRUGO | S_IWUGO), i, + mdev->chroots[i]); + if (rc) + return rc; + } + + return 0; +} +EXPORT_SYMBOL_GPL(hidma_mgmt_init_sys); diff --git a/drivers/dma/qcom/qcom_adm.c b/drivers/dma/qcom/qcom_adm.c new file mode 100644 index 0000000000..d56caf1681 --- /dev/null +++ b/drivers/dma/qcom/qcom_adm.c @@ -0,0 +1,953 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2013-2015, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../dmaengine.h" +#include "../virt-dma.h" + +/* ADM registers - calculated from channel number and security domain */ +#define ADM_CHAN_MULTI 0x4 +#define ADM_CI_MULTI 0x4 +#define ADM_CRCI_MULTI 0x4 +#define ADM_EE_MULTI 0x800 +#define ADM_CHAN_OFFS(chan) (ADM_CHAN_MULTI * (chan)) +#define ADM_EE_OFFS(ee) (ADM_EE_MULTI * (ee)) +#define ADM_CHAN_EE_OFFS(chan, ee) (ADM_CHAN_OFFS(chan) + ADM_EE_OFFS(ee)) +#define ADM_CHAN_OFFS(chan) (ADM_CHAN_MULTI * (chan)) +#define ADM_CI_OFFS(ci) (ADM_CHAN_OFF(ci)) +#define ADM_CH_CMD_PTR(chan, ee) (ADM_CHAN_EE_OFFS(chan, ee)) +#define ADM_CH_RSLT(chan, ee) (0x40 + ADM_CHAN_EE_OFFS(chan, ee)) +#define ADM_CH_FLUSH_STATE0(chan, ee) (0x80 + ADM_CHAN_EE_OFFS(chan, ee)) +#define ADM_CH_STATUS_SD(chan, ee) (0x200 + ADM_CHAN_EE_OFFS(chan, ee)) +#define ADM_CH_CONF(chan) (0x240 + ADM_CHAN_OFFS(chan)) +#define ADM_CH_RSLT_CONF(chan, ee) (0x300 + ADM_CHAN_EE_OFFS(chan, ee)) +#define ADM_SEC_DOMAIN_IRQ_STATUS(ee) (0x380 + ADM_EE_OFFS(ee)) +#define ADM_CI_CONF(ci) (0x390 + (ci) * ADM_CI_MULTI) +#define ADM_GP_CTL 0x3d8 +#define ADM_CRCI_CTL(crci, ee) (0x400 + (crci) * ADM_CRCI_MULTI + \ + ADM_EE_OFFS(ee)) + +/* channel status */ +#define ADM_CH_STATUS_VALID BIT(1) + +/* channel result */ +#define ADM_CH_RSLT_VALID BIT(31) +#define ADM_CH_RSLT_ERR BIT(3) +#define ADM_CH_RSLT_FLUSH BIT(2) +#define ADM_CH_RSLT_TPD BIT(1) + +/* channel conf */ +#define ADM_CH_CONF_SHADOW_EN BIT(12) +#define ADM_CH_CONF_MPU_DISABLE BIT(11) +#define ADM_CH_CONF_PERM_MPU_CONF BIT(9) +#define ADM_CH_CONF_FORCE_RSLT_EN BIT(7) +#define ADM_CH_CONF_SEC_DOMAIN(ee) ((((ee) & 0x3) << 4) | (((ee) & 0x4) << 11)) + +/* channel result conf */ +#define ADM_CH_RSLT_CONF_FLUSH_EN BIT(1) +#define ADM_CH_RSLT_CONF_IRQ_EN BIT(0) + +/* CRCI CTL */ +#define ADM_CRCI_CTL_MUX_SEL BIT(18) +#define ADM_CRCI_CTL_RST BIT(17) + +/* CI configuration */ +#define ADM_CI_RANGE_END(x) ((x) << 24) +#define ADM_CI_RANGE_START(x) ((x) << 16) +#define ADM_CI_BURST_4_WORDS BIT(2) +#define ADM_CI_BURST_8_WORDS BIT(3) + +/* GP CTL */ +#define ADM_GP_CTL_LP_EN BIT(12) +#define ADM_GP_CTL_LP_CNT(x) ((x) << 8) + +/* Command pointer list entry */ +#define ADM_CPLE_LP BIT(31) +#define ADM_CPLE_CMD_PTR_LIST BIT(29) + +/* Command list entry */ +#define ADM_CMD_LC BIT(31) +#define ADM_CMD_DST_CRCI(n) (((n) & 0xf) << 7) +#define ADM_CMD_SRC_CRCI(n) (((n) & 0xf) << 3) + +#define ADM_CMD_TYPE_SINGLE 0x0 +#define ADM_CMD_TYPE_BOX 0x3 + +#define ADM_CRCI_MUX_SEL BIT(4) +#define ADM_DESC_ALIGN 8 +#define ADM_MAX_XFER (SZ_64K - 1) +#define ADM_MAX_ROWS (SZ_64K - 1) +#define ADM_MAX_CHANNELS 16 + +struct adm_desc_hw_box { + u32 cmd; + u32 src_addr; + u32 dst_addr; + u32 row_len; + u32 num_rows; + u32 row_offset; +}; + +struct adm_desc_hw_single { + u32 cmd; + u32 src_addr; + u32 dst_addr; + u32 len; +}; + +struct adm_async_desc { + struct virt_dma_desc vd; + struct adm_device *adev; + + size_t length; + enum dma_transfer_direction dir; + dma_addr_t dma_addr; + size_t dma_len; + + void *cpl; + dma_addr_t cp_addr; + u32 crci; + u32 mux; + u32 blk_size; +}; + +struct adm_chan { + struct virt_dma_chan vc; + struct adm_device *adev; + + /* parsed from DT */ + u32 id; /* channel id */ + + struct adm_async_desc *curr_txd; + struct dma_slave_config slave; + u32 crci; + u32 mux; + struct list_head node; + + int error; + int initialized; +}; + +static inline struct adm_chan *to_adm_chan(struct dma_chan *common) +{ + return container_of(common, struct adm_chan, vc.chan); +} + +struct adm_device { + void __iomem *regs; + struct device *dev; + struct dma_device common; + struct device_dma_parameters dma_parms; + struct adm_chan *channels; + + u32 ee; + + struct clk *core_clk; + struct clk *iface_clk; + + struct reset_control *clk_reset; + struct reset_control *c0_reset; + struct reset_control *c1_reset; + struct reset_control *c2_reset; + int irq; +}; + +/** + * adm_free_chan - Frees dma resources associated with the specific channel + * + * @chan: dma channel + * + * Free all allocated descriptors associated with this channel + */ +static void adm_free_chan(struct dma_chan *chan) +{ + /* free all queued descriptors */ + vchan_free_chan_resources(to_virt_chan(chan)); +} + +/** + * adm_get_blksize - Get block size from burst value + * + * @burst: Burst size of transaction + */ +static int adm_get_blksize(unsigned int burst) +{ + int ret; + + switch (burst) { + case 16: + case 32: + case 64: + case 128: + ret = ffs(burst >> 4) - 1; + break; + case 192: + ret = 4; + break; + case 256: + ret = 5; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +/** + * adm_process_fc_descriptors - Process descriptors for flow controlled xfers + * + * @achan: ADM channel + * @desc: Descriptor memory pointer + * @sg: Scatterlist entry + * @crci: CRCI value + * @burst: Burst size of transaction + * @direction: DMA transfer direction + */ +static void *adm_process_fc_descriptors(struct adm_chan *achan, void *desc, + struct scatterlist *sg, u32 crci, + u32 burst, + enum dma_transfer_direction direction) +{ + struct adm_desc_hw_box *box_desc = NULL; + struct adm_desc_hw_single *single_desc; + u32 remainder = sg_dma_len(sg); + u32 rows, row_offset, crci_cmd; + u32 mem_addr = sg_dma_address(sg); + u32 *incr_addr = &mem_addr; + u32 *src, *dst; + + if (direction == DMA_DEV_TO_MEM) { + crci_cmd = ADM_CMD_SRC_CRCI(crci); + row_offset = burst; + src = &achan->slave.src_addr; + dst = &mem_addr; + } else { + crci_cmd = ADM_CMD_DST_CRCI(crci); + row_offset = burst << 16; + src = &mem_addr; + dst = &achan->slave.dst_addr; + } + + while (remainder >= burst) { + box_desc = desc; + box_desc->cmd = ADM_CMD_TYPE_BOX | crci_cmd; + box_desc->row_offset = row_offset; + box_desc->src_addr = *src; + box_desc->dst_addr = *dst; + + rows = remainder / burst; + rows = min_t(u32, rows, ADM_MAX_ROWS); + box_desc->num_rows = rows << 16 | rows; + box_desc->row_len = burst << 16 | burst; + + *incr_addr += burst * rows; + remainder -= burst * rows; + desc += sizeof(*box_desc); + } + + /* if leftover bytes, do one single descriptor */ + if (remainder) { + single_desc = desc; + single_desc->cmd = ADM_CMD_TYPE_SINGLE | crci_cmd; + single_desc->len = remainder; + single_desc->src_addr = *src; + single_desc->dst_addr = *dst; + desc += sizeof(*single_desc); + + if (sg_is_last(sg)) + single_desc->cmd |= ADM_CMD_LC; + } else { + if (box_desc && sg_is_last(sg)) + box_desc->cmd |= ADM_CMD_LC; + } + + return desc; +} + +/** + * adm_process_non_fc_descriptors - Process descriptors for non-fc xfers + * + * @achan: ADM channel + * @desc: Descriptor memory pointer + * @sg: Scatterlist entry + * @direction: DMA transfer direction + */ +static void *adm_process_non_fc_descriptors(struct adm_chan *achan, void *desc, + struct scatterlist *sg, + enum dma_transfer_direction direction) +{ + struct adm_desc_hw_single *single_desc; + u32 remainder = sg_dma_len(sg); + u32 mem_addr = sg_dma_address(sg); + u32 *incr_addr = &mem_addr; + u32 *src, *dst; + + if (direction == DMA_DEV_TO_MEM) { + src = &achan->slave.src_addr; + dst = &mem_addr; + } else { + src = &mem_addr; + dst = &achan->slave.dst_addr; + } + + do { + single_desc = desc; + single_desc->cmd = ADM_CMD_TYPE_SINGLE; + single_desc->src_addr = *src; + single_desc->dst_addr = *dst; + single_desc->len = (remainder > ADM_MAX_XFER) ? + ADM_MAX_XFER : remainder; + + remainder -= single_desc->len; + *incr_addr += single_desc->len; + desc += sizeof(*single_desc); + } while (remainder); + + /* set last command if this is the end of the whole transaction */ + if (sg_is_last(sg)) + single_desc->cmd |= ADM_CMD_LC; + + return desc; +} + +/** + * adm_prep_slave_sg - Prep slave sg transaction + * + * @chan: dma channel + * @sgl: scatter gather list + * @sg_len: length of sg + * @direction: DMA transfer direction + * @flags: DMA flags + * @context: transfer context (unused) + */ +static struct dma_async_tx_descriptor *adm_prep_slave_sg(struct dma_chan *chan, + struct scatterlist *sgl, + unsigned int sg_len, + enum dma_transfer_direction direction, + unsigned long flags, + void *context) +{ + struct adm_chan *achan = to_adm_chan(chan); + struct adm_device *adev = achan->adev; + struct adm_async_desc *async_desc; + struct scatterlist *sg; + dma_addr_t cple_addr; + u32 i, burst; + u32 single_count = 0, box_count = 0, crci = 0; + void *desc; + u32 *cple; + int blk_size = 0; + + if (!is_slave_direction(direction)) { + dev_err(adev->dev, "invalid dma direction\n"); + return NULL; + } + + /* + * get burst value from slave configuration + */ + burst = (direction == DMA_MEM_TO_DEV) ? + achan->slave.dst_maxburst : + achan->slave.src_maxburst; + + /* if using flow control, validate burst and crci values */ + if (achan->slave.device_fc) { + blk_size = adm_get_blksize(burst); + if (blk_size < 0) { + dev_err(adev->dev, "invalid burst value: %d\n", + burst); + return NULL; + } + + crci = achan->crci & 0xf; + if (!crci || achan->crci > 0x1f) { + dev_err(adev->dev, "invalid crci value\n"); + return NULL; + } + } + + /* iterate through sgs and compute allocation size of structures */ + for_each_sg(sgl, sg, sg_len, i) { + if (achan->slave.device_fc) { + box_count += DIV_ROUND_UP(sg_dma_len(sg) / burst, + ADM_MAX_ROWS); + if (sg_dma_len(sg) % burst) + single_count++; + } else { + single_count += DIV_ROUND_UP(sg_dma_len(sg), + ADM_MAX_XFER); + } + } + + async_desc = kzalloc(sizeof(*async_desc), GFP_NOWAIT); + if (!async_desc) { + dev_err(adev->dev, "not enough memory for async_desc struct\n"); + return NULL; + } + + async_desc->mux = achan->mux ? ADM_CRCI_CTL_MUX_SEL : 0; + async_desc->crci = crci; + async_desc->blk_size = blk_size; + async_desc->dma_len = single_count * sizeof(struct adm_desc_hw_single) + + box_count * sizeof(struct adm_desc_hw_box) + + sizeof(*cple) + 2 * ADM_DESC_ALIGN; + + async_desc->cpl = kzalloc(async_desc->dma_len, GFP_NOWAIT); + if (!async_desc->cpl) { + dev_err(adev->dev, "not enough memory for cpl struct\n"); + goto free; + } + + async_desc->adev = adev; + + /* both command list entry and descriptors must be 8 byte aligned */ + cple = PTR_ALIGN(async_desc->cpl, ADM_DESC_ALIGN); + desc = PTR_ALIGN(cple + 1, ADM_DESC_ALIGN); + + for_each_sg(sgl, sg, sg_len, i) { + async_desc->length += sg_dma_len(sg); + + if (achan->slave.device_fc) + desc = adm_process_fc_descriptors(achan, desc, sg, crci, + burst, direction); + else + desc = adm_process_non_fc_descriptors(achan, desc, sg, + direction); + } + + async_desc->dma_addr = dma_map_single(adev->dev, async_desc->cpl, + async_desc->dma_len, + DMA_TO_DEVICE); + if (dma_mapping_error(adev->dev, async_desc->dma_addr)) { + dev_err(adev->dev, "dma mapping error for cpl\n"); + goto free; + } + + cple_addr = async_desc->dma_addr + ((void *)cple - async_desc->cpl); + + /* init cmd list */ + dma_sync_single_for_cpu(adev->dev, cple_addr, sizeof(*cple), + DMA_TO_DEVICE); + *cple = ADM_CPLE_LP; + *cple |= (async_desc->dma_addr + ADM_DESC_ALIGN) >> 3; + dma_sync_single_for_device(adev->dev, cple_addr, sizeof(*cple), + DMA_TO_DEVICE); + + return vchan_tx_prep(&achan->vc, &async_desc->vd, flags); + +free: + kfree(async_desc); + return NULL; +} + +/** + * adm_terminate_all - terminate all transactions on a channel + * @chan: dma channel + * + * Dequeues and frees all transactions, aborts current transaction + * No callbacks are done + * + */ +static int adm_terminate_all(struct dma_chan *chan) +{ + struct adm_chan *achan = to_adm_chan(chan); + struct adm_device *adev = achan->adev; + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&achan->vc.lock, flags); + vchan_get_all_descriptors(&achan->vc, &head); + + /* send flush command to terminate current transaction */ + writel_relaxed(0x0, + adev->regs + ADM_CH_FLUSH_STATE0(achan->id, adev->ee)); + + spin_unlock_irqrestore(&achan->vc.lock, flags); + + vchan_dma_desc_free_list(&achan->vc, &head); + + return 0; +} + +static int adm_slave_config(struct dma_chan *chan, struct dma_slave_config *cfg) +{ + struct adm_chan *achan = to_adm_chan(chan); + struct qcom_adm_peripheral_config *config = cfg->peripheral_config; + unsigned long flag; + + spin_lock_irqsave(&achan->vc.lock, flag); + memcpy(&achan->slave, cfg, sizeof(struct dma_slave_config)); + if (cfg->peripheral_size == sizeof(*config)) + achan->crci = config->crci; + spin_unlock_irqrestore(&achan->vc.lock, flag); + + return 0; +} + +/** + * adm_start_dma - start next transaction + * @achan: ADM dma channel + */ +static void adm_start_dma(struct adm_chan *achan) +{ + struct virt_dma_desc *vd = vchan_next_desc(&achan->vc); + struct adm_device *adev = achan->adev; + struct adm_async_desc *async_desc; + + lockdep_assert_held(&achan->vc.lock); + + if (!vd) + return; + + list_del(&vd->node); + + /* write next command list out to the CMD FIFO */ + async_desc = container_of(vd, struct adm_async_desc, vd); + achan->curr_txd = async_desc; + + /* reset channel error */ + achan->error = 0; + + if (!achan->initialized) { + /* enable interrupts */ + writel(ADM_CH_CONF_SHADOW_EN | + ADM_CH_CONF_PERM_MPU_CONF | + ADM_CH_CONF_MPU_DISABLE | + ADM_CH_CONF_SEC_DOMAIN(adev->ee), + adev->regs + ADM_CH_CONF(achan->id)); + + writel(ADM_CH_RSLT_CONF_IRQ_EN | ADM_CH_RSLT_CONF_FLUSH_EN, + adev->regs + ADM_CH_RSLT_CONF(achan->id, adev->ee)); + + achan->initialized = 1; + } + + /* set the crci block size if this transaction requires CRCI */ + if (async_desc->crci) { + writel(async_desc->mux | async_desc->blk_size, + adev->regs + ADM_CRCI_CTL(async_desc->crci, adev->ee)); + } + + /* make sure IRQ enable doesn't get reordered */ + wmb(); + + /* write next command list out to the CMD FIFO */ + writel(ALIGN(async_desc->dma_addr, ADM_DESC_ALIGN) >> 3, + adev->regs + ADM_CH_CMD_PTR(achan->id, adev->ee)); +} + +/** + * adm_dma_irq - irq handler for ADM controller + * @irq: IRQ of interrupt + * @data: callback data + * + * IRQ handler for the bam controller + */ +static irqreturn_t adm_dma_irq(int irq, void *data) +{ + struct adm_device *adev = data; + u32 srcs, i; + struct adm_async_desc *async_desc; + unsigned long flags; + + srcs = readl_relaxed(adev->regs + + ADM_SEC_DOMAIN_IRQ_STATUS(adev->ee)); + + for (i = 0; i < ADM_MAX_CHANNELS; i++) { + struct adm_chan *achan = &adev->channels[i]; + u32 status, result; + + if (srcs & BIT(i)) { + status = readl_relaxed(adev->regs + + ADM_CH_STATUS_SD(i, adev->ee)); + + /* if no result present, skip */ + if (!(status & ADM_CH_STATUS_VALID)) + continue; + + result = readl_relaxed(adev->regs + + ADM_CH_RSLT(i, adev->ee)); + + /* no valid results, skip */ + if (!(result & ADM_CH_RSLT_VALID)) + continue; + + /* flag error if transaction was flushed or failed */ + if (result & (ADM_CH_RSLT_ERR | ADM_CH_RSLT_FLUSH)) + achan->error = 1; + + spin_lock_irqsave(&achan->vc.lock, flags); + async_desc = achan->curr_txd; + + achan->curr_txd = NULL; + + if (async_desc) { + vchan_cookie_complete(&async_desc->vd); + + /* kick off next DMA */ + adm_start_dma(achan); + } + + spin_unlock_irqrestore(&achan->vc.lock, flags); + } + } + + return IRQ_HANDLED; +} + +/** + * adm_tx_status - returns status of transaction + * @chan: dma channel + * @cookie: transaction cookie + * @txstate: DMA transaction state + * + * Return status of dma transaction + */ +static enum dma_status adm_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct adm_chan *achan = to_adm_chan(chan); + struct virt_dma_desc *vd; + enum dma_status ret; + unsigned long flags; + size_t residue = 0; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE || !txstate) + return ret; + + spin_lock_irqsave(&achan->vc.lock, flags); + + vd = vchan_find_desc(&achan->vc, cookie); + if (vd) + residue = container_of(vd, struct adm_async_desc, vd)->length; + + spin_unlock_irqrestore(&achan->vc.lock, flags); + + /* + * residue is either the full length if it is in the issued list, or 0 + * if it is in progress. We have no reliable way of determining + * anything inbetween + */ + dma_set_residue(txstate, residue); + + if (achan->error) + return DMA_ERROR; + + return ret; +} + +/** + * adm_issue_pending - starts pending transactions + * @chan: dma channel + * + * Issues all pending transactions and starts DMA + */ +static void adm_issue_pending(struct dma_chan *chan) +{ + struct adm_chan *achan = to_adm_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&achan->vc.lock, flags); + + if (vchan_issue_pending(&achan->vc) && !achan->curr_txd) + adm_start_dma(achan); + spin_unlock_irqrestore(&achan->vc.lock, flags); +} + +/** + * adm_dma_free_desc - free descriptor memory + * @vd: virtual descriptor + * + */ +static void adm_dma_free_desc(struct virt_dma_desc *vd) +{ + struct adm_async_desc *async_desc = container_of(vd, + struct adm_async_desc, vd); + + dma_unmap_single(async_desc->adev->dev, async_desc->dma_addr, + async_desc->dma_len, DMA_TO_DEVICE); + kfree(async_desc->cpl); + kfree(async_desc); +} + +static void adm_channel_init(struct adm_device *adev, struct adm_chan *achan, + u32 index) +{ + achan->id = index; + achan->adev = adev; + + vchan_init(&achan->vc, &adev->common); + achan->vc.desc_free = adm_dma_free_desc; +} + +/** + * adm_dma_xlate + * @dma_spec: pointer to DMA specifier as found in the device tree + * @ofdma: pointer to DMA controller data + * + * This can use either 1-cell or 2-cell formats, the first cell + * identifies the slave device, while the optional second cell + * contains the crci value. + * + * Returns pointer to appropriate dma channel on success or NULL on error. + */ +static struct dma_chan *adm_dma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct dma_device *dev = ofdma->of_dma_data; + struct dma_chan *chan, *candidate = NULL; + struct adm_chan *achan; + + if (!dev || dma_spec->args_count > 2) + return NULL; + + list_for_each_entry(chan, &dev->channels, device_node) + if (chan->chan_id == dma_spec->args[0]) { + candidate = chan; + break; + } + + if (!candidate) + return NULL; + + achan = to_adm_chan(candidate); + if (dma_spec->args_count == 2) + achan->crci = dma_spec->args[1]; + else + achan->crci = 0; + + return dma_get_slave_channel(candidate); +} + +static int adm_dma_probe(struct platform_device *pdev) +{ + struct adm_device *adev; + int ret; + u32 i; + + adev = devm_kzalloc(&pdev->dev, sizeof(*adev), GFP_KERNEL); + if (!adev) + return -ENOMEM; + + adev->dev = &pdev->dev; + + adev->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(adev->regs)) + return PTR_ERR(adev->regs); + + adev->irq = platform_get_irq(pdev, 0); + if (adev->irq < 0) + return adev->irq; + + ret = of_property_read_u32(pdev->dev.of_node, "qcom,ee", &adev->ee); + if (ret) { + dev_err(adev->dev, "Execution environment unspecified\n"); + return ret; + } + + adev->core_clk = devm_clk_get(adev->dev, "core"); + if (IS_ERR(adev->core_clk)) + return PTR_ERR(adev->core_clk); + + adev->iface_clk = devm_clk_get(adev->dev, "iface"); + if (IS_ERR(adev->iface_clk)) + return PTR_ERR(adev->iface_clk); + + adev->clk_reset = devm_reset_control_get_exclusive(&pdev->dev, "clk"); + if (IS_ERR(adev->clk_reset)) { + dev_err(adev->dev, "failed to get ADM0 reset\n"); + return PTR_ERR(adev->clk_reset); + } + + adev->c0_reset = devm_reset_control_get_exclusive(&pdev->dev, "c0"); + if (IS_ERR(adev->c0_reset)) { + dev_err(adev->dev, "failed to get ADM0 C0 reset\n"); + return PTR_ERR(adev->c0_reset); + } + + adev->c1_reset = devm_reset_control_get_exclusive(&pdev->dev, "c1"); + if (IS_ERR(adev->c1_reset)) { + dev_err(adev->dev, "failed to get ADM0 C1 reset\n"); + return PTR_ERR(adev->c1_reset); + } + + adev->c2_reset = devm_reset_control_get_exclusive(&pdev->dev, "c2"); + if (IS_ERR(adev->c2_reset)) { + dev_err(adev->dev, "failed to get ADM0 C2 reset\n"); + return PTR_ERR(adev->c2_reset); + } + + ret = clk_prepare_enable(adev->core_clk); + if (ret) { + dev_err(adev->dev, "failed to prepare/enable core clock\n"); + return ret; + } + + ret = clk_prepare_enable(adev->iface_clk); + if (ret) { + dev_err(adev->dev, "failed to prepare/enable iface clock\n"); + goto err_disable_core_clk; + } + + reset_control_assert(adev->clk_reset); + reset_control_assert(adev->c0_reset); + reset_control_assert(adev->c1_reset); + reset_control_assert(adev->c2_reset); + + udelay(2); + + reset_control_deassert(adev->clk_reset); + reset_control_deassert(adev->c0_reset); + reset_control_deassert(adev->c1_reset); + reset_control_deassert(adev->c2_reset); + + adev->channels = devm_kcalloc(adev->dev, ADM_MAX_CHANNELS, + sizeof(*adev->channels), GFP_KERNEL); + + if (!adev->channels) { + ret = -ENOMEM; + goto err_disable_clks; + } + + /* allocate and initialize channels */ + INIT_LIST_HEAD(&adev->common.channels); + + for (i = 0; i < ADM_MAX_CHANNELS; i++) + adm_channel_init(adev, &adev->channels[i], i); + + /* reset CRCIs */ + for (i = 0; i < 16; i++) + writel(ADM_CRCI_CTL_RST, adev->regs + + ADM_CRCI_CTL(i, adev->ee)); + + /* configure client interfaces */ + writel(ADM_CI_RANGE_START(0x40) | ADM_CI_RANGE_END(0xb0) | + ADM_CI_BURST_8_WORDS, adev->regs + ADM_CI_CONF(0)); + writel(ADM_CI_RANGE_START(0x2a) | ADM_CI_RANGE_END(0x2c) | + ADM_CI_BURST_8_WORDS, adev->regs + ADM_CI_CONF(1)); + writel(ADM_CI_RANGE_START(0x12) | ADM_CI_RANGE_END(0x28) | + ADM_CI_BURST_8_WORDS, adev->regs + ADM_CI_CONF(2)); + writel(ADM_GP_CTL_LP_EN | ADM_GP_CTL_LP_CNT(0xf), + adev->regs + ADM_GP_CTL); + + ret = devm_request_irq(adev->dev, adev->irq, adm_dma_irq, + 0, "adm_dma", adev); + if (ret) + goto err_disable_clks; + + platform_set_drvdata(pdev, adev); + + adev->common.dev = adev->dev; + adev->common.dev->dma_parms = &adev->dma_parms; + + /* set capabilities */ + dma_cap_zero(adev->common.cap_mask); + dma_cap_set(DMA_SLAVE, adev->common.cap_mask); + dma_cap_set(DMA_PRIVATE, adev->common.cap_mask); + + /* initialize dmaengine apis */ + adev->common.directions = BIT(DMA_DEV_TO_MEM | DMA_MEM_TO_DEV); + adev->common.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + adev->common.src_addr_widths = DMA_SLAVE_BUSWIDTH_4_BYTES; + adev->common.dst_addr_widths = DMA_SLAVE_BUSWIDTH_4_BYTES; + adev->common.device_free_chan_resources = adm_free_chan; + adev->common.device_prep_slave_sg = adm_prep_slave_sg; + adev->common.device_issue_pending = adm_issue_pending; + adev->common.device_tx_status = adm_tx_status; + adev->common.device_terminate_all = adm_terminate_all; + adev->common.device_config = adm_slave_config; + + ret = dma_async_device_register(&adev->common); + if (ret) { + dev_err(adev->dev, "failed to register dma async device\n"); + goto err_disable_clks; + } + + ret = of_dma_controller_register(pdev->dev.of_node, adm_dma_xlate, + &adev->common); + if (ret) + goto err_unregister_dma; + + return 0; + +err_unregister_dma: + dma_async_device_unregister(&adev->common); +err_disable_clks: + clk_disable_unprepare(adev->iface_clk); +err_disable_core_clk: + clk_disable_unprepare(adev->core_clk); + + return ret; +} + +static int adm_dma_remove(struct platform_device *pdev) +{ + struct adm_device *adev = platform_get_drvdata(pdev); + struct adm_chan *achan; + u32 i; + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&adev->common); + + for (i = 0; i < ADM_MAX_CHANNELS; i++) { + achan = &adev->channels[i]; + + /* mask IRQs for this channel/EE pair */ + writel(0, adev->regs + ADM_CH_RSLT_CONF(achan->id, adev->ee)); + + tasklet_kill(&adev->channels[i].vc.task); + adm_terminate_all(&adev->channels[i].vc.chan); + } + + devm_free_irq(adev->dev, adev->irq, adev); + + clk_disable_unprepare(adev->core_clk); + clk_disable_unprepare(adev->iface_clk); + + return 0; +} + +static const struct of_device_id adm_of_match[] = { + { .compatible = "qcom,adm", }, + {} +}; +MODULE_DEVICE_TABLE(of, adm_of_match); + +static struct platform_driver adm_dma_driver = { + .probe = adm_dma_probe, + .remove = adm_dma_remove, + .driver = { + .name = "adm-dma-engine", + .of_match_table = adm_of_match, + }, +}; + +module_platform_driver(adm_dma_driver); + +MODULE_AUTHOR("Andy Gross "); +MODULE_DESCRIPTION("QCOM ADM DMA engine driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c new file mode 100644 index 0000000000..a29c13cae7 --- /dev/null +++ b/drivers/dma/sa11x0-dma.c @@ -0,0 +1,1102 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * SA11x0 DMAengine support + * + * Copyright (C) 2012 Russell King + * Derived in part from arch/arm/mach-sa1100/dma.c, + * Copyright (C) 2000, 2001 by Nicolas Pitre + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virt-dma.h" + +#define NR_PHY_CHAN 6 +#define DMA_ALIGN 3 +#define DMA_MAX_SIZE 0x1fff +#define DMA_CHUNK_SIZE 0x1000 + +#define DMA_DDAR 0x00 +#define DMA_DCSR_S 0x04 +#define DMA_DCSR_C 0x08 +#define DMA_DCSR_R 0x0c +#define DMA_DBSA 0x10 +#define DMA_DBTA 0x14 +#define DMA_DBSB 0x18 +#define DMA_DBTB 0x1c +#define DMA_SIZE 0x20 + +#define DCSR_RUN (1 << 0) +#define DCSR_IE (1 << 1) +#define DCSR_ERROR (1 << 2) +#define DCSR_DONEA (1 << 3) +#define DCSR_STRTA (1 << 4) +#define DCSR_DONEB (1 << 5) +#define DCSR_STRTB (1 << 6) +#define DCSR_BIU (1 << 7) + +#define DDAR_RW (1 << 0) /* 0 = W, 1 = R */ +#define DDAR_E (1 << 1) /* 0 = LE, 1 = BE */ +#define DDAR_BS (1 << 2) /* 0 = BS4, 1 = BS8 */ +#define DDAR_DW (1 << 3) /* 0 = 8b, 1 = 16b */ +#define DDAR_Ser0UDCTr (0x0 << 4) +#define DDAR_Ser0UDCRc (0x1 << 4) +#define DDAR_Ser1SDLCTr (0x2 << 4) +#define DDAR_Ser1SDLCRc (0x3 << 4) +#define DDAR_Ser1UARTTr (0x4 << 4) +#define DDAR_Ser1UARTRc (0x5 << 4) +#define DDAR_Ser2ICPTr (0x6 << 4) +#define DDAR_Ser2ICPRc (0x7 << 4) +#define DDAR_Ser3UARTTr (0x8 << 4) +#define DDAR_Ser3UARTRc (0x9 << 4) +#define DDAR_Ser4MCP0Tr (0xa << 4) +#define DDAR_Ser4MCP0Rc (0xb << 4) +#define DDAR_Ser4MCP1Tr (0xc << 4) +#define DDAR_Ser4MCP1Rc (0xd << 4) +#define DDAR_Ser4SSPTr (0xe << 4) +#define DDAR_Ser4SSPRc (0xf << 4) + +struct sa11x0_dma_sg { + u32 addr; + u32 len; +}; + +struct sa11x0_dma_desc { + struct virt_dma_desc vd; + + u32 ddar; + size_t size; + unsigned period; + bool cyclic; + + unsigned sglen; + struct sa11x0_dma_sg sg[]; +}; + +struct sa11x0_dma_phy; + +struct sa11x0_dma_chan { + struct virt_dma_chan vc; + + /* protected by c->vc.lock */ + struct sa11x0_dma_phy *phy; + enum dma_status status; + + /* protected by d->lock */ + struct list_head node; + + u32 ddar; + const char *name; +}; + +struct sa11x0_dma_phy { + void __iomem *base; + struct sa11x0_dma_dev *dev; + unsigned num; + + struct sa11x0_dma_chan *vchan; + + /* Protected by c->vc.lock */ + unsigned sg_load; + struct sa11x0_dma_desc *txd_load; + unsigned sg_done; + struct sa11x0_dma_desc *txd_done; + u32 dbs[2]; + u32 dbt[2]; + u32 dcsr; +}; + +struct sa11x0_dma_dev { + struct dma_device slave; + void __iomem *base; + spinlock_t lock; + struct tasklet_struct task; + struct list_head chan_pending; + struct sa11x0_dma_phy phy[NR_PHY_CHAN]; +}; + +static struct sa11x0_dma_chan *to_sa11x0_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct sa11x0_dma_chan, vc.chan); +} + +static struct sa11x0_dma_dev *to_sa11x0_dma(struct dma_device *dmadev) +{ + return container_of(dmadev, struct sa11x0_dma_dev, slave); +} + +static struct sa11x0_dma_desc *sa11x0_dma_next_desc(struct sa11x0_dma_chan *c) +{ + struct virt_dma_desc *vd = vchan_next_desc(&c->vc); + + return vd ? container_of(vd, struct sa11x0_dma_desc, vd) : NULL; +} + +static void sa11x0_dma_free_desc(struct virt_dma_desc *vd) +{ + kfree(container_of(vd, struct sa11x0_dma_desc, vd)); +} + +static void sa11x0_dma_start_desc(struct sa11x0_dma_phy *p, struct sa11x0_dma_desc *txd) +{ + list_del(&txd->vd.node); + p->txd_load = txd; + p->sg_load = 0; + + dev_vdbg(p->dev->slave.dev, "pchan %u: txd %p[%x]: starting: DDAR:%x\n", + p->num, &txd->vd, txd->vd.tx.cookie, txd->ddar); +} + +static void noinline sa11x0_dma_start_sg(struct sa11x0_dma_phy *p, + struct sa11x0_dma_chan *c) +{ + struct sa11x0_dma_desc *txd = p->txd_load; + struct sa11x0_dma_sg *sg; + void __iomem *base = p->base; + unsigned dbsx, dbtx; + u32 dcsr; + + if (!txd) + return; + + dcsr = readl_relaxed(base + DMA_DCSR_R); + + /* Don't try to load the next transfer if both buffers are started */ + if ((dcsr & (DCSR_STRTA | DCSR_STRTB)) == (DCSR_STRTA | DCSR_STRTB)) + return; + + if (p->sg_load == txd->sglen) { + if (!txd->cyclic) { + struct sa11x0_dma_desc *txn = sa11x0_dma_next_desc(c); + + /* + * We have reached the end of the current descriptor. + * Peek at the next descriptor, and if compatible with + * the current, start processing it. + */ + if (txn && txn->ddar == txd->ddar) { + txd = txn; + sa11x0_dma_start_desc(p, txn); + } else { + p->txd_load = NULL; + return; + } + } else { + /* Cyclic: reset back to beginning */ + p->sg_load = 0; + } + } + + sg = &txd->sg[p->sg_load++]; + + /* Select buffer to load according to channel status */ + if (((dcsr & (DCSR_BIU | DCSR_STRTB)) == (DCSR_BIU | DCSR_STRTB)) || + ((dcsr & (DCSR_BIU | DCSR_STRTA)) == 0)) { + dbsx = DMA_DBSA; + dbtx = DMA_DBTA; + dcsr = DCSR_STRTA | DCSR_IE | DCSR_RUN; + } else { + dbsx = DMA_DBSB; + dbtx = DMA_DBTB; + dcsr = DCSR_STRTB | DCSR_IE | DCSR_RUN; + } + + writel_relaxed(sg->addr, base + dbsx); + writel_relaxed(sg->len, base + dbtx); + writel(dcsr, base + DMA_DCSR_S); + + dev_dbg(p->dev->slave.dev, "pchan %u: load: DCSR:%02x DBS%c:%08x DBT%c:%08x\n", + p->num, dcsr, + 'A' + (dbsx == DMA_DBSB), sg->addr, + 'A' + (dbtx == DMA_DBTB), sg->len); +} + +static void noinline sa11x0_dma_complete(struct sa11x0_dma_phy *p, + struct sa11x0_dma_chan *c) +{ + struct sa11x0_dma_desc *txd = p->txd_done; + + if (++p->sg_done == txd->sglen) { + if (!txd->cyclic) { + vchan_cookie_complete(&txd->vd); + + p->sg_done = 0; + p->txd_done = p->txd_load; + + if (!p->txd_done) + tasklet_schedule(&p->dev->task); + } else { + if ((p->sg_done % txd->period) == 0) + vchan_cyclic_callback(&txd->vd); + + /* Cyclic: reset back to beginning */ + p->sg_done = 0; + } + } + + sa11x0_dma_start_sg(p, c); +} + +static irqreturn_t sa11x0_dma_irq(int irq, void *dev_id) +{ + struct sa11x0_dma_phy *p = dev_id; + struct sa11x0_dma_dev *d = p->dev; + struct sa11x0_dma_chan *c; + u32 dcsr; + + dcsr = readl_relaxed(p->base + DMA_DCSR_R); + if (!(dcsr & (DCSR_ERROR | DCSR_DONEA | DCSR_DONEB))) + return IRQ_NONE; + + /* Clear reported status bits */ + writel_relaxed(dcsr & (DCSR_ERROR | DCSR_DONEA | DCSR_DONEB), + p->base + DMA_DCSR_C); + + dev_dbg(d->slave.dev, "pchan %u: irq: DCSR:%02x\n", p->num, dcsr); + + if (dcsr & DCSR_ERROR) { + dev_err(d->slave.dev, "pchan %u: error. DCSR:%02x DDAR:%08x DBSA:%08x DBTA:%08x DBSB:%08x DBTB:%08x\n", + p->num, dcsr, + readl_relaxed(p->base + DMA_DDAR), + readl_relaxed(p->base + DMA_DBSA), + readl_relaxed(p->base + DMA_DBTA), + readl_relaxed(p->base + DMA_DBSB), + readl_relaxed(p->base + DMA_DBTB)); + } + + c = p->vchan; + if (c) { + unsigned long flags; + + spin_lock_irqsave(&c->vc.lock, flags); + /* + * Now that we're holding the lock, check that the vchan + * really is associated with this pchan before touching the + * hardware. This should always succeed, because we won't + * change p->vchan or c->phy while the channel is actively + * transferring. + */ + if (c->phy == p) { + if (dcsr & DCSR_DONEA) + sa11x0_dma_complete(p, c); + if (dcsr & DCSR_DONEB) + sa11x0_dma_complete(p, c); + } + spin_unlock_irqrestore(&c->vc.lock, flags); + } + + return IRQ_HANDLED; +} + +static void sa11x0_dma_start_txd(struct sa11x0_dma_chan *c) +{ + struct sa11x0_dma_desc *txd = sa11x0_dma_next_desc(c); + + /* If the issued list is empty, we have no further txds to process */ + if (txd) { + struct sa11x0_dma_phy *p = c->phy; + + sa11x0_dma_start_desc(p, txd); + p->txd_done = txd; + p->sg_done = 0; + + /* The channel should not have any transfers started */ + WARN_ON(readl_relaxed(p->base + DMA_DCSR_R) & + (DCSR_STRTA | DCSR_STRTB)); + + /* Clear the run and start bits before changing DDAR */ + writel_relaxed(DCSR_RUN | DCSR_STRTA | DCSR_STRTB, + p->base + DMA_DCSR_C); + writel_relaxed(txd->ddar, p->base + DMA_DDAR); + + /* Try to start both buffers */ + sa11x0_dma_start_sg(p, c); + sa11x0_dma_start_sg(p, c); + } +} + +static void sa11x0_dma_tasklet(struct tasklet_struct *t) +{ + struct sa11x0_dma_dev *d = from_tasklet(d, t, task); + struct sa11x0_dma_phy *p; + struct sa11x0_dma_chan *c; + unsigned pch, pch_alloc = 0; + + dev_dbg(d->slave.dev, "tasklet enter\n"); + + list_for_each_entry(c, &d->slave.channels, vc.chan.device_node) { + spin_lock_irq(&c->vc.lock); + p = c->phy; + if (p && !p->txd_done) { + sa11x0_dma_start_txd(c); + if (!p->txd_done) { + /* No current txd associated with this channel */ + dev_dbg(d->slave.dev, "pchan %u: free\n", p->num); + + /* Mark this channel free */ + c->phy = NULL; + p->vchan = NULL; + } + } + spin_unlock_irq(&c->vc.lock); + } + + spin_lock_irq(&d->lock); + for (pch = 0; pch < NR_PHY_CHAN; pch++) { + p = &d->phy[pch]; + + if (p->vchan == NULL && !list_empty(&d->chan_pending)) { + c = list_first_entry(&d->chan_pending, + struct sa11x0_dma_chan, node); + list_del_init(&c->node); + + pch_alloc |= 1 << pch; + + /* Mark this channel allocated */ + p->vchan = c; + + dev_dbg(d->slave.dev, "pchan %u: alloc vchan %p\n", pch, &c->vc); + } + } + spin_unlock_irq(&d->lock); + + for (pch = 0; pch < NR_PHY_CHAN; pch++) { + if (pch_alloc & (1 << pch)) { + p = &d->phy[pch]; + c = p->vchan; + + spin_lock_irq(&c->vc.lock); + c->phy = p; + + sa11x0_dma_start_txd(c); + spin_unlock_irq(&c->vc.lock); + } + } + + dev_dbg(d->slave.dev, "tasklet exit\n"); +} + + +static void sa11x0_dma_free_chan_resources(struct dma_chan *chan) +{ + struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); + struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device); + unsigned long flags; + + spin_lock_irqsave(&d->lock, flags); + list_del_init(&c->node); + spin_unlock_irqrestore(&d->lock, flags); + + vchan_free_chan_resources(&c->vc); +} + +static dma_addr_t sa11x0_dma_pos(struct sa11x0_dma_phy *p) +{ + unsigned reg; + u32 dcsr; + + dcsr = readl_relaxed(p->base + DMA_DCSR_R); + + if ((dcsr & (DCSR_BIU | DCSR_STRTA)) == DCSR_STRTA || + (dcsr & (DCSR_BIU | DCSR_STRTB)) == DCSR_BIU) + reg = DMA_DBSA; + else + reg = DMA_DBSB; + + return readl_relaxed(p->base + reg); +} + +static enum dma_status sa11x0_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *state) +{ + struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); + struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device); + struct sa11x0_dma_phy *p; + struct virt_dma_desc *vd; + unsigned long flags; + enum dma_status ret; + + ret = dma_cookie_status(&c->vc.chan, cookie, state); + if (ret == DMA_COMPLETE) + return ret; + + if (!state) + return c->status; + + spin_lock_irqsave(&c->vc.lock, flags); + p = c->phy; + + /* + * If the cookie is on our issue queue, then the residue is + * its total size. + */ + vd = vchan_find_desc(&c->vc, cookie); + if (vd) { + state->residue = container_of(vd, struct sa11x0_dma_desc, vd)->size; + } else if (!p) { + state->residue = 0; + } else { + struct sa11x0_dma_desc *txd; + size_t bytes = 0; + + if (p->txd_done && p->txd_done->vd.tx.cookie == cookie) + txd = p->txd_done; + else if (p->txd_load && p->txd_load->vd.tx.cookie == cookie) + txd = p->txd_load; + else + txd = NULL; + + ret = c->status; + if (txd) { + dma_addr_t addr = sa11x0_dma_pos(p); + unsigned i; + + dev_vdbg(d->slave.dev, "tx_status: addr:%pad\n", &addr); + + for (i = 0; i < txd->sglen; i++) { + dev_vdbg(d->slave.dev, "tx_status: [%u] %x+%x\n", + i, txd->sg[i].addr, txd->sg[i].len); + if (addr >= txd->sg[i].addr && + addr < txd->sg[i].addr + txd->sg[i].len) { + unsigned len; + + len = txd->sg[i].len - + (addr - txd->sg[i].addr); + dev_vdbg(d->slave.dev, "tx_status: [%u] +%x\n", + i, len); + bytes += len; + i++; + break; + } + } + for (; i < txd->sglen; i++) { + dev_vdbg(d->slave.dev, "tx_status: [%u] %x+%x ++\n", + i, txd->sg[i].addr, txd->sg[i].len); + bytes += txd->sg[i].len; + } + } + state->residue = bytes; + } + spin_unlock_irqrestore(&c->vc.lock, flags); + + dev_vdbg(d->slave.dev, "tx_status: bytes 0x%x\n", state->residue); + + return ret; +} + +/* + * Move pending txds to the issued list, and re-init pending list. + * If not already pending, add this channel to the list of pending + * channels and trigger the tasklet to run. + */ +static void sa11x0_dma_issue_pending(struct dma_chan *chan) +{ + struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); + struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device); + unsigned long flags; + + spin_lock_irqsave(&c->vc.lock, flags); + if (vchan_issue_pending(&c->vc)) { + if (!c->phy) { + spin_lock(&d->lock); + if (list_empty(&c->node)) { + list_add_tail(&c->node, &d->chan_pending); + tasklet_schedule(&d->task); + dev_dbg(d->slave.dev, "vchan %p: issued\n", &c->vc); + } + spin_unlock(&d->lock); + } + } else + dev_dbg(d->slave.dev, "vchan %p: nothing to issue\n", &c->vc); + spin_unlock_irqrestore(&c->vc.lock, flags); +} + +static struct dma_async_tx_descriptor *sa11x0_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sg, unsigned int sglen, + enum dma_transfer_direction dir, unsigned long flags, void *context) +{ + struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); + struct sa11x0_dma_desc *txd; + struct scatterlist *sgent; + unsigned i, j = sglen; + size_t size = 0; + + /* SA11x0 channels can only operate in their native direction */ + if (dir != (c->ddar & DDAR_RW ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV)) { + dev_err(chan->device->dev, "vchan %p: bad DMA direction: DDAR:%08x dir:%u\n", + &c->vc, c->ddar, dir); + return NULL; + } + + /* Do not allow zero-sized txds */ + if (sglen == 0) + return NULL; + + for_each_sg(sg, sgent, sglen, i) { + dma_addr_t addr = sg_dma_address(sgent); + unsigned int len = sg_dma_len(sgent); + + if (len > DMA_MAX_SIZE) + j += DIV_ROUND_UP(len, DMA_MAX_SIZE & ~DMA_ALIGN) - 1; + if (addr & DMA_ALIGN) { + dev_dbg(chan->device->dev, "vchan %p: bad buffer alignment: %pad\n", + &c->vc, &addr); + return NULL; + } + } + + txd = kzalloc(struct_size(txd, sg, j), GFP_ATOMIC); + if (!txd) { + dev_dbg(chan->device->dev, "vchan %p: kzalloc failed\n", &c->vc); + return NULL; + } + + j = 0; + for_each_sg(sg, sgent, sglen, i) { + dma_addr_t addr = sg_dma_address(sgent); + unsigned len = sg_dma_len(sgent); + + size += len; + + do { + unsigned tlen = len; + + /* + * Check whether the transfer will fit. If not, try + * to split the transfer up such that we end up with + * equal chunks - but make sure that we preserve the + * alignment. This avoids small segments. + */ + if (tlen > DMA_MAX_SIZE) { + unsigned mult = DIV_ROUND_UP(tlen, + DMA_MAX_SIZE & ~DMA_ALIGN); + + tlen = (tlen / mult) & ~DMA_ALIGN; + } + + txd->sg[j].addr = addr; + txd->sg[j].len = tlen; + + addr += tlen; + len -= tlen; + j++; + } while (len); + } + + txd->ddar = c->ddar; + txd->size = size; + txd->sglen = j; + + dev_dbg(chan->device->dev, "vchan %p: txd %p: size %zu nr %u\n", + &c->vc, &txd->vd, txd->size, txd->sglen); + + return vchan_tx_prep(&c->vc, &txd->vd, flags); +} + +static struct dma_async_tx_descriptor *sa11x0_dma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period, + enum dma_transfer_direction dir, unsigned long flags) +{ + struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); + struct sa11x0_dma_desc *txd; + unsigned i, j, k, sglen, sgperiod; + + /* SA11x0 channels can only operate in their native direction */ + if (dir != (c->ddar & DDAR_RW ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV)) { + dev_err(chan->device->dev, "vchan %p: bad DMA direction: DDAR:%08x dir:%u\n", + &c->vc, c->ddar, dir); + return NULL; + } + + sgperiod = DIV_ROUND_UP(period, DMA_MAX_SIZE & ~DMA_ALIGN); + sglen = size * sgperiod / period; + + /* Do not allow zero-sized txds */ + if (sglen == 0) + return NULL; + + txd = kzalloc(struct_size(txd, sg, sglen), GFP_ATOMIC); + if (!txd) { + dev_dbg(chan->device->dev, "vchan %p: kzalloc failed\n", &c->vc); + return NULL; + } + + for (i = k = 0; i < size / period; i++) { + size_t tlen, len = period; + + for (j = 0; j < sgperiod; j++, k++) { + tlen = len; + + if (tlen > DMA_MAX_SIZE) { + unsigned mult = DIV_ROUND_UP(tlen, DMA_MAX_SIZE & ~DMA_ALIGN); + tlen = (tlen / mult) & ~DMA_ALIGN; + } + + txd->sg[k].addr = addr; + txd->sg[k].len = tlen; + addr += tlen; + len -= tlen; + } + + WARN_ON(len != 0); + } + + WARN_ON(k != sglen); + + txd->ddar = c->ddar; + txd->size = size; + txd->sglen = sglen; + txd->cyclic = 1; + txd->period = sgperiod; + + return vchan_tx_prep(&c->vc, &txd->vd, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); +} + +static int sa11x0_dma_device_config(struct dma_chan *chan, + struct dma_slave_config *cfg) +{ + struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); + u32 ddar = c->ddar & ((0xf << 4) | DDAR_RW); + dma_addr_t addr; + enum dma_slave_buswidth width; + u32 maxburst; + + if (ddar & DDAR_RW) { + addr = cfg->src_addr; + width = cfg->src_addr_width; + maxburst = cfg->src_maxburst; + } else { + addr = cfg->dst_addr; + width = cfg->dst_addr_width; + maxburst = cfg->dst_maxburst; + } + + if ((width != DMA_SLAVE_BUSWIDTH_1_BYTE && + width != DMA_SLAVE_BUSWIDTH_2_BYTES) || + (maxburst != 4 && maxburst != 8)) + return -EINVAL; + + if (width == DMA_SLAVE_BUSWIDTH_2_BYTES) + ddar |= DDAR_DW; + if (maxburst == 8) + ddar |= DDAR_BS; + + dev_dbg(c->vc.chan.device->dev, "vchan %p: dma_slave_config addr %pad width %u burst %u\n", + &c->vc, &addr, width, maxburst); + + c->ddar = ddar | (addr & 0xf0000000) | (addr & 0x003ffffc) << 6; + + return 0; +} + +static int sa11x0_dma_device_pause(struct dma_chan *chan) +{ + struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); + struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device); + struct sa11x0_dma_phy *p; + unsigned long flags; + + dev_dbg(d->slave.dev, "vchan %p: pause\n", &c->vc); + spin_lock_irqsave(&c->vc.lock, flags); + if (c->status == DMA_IN_PROGRESS) { + c->status = DMA_PAUSED; + + p = c->phy; + if (p) { + writel(DCSR_RUN | DCSR_IE, p->base + DMA_DCSR_C); + } else { + spin_lock(&d->lock); + list_del_init(&c->node); + spin_unlock(&d->lock); + } + } + spin_unlock_irqrestore(&c->vc.lock, flags); + + return 0; +} + +static int sa11x0_dma_device_resume(struct dma_chan *chan) +{ + struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); + struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device); + struct sa11x0_dma_phy *p; + unsigned long flags; + + dev_dbg(d->slave.dev, "vchan %p: resume\n", &c->vc); + spin_lock_irqsave(&c->vc.lock, flags); + if (c->status == DMA_PAUSED) { + c->status = DMA_IN_PROGRESS; + + p = c->phy; + if (p) { + writel(DCSR_RUN | DCSR_IE, p->base + DMA_DCSR_S); + } else if (!list_empty(&c->vc.desc_issued)) { + spin_lock(&d->lock); + list_add_tail(&c->node, &d->chan_pending); + spin_unlock(&d->lock); + } + } + spin_unlock_irqrestore(&c->vc.lock, flags); + + return 0; +} + +static int sa11x0_dma_device_terminate_all(struct dma_chan *chan) +{ + struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); + struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device); + struct sa11x0_dma_phy *p; + LIST_HEAD(head); + unsigned long flags; + + dev_dbg(d->slave.dev, "vchan %p: terminate all\n", &c->vc); + /* Clear the tx descriptor lists */ + spin_lock_irqsave(&c->vc.lock, flags); + vchan_get_all_descriptors(&c->vc, &head); + + p = c->phy; + if (p) { + dev_dbg(d->slave.dev, "pchan %u: terminating\n", p->num); + /* vchan is assigned to a pchan - stop the channel */ + writel(DCSR_RUN | DCSR_IE | + DCSR_STRTA | DCSR_DONEA | + DCSR_STRTB | DCSR_DONEB, + p->base + DMA_DCSR_C); + + if (p->txd_load) { + if (p->txd_load != p->txd_done) + list_add_tail(&p->txd_load->vd.node, &head); + p->txd_load = NULL; + } + if (p->txd_done) { + list_add_tail(&p->txd_done->vd.node, &head); + p->txd_done = NULL; + } + c->phy = NULL; + spin_lock(&d->lock); + p->vchan = NULL; + spin_unlock(&d->lock); + tasklet_schedule(&d->task); + } + spin_unlock_irqrestore(&c->vc.lock, flags); + vchan_dma_desc_free_list(&c->vc, &head); + + return 0; +} + +struct sa11x0_dma_channel_desc { + u32 ddar; + const char *name; +}; + +#define CD(d1, d2) { .ddar = DDAR_##d1 | d2, .name = #d1 } +static const struct sa11x0_dma_channel_desc chan_desc[] = { + CD(Ser0UDCTr, 0), + CD(Ser0UDCRc, DDAR_RW), + CD(Ser1SDLCTr, 0), + CD(Ser1SDLCRc, DDAR_RW), + CD(Ser1UARTTr, 0), + CD(Ser1UARTRc, DDAR_RW), + CD(Ser2ICPTr, 0), + CD(Ser2ICPRc, DDAR_RW), + CD(Ser3UARTTr, 0), + CD(Ser3UARTRc, DDAR_RW), + CD(Ser4MCP0Tr, 0), + CD(Ser4MCP0Rc, DDAR_RW), + CD(Ser4MCP1Tr, 0), + CD(Ser4MCP1Rc, DDAR_RW), + CD(Ser4SSPTr, 0), + CD(Ser4SSPRc, DDAR_RW), +}; + +static const struct dma_slave_map sa11x0_dma_map[] = { + { "sa11x0-ir", "tx", "Ser2ICPTr" }, + { "sa11x0-ir", "rx", "Ser2ICPRc" }, + { "sa11x0-ssp", "tx", "Ser4SSPTr" }, + { "sa11x0-ssp", "rx", "Ser4SSPRc" }, +}; + +static bool sa11x0_dma_filter_fn(struct dma_chan *chan, void *param) +{ + struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); + const char *p = param; + + return !strcmp(c->name, p); +} + +static int sa11x0_dma_init_dmadev(struct dma_device *dmadev, + struct device *dev) +{ + unsigned i; + + INIT_LIST_HEAD(&dmadev->channels); + dmadev->dev = dev; + dmadev->device_free_chan_resources = sa11x0_dma_free_chan_resources; + dmadev->device_config = sa11x0_dma_device_config; + dmadev->device_pause = sa11x0_dma_device_pause; + dmadev->device_resume = sa11x0_dma_device_resume; + dmadev->device_terminate_all = sa11x0_dma_device_terminate_all; + dmadev->device_tx_status = sa11x0_dma_tx_status; + dmadev->device_issue_pending = sa11x0_dma_issue_pending; + + for (i = 0; i < ARRAY_SIZE(chan_desc); i++) { + struct sa11x0_dma_chan *c; + + c = kzalloc(sizeof(*c), GFP_KERNEL); + if (!c) { + dev_err(dev, "no memory for channel %u\n", i); + return -ENOMEM; + } + + c->status = DMA_IN_PROGRESS; + c->ddar = chan_desc[i].ddar; + c->name = chan_desc[i].name; + INIT_LIST_HEAD(&c->node); + + c->vc.desc_free = sa11x0_dma_free_desc; + vchan_init(&c->vc, dmadev); + } + + return dma_async_device_register(dmadev); +} + +static int sa11x0_dma_request_irq(struct platform_device *pdev, int nr, + void *data) +{ + int irq = platform_get_irq(pdev, nr); + + if (irq <= 0) + return -ENXIO; + + return request_irq(irq, sa11x0_dma_irq, 0, dev_name(&pdev->dev), data); +} + +static void sa11x0_dma_free_irq(struct platform_device *pdev, int nr, + void *data) +{ + int irq = platform_get_irq(pdev, nr); + if (irq > 0) + free_irq(irq, data); +} + +static void sa11x0_dma_free_channels(struct dma_device *dmadev) +{ + struct sa11x0_dma_chan *c, *cn; + + list_for_each_entry_safe(c, cn, &dmadev->channels, vc.chan.device_node) { + list_del(&c->vc.chan.device_node); + tasklet_kill(&c->vc.task); + kfree(c); + } +} + +static int sa11x0_dma_probe(struct platform_device *pdev) +{ + struct sa11x0_dma_dev *d; + struct resource *res; + unsigned i; + int ret; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENXIO; + + d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + ret = -ENOMEM; + goto err_alloc; + } + + spin_lock_init(&d->lock); + INIT_LIST_HEAD(&d->chan_pending); + + d->slave.filter.fn = sa11x0_dma_filter_fn; + d->slave.filter.mapcnt = ARRAY_SIZE(sa11x0_dma_map); + d->slave.filter.map = sa11x0_dma_map; + + d->base = ioremap(res->start, resource_size(res)); + if (!d->base) { + ret = -ENOMEM; + goto err_ioremap; + } + + tasklet_setup(&d->task, sa11x0_dma_tasklet); + + for (i = 0; i < NR_PHY_CHAN; i++) { + struct sa11x0_dma_phy *p = &d->phy[i]; + + p->dev = d; + p->num = i; + p->base = d->base + i * DMA_SIZE; + writel_relaxed(DCSR_RUN | DCSR_IE | DCSR_ERROR | + DCSR_DONEA | DCSR_STRTA | DCSR_DONEB | DCSR_STRTB, + p->base + DMA_DCSR_C); + writel_relaxed(0, p->base + DMA_DDAR); + + ret = sa11x0_dma_request_irq(pdev, i, p); + if (ret) { + while (i) { + i--; + sa11x0_dma_free_irq(pdev, i, &d->phy[i]); + } + goto err_irq; + } + } + + dma_cap_set(DMA_SLAVE, d->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, d->slave.cap_mask); + d->slave.device_prep_slave_sg = sa11x0_dma_prep_slave_sg; + d->slave.device_prep_dma_cyclic = sa11x0_dma_prep_dma_cyclic; + d->slave.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + d->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + d->slave.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES); + d->slave.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES); + ret = sa11x0_dma_init_dmadev(&d->slave, &pdev->dev); + if (ret) { + dev_warn(d->slave.dev, "failed to register slave async device: %d\n", + ret); + goto err_slave_reg; + } + + platform_set_drvdata(pdev, d); + return 0; + + err_slave_reg: + sa11x0_dma_free_channels(&d->slave); + for (i = 0; i < NR_PHY_CHAN; i++) + sa11x0_dma_free_irq(pdev, i, &d->phy[i]); + err_irq: + tasklet_kill(&d->task); + iounmap(d->base); + err_ioremap: + kfree(d); + err_alloc: + return ret; +} + +static int sa11x0_dma_remove(struct platform_device *pdev) +{ + struct sa11x0_dma_dev *d = platform_get_drvdata(pdev); + unsigned pch; + + dma_async_device_unregister(&d->slave); + + sa11x0_dma_free_channels(&d->slave); + for (pch = 0; pch < NR_PHY_CHAN; pch++) + sa11x0_dma_free_irq(pdev, pch, &d->phy[pch]); + tasklet_kill(&d->task); + iounmap(d->base); + kfree(d); + + return 0; +} + +static __maybe_unused int sa11x0_dma_suspend(struct device *dev) +{ + struct sa11x0_dma_dev *d = dev_get_drvdata(dev); + unsigned pch; + + for (pch = 0; pch < NR_PHY_CHAN; pch++) { + struct sa11x0_dma_phy *p = &d->phy[pch]; + u32 dcsr, saved_dcsr; + + dcsr = saved_dcsr = readl_relaxed(p->base + DMA_DCSR_R); + if (dcsr & DCSR_RUN) { + writel(DCSR_RUN | DCSR_IE, p->base + DMA_DCSR_C); + dcsr = readl_relaxed(p->base + DMA_DCSR_R); + } + + saved_dcsr &= DCSR_RUN | DCSR_IE; + if (dcsr & DCSR_BIU) { + p->dbs[0] = readl_relaxed(p->base + DMA_DBSB); + p->dbt[0] = readl_relaxed(p->base + DMA_DBTB); + p->dbs[1] = readl_relaxed(p->base + DMA_DBSA); + p->dbt[1] = readl_relaxed(p->base + DMA_DBTA); + saved_dcsr |= (dcsr & DCSR_STRTA ? DCSR_STRTB : 0) | + (dcsr & DCSR_STRTB ? DCSR_STRTA : 0); + } else { + p->dbs[0] = readl_relaxed(p->base + DMA_DBSA); + p->dbt[0] = readl_relaxed(p->base + DMA_DBTA); + p->dbs[1] = readl_relaxed(p->base + DMA_DBSB); + p->dbt[1] = readl_relaxed(p->base + DMA_DBTB); + saved_dcsr |= dcsr & (DCSR_STRTA | DCSR_STRTB); + } + p->dcsr = saved_dcsr; + + writel(DCSR_STRTA | DCSR_STRTB, p->base + DMA_DCSR_C); + } + + return 0; +} + +static __maybe_unused int sa11x0_dma_resume(struct device *dev) +{ + struct sa11x0_dma_dev *d = dev_get_drvdata(dev); + unsigned pch; + + for (pch = 0; pch < NR_PHY_CHAN; pch++) { + struct sa11x0_dma_phy *p = &d->phy[pch]; + struct sa11x0_dma_desc *txd = NULL; + u32 dcsr = readl_relaxed(p->base + DMA_DCSR_R); + + WARN_ON(dcsr & (DCSR_BIU | DCSR_STRTA | DCSR_STRTB | DCSR_RUN)); + + if (p->txd_done) + txd = p->txd_done; + else if (p->txd_load) + txd = p->txd_load; + + if (!txd) + continue; + + writel_relaxed(txd->ddar, p->base + DMA_DDAR); + + writel_relaxed(p->dbs[0], p->base + DMA_DBSA); + writel_relaxed(p->dbt[0], p->base + DMA_DBTA); + writel_relaxed(p->dbs[1], p->base + DMA_DBSB); + writel_relaxed(p->dbt[1], p->base + DMA_DBTB); + writel_relaxed(p->dcsr, p->base + DMA_DCSR_S); + } + + return 0; +} + +static const struct dev_pm_ops sa11x0_dma_pm_ops = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(sa11x0_dma_suspend, sa11x0_dma_resume) +}; + +static struct platform_driver sa11x0_dma_driver = { + .driver = { + .name = "sa11x0-dma", + .pm = &sa11x0_dma_pm_ops, + }, + .probe = sa11x0_dma_probe, + .remove = sa11x0_dma_remove, +}; + +static int __init sa11x0_dma_init(void) +{ + return platform_driver_register(&sa11x0_dma_driver); +} +subsys_initcall(sa11x0_dma_init); + +static void __exit sa11x0_dma_exit(void) +{ + platform_driver_unregister(&sa11x0_dma_driver); +} +module_exit(sa11x0_dma_exit); + +MODULE_AUTHOR("Russell King"); +MODULE_DESCRIPTION("SA-11x0 DMA driver"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:sa11x0-dma"); diff --git a/drivers/dma/sf-pdma/Kconfig b/drivers/dma/sf-pdma/Kconfig new file mode 100644 index 0000000000..ba46a0a15a --- /dev/null +++ b/drivers/dma/sf-pdma/Kconfig @@ -0,0 +1,7 @@ +config SF_PDMA + tristate "Sifive PDMA controller driver" + depends on HAS_IOMEM + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support the SiFive PDMA controller. diff --git a/drivers/dma/sf-pdma/Makefile b/drivers/dma/sf-pdma/Makefile new file mode 100644 index 0000000000..764552ab8d --- /dev/null +++ b/drivers/dma/sf-pdma/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_SF_PDMA) += sf-pdma.o diff --git a/drivers/dma/sf-pdma/sf-pdma.c b/drivers/dma/sf-pdma/sf-pdma.c new file mode 100644 index 0000000000..d1c6956af4 --- /dev/null +++ b/drivers/dma/sf-pdma/sf-pdma.c @@ -0,0 +1,623 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SiFive FU540 Platform DMA driver + * Copyright (C) 2019 SiFive + * + * Based partially on: + * - drivers/dma/fsl-edma.c + * - drivers/dma/dw-edma/ + * - drivers/dma/pxa-dma.c + * + * See the following sources for further documentation: + * - Chapter 12 "Platform DMA Engine (PDMA)" of + * SiFive FU540-C000 v1.0 + * https://static.dev.sifive.com/FU540-C000-v1.0.pdf + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sf-pdma.h" + +#ifndef readq +static inline unsigned long long readq(void __iomem *addr) +{ + return readl(addr) | (((unsigned long long)readl(addr + 4)) << 32LL); +} +#endif + +#ifndef writeq +static inline void writeq(unsigned long long v, void __iomem *addr) +{ + writel(lower_32_bits(v), addr); + writel(upper_32_bits(v), addr + 4); +} +#endif + +static inline struct sf_pdma_chan *to_sf_pdma_chan(struct dma_chan *dchan) +{ + return container_of(dchan, struct sf_pdma_chan, vchan.chan); +} + +static inline struct sf_pdma_desc *to_sf_pdma_desc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct sf_pdma_desc, vdesc); +} + +static struct sf_pdma_desc *sf_pdma_alloc_desc(struct sf_pdma_chan *chan) +{ + struct sf_pdma_desc *desc; + + desc = kzalloc(sizeof(*desc), GFP_NOWAIT); + if (!desc) + return NULL; + + desc->chan = chan; + + return desc; +} + +static void sf_pdma_fill_desc(struct sf_pdma_desc *desc, + u64 dst, u64 src, u64 size) +{ + desc->xfer_type = PDMA_FULL_SPEED; + desc->xfer_size = size; + desc->dst_addr = dst; + desc->src_addr = src; +} + +static void sf_pdma_disclaim_chan(struct sf_pdma_chan *chan) +{ + struct pdma_regs *regs = &chan->regs; + + writel(PDMA_CLEAR_CTRL, regs->ctrl); +} + +static struct dma_async_tx_descriptor * +sf_pdma_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan); + struct sf_pdma_desc *desc; + unsigned long iflags; + + if (chan && (!len || !dest || !src)) { + dev_err(chan->pdma->dma_dev.dev, + "Please check dma len, dest, src!\n"); + return NULL; + } + + desc = sf_pdma_alloc_desc(chan); + if (!desc) + return NULL; + + desc->dirn = DMA_MEM_TO_MEM; + desc->async_tx = vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); + + spin_lock_irqsave(&chan->vchan.lock, iflags); + sf_pdma_fill_desc(desc, dest, src, len); + spin_unlock_irqrestore(&chan->vchan.lock, iflags); + + return desc->async_tx; +} + +static int sf_pdma_slave_config(struct dma_chan *dchan, + struct dma_slave_config *cfg) +{ + struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan); + + memcpy(&chan->cfg, cfg, sizeof(*cfg)); + + return 0; +} + +static int sf_pdma_alloc_chan_resources(struct dma_chan *dchan) +{ + struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan); + struct pdma_regs *regs = &chan->regs; + + dma_cookie_init(dchan); + writel(PDMA_CLAIM_MASK, regs->ctrl); + + return 0; +} + +static void sf_pdma_disable_request(struct sf_pdma_chan *chan) +{ + struct pdma_regs *regs = &chan->regs; + + writel(readl(regs->ctrl) & ~PDMA_RUN_MASK, regs->ctrl); +} + +static void sf_pdma_free_chan_resources(struct dma_chan *dchan) +{ + struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&chan->vchan.lock, flags); + sf_pdma_disable_request(chan); + kfree(chan->desc); + chan->desc = NULL; + vchan_get_all_descriptors(&chan->vchan, &head); + sf_pdma_disclaim_chan(chan); + spin_unlock_irqrestore(&chan->vchan.lock, flags); + vchan_dma_desc_free_list(&chan->vchan, &head); +} + +static size_t sf_pdma_desc_residue(struct sf_pdma_chan *chan, + dma_cookie_t cookie) +{ + struct virt_dma_desc *vd = NULL; + struct pdma_regs *regs = &chan->regs; + unsigned long flags; + u64 residue = 0; + struct sf_pdma_desc *desc; + struct dma_async_tx_descriptor *tx = NULL; + + spin_lock_irqsave(&chan->vchan.lock, flags); + + list_for_each_entry(vd, &chan->vchan.desc_submitted, node) + if (vd->tx.cookie == cookie) + tx = &vd->tx; + + if (!tx) + goto out; + + if (cookie == tx->chan->completed_cookie) + goto out; + + if (cookie == tx->cookie) { + residue = readq(regs->residue); + } else { + vd = vchan_find_desc(&chan->vchan, cookie); + if (!vd) + goto out; + + desc = to_sf_pdma_desc(vd); + residue = desc->xfer_size; + } + +out: + spin_unlock_irqrestore(&chan->vchan.lock, flags); + return residue; +} + +static enum dma_status +sf_pdma_tx_status(struct dma_chan *dchan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan); + enum dma_status status; + + status = dma_cookie_status(dchan, cookie, txstate); + + if (txstate && status != DMA_ERROR) + dma_set_residue(txstate, sf_pdma_desc_residue(chan, cookie)); + + return status; +} + +static int sf_pdma_terminate_all(struct dma_chan *dchan) +{ + struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&chan->vchan.lock, flags); + sf_pdma_disable_request(chan); + kfree(chan->desc); + chan->desc = NULL; + chan->xfer_err = false; + vchan_get_all_descriptors(&chan->vchan, &head); + spin_unlock_irqrestore(&chan->vchan.lock, flags); + vchan_dma_desc_free_list(&chan->vchan, &head); + + return 0; +} + +static void sf_pdma_enable_request(struct sf_pdma_chan *chan) +{ + struct pdma_regs *regs = &chan->regs; + u32 v; + + v = PDMA_CLAIM_MASK | + PDMA_ENABLE_DONE_INT_MASK | + PDMA_ENABLE_ERR_INT_MASK | + PDMA_RUN_MASK; + + writel(v, regs->ctrl); +} + +static struct sf_pdma_desc *sf_pdma_get_first_pending_desc(struct sf_pdma_chan *chan) +{ + struct virt_dma_chan *vchan = &chan->vchan; + struct virt_dma_desc *vdesc; + + if (list_empty(&vchan->desc_issued)) + return NULL; + + vdesc = list_first_entry(&vchan->desc_issued, struct virt_dma_desc, node); + + return container_of(vdesc, struct sf_pdma_desc, vdesc); +} + +static void sf_pdma_xfer_desc(struct sf_pdma_chan *chan) +{ + struct sf_pdma_desc *desc = chan->desc; + struct pdma_regs *regs = &chan->regs; + + if (!desc) { + dev_err(chan->pdma->dma_dev.dev, "NULL desc.\n"); + return; + } + + writel(desc->xfer_type, regs->xfer_type); + writeq(desc->xfer_size, regs->xfer_size); + writeq(desc->dst_addr, regs->dst_addr); + writeq(desc->src_addr, regs->src_addr); + + chan->desc = desc; + chan->status = DMA_IN_PROGRESS; + sf_pdma_enable_request(chan); +} + +static void sf_pdma_issue_pending(struct dma_chan *dchan) +{ + struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan); + unsigned long flags; + + spin_lock_irqsave(&chan->vchan.lock, flags); + + if (!chan->desc && vchan_issue_pending(&chan->vchan)) { + /* vchan_issue_pending has made a check that desc in not NULL */ + chan->desc = sf_pdma_get_first_pending_desc(chan); + sf_pdma_xfer_desc(chan); + } + + spin_unlock_irqrestore(&chan->vchan.lock, flags); +} + +static void sf_pdma_free_desc(struct virt_dma_desc *vdesc) +{ + struct sf_pdma_desc *desc; + + desc = to_sf_pdma_desc(vdesc); + kfree(desc); +} + +static void sf_pdma_donebh_tasklet(struct tasklet_struct *t) +{ + struct sf_pdma_chan *chan = from_tasklet(chan, t, done_tasklet); + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + if (chan->xfer_err) { + chan->retries = MAX_RETRY; + chan->status = DMA_COMPLETE; + chan->xfer_err = false; + } + spin_unlock_irqrestore(&chan->lock, flags); + + spin_lock_irqsave(&chan->vchan.lock, flags); + list_del(&chan->desc->vdesc.node); + vchan_cookie_complete(&chan->desc->vdesc); + + chan->desc = sf_pdma_get_first_pending_desc(chan); + if (chan->desc) + sf_pdma_xfer_desc(chan); + + spin_unlock_irqrestore(&chan->vchan.lock, flags); +} + +static void sf_pdma_errbh_tasklet(struct tasklet_struct *t) +{ + struct sf_pdma_chan *chan = from_tasklet(chan, t, err_tasklet); + struct sf_pdma_desc *desc = chan->desc; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + if (chan->retries <= 0) { + /* fail to recover */ + spin_unlock_irqrestore(&chan->lock, flags); + dmaengine_desc_get_callback_invoke(desc->async_tx, NULL); + } else { + /* retry */ + chan->retries--; + chan->xfer_err = true; + chan->status = DMA_ERROR; + + sf_pdma_enable_request(chan); + spin_unlock_irqrestore(&chan->lock, flags); + } +} + +static irqreturn_t sf_pdma_done_isr(int irq, void *dev_id) +{ + struct sf_pdma_chan *chan = dev_id; + struct pdma_regs *regs = &chan->regs; + u64 residue; + + spin_lock(&chan->vchan.lock); + writel((readl(regs->ctrl)) & ~PDMA_DONE_STATUS_MASK, regs->ctrl); + residue = readq(regs->residue); + + if (!residue) { + tasklet_hi_schedule(&chan->done_tasklet); + } else { + /* submit next trascatioin if possible */ + struct sf_pdma_desc *desc = chan->desc; + + desc->src_addr += desc->xfer_size - residue; + desc->dst_addr += desc->xfer_size - residue; + desc->xfer_size = residue; + + sf_pdma_xfer_desc(chan); + } + + spin_unlock(&chan->vchan.lock); + + return IRQ_HANDLED; +} + +static irqreturn_t sf_pdma_err_isr(int irq, void *dev_id) +{ + struct sf_pdma_chan *chan = dev_id; + struct pdma_regs *regs = &chan->regs; + + spin_lock(&chan->lock); + writel((readl(regs->ctrl)) & ~PDMA_ERR_STATUS_MASK, regs->ctrl); + spin_unlock(&chan->lock); + + tasklet_schedule(&chan->err_tasklet); + + return IRQ_HANDLED; +} + +/** + * sf_pdma_irq_init() - Init PDMA IRQ Handlers + * @pdev: pointer of platform_device + * @pdma: pointer of PDMA engine. Caller should check NULL + * + * Initialize DONE and ERROR interrupt handler for 4 channels. Caller should + * make sure the pointer passed in are non-NULL. This function should be called + * only one time during the device probe. + * + * Context: Any context. + * + * Return: + * * 0 - OK to init all IRQ handlers + * * -EINVAL - Fail to request IRQ + */ +static int sf_pdma_irq_init(struct platform_device *pdev, struct sf_pdma *pdma) +{ + int irq, r, i; + struct sf_pdma_chan *chan; + + for (i = 0; i < pdma->n_chans; i++) { + chan = &pdma->chans[i]; + + irq = platform_get_irq(pdev, i * 2); + if (irq < 0) + return -EINVAL; + + r = devm_request_irq(&pdev->dev, irq, sf_pdma_done_isr, 0, + dev_name(&pdev->dev), (void *)chan); + if (r) { + dev_err(&pdev->dev, "Fail to attach done ISR: %d\n", r); + return -EINVAL; + } + + chan->txirq = irq; + + irq = platform_get_irq(pdev, (i * 2) + 1); + if (irq < 0) + return -EINVAL; + + r = devm_request_irq(&pdev->dev, irq, sf_pdma_err_isr, 0, + dev_name(&pdev->dev), (void *)chan); + if (r) { + dev_err(&pdev->dev, "Fail to attach err ISR: %d\n", r); + return -EINVAL; + } + + chan->errirq = irq; + } + + return 0; +} + +/** + * sf_pdma_setup_chans() - Init settings of each channel + * @pdma: pointer of PDMA engine. Caller should check NULL + * + * Initialize all data structure and register base. Caller should make sure + * the pointer passed in are non-NULL. This function should be called only + * one time during the device probe. + * + * Context: Any context. + * + * Return: none + */ +static void sf_pdma_setup_chans(struct sf_pdma *pdma) +{ + int i; + struct sf_pdma_chan *chan; + + INIT_LIST_HEAD(&pdma->dma_dev.channels); + + for (i = 0; i < pdma->n_chans; i++) { + chan = &pdma->chans[i]; + + chan->regs.ctrl = + SF_PDMA_REG_BASE(i) + PDMA_CTRL; + chan->regs.xfer_type = + SF_PDMA_REG_BASE(i) + PDMA_XFER_TYPE; + chan->regs.xfer_size = + SF_PDMA_REG_BASE(i) + PDMA_XFER_SIZE; + chan->regs.dst_addr = + SF_PDMA_REG_BASE(i) + PDMA_DST_ADDR; + chan->regs.src_addr = + SF_PDMA_REG_BASE(i) + PDMA_SRC_ADDR; + chan->regs.act_type = + SF_PDMA_REG_BASE(i) + PDMA_ACT_TYPE; + chan->regs.residue = + SF_PDMA_REG_BASE(i) + PDMA_REMAINING_BYTE; + chan->regs.cur_dst_addr = + SF_PDMA_REG_BASE(i) + PDMA_CUR_DST_ADDR; + chan->regs.cur_src_addr = + SF_PDMA_REG_BASE(i) + PDMA_CUR_SRC_ADDR; + + chan->pdma = pdma; + chan->pm_state = RUNNING; + chan->slave_id = i; + chan->xfer_err = false; + spin_lock_init(&chan->lock); + + chan->vchan.desc_free = sf_pdma_free_desc; + vchan_init(&chan->vchan, &pdma->dma_dev); + + writel(PDMA_CLEAR_CTRL, chan->regs.ctrl); + + tasklet_setup(&chan->done_tasklet, sf_pdma_donebh_tasklet); + tasklet_setup(&chan->err_tasklet, sf_pdma_errbh_tasklet); + } +} + +static int sf_pdma_probe(struct platform_device *pdev) +{ + struct sf_pdma *pdma; + int ret, n_chans; + const enum dma_slave_buswidth widths = + DMA_SLAVE_BUSWIDTH_1_BYTE | DMA_SLAVE_BUSWIDTH_2_BYTES | + DMA_SLAVE_BUSWIDTH_4_BYTES | DMA_SLAVE_BUSWIDTH_8_BYTES | + DMA_SLAVE_BUSWIDTH_16_BYTES | DMA_SLAVE_BUSWIDTH_32_BYTES | + DMA_SLAVE_BUSWIDTH_64_BYTES; + + ret = of_property_read_u32(pdev->dev.of_node, "dma-channels", &n_chans); + if (ret) { + /* backwards-compatibility for no dma-channels property */ + dev_dbg(&pdev->dev, "set number of channels to default value: 4\n"); + n_chans = PDMA_MAX_NR_CH; + } else if (n_chans > PDMA_MAX_NR_CH) { + dev_err(&pdev->dev, "the number of channels exceeds the maximum\n"); + return -EINVAL; + } + + pdma = devm_kzalloc(&pdev->dev, struct_size(pdma, chans, n_chans), + GFP_KERNEL); + if (!pdma) + return -ENOMEM; + + pdma->n_chans = n_chans; + + pdma->membase = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(pdma->membase)) + return PTR_ERR(pdma->membase); + + ret = sf_pdma_irq_init(pdev, pdma); + if (ret) + return ret; + + sf_pdma_setup_chans(pdma); + + pdma->dma_dev.dev = &pdev->dev; + + /* Setup capability */ + dma_cap_set(DMA_MEMCPY, pdma->dma_dev.cap_mask); + pdma->dma_dev.copy_align = 2; + pdma->dma_dev.src_addr_widths = widths; + pdma->dma_dev.dst_addr_widths = widths; + pdma->dma_dev.directions = BIT(DMA_MEM_TO_MEM); + pdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + pdma->dma_dev.descriptor_reuse = true; + + /* Setup DMA APIs */ + pdma->dma_dev.device_alloc_chan_resources = + sf_pdma_alloc_chan_resources; + pdma->dma_dev.device_free_chan_resources = + sf_pdma_free_chan_resources; + pdma->dma_dev.device_tx_status = sf_pdma_tx_status; + pdma->dma_dev.device_prep_dma_memcpy = sf_pdma_prep_dma_memcpy; + pdma->dma_dev.device_config = sf_pdma_slave_config; + pdma->dma_dev.device_terminate_all = sf_pdma_terminate_all; + pdma->dma_dev.device_issue_pending = sf_pdma_issue_pending; + + platform_set_drvdata(pdev, pdma); + + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) + dev_warn(&pdev->dev, + "Failed to set DMA mask. Fall back to default.\n"); + + ret = dma_async_device_register(&pdma->dma_dev); + if (ret) { + dev_err(&pdev->dev, + "Can't register SiFive Platform DMA. (%d)\n", ret); + return ret; + } + + return 0; +} + +static int sf_pdma_remove(struct platform_device *pdev) +{ + struct sf_pdma *pdma = platform_get_drvdata(pdev); + struct sf_pdma_chan *ch; + int i; + + for (i = 0; i < pdma->n_chans; i++) { + ch = &pdma->chans[i]; + + devm_free_irq(&pdev->dev, ch->txirq, ch); + devm_free_irq(&pdev->dev, ch->errirq, ch); + list_del(&ch->vchan.chan.device_node); + tasklet_kill(&ch->vchan.task); + tasklet_kill(&ch->done_tasklet); + tasklet_kill(&ch->err_tasklet); + } + + dma_async_device_unregister(&pdma->dma_dev); + + return 0; +} + +static const struct of_device_id sf_pdma_dt_ids[] = { + { .compatible = "sifive,fu540-c000-pdma" }, + { .compatible = "sifive,pdma0" }, + {}, +}; +MODULE_DEVICE_TABLE(of, sf_pdma_dt_ids); + +static struct platform_driver sf_pdma_driver = { + .probe = sf_pdma_probe, + .remove = sf_pdma_remove, + .driver = { + .name = "sf-pdma", + .of_match_table = sf_pdma_dt_ids, + }, +}; + +static int __init sf_pdma_init(void) +{ + return platform_driver_register(&sf_pdma_driver); +} + +static void __exit sf_pdma_exit(void) +{ + platform_driver_unregister(&sf_pdma_driver); +} + +/* do early init */ +subsys_initcall(sf_pdma_init); +module_exit(sf_pdma_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("SiFive Platform DMA driver"); +MODULE_AUTHOR("Green Wan "); diff --git a/drivers/dma/sf-pdma/sf-pdma.h b/drivers/dma/sf-pdma/sf-pdma.h new file mode 100644 index 0000000000..5c398a83b4 --- /dev/null +++ b/drivers/dma/sf-pdma/sf-pdma.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SiFive FU540 Platform DMA driver + * Copyright (C) 2019 SiFive + * + * Based partially on: + * - drivers/dma/fsl-edma.c + * - drivers/dma/dw-edma/ + * - drivers/dma/pxa-dma.c + * + * See the following sources for further documentation: + * - Chapter 12 "Platform DMA Engine (PDMA)" of + * SiFive FU540-C000 v1.0 + * https://static.dev.sifive.com/FU540-C000-v1.0.pdf + */ +#ifndef _SF_PDMA_H +#define _SF_PDMA_H + +#include +#include + +#include "../dmaengine.h" +#include "../virt-dma.h" + +#define PDMA_MAX_NR_CH 4 + +#define PDMA_BASE_ADDR 0x3000000 +#define PDMA_CHAN_OFFSET 0x1000 + +/* Register Offset */ +#define PDMA_CTRL 0x000 +#define PDMA_XFER_TYPE 0x004 +#define PDMA_XFER_SIZE 0x008 +#define PDMA_DST_ADDR 0x010 +#define PDMA_SRC_ADDR 0x018 +#define PDMA_ACT_TYPE 0x104 /* Read-only */ +#define PDMA_REMAINING_BYTE 0x108 /* Read-only */ +#define PDMA_CUR_DST_ADDR 0x110 /* Read-only*/ +#define PDMA_CUR_SRC_ADDR 0x118 /* Read-only*/ + +/* CTRL */ +#define PDMA_CLEAR_CTRL 0x0 +#define PDMA_CLAIM_MASK GENMASK(0, 0) +#define PDMA_RUN_MASK GENMASK(1, 1) +#define PDMA_ENABLE_DONE_INT_MASK GENMASK(14, 14) +#define PDMA_ENABLE_ERR_INT_MASK GENMASK(15, 15) +#define PDMA_DONE_STATUS_MASK GENMASK(30, 30) +#define PDMA_ERR_STATUS_MASK GENMASK(31, 31) + +/* Transfer Type */ +#define PDMA_FULL_SPEED 0xFF000008 + +/* Error Recovery */ +#define MAX_RETRY 1 + +#define SF_PDMA_REG_BASE(ch) (pdma->membase + (PDMA_CHAN_OFFSET * (ch))) + +struct pdma_regs { + /* read-write regs */ + void __iomem *ctrl; /* 4 bytes */ + + void __iomem *xfer_type; /* 4 bytes */ + void __iomem *xfer_size; /* 8 bytes */ + void __iomem *dst_addr; /* 8 bytes */ + void __iomem *src_addr; /* 8 bytes */ + + /* read-only */ + void __iomem *act_type; /* 4 bytes */ + void __iomem *residue; /* 8 bytes */ + void __iomem *cur_dst_addr; /* 8 bytes */ + void __iomem *cur_src_addr; /* 8 bytes */ +}; + +struct sf_pdma_desc { + u32 xfer_type; + u64 xfer_size; + u64 dst_addr; + u64 src_addr; + struct virt_dma_desc vdesc; + struct sf_pdma_chan *chan; + enum dma_transfer_direction dirn; + struct dma_async_tx_descriptor *async_tx; +}; + +enum sf_pdma_pm_state { + RUNNING = 0, + SUSPENDED, +}; + +struct sf_pdma_chan { + struct virt_dma_chan vchan; + enum dma_status status; + enum sf_pdma_pm_state pm_state; + u32 slave_id; + struct sf_pdma *pdma; + struct sf_pdma_desc *desc; + struct dma_slave_config cfg; + u32 attr; + dma_addr_t dma_dev_addr; + u32 dma_dev_size; + struct tasklet_struct done_tasklet; + struct tasklet_struct err_tasklet; + struct pdma_regs regs; + spinlock_t lock; /* protect chan data */ + bool xfer_err; + int txirq; + int errirq; + int retries; +}; + +struct sf_pdma { + struct dma_device dma_dev; + void __iomem *membase; + void __iomem *mappedbase; + u32 n_chans; + struct sf_pdma_chan chans[]; +}; + +#endif /* _SF_PDMA_H */ diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig new file mode 100644 index 0000000000..c0b2997ab7 --- /dev/null +++ b/drivers/dma/sh/Kconfig @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# DMA engine configuration for sh +# + +config RENESAS_DMA + bool + select DMA_ENGINE + +# +# DMA Engine Helpers +# + +config SH_DMAE_BASE + bool "Renesas SuperH DMA Engine support" + depends on SUPERH || COMPILE_TEST + depends on !SUPERH || SH_DMA + depends on !SH_DMA_API + default y + select RENESAS_DMA + help + Enable support for the Renesas SuperH DMA controllers. + +# +# DMA Controllers +# + +config SH_DMAE + tristate "Renesas SuperH DMAC support" + depends on SH_DMAE_BASE + help + Enable support for the Renesas SuperH DMA controllers. + +config RCAR_DMAC + tristate "Renesas R-Car Gen{2,3} and RZ/G{1,2} DMA Controller" + depends on ARCH_RENESAS || COMPILE_TEST + select RENESAS_DMA + help + This driver supports the general purpose DMA controller found in the + Renesas R-Car Gen{2,3} and RZ/G{1,2} SoCs. + +config RENESAS_USB_DMAC + tristate "Renesas USB-DMA Controller" + depends on ARCH_RENESAS || COMPILE_TEST + select RENESAS_DMA + select DMA_VIRTUAL_CHANNELS + help + This driver supports the USB-DMA controller found in the Renesas + SoCs. + +config RZ_DMAC + tristate "Renesas RZ/{G2L,V2L} DMA Controller" + depends on ARCH_RZG2L || COMPILE_TEST + select RENESAS_DMA + select DMA_VIRTUAL_CHANNELS + help + This driver supports the general purpose DMA controller found in the + Renesas RZ/{G2L,V2L} SoC variants. diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile new file mode 100644 index 0000000000..360ab6d25e --- /dev/null +++ b/drivers/dma/sh/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# DMA Engine Helpers +# + +obj-$(CONFIG_SH_DMAE_BASE) += shdma-base.o + +# +# DMA Controllers +# + +shdma-y := shdmac.o +shdma-objs := $(shdma-y) +obj-$(CONFIG_SH_DMAE) += shdma.o + +obj-$(CONFIG_RCAR_DMAC) += rcar-dmac.o +obj-$(CONFIG_RENESAS_USB_DMAC) += usb-dmac.o +obj-$(CONFIG_RZ_DMAC) += rz-dmac.o diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c new file mode 100644 index 0000000000..641d689d17 --- /dev/null +++ b/drivers/dma/sh/rcar-dmac.c @@ -0,0 +1,2052 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Renesas R-Car Gen2/Gen3 DMA Controller Driver + * + * Copyright (C) 2014-2019 Renesas Electronics Inc. + * + * Author: Laurent Pinchart + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../dmaengine.h" + +/* + * struct rcar_dmac_xfer_chunk - Descriptor for a hardware transfer + * @node: entry in the parent's chunks list + * @src_addr: device source address + * @dst_addr: device destination address + * @size: transfer size in bytes + */ +struct rcar_dmac_xfer_chunk { + struct list_head node; + + dma_addr_t src_addr; + dma_addr_t dst_addr; + u32 size; +}; + +/* + * struct rcar_dmac_hw_desc - Hardware descriptor for a transfer chunk + * @sar: value of the SAR register (source address) + * @dar: value of the DAR register (destination address) + * @tcr: value of the TCR register (transfer count) + */ +struct rcar_dmac_hw_desc { + u32 sar; + u32 dar; + u32 tcr; + u32 reserved; +} __attribute__((__packed__)); + +/* + * struct rcar_dmac_desc - R-Car Gen2 DMA Transfer Descriptor + * @async_tx: base DMA asynchronous transaction descriptor + * @direction: direction of the DMA transfer + * @xfer_shift: log2 of the transfer size + * @chcr: value of the channel configuration register for this transfer + * @node: entry in the channel's descriptors lists + * @chunks: list of transfer chunks for this transfer + * @running: the transfer chunk being currently processed + * @nchunks: number of transfer chunks for this transfer + * @hwdescs.use: whether the transfer descriptor uses hardware descriptors + * @hwdescs.mem: hardware descriptors memory for the transfer + * @hwdescs.dma: device address of the hardware descriptors memory + * @hwdescs.size: size of the hardware descriptors in bytes + * @size: transfer size in bytes + * @cyclic: when set indicates that the DMA transfer is cyclic + */ +struct rcar_dmac_desc { + struct dma_async_tx_descriptor async_tx; + enum dma_transfer_direction direction; + unsigned int xfer_shift; + u32 chcr; + + struct list_head node; + struct list_head chunks; + struct rcar_dmac_xfer_chunk *running; + unsigned int nchunks; + + struct { + bool use; + struct rcar_dmac_hw_desc *mem; + dma_addr_t dma; + size_t size; + } hwdescs; + + unsigned int size; + bool cyclic; +}; + +#define to_rcar_dmac_desc(d) container_of(d, struct rcar_dmac_desc, async_tx) + +/* + * struct rcar_dmac_desc_page - One page worth of descriptors + * @node: entry in the channel's pages list + * @descs: array of DMA descriptors + * @chunks: array of transfer chunk descriptors + */ +struct rcar_dmac_desc_page { + struct list_head node; + + union { + DECLARE_FLEX_ARRAY(struct rcar_dmac_desc, descs); + DECLARE_FLEX_ARRAY(struct rcar_dmac_xfer_chunk, chunks); + }; +}; + +#define RCAR_DMAC_DESCS_PER_PAGE \ + ((PAGE_SIZE - offsetof(struct rcar_dmac_desc_page, descs)) / \ + sizeof(struct rcar_dmac_desc)) +#define RCAR_DMAC_XFER_CHUNKS_PER_PAGE \ + ((PAGE_SIZE - offsetof(struct rcar_dmac_desc_page, chunks)) / \ + sizeof(struct rcar_dmac_xfer_chunk)) + +/* + * struct rcar_dmac_chan_slave - Slave configuration + * @slave_addr: slave memory address + * @xfer_size: size (in bytes) of hardware transfers + */ +struct rcar_dmac_chan_slave { + phys_addr_t slave_addr; + unsigned int xfer_size; +}; + +/* + * struct rcar_dmac_chan_map - Map of slave device phys to dma address + * @addr: slave dma address + * @dir: direction of mapping + * @slave: slave configuration that is mapped + */ +struct rcar_dmac_chan_map { + dma_addr_t addr; + enum dma_data_direction dir; + struct rcar_dmac_chan_slave slave; +}; + +/* + * struct rcar_dmac_chan - R-Car Gen2 DMA Controller Channel + * @chan: base DMA channel object + * @iomem: channel I/O memory base + * @index: index of this channel in the controller + * @irq: channel IRQ + * @src: slave memory address and size on the source side + * @dst: slave memory address and size on the destination side + * @mid_rid: hardware MID/RID for the DMA client using this channel + * @lock: protects the channel CHCR register and the desc members + * @desc.free: list of free descriptors + * @desc.pending: list of pending descriptors (submitted with tx_submit) + * @desc.active: list of active descriptors (activated with issue_pending) + * @desc.done: list of completed descriptors + * @desc.wait: list of descriptors waiting for an ack + * @desc.running: the descriptor being processed (a member of the active list) + * @desc.chunks_free: list of free transfer chunk descriptors + * @desc.pages: list of pages used by allocated descriptors + */ +struct rcar_dmac_chan { + struct dma_chan chan; + void __iomem *iomem; + unsigned int index; + int irq; + + struct rcar_dmac_chan_slave src; + struct rcar_dmac_chan_slave dst; + struct rcar_dmac_chan_map map; + int mid_rid; + + spinlock_t lock; + + struct { + struct list_head free; + struct list_head pending; + struct list_head active; + struct list_head done; + struct list_head wait; + struct rcar_dmac_desc *running; + + struct list_head chunks_free; + + struct list_head pages; + } desc; +}; + +#define to_rcar_dmac_chan(c) container_of(c, struct rcar_dmac_chan, chan) + +/* + * struct rcar_dmac - R-Car Gen2 DMA Controller + * @engine: base DMA engine object + * @dev: the hardware device + * @dmac_base: remapped base register block + * @chan_base: remapped channel register block (optional) + * @n_channels: number of available channels + * @channels: array of DMAC channels + * @channels_mask: bitfield of which DMA channels are managed by this driver + * @modules: bitmask of client modules in use + */ +struct rcar_dmac { + struct dma_device engine; + struct device *dev; + void __iomem *dmac_base; + void __iomem *chan_base; + + unsigned int n_channels; + struct rcar_dmac_chan *channels; + u32 channels_mask; + + DECLARE_BITMAP(modules, 256); +}; + +#define to_rcar_dmac(d) container_of(d, struct rcar_dmac, engine) + +#define for_each_rcar_dmac_chan(i, dmac, chan) \ + for (i = 0, chan = &(dmac)->channels[0]; i < (dmac)->n_channels; i++, chan++) \ + if (!((dmac)->channels_mask & BIT(i))) continue; else + +/* + * struct rcar_dmac_of_data - This driver's OF data + * @chan_offset_base: DMAC channels base offset + * @chan_offset_stride: DMAC channels offset stride + */ +struct rcar_dmac_of_data { + u32 chan_offset_base; + u32 chan_offset_stride; +}; + +/* ----------------------------------------------------------------------------- + * Registers + */ + +#define RCAR_DMAISTA 0x0020 +#define RCAR_DMASEC 0x0030 +#define RCAR_DMAOR 0x0060 +#define RCAR_DMAOR_PRI_FIXED (0 << 8) +#define RCAR_DMAOR_PRI_ROUND_ROBIN (3 << 8) +#define RCAR_DMAOR_AE (1 << 2) +#define RCAR_DMAOR_DME (1 << 0) +#define RCAR_DMACHCLR 0x0080 /* Not on R-Car Gen4 */ +#define RCAR_DMADPSEC 0x00a0 + +#define RCAR_DMASAR 0x0000 +#define RCAR_DMADAR 0x0004 +#define RCAR_DMATCR 0x0008 +#define RCAR_DMATCR_MASK 0x00ffffff +#define RCAR_DMATSR 0x0028 +#define RCAR_DMACHCR 0x000c +#define RCAR_DMACHCR_CAE (1 << 31) +#define RCAR_DMACHCR_CAIE (1 << 30) +#define RCAR_DMACHCR_DPM_DISABLED (0 << 28) +#define RCAR_DMACHCR_DPM_ENABLED (1 << 28) +#define RCAR_DMACHCR_DPM_REPEAT (2 << 28) +#define RCAR_DMACHCR_DPM_INFINITE (3 << 28) +#define RCAR_DMACHCR_RPT_SAR (1 << 27) +#define RCAR_DMACHCR_RPT_DAR (1 << 26) +#define RCAR_DMACHCR_RPT_TCR (1 << 25) +#define RCAR_DMACHCR_DPB (1 << 22) +#define RCAR_DMACHCR_DSE (1 << 19) +#define RCAR_DMACHCR_DSIE (1 << 18) +#define RCAR_DMACHCR_TS_1B ((0 << 20) | (0 << 3)) +#define RCAR_DMACHCR_TS_2B ((0 << 20) | (1 << 3)) +#define RCAR_DMACHCR_TS_4B ((0 << 20) | (2 << 3)) +#define RCAR_DMACHCR_TS_16B ((0 << 20) | (3 << 3)) +#define RCAR_DMACHCR_TS_32B ((1 << 20) | (0 << 3)) +#define RCAR_DMACHCR_TS_64B ((1 << 20) | (1 << 3)) +#define RCAR_DMACHCR_TS_8B ((1 << 20) | (3 << 3)) +#define RCAR_DMACHCR_DM_FIXED (0 << 14) +#define RCAR_DMACHCR_DM_INC (1 << 14) +#define RCAR_DMACHCR_DM_DEC (2 << 14) +#define RCAR_DMACHCR_SM_FIXED (0 << 12) +#define RCAR_DMACHCR_SM_INC (1 << 12) +#define RCAR_DMACHCR_SM_DEC (2 << 12) +#define RCAR_DMACHCR_RS_AUTO (4 << 8) +#define RCAR_DMACHCR_RS_DMARS (8 << 8) +#define RCAR_DMACHCR_IE (1 << 2) +#define RCAR_DMACHCR_TE (1 << 1) +#define RCAR_DMACHCR_DE (1 << 0) +#define RCAR_DMATCRB 0x0018 +#define RCAR_DMATSRB 0x0038 +#define RCAR_DMACHCRB 0x001c +#define RCAR_DMACHCRB_DCNT(n) ((n) << 24) +#define RCAR_DMACHCRB_DPTR_MASK (0xff << 16) +#define RCAR_DMACHCRB_DPTR_SHIFT 16 +#define RCAR_DMACHCRB_DRST (1 << 15) +#define RCAR_DMACHCRB_DTS (1 << 8) +#define RCAR_DMACHCRB_SLM_NORMAL (0 << 4) +#define RCAR_DMACHCRB_SLM_CLK(n) ((8 | (n)) << 4) +#define RCAR_DMACHCRB_PRI(n) ((n) << 0) +#define RCAR_DMARS 0x0040 +#define RCAR_DMABUFCR 0x0048 +#define RCAR_DMABUFCR_MBU(n) ((n) << 16) +#define RCAR_DMABUFCR_ULB(n) ((n) << 0) +#define RCAR_DMADPBASE 0x0050 +#define RCAR_DMADPBASE_MASK 0xfffffff0 +#define RCAR_DMADPBASE_SEL (1 << 0) +#define RCAR_DMADPCR 0x0054 +#define RCAR_DMADPCR_DIPT(n) ((n) << 24) +#define RCAR_DMAFIXSAR 0x0010 +#define RCAR_DMAFIXDAR 0x0014 +#define RCAR_DMAFIXDPBASE 0x0060 + +/* For R-Car Gen4 */ +#define RCAR_GEN4_DMACHCLR 0x0100 + +/* Hardcode the MEMCPY transfer size to 4 bytes. */ +#define RCAR_DMAC_MEMCPY_XFER_SIZE 4 + +/* ----------------------------------------------------------------------------- + * Device access + */ + +static void rcar_dmac_write(struct rcar_dmac *dmac, u32 reg, u32 data) +{ + if (reg == RCAR_DMAOR) + writew(data, dmac->dmac_base + reg); + else + writel(data, dmac->dmac_base + reg); +} + +static u32 rcar_dmac_read(struct rcar_dmac *dmac, u32 reg) +{ + if (reg == RCAR_DMAOR) + return readw(dmac->dmac_base + reg); + else + return readl(dmac->dmac_base + reg); +} + +static u32 rcar_dmac_chan_read(struct rcar_dmac_chan *chan, u32 reg) +{ + if (reg == RCAR_DMARS) + return readw(chan->iomem + reg); + else + return readl(chan->iomem + reg); +} + +static void rcar_dmac_chan_write(struct rcar_dmac_chan *chan, u32 reg, u32 data) +{ + if (reg == RCAR_DMARS) + writew(data, chan->iomem + reg); + else + writel(data, chan->iomem + reg); +} + +static void rcar_dmac_chan_clear(struct rcar_dmac *dmac, + struct rcar_dmac_chan *chan) +{ + if (dmac->chan_base) + rcar_dmac_chan_write(chan, RCAR_GEN4_DMACHCLR, 1); + else + rcar_dmac_write(dmac, RCAR_DMACHCLR, BIT(chan->index)); +} + +static void rcar_dmac_chan_clear_all(struct rcar_dmac *dmac) +{ + struct rcar_dmac_chan *chan; + unsigned int i; + + if (dmac->chan_base) { + for_each_rcar_dmac_chan(i, dmac, chan) + rcar_dmac_chan_write(chan, RCAR_GEN4_DMACHCLR, 1); + } else { + rcar_dmac_write(dmac, RCAR_DMACHCLR, dmac->channels_mask); + } +} + +/* ----------------------------------------------------------------------------- + * Initialization and configuration + */ + +static bool rcar_dmac_chan_is_busy(struct rcar_dmac_chan *chan) +{ + u32 chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR); + + return !!(chcr & (RCAR_DMACHCR_DE | RCAR_DMACHCR_TE)); +} + +static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan) +{ + struct rcar_dmac_desc *desc = chan->desc.running; + u32 chcr = desc->chcr; + + WARN_ON_ONCE(rcar_dmac_chan_is_busy(chan)); + + if (chan->mid_rid >= 0) + rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid); + + if (desc->hwdescs.use) { + struct rcar_dmac_xfer_chunk *chunk = + list_first_entry(&desc->chunks, + struct rcar_dmac_xfer_chunk, node); + + dev_dbg(chan->chan.device->dev, + "chan%u: queue desc %p: %u@%pad\n", + chan->index, desc, desc->nchunks, &desc->hwdescs.dma); + +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + rcar_dmac_chan_write(chan, RCAR_DMAFIXSAR, + chunk->src_addr >> 32); + rcar_dmac_chan_write(chan, RCAR_DMAFIXDAR, + chunk->dst_addr >> 32); + rcar_dmac_chan_write(chan, RCAR_DMAFIXDPBASE, + desc->hwdescs.dma >> 32); +#endif + rcar_dmac_chan_write(chan, RCAR_DMADPBASE, + (desc->hwdescs.dma & 0xfffffff0) | + RCAR_DMADPBASE_SEL); + rcar_dmac_chan_write(chan, RCAR_DMACHCRB, + RCAR_DMACHCRB_DCNT(desc->nchunks - 1) | + RCAR_DMACHCRB_DRST); + + /* + * Errata: When descriptor memory is accessed through an IOMMU + * the DMADAR register isn't initialized automatically from the + * first descriptor at beginning of transfer by the DMAC like it + * should. Initialize it manually with the destination address + * of the first chunk. + */ + rcar_dmac_chan_write(chan, RCAR_DMADAR, + chunk->dst_addr & 0xffffffff); + + /* + * Program the descriptor stage interrupt to occur after the end + * of the first stage. + */ + rcar_dmac_chan_write(chan, RCAR_DMADPCR, RCAR_DMADPCR_DIPT(1)); + + chcr |= RCAR_DMACHCR_RPT_SAR | RCAR_DMACHCR_RPT_DAR + | RCAR_DMACHCR_RPT_TCR | RCAR_DMACHCR_DPB; + + /* + * If the descriptor isn't cyclic enable normal descriptor mode + * and the transfer completion interrupt. + */ + if (!desc->cyclic) + chcr |= RCAR_DMACHCR_DPM_ENABLED | RCAR_DMACHCR_IE; + /* + * If the descriptor is cyclic and has a callback enable the + * descriptor stage interrupt in infinite repeat mode. + */ + else if (desc->async_tx.callback) + chcr |= RCAR_DMACHCR_DPM_INFINITE | RCAR_DMACHCR_DSIE; + /* + * Otherwise just select infinite repeat mode without any + * interrupt. + */ + else + chcr |= RCAR_DMACHCR_DPM_INFINITE; + } else { + struct rcar_dmac_xfer_chunk *chunk = desc->running; + + dev_dbg(chan->chan.device->dev, + "chan%u: queue chunk %p: %u@%pad -> %pad\n", + chan->index, chunk, chunk->size, &chunk->src_addr, + &chunk->dst_addr); + +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + rcar_dmac_chan_write(chan, RCAR_DMAFIXSAR, + chunk->src_addr >> 32); + rcar_dmac_chan_write(chan, RCAR_DMAFIXDAR, + chunk->dst_addr >> 32); +#endif + rcar_dmac_chan_write(chan, RCAR_DMASAR, + chunk->src_addr & 0xffffffff); + rcar_dmac_chan_write(chan, RCAR_DMADAR, + chunk->dst_addr & 0xffffffff); + rcar_dmac_chan_write(chan, RCAR_DMATCR, + chunk->size >> desc->xfer_shift); + + chcr |= RCAR_DMACHCR_DPM_DISABLED | RCAR_DMACHCR_IE; + } + + rcar_dmac_chan_write(chan, RCAR_DMACHCR, + chcr | RCAR_DMACHCR_DE | RCAR_DMACHCR_CAIE); +} + +static int rcar_dmac_init(struct rcar_dmac *dmac) +{ + u16 dmaor; + + /* Clear all channels and enable the DMAC globally. */ + rcar_dmac_chan_clear_all(dmac); + rcar_dmac_write(dmac, RCAR_DMAOR, + RCAR_DMAOR_PRI_FIXED | RCAR_DMAOR_DME); + + dmaor = rcar_dmac_read(dmac, RCAR_DMAOR); + if ((dmaor & (RCAR_DMAOR_AE | RCAR_DMAOR_DME)) != RCAR_DMAOR_DME) { + dev_warn(dmac->dev, "DMAOR initialization failed.\n"); + return -EIO; + } + + return 0; +} + +/* ----------------------------------------------------------------------------- + * Descriptors submission + */ + +static dma_cookie_t rcar_dmac_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct rcar_dmac_chan *chan = to_rcar_dmac_chan(tx->chan); + struct rcar_dmac_desc *desc = to_rcar_dmac_desc(tx); + unsigned long flags; + dma_cookie_t cookie; + + spin_lock_irqsave(&chan->lock, flags); + + cookie = dma_cookie_assign(tx); + + dev_dbg(chan->chan.device->dev, "chan%u: submit #%d@%p\n", + chan->index, tx->cookie, desc); + + list_add_tail(&desc->node, &chan->desc.pending); + desc->running = list_first_entry(&desc->chunks, + struct rcar_dmac_xfer_chunk, node); + + spin_unlock_irqrestore(&chan->lock, flags); + + return cookie; +} + +/* ----------------------------------------------------------------------------- + * Descriptors allocation and free + */ + +/* + * rcar_dmac_desc_alloc - Allocate a page worth of DMA descriptors + * @chan: the DMA channel + * @gfp: allocation flags + */ +static int rcar_dmac_desc_alloc(struct rcar_dmac_chan *chan, gfp_t gfp) +{ + struct rcar_dmac_desc_page *page; + unsigned long flags; + LIST_HEAD(list); + unsigned int i; + + page = (void *)get_zeroed_page(gfp); + if (!page) + return -ENOMEM; + + for (i = 0; i < RCAR_DMAC_DESCS_PER_PAGE; ++i) { + struct rcar_dmac_desc *desc = &page->descs[i]; + + dma_async_tx_descriptor_init(&desc->async_tx, &chan->chan); + desc->async_tx.tx_submit = rcar_dmac_tx_submit; + INIT_LIST_HEAD(&desc->chunks); + + list_add_tail(&desc->node, &list); + } + + spin_lock_irqsave(&chan->lock, flags); + list_splice_tail(&list, &chan->desc.free); + list_add_tail(&page->node, &chan->desc.pages); + spin_unlock_irqrestore(&chan->lock, flags); + + return 0; +} + +/* + * rcar_dmac_desc_put - Release a DMA transfer descriptor + * @chan: the DMA channel + * @desc: the descriptor + * + * Put the descriptor and its transfer chunk descriptors back in the channel's + * free descriptors lists. The descriptor's chunks list will be reinitialized to + * an empty list as a result. + * + * The descriptor must have been removed from the channel's lists before calling + * this function. + */ +static void rcar_dmac_desc_put(struct rcar_dmac_chan *chan, + struct rcar_dmac_desc *desc) +{ + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + list_splice_tail_init(&desc->chunks, &chan->desc.chunks_free); + list_add(&desc->node, &chan->desc.free); + spin_unlock_irqrestore(&chan->lock, flags); +} + +static void rcar_dmac_desc_recycle_acked(struct rcar_dmac_chan *chan) +{ + struct rcar_dmac_desc *desc, *_desc; + unsigned long flags; + LIST_HEAD(list); + + /* + * We have to temporarily move all descriptors from the wait list to a + * local list as iterating over the wait list, even with + * list_for_each_entry_safe, isn't safe if we release the channel lock + * around the rcar_dmac_desc_put() call. + */ + spin_lock_irqsave(&chan->lock, flags); + list_splice_init(&chan->desc.wait, &list); + spin_unlock_irqrestore(&chan->lock, flags); + + list_for_each_entry_safe(desc, _desc, &list, node) { + if (async_tx_test_ack(&desc->async_tx)) { + list_del(&desc->node); + rcar_dmac_desc_put(chan, desc); + } + } + + if (list_empty(&list)) + return; + + /* Put the remaining descriptors back in the wait list. */ + spin_lock_irqsave(&chan->lock, flags); + list_splice(&list, &chan->desc.wait); + spin_unlock_irqrestore(&chan->lock, flags); +} + +/* + * rcar_dmac_desc_get - Allocate a descriptor for a DMA transfer + * @chan: the DMA channel + * + * Locking: This function must be called in a non-atomic context. + * + * Return: A pointer to the allocated descriptor or NULL if no descriptor can + * be allocated. + */ +static struct rcar_dmac_desc *rcar_dmac_desc_get(struct rcar_dmac_chan *chan) +{ + struct rcar_dmac_desc *desc; + unsigned long flags; + int ret; + + /* Recycle acked descriptors before attempting allocation. */ + rcar_dmac_desc_recycle_acked(chan); + + spin_lock_irqsave(&chan->lock, flags); + + while (list_empty(&chan->desc.free)) { + /* + * No free descriptors, allocate a page worth of them and try + * again, as someone else could race us to get the newly + * allocated descriptors. If the allocation fails return an + * error. + */ + spin_unlock_irqrestore(&chan->lock, flags); + ret = rcar_dmac_desc_alloc(chan, GFP_NOWAIT); + if (ret < 0) + return NULL; + spin_lock_irqsave(&chan->lock, flags); + } + + desc = list_first_entry(&chan->desc.free, struct rcar_dmac_desc, node); + list_del(&desc->node); + + spin_unlock_irqrestore(&chan->lock, flags); + + return desc; +} + +/* + * rcar_dmac_xfer_chunk_alloc - Allocate a page worth of transfer chunks + * @chan: the DMA channel + * @gfp: allocation flags + */ +static int rcar_dmac_xfer_chunk_alloc(struct rcar_dmac_chan *chan, gfp_t gfp) +{ + struct rcar_dmac_desc_page *page; + unsigned long flags; + LIST_HEAD(list); + unsigned int i; + + page = (void *)get_zeroed_page(gfp); + if (!page) + return -ENOMEM; + + for (i = 0; i < RCAR_DMAC_XFER_CHUNKS_PER_PAGE; ++i) { + struct rcar_dmac_xfer_chunk *chunk = &page->chunks[i]; + + list_add_tail(&chunk->node, &list); + } + + spin_lock_irqsave(&chan->lock, flags); + list_splice_tail(&list, &chan->desc.chunks_free); + list_add_tail(&page->node, &chan->desc.pages); + spin_unlock_irqrestore(&chan->lock, flags); + + return 0; +} + +/* + * rcar_dmac_xfer_chunk_get - Allocate a transfer chunk for a DMA transfer + * @chan: the DMA channel + * + * Locking: This function must be called in a non-atomic context. + * + * Return: A pointer to the allocated transfer chunk descriptor or NULL if no + * descriptor can be allocated. + */ +static struct rcar_dmac_xfer_chunk * +rcar_dmac_xfer_chunk_get(struct rcar_dmac_chan *chan) +{ + struct rcar_dmac_xfer_chunk *chunk; + unsigned long flags; + int ret; + + spin_lock_irqsave(&chan->lock, flags); + + while (list_empty(&chan->desc.chunks_free)) { + /* + * No free descriptors, allocate a page worth of them and try + * again, as someone else could race us to get the newly + * allocated descriptors. If the allocation fails return an + * error. + */ + spin_unlock_irqrestore(&chan->lock, flags); + ret = rcar_dmac_xfer_chunk_alloc(chan, GFP_NOWAIT); + if (ret < 0) + return NULL; + spin_lock_irqsave(&chan->lock, flags); + } + + chunk = list_first_entry(&chan->desc.chunks_free, + struct rcar_dmac_xfer_chunk, node); + list_del(&chunk->node); + + spin_unlock_irqrestore(&chan->lock, flags); + + return chunk; +} + +static void rcar_dmac_realloc_hwdesc(struct rcar_dmac_chan *chan, + struct rcar_dmac_desc *desc, size_t size) +{ + /* + * dma_alloc_coherent() allocates memory in page size increments. To + * avoid reallocating the hardware descriptors when the allocated size + * wouldn't change align the requested size to a multiple of the page + * size. + */ + size = PAGE_ALIGN(size); + + if (desc->hwdescs.size == size) + return; + + if (desc->hwdescs.mem) { + dma_free_coherent(chan->chan.device->dev, desc->hwdescs.size, + desc->hwdescs.mem, desc->hwdescs.dma); + desc->hwdescs.mem = NULL; + desc->hwdescs.size = 0; + } + + if (!size) + return; + + desc->hwdescs.mem = dma_alloc_coherent(chan->chan.device->dev, size, + &desc->hwdescs.dma, GFP_NOWAIT); + if (!desc->hwdescs.mem) + return; + + desc->hwdescs.size = size; +} + +static int rcar_dmac_fill_hwdesc(struct rcar_dmac_chan *chan, + struct rcar_dmac_desc *desc) +{ + struct rcar_dmac_xfer_chunk *chunk; + struct rcar_dmac_hw_desc *hwdesc; + + rcar_dmac_realloc_hwdesc(chan, desc, desc->nchunks * sizeof(*hwdesc)); + + hwdesc = desc->hwdescs.mem; + if (!hwdesc) + return -ENOMEM; + + list_for_each_entry(chunk, &desc->chunks, node) { + hwdesc->sar = chunk->src_addr; + hwdesc->dar = chunk->dst_addr; + hwdesc->tcr = chunk->size >> desc->xfer_shift; + hwdesc++; + } + + return 0; +} + +/* ----------------------------------------------------------------------------- + * Stop and reset + */ +static void rcar_dmac_chcr_de_barrier(struct rcar_dmac_chan *chan) +{ + u32 chcr; + unsigned int i; + + /* + * Ensure that the setting of the DE bit is actually 0 after + * clearing it. + */ + for (i = 0; i < 1024; i++) { + chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR); + if (!(chcr & RCAR_DMACHCR_DE)) + return; + udelay(1); + } + + dev_err(chan->chan.device->dev, "CHCR DE check error\n"); +} + +static void rcar_dmac_clear_chcr_de(struct rcar_dmac_chan *chan) +{ + u32 chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR); + + /* set DE=0 and flush remaining data */ + rcar_dmac_chan_write(chan, RCAR_DMACHCR, (chcr & ~RCAR_DMACHCR_DE)); + + /* make sure all remaining data was flushed */ + rcar_dmac_chcr_de_barrier(chan); +} + +static void rcar_dmac_chan_halt(struct rcar_dmac_chan *chan) +{ + u32 chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR); + + chcr &= ~(RCAR_DMACHCR_DSE | RCAR_DMACHCR_DSIE | RCAR_DMACHCR_IE | + RCAR_DMACHCR_TE | RCAR_DMACHCR_DE | + RCAR_DMACHCR_CAE | RCAR_DMACHCR_CAIE); + rcar_dmac_chan_write(chan, RCAR_DMACHCR, chcr); + rcar_dmac_chcr_de_barrier(chan); +} + +static void rcar_dmac_chan_reinit(struct rcar_dmac_chan *chan) +{ + struct rcar_dmac_desc *desc, *_desc; + unsigned long flags; + LIST_HEAD(descs); + + spin_lock_irqsave(&chan->lock, flags); + + /* Move all non-free descriptors to the local lists. */ + list_splice_init(&chan->desc.pending, &descs); + list_splice_init(&chan->desc.active, &descs); + list_splice_init(&chan->desc.done, &descs); + list_splice_init(&chan->desc.wait, &descs); + + chan->desc.running = NULL; + + spin_unlock_irqrestore(&chan->lock, flags); + + list_for_each_entry_safe(desc, _desc, &descs, node) { + list_del(&desc->node); + rcar_dmac_desc_put(chan, desc); + } +} + +static void rcar_dmac_stop_all_chan(struct rcar_dmac *dmac) +{ + struct rcar_dmac_chan *chan; + unsigned int i; + + /* Stop all channels. */ + for_each_rcar_dmac_chan(i, dmac, chan) { + /* Stop and reinitialize the channel. */ + spin_lock_irq(&chan->lock); + rcar_dmac_chan_halt(chan); + spin_unlock_irq(&chan->lock); + } +} + +static int rcar_dmac_chan_pause(struct dma_chan *chan) +{ + unsigned long flags; + struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan); + + spin_lock_irqsave(&rchan->lock, flags); + rcar_dmac_clear_chcr_de(rchan); + spin_unlock_irqrestore(&rchan->lock, flags); + + return 0; +} + +/* ----------------------------------------------------------------------------- + * Descriptors preparation + */ + +static void rcar_dmac_chan_configure_desc(struct rcar_dmac_chan *chan, + struct rcar_dmac_desc *desc) +{ + static const u32 chcr_ts[] = { + RCAR_DMACHCR_TS_1B, RCAR_DMACHCR_TS_2B, + RCAR_DMACHCR_TS_4B, RCAR_DMACHCR_TS_8B, + RCAR_DMACHCR_TS_16B, RCAR_DMACHCR_TS_32B, + RCAR_DMACHCR_TS_64B, + }; + + unsigned int xfer_size; + u32 chcr; + + switch (desc->direction) { + case DMA_DEV_TO_MEM: + chcr = RCAR_DMACHCR_DM_INC | RCAR_DMACHCR_SM_FIXED + | RCAR_DMACHCR_RS_DMARS; + xfer_size = chan->src.xfer_size; + break; + + case DMA_MEM_TO_DEV: + chcr = RCAR_DMACHCR_DM_FIXED | RCAR_DMACHCR_SM_INC + | RCAR_DMACHCR_RS_DMARS; + xfer_size = chan->dst.xfer_size; + break; + + case DMA_MEM_TO_MEM: + default: + chcr = RCAR_DMACHCR_DM_INC | RCAR_DMACHCR_SM_INC + | RCAR_DMACHCR_RS_AUTO; + xfer_size = RCAR_DMAC_MEMCPY_XFER_SIZE; + break; + } + + desc->xfer_shift = ilog2(xfer_size); + desc->chcr = chcr | chcr_ts[desc->xfer_shift]; +} + +/* + * rcar_dmac_chan_prep_sg - prepare transfer descriptors from an SG list + * + * Common routine for public (MEMCPY) and slave DMA. The MEMCPY case is also + * converted to scatter-gather to guarantee consistent locking and a correct + * list manipulation. For slave DMA direction carries the usual meaning, and, + * logically, the SG list is RAM and the addr variable contains slave address, + * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_MEM_TO_MEM + * and the SG list contains only one element and points at the source buffer. + */ +static struct dma_async_tx_descriptor * +rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, dma_addr_t dev_addr, + enum dma_transfer_direction dir, unsigned long dma_flags, + bool cyclic) +{ + struct rcar_dmac_xfer_chunk *chunk; + struct rcar_dmac_desc *desc; + struct scatterlist *sg; + unsigned int nchunks = 0; + unsigned int max_chunk_size; + unsigned int full_size = 0; + bool cross_boundary = false; + unsigned int i; +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + u32 high_dev_addr; + u32 high_mem_addr; +#endif + + desc = rcar_dmac_desc_get(chan); + if (!desc) + return NULL; + + desc->async_tx.flags = dma_flags; + desc->async_tx.cookie = -EBUSY; + + desc->cyclic = cyclic; + desc->direction = dir; + + rcar_dmac_chan_configure_desc(chan, desc); + + max_chunk_size = RCAR_DMATCR_MASK << desc->xfer_shift; + + /* + * Allocate and fill the transfer chunk descriptors. We own the only + * reference to the DMA descriptor, there's no need for locking. + */ + for_each_sg(sgl, sg, sg_len, i) { + dma_addr_t mem_addr = sg_dma_address(sg); + unsigned int len = sg_dma_len(sg); + + full_size += len; + +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + if (i == 0) { + high_dev_addr = dev_addr >> 32; + high_mem_addr = mem_addr >> 32; + } + + if ((dev_addr >> 32 != high_dev_addr) || + (mem_addr >> 32 != high_mem_addr)) + cross_boundary = true; +#endif + while (len) { + unsigned int size = min(len, max_chunk_size); + +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + /* + * Prevent individual transfers from crossing 4GB + * boundaries. + */ + if (dev_addr >> 32 != (dev_addr + size - 1) >> 32) { + size = ALIGN(dev_addr, 1ULL << 32) - dev_addr; + cross_boundary = true; + } + if (mem_addr >> 32 != (mem_addr + size - 1) >> 32) { + size = ALIGN(mem_addr, 1ULL << 32) - mem_addr; + cross_boundary = true; + } +#endif + + chunk = rcar_dmac_xfer_chunk_get(chan); + if (!chunk) { + rcar_dmac_desc_put(chan, desc); + return NULL; + } + + if (dir == DMA_DEV_TO_MEM) { + chunk->src_addr = dev_addr; + chunk->dst_addr = mem_addr; + } else { + chunk->src_addr = mem_addr; + chunk->dst_addr = dev_addr; + } + + chunk->size = size; + + dev_dbg(chan->chan.device->dev, + "chan%u: chunk %p/%p sgl %u@%p, %u/%u %pad -> %pad\n", + chan->index, chunk, desc, i, sg, size, len, + &chunk->src_addr, &chunk->dst_addr); + + mem_addr += size; + if (dir == DMA_MEM_TO_MEM) + dev_addr += size; + + len -= size; + + list_add_tail(&chunk->node, &desc->chunks); + nchunks++; + } + } + + desc->nchunks = nchunks; + desc->size = full_size; + + /* + * Use hardware descriptor lists if possible when more than one chunk + * needs to be transferred (otherwise they don't make much sense). + * + * Source/Destination address should be located in same 4GiB region + * in the 40bit address space when it uses Hardware descriptor, + * and cross_boundary is checking it. + */ + desc->hwdescs.use = !cross_boundary && nchunks > 1; + if (desc->hwdescs.use) { + if (rcar_dmac_fill_hwdesc(chan, desc) < 0) + desc->hwdescs.use = false; + } + + return &desc->async_tx; +} + +/* ----------------------------------------------------------------------------- + * DMA engine operations + */ + +static int rcar_dmac_alloc_chan_resources(struct dma_chan *chan) +{ + struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan); + int ret; + + INIT_LIST_HEAD(&rchan->desc.chunks_free); + INIT_LIST_HEAD(&rchan->desc.pages); + + /* Preallocate descriptors. */ + ret = rcar_dmac_xfer_chunk_alloc(rchan, GFP_KERNEL); + if (ret < 0) + return -ENOMEM; + + ret = rcar_dmac_desc_alloc(rchan, GFP_KERNEL); + if (ret < 0) + return -ENOMEM; + + return pm_runtime_get_sync(chan->device->dev); +} + +static void rcar_dmac_free_chan_resources(struct dma_chan *chan) +{ + struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan); + struct rcar_dmac *dmac = to_rcar_dmac(chan->device); + struct rcar_dmac_chan_map *map = &rchan->map; + struct rcar_dmac_desc_page *page, *_page; + struct rcar_dmac_desc *desc; + LIST_HEAD(list); + + /* Protect against ISR */ + spin_lock_irq(&rchan->lock); + rcar_dmac_chan_halt(rchan); + spin_unlock_irq(&rchan->lock); + + /* + * Now no new interrupts will occur, but one might already be + * running. Wait for it to finish before freeing resources. + */ + synchronize_irq(rchan->irq); + + if (rchan->mid_rid >= 0) { + /* The caller is holding dma_list_mutex */ + clear_bit(rchan->mid_rid, dmac->modules); + rchan->mid_rid = -EINVAL; + } + + list_splice_init(&rchan->desc.free, &list); + list_splice_init(&rchan->desc.pending, &list); + list_splice_init(&rchan->desc.active, &list); + list_splice_init(&rchan->desc.done, &list); + list_splice_init(&rchan->desc.wait, &list); + + rchan->desc.running = NULL; + + list_for_each_entry(desc, &list, node) + rcar_dmac_realloc_hwdesc(rchan, desc, 0); + + list_for_each_entry_safe(page, _page, &rchan->desc.pages, node) { + list_del(&page->node); + free_page((unsigned long)page); + } + + /* Remove slave mapping if present. */ + if (map->slave.xfer_size) { + dma_unmap_resource(chan->device->dev, map->addr, + map->slave.xfer_size, map->dir, 0); + map->slave.xfer_size = 0; + } + + pm_runtime_put(chan->device->dev); +} + +static struct dma_async_tx_descriptor * +rcar_dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan); + struct scatterlist sgl; + + if (!len) + return NULL; + + sg_init_table(&sgl, 1); + sg_set_page(&sgl, pfn_to_page(PFN_DOWN(dma_src)), len, + offset_in_page(dma_src)); + sg_dma_address(&sgl) = dma_src; + sg_dma_len(&sgl) = len; + + return rcar_dmac_chan_prep_sg(rchan, &sgl, 1, dma_dest, + DMA_MEM_TO_MEM, flags, false); +} + +static int rcar_dmac_map_slave_addr(struct dma_chan *chan, + enum dma_transfer_direction dir) +{ + struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan); + struct rcar_dmac_chan_map *map = &rchan->map; + phys_addr_t dev_addr; + size_t dev_size; + enum dma_data_direction dev_dir; + + if (dir == DMA_DEV_TO_MEM) { + dev_addr = rchan->src.slave_addr; + dev_size = rchan->src.xfer_size; + dev_dir = DMA_TO_DEVICE; + } else { + dev_addr = rchan->dst.slave_addr; + dev_size = rchan->dst.xfer_size; + dev_dir = DMA_FROM_DEVICE; + } + + /* Reuse current map if possible. */ + if (dev_addr == map->slave.slave_addr && + dev_size == map->slave.xfer_size && + dev_dir == map->dir) + return 0; + + /* Remove old mapping if present. */ + if (map->slave.xfer_size) + dma_unmap_resource(chan->device->dev, map->addr, + map->slave.xfer_size, map->dir, 0); + map->slave.xfer_size = 0; + + /* Create new slave address map. */ + map->addr = dma_map_resource(chan->device->dev, dev_addr, dev_size, + dev_dir, 0); + + if (dma_mapping_error(chan->device->dev, map->addr)) { + dev_err(chan->device->dev, + "chan%u: failed to map %zx@%pap", rchan->index, + dev_size, &dev_addr); + return -EIO; + } + + dev_dbg(chan->device->dev, "chan%u: map %zx@%pap to %pad dir: %s\n", + rchan->index, dev_size, &dev_addr, &map->addr, + dev_dir == DMA_TO_DEVICE ? "DMA_TO_DEVICE" : "DMA_FROM_DEVICE"); + + map->slave.slave_addr = dev_addr; + map->slave.xfer_size = dev_size; + map->dir = dev_dir; + + return 0; +} + +static struct dma_async_tx_descriptor * +rcar_dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan); + + /* Someone calling slave DMA on a generic channel? */ + if (rchan->mid_rid < 0 || !sg_len || !sg_dma_len(sgl)) { + dev_warn(chan->device->dev, + "%s: bad parameter: len=%d, id=%d\n", + __func__, sg_len, rchan->mid_rid); + return NULL; + } + + if (rcar_dmac_map_slave_addr(chan, dir)) + return NULL; + + return rcar_dmac_chan_prep_sg(rchan, sgl, sg_len, rchan->map.addr, + dir, flags, false); +} + +#define RCAR_DMAC_MAX_SG_LEN 32 + +static struct dma_async_tx_descriptor * +rcar_dmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, + size_t buf_len, size_t period_len, + enum dma_transfer_direction dir, unsigned long flags) +{ + struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan); + struct dma_async_tx_descriptor *desc; + struct scatterlist *sgl; + unsigned int sg_len; + unsigned int i; + + /* Someone calling slave DMA on a generic channel? */ + if (rchan->mid_rid < 0 || buf_len < period_len) { + dev_warn(chan->device->dev, + "%s: bad parameter: buf_len=%zu, period_len=%zu, id=%d\n", + __func__, buf_len, period_len, rchan->mid_rid); + return NULL; + } + + if (rcar_dmac_map_slave_addr(chan, dir)) + return NULL; + + sg_len = buf_len / period_len; + if (sg_len > RCAR_DMAC_MAX_SG_LEN) { + dev_err(chan->device->dev, + "chan%u: sg length %d exceeds limit %d", + rchan->index, sg_len, RCAR_DMAC_MAX_SG_LEN); + return NULL; + } + + /* + * Allocate the sg list dynamically as it would consume too much stack + * space. + */ + sgl = kmalloc_array(sg_len, sizeof(*sgl), GFP_NOWAIT); + if (!sgl) + return NULL; + + sg_init_table(sgl, sg_len); + + for (i = 0; i < sg_len; ++i) { + dma_addr_t src = buf_addr + (period_len * i); + + sg_set_page(&sgl[i], pfn_to_page(PFN_DOWN(src)), period_len, + offset_in_page(src)); + sg_dma_address(&sgl[i]) = src; + sg_dma_len(&sgl[i]) = period_len; + } + + desc = rcar_dmac_chan_prep_sg(rchan, sgl, sg_len, rchan->map.addr, + dir, flags, true); + + kfree(sgl); + return desc; +} + +static int rcar_dmac_device_config(struct dma_chan *chan, + struct dma_slave_config *cfg) +{ + struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan); + + /* + * We could lock this, but you shouldn't be configuring the + * channel, while using it... + */ + rchan->src.slave_addr = cfg->src_addr; + rchan->dst.slave_addr = cfg->dst_addr; + rchan->src.xfer_size = cfg->src_addr_width; + rchan->dst.xfer_size = cfg->dst_addr_width; + + return 0; +} + +static int rcar_dmac_chan_terminate_all(struct dma_chan *chan) +{ + struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&rchan->lock, flags); + rcar_dmac_chan_halt(rchan); + spin_unlock_irqrestore(&rchan->lock, flags); + + /* + * FIXME: No new interrupt can occur now, but the IRQ thread might still + * be running. + */ + + rcar_dmac_chan_reinit(rchan); + + return 0; +} + +static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan, + dma_cookie_t cookie) +{ + struct rcar_dmac_desc *desc = chan->desc.running; + struct rcar_dmac_xfer_chunk *running = NULL; + struct rcar_dmac_xfer_chunk *chunk; + enum dma_status status; + unsigned int residue = 0; + unsigned int dptr = 0; + unsigned int chcrb; + unsigned int tcrb; + unsigned int i; + + if (!desc) + return 0; + + /* + * If the cookie corresponds to a descriptor that has been completed + * there is no residue. The same check has already been performed by the + * caller but without holding the channel lock, so the descriptor could + * now be complete. + */ + status = dma_cookie_status(&chan->chan, cookie, NULL); + if (status == DMA_COMPLETE) + return 0; + + /* + * If the cookie doesn't correspond to the currently running transfer + * then the descriptor hasn't been processed yet, and the residue is + * equal to the full descriptor size. + * Also, a client driver is possible to call this function before + * rcar_dmac_isr_channel_thread() runs. In this case, the "desc.running" + * will be the next descriptor, and the done list will appear. So, if + * the argument cookie matches the done list's cookie, we can assume + * the residue is zero. + */ + if (cookie != desc->async_tx.cookie) { + list_for_each_entry(desc, &chan->desc.done, node) { + if (cookie == desc->async_tx.cookie) + return 0; + } + list_for_each_entry(desc, &chan->desc.pending, node) { + if (cookie == desc->async_tx.cookie) + return desc->size; + } + list_for_each_entry(desc, &chan->desc.active, node) { + if (cookie == desc->async_tx.cookie) + return desc->size; + } + + /* + * No descriptor found for the cookie, there's thus no residue. + * This shouldn't happen if the calling driver passes a correct + * cookie value. + */ + WARN(1, "No descriptor for cookie!"); + return 0; + } + + /* + * We need to read two registers. + * Make sure the control register does not skip to next chunk + * while reading the counter. + * Trying it 3 times should be enough: Initial read, retry, retry + * for the paranoid. + */ + for (i = 0; i < 3; i++) { + chcrb = rcar_dmac_chan_read(chan, RCAR_DMACHCRB) & + RCAR_DMACHCRB_DPTR_MASK; + tcrb = rcar_dmac_chan_read(chan, RCAR_DMATCRB); + /* Still the same? */ + if (chcrb == (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) & + RCAR_DMACHCRB_DPTR_MASK)) + break; + } + WARN_ONCE(i >= 3, "residue might be not continuous!"); + + /* + * In descriptor mode the descriptor running pointer is not maintained + * by the interrupt handler, find the running descriptor from the + * descriptor pointer field in the CHCRB register. In non-descriptor + * mode just use the running descriptor pointer. + */ + if (desc->hwdescs.use) { + dptr = chcrb >> RCAR_DMACHCRB_DPTR_SHIFT; + if (dptr == 0) + dptr = desc->nchunks; + dptr--; + WARN_ON(dptr >= desc->nchunks); + } else { + running = desc->running; + } + + /* Compute the size of all chunks still to be transferred. */ + list_for_each_entry_reverse(chunk, &desc->chunks, node) { + if (chunk == running || ++dptr == desc->nchunks) + break; + + residue += chunk->size; + } + + /* Add the residue for the current chunk. */ + residue += tcrb << desc->xfer_shift; + + return residue; +} + +static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan); + enum dma_status status; + unsigned long flags; + unsigned int residue; + bool cyclic; + + status = dma_cookie_status(chan, cookie, txstate); + if (status == DMA_COMPLETE || !txstate) + return status; + + spin_lock_irqsave(&rchan->lock, flags); + residue = rcar_dmac_chan_get_residue(rchan, cookie); + cyclic = rchan->desc.running ? rchan->desc.running->cyclic : false; + spin_unlock_irqrestore(&rchan->lock, flags); + + /* if there's no residue, the cookie is complete */ + if (!residue && !cyclic) + return DMA_COMPLETE; + + dma_set_residue(txstate, residue); + + return status; +} + +static void rcar_dmac_issue_pending(struct dma_chan *chan) +{ + struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&rchan->lock, flags); + + if (list_empty(&rchan->desc.pending)) + goto done; + + /* Append the pending list to the active list. */ + list_splice_tail_init(&rchan->desc.pending, &rchan->desc.active); + + /* + * If no transfer is running pick the first descriptor from the active + * list and start the transfer. + */ + if (!rchan->desc.running) { + struct rcar_dmac_desc *desc; + + desc = list_first_entry(&rchan->desc.active, + struct rcar_dmac_desc, node); + rchan->desc.running = desc; + + rcar_dmac_chan_start_xfer(rchan); + } + +done: + spin_unlock_irqrestore(&rchan->lock, flags); +} + +static void rcar_dmac_device_synchronize(struct dma_chan *chan) +{ + struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan); + + synchronize_irq(rchan->irq); +} + +/* ----------------------------------------------------------------------------- + * IRQ handling + */ + +static irqreturn_t rcar_dmac_isr_desc_stage_end(struct rcar_dmac_chan *chan) +{ + struct rcar_dmac_desc *desc = chan->desc.running; + unsigned int stage; + + if (WARN_ON(!desc || !desc->cyclic)) { + /* + * This should never happen, there should always be a running + * cyclic descriptor when a descriptor stage end interrupt is + * triggered. Warn and return. + */ + return IRQ_NONE; + } + + /* Program the interrupt pointer to the next stage. */ + stage = (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) & + RCAR_DMACHCRB_DPTR_MASK) >> RCAR_DMACHCRB_DPTR_SHIFT; + rcar_dmac_chan_write(chan, RCAR_DMADPCR, RCAR_DMADPCR_DIPT(stage)); + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t rcar_dmac_isr_transfer_end(struct rcar_dmac_chan *chan) +{ + struct rcar_dmac_desc *desc = chan->desc.running; + irqreturn_t ret = IRQ_WAKE_THREAD; + + if (WARN_ON_ONCE(!desc)) { + /* + * This should never happen, there should always be a running + * descriptor when a transfer end interrupt is triggered. Warn + * and return. + */ + return IRQ_NONE; + } + + /* + * The transfer end interrupt isn't generated for each chunk when using + * descriptor mode. Only update the running chunk pointer in + * non-descriptor mode. + */ + if (!desc->hwdescs.use) { + /* + * If we haven't completed the last transfer chunk simply move + * to the next one. Only wake the IRQ thread if the transfer is + * cyclic. + */ + if (!list_is_last(&desc->running->node, &desc->chunks)) { + desc->running = list_next_entry(desc->running, node); + if (!desc->cyclic) + ret = IRQ_HANDLED; + goto done; + } + + /* + * We've completed the last transfer chunk. If the transfer is + * cyclic, move back to the first one. + */ + if (desc->cyclic) { + desc->running = + list_first_entry(&desc->chunks, + struct rcar_dmac_xfer_chunk, + node); + goto done; + } + } + + /* The descriptor is complete, move it to the done list. */ + list_move_tail(&desc->node, &chan->desc.done); + + /* Queue the next descriptor, if any. */ + if (!list_empty(&chan->desc.active)) + chan->desc.running = list_first_entry(&chan->desc.active, + struct rcar_dmac_desc, + node); + else + chan->desc.running = NULL; + +done: + if (chan->desc.running) + rcar_dmac_chan_start_xfer(chan); + + return ret; +} + +static irqreturn_t rcar_dmac_isr_channel(int irq, void *dev) +{ + u32 mask = RCAR_DMACHCR_DSE | RCAR_DMACHCR_TE; + struct rcar_dmac_chan *chan = dev; + irqreturn_t ret = IRQ_NONE; + bool reinit = false; + u32 chcr; + + spin_lock(&chan->lock); + + chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR); + if (chcr & RCAR_DMACHCR_CAE) { + struct rcar_dmac *dmac = to_rcar_dmac(chan->chan.device); + + /* + * We don't need to call rcar_dmac_chan_halt() + * because channel is already stopped in error case. + * We need to clear register and check DE bit as recovery. + */ + rcar_dmac_chan_clear(dmac, chan); + rcar_dmac_chcr_de_barrier(chan); + reinit = true; + goto spin_lock_end; + } + + if (chcr & RCAR_DMACHCR_TE) + mask |= RCAR_DMACHCR_DE; + rcar_dmac_chan_write(chan, RCAR_DMACHCR, chcr & ~mask); + if (mask & RCAR_DMACHCR_DE) + rcar_dmac_chcr_de_barrier(chan); + + if (chcr & RCAR_DMACHCR_DSE) + ret |= rcar_dmac_isr_desc_stage_end(chan); + + if (chcr & RCAR_DMACHCR_TE) + ret |= rcar_dmac_isr_transfer_end(chan); + +spin_lock_end: + spin_unlock(&chan->lock); + + if (reinit) { + dev_err(chan->chan.device->dev, "Channel Address Error\n"); + + rcar_dmac_chan_reinit(chan); + ret = IRQ_HANDLED; + } + + return ret; +} + +static irqreturn_t rcar_dmac_isr_channel_thread(int irq, void *dev) +{ + struct rcar_dmac_chan *chan = dev; + struct rcar_dmac_desc *desc; + struct dmaengine_desc_callback cb; + + spin_lock_irq(&chan->lock); + + /* For cyclic transfers notify the user after every chunk. */ + if (chan->desc.running && chan->desc.running->cyclic) { + desc = chan->desc.running; + dmaengine_desc_get_callback(&desc->async_tx, &cb); + + if (dmaengine_desc_callback_valid(&cb)) { + spin_unlock_irq(&chan->lock); + dmaengine_desc_callback_invoke(&cb, NULL); + spin_lock_irq(&chan->lock); + } + } + + /* + * Call the callback function for all descriptors on the done list and + * move them to the ack wait list. + */ + while (!list_empty(&chan->desc.done)) { + desc = list_first_entry(&chan->desc.done, struct rcar_dmac_desc, + node); + dma_cookie_complete(&desc->async_tx); + list_del(&desc->node); + + dmaengine_desc_get_callback(&desc->async_tx, &cb); + if (dmaengine_desc_callback_valid(&cb)) { + spin_unlock_irq(&chan->lock); + /* + * We own the only reference to this descriptor, we can + * safely dereference it without holding the channel + * lock. + */ + dmaengine_desc_callback_invoke(&cb, NULL); + spin_lock_irq(&chan->lock); + } + + list_add_tail(&desc->node, &chan->desc.wait); + } + + spin_unlock_irq(&chan->lock); + + /* Recycle all acked descriptors. */ + rcar_dmac_desc_recycle_acked(chan); + + return IRQ_HANDLED; +} + +/* ----------------------------------------------------------------------------- + * OF xlate and channel filter + */ + +static bool rcar_dmac_chan_filter(struct dma_chan *chan, void *arg) +{ + struct rcar_dmac *dmac = to_rcar_dmac(chan->device); + struct of_phandle_args *dma_spec = arg; + + /* + * FIXME: Using a filter on OF platforms is a nonsense. The OF xlate + * function knows from which device it wants to allocate a channel from, + * and would be perfectly capable of selecting the channel it wants. + * Forcing it to call dma_request_channel() and iterate through all + * channels from all controllers is just pointless. + */ + if (chan->device->device_config != rcar_dmac_device_config) + return false; + + return !test_and_set_bit(dma_spec->args[0], dmac->modules); +} + +static struct dma_chan *rcar_dmac_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct rcar_dmac_chan *rchan; + struct dma_chan *chan; + dma_cap_mask_t mask; + + if (dma_spec->args_count != 1) + return NULL; + + /* Only slave DMA channels can be allocated via DT */ + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + + chan = __dma_request_channel(&mask, rcar_dmac_chan_filter, dma_spec, + ofdma->of_node); + if (!chan) + return NULL; + + rchan = to_rcar_dmac_chan(chan); + rchan->mid_rid = dma_spec->args[0]; + + return chan; +} + +/* ----------------------------------------------------------------------------- + * Power management + */ + +#ifdef CONFIG_PM +static int rcar_dmac_runtime_suspend(struct device *dev) +{ + return 0; +} + +static int rcar_dmac_runtime_resume(struct device *dev) +{ + struct rcar_dmac *dmac = dev_get_drvdata(dev); + + return rcar_dmac_init(dmac); +} +#endif + +static const struct dev_pm_ops rcar_dmac_pm = { + /* + * TODO for system sleep/resume: + * - Wait for the current transfer to complete and stop the device, + * - Resume transfers, if any. + */ + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) + SET_RUNTIME_PM_OPS(rcar_dmac_runtime_suspend, rcar_dmac_runtime_resume, + NULL) +}; + +/* ----------------------------------------------------------------------------- + * Probe and remove + */ + +static int rcar_dmac_chan_probe(struct rcar_dmac *dmac, + struct rcar_dmac_chan *rchan) +{ + struct platform_device *pdev = to_platform_device(dmac->dev); + struct dma_chan *chan = &rchan->chan; + char pdev_irqname[5]; + char *irqname; + int ret; + + rchan->mid_rid = -EINVAL; + + spin_lock_init(&rchan->lock); + + INIT_LIST_HEAD(&rchan->desc.free); + INIT_LIST_HEAD(&rchan->desc.pending); + INIT_LIST_HEAD(&rchan->desc.active); + INIT_LIST_HEAD(&rchan->desc.done); + INIT_LIST_HEAD(&rchan->desc.wait); + + /* Request the channel interrupt. */ + sprintf(pdev_irqname, "ch%u", rchan->index); + rchan->irq = platform_get_irq_byname(pdev, pdev_irqname); + if (rchan->irq < 0) + return -ENODEV; + + irqname = devm_kasprintf(dmac->dev, GFP_KERNEL, "%s:%u", + dev_name(dmac->dev), rchan->index); + if (!irqname) + return -ENOMEM; + + /* + * Initialize the DMA engine channel and add it to the DMA engine + * channels list. + */ + chan->device = &dmac->engine; + dma_cookie_init(chan); + + list_add_tail(&chan->device_node, &dmac->engine.channels); + + ret = devm_request_threaded_irq(dmac->dev, rchan->irq, + rcar_dmac_isr_channel, + rcar_dmac_isr_channel_thread, 0, + irqname, rchan); + if (ret) { + dev_err(dmac->dev, "failed to request IRQ %u (%d)\n", + rchan->irq, ret); + return ret; + } + + return 0; +} + +#define RCAR_DMAC_MAX_CHANNELS 32 + +static int rcar_dmac_parse_of(struct device *dev, struct rcar_dmac *dmac) +{ + struct device_node *np = dev->of_node; + int ret; + + ret = of_property_read_u32(np, "dma-channels", &dmac->n_channels); + if (ret < 0) { + dev_err(dev, "unable to read dma-channels property\n"); + return ret; + } + + /* The hardware and driver don't support more than 32 bits in CHCLR */ + if (dmac->n_channels <= 0 || + dmac->n_channels >= RCAR_DMAC_MAX_CHANNELS) { + dev_err(dev, "invalid number of channels %u\n", + dmac->n_channels); + return -EINVAL; + } + + /* + * If the driver is unable to read dma-channel-mask property, + * the driver assumes that it can use all channels. + */ + dmac->channels_mask = GENMASK(dmac->n_channels - 1, 0); + of_property_read_u32(np, "dma-channel-mask", &dmac->channels_mask); + + /* If the property has out-of-channel mask, this driver clears it */ + dmac->channels_mask &= GENMASK(dmac->n_channels - 1, 0); + + return 0; +} + +static int rcar_dmac_probe(struct platform_device *pdev) +{ + const enum dma_slave_buswidth widths = DMA_SLAVE_BUSWIDTH_1_BYTE | + DMA_SLAVE_BUSWIDTH_2_BYTES | DMA_SLAVE_BUSWIDTH_4_BYTES | + DMA_SLAVE_BUSWIDTH_8_BYTES | DMA_SLAVE_BUSWIDTH_16_BYTES | + DMA_SLAVE_BUSWIDTH_32_BYTES | DMA_SLAVE_BUSWIDTH_64_BYTES; + const struct rcar_dmac_of_data *data; + struct rcar_dmac_chan *chan; + struct dma_device *engine; + void __iomem *chan_base; + struct rcar_dmac *dmac; + unsigned int i; + int ret; + + data = of_device_get_match_data(&pdev->dev); + if (!data) + return -EINVAL; + + dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL); + if (!dmac) + return -ENOMEM; + + dmac->dev = &pdev->dev; + platform_set_drvdata(pdev, dmac); + ret = dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK); + if (ret) + return ret; + + ret = dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40)); + if (ret) + return ret; + + ret = rcar_dmac_parse_of(&pdev->dev, dmac); + if (ret < 0) + return ret; + + /* + * A still unconfirmed hardware bug prevents the IPMMU microTLB 0 to be + * flushed correctly, resulting in memory corruption. DMAC 0 channel 0 + * is connected to microTLB 0 on currently supported platforms, so we + * can't use it with the IPMMU. As the IOMMU API operates at the device + * level we can't disable it selectively, so ignore channel 0 for now if + * the device is part of an IOMMU group. + */ + if (device_iommu_mapped(&pdev->dev)) + dmac->channels_mask &= ~BIT(0); + + dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels, + sizeof(*dmac->channels), GFP_KERNEL); + if (!dmac->channels) + return -ENOMEM; + + /* Request resources. */ + dmac->dmac_base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(dmac->dmac_base)) + return PTR_ERR(dmac->dmac_base); + + if (!data->chan_offset_base) { + dmac->chan_base = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(dmac->chan_base)) + return PTR_ERR(dmac->chan_base); + + chan_base = dmac->chan_base; + } else { + chan_base = dmac->dmac_base + data->chan_offset_base; + } + + for_each_rcar_dmac_chan(i, dmac, chan) { + chan->index = i; + chan->iomem = chan_base + i * data->chan_offset_stride; + } + + /* Enable runtime PM and initialize the device. */ + pm_runtime_enable(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret < 0) { + dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret); + goto err_pm_disable; + } + + ret = rcar_dmac_init(dmac); + pm_runtime_put(&pdev->dev); + + if (ret) { + dev_err(&pdev->dev, "failed to reset device\n"); + goto err_pm_disable; + } + + /* Initialize engine */ + engine = &dmac->engine; + + dma_cap_set(DMA_MEMCPY, engine->cap_mask); + dma_cap_set(DMA_SLAVE, engine->cap_mask); + + engine->dev = &pdev->dev; + engine->copy_align = ilog2(RCAR_DMAC_MEMCPY_XFER_SIZE); + + engine->src_addr_widths = widths; + engine->dst_addr_widths = widths; + engine->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); + engine->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + engine->device_alloc_chan_resources = rcar_dmac_alloc_chan_resources; + engine->device_free_chan_resources = rcar_dmac_free_chan_resources; + engine->device_prep_dma_memcpy = rcar_dmac_prep_dma_memcpy; + engine->device_prep_slave_sg = rcar_dmac_prep_slave_sg; + engine->device_prep_dma_cyclic = rcar_dmac_prep_dma_cyclic; + engine->device_config = rcar_dmac_device_config; + engine->device_pause = rcar_dmac_chan_pause; + engine->device_terminate_all = rcar_dmac_chan_terminate_all; + engine->device_tx_status = rcar_dmac_tx_status; + engine->device_issue_pending = rcar_dmac_issue_pending; + engine->device_synchronize = rcar_dmac_device_synchronize; + + INIT_LIST_HEAD(&engine->channels); + + for_each_rcar_dmac_chan(i, dmac, chan) { + ret = rcar_dmac_chan_probe(dmac, chan); + if (ret < 0) + goto err_pm_disable; + } + + /* Register the DMAC as a DMA provider for DT. */ + ret = of_dma_controller_register(pdev->dev.of_node, rcar_dmac_of_xlate, + NULL); + if (ret < 0) + goto err_pm_disable; + + /* + * Register the DMA engine device. + * + * Default transfer size of 32 bytes requires 32-byte alignment. + */ + ret = dma_async_device_register(engine); + if (ret < 0) + goto err_dma_free; + + return 0; + +err_dma_free: + of_dma_controller_free(pdev->dev.of_node); +err_pm_disable: + pm_runtime_disable(&pdev->dev); + return ret; +} + +static int rcar_dmac_remove(struct platform_device *pdev) +{ + struct rcar_dmac *dmac = platform_get_drvdata(pdev); + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&dmac->engine); + + pm_runtime_disable(&pdev->dev); + + return 0; +} + +static void rcar_dmac_shutdown(struct platform_device *pdev) +{ + struct rcar_dmac *dmac = platform_get_drvdata(pdev); + + rcar_dmac_stop_all_chan(dmac); +} + +static const struct rcar_dmac_of_data rcar_dmac_data = { + .chan_offset_base = 0x8000, + .chan_offset_stride = 0x80, +}; + +static const struct rcar_dmac_of_data rcar_gen4_dmac_data = { + .chan_offset_base = 0x0, + .chan_offset_stride = 0x1000, +}; + +static const struct of_device_id rcar_dmac_of_ids[] = { + { + .compatible = "renesas,rcar-dmac", + .data = &rcar_dmac_data, + }, { + .compatible = "renesas,rcar-gen4-dmac", + .data = &rcar_gen4_dmac_data, + }, { + .compatible = "renesas,dmac-r8a779a0", + .data = &rcar_gen4_dmac_data, + }, + { /* Sentinel */ } +}; +MODULE_DEVICE_TABLE(of, rcar_dmac_of_ids); + +static struct platform_driver rcar_dmac_driver = { + .driver = { + .pm = &rcar_dmac_pm, + .name = "rcar-dmac", + .of_match_table = rcar_dmac_of_ids, + }, + .probe = rcar_dmac_probe, + .remove = rcar_dmac_remove, + .shutdown = rcar_dmac_shutdown, +}; + +module_platform_driver(rcar_dmac_driver); + +MODULE_DESCRIPTION("R-Car Gen2 DMA Controller Driver"); +MODULE_AUTHOR("Laurent Pinchart "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/sh/rz-dmac.c b/drivers/dma/sh/rz-dmac.c new file mode 100644 index 0000000000..f777addda8 --- /dev/null +++ b/drivers/dma/sh/rz-dmac.c @@ -0,0 +1,1013 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Renesas RZ/G2L DMA Controller Driver + * + * Based on imx-dma.c + * + * Copyright (C) 2021 Renesas Electronics Corp. + * Copyright 2010 Sascha Hauer, Pengutronix + * Copyright 2012 Javier Martin, Vista Silicon + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../dmaengine.h" +#include "../virt-dma.h" + +enum rz_dmac_prep_type { + RZ_DMAC_DESC_MEMCPY, + RZ_DMAC_DESC_SLAVE_SG, +}; + +struct rz_lmdesc { + u32 header; + u32 sa; + u32 da; + u32 tb; + u32 chcfg; + u32 chitvl; + u32 chext; + u32 nxla; +}; + +struct rz_dmac_desc { + struct virt_dma_desc vd; + dma_addr_t src; + dma_addr_t dest; + size_t len; + struct list_head node; + enum dma_transfer_direction direction; + enum rz_dmac_prep_type type; + /* For slave sg */ + struct scatterlist *sg; + unsigned int sgcount; +}; + +#define to_rz_dmac_desc(d) container_of(d, struct rz_dmac_desc, vd) + +struct rz_dmac_chan { + struct virt_dma_chan vc; + void __iomem *ch_base; + void __iomem *ch_cmn_base; + unsigned int index; + int irq; + struct rz_dmac_desc *desc; + int descs_allocated; + + dma_addr_t src_per_address; + dma_addr_t dst_per_address; + + u32 chcfg; + u32 chctrl; + int mid_rid; + + struct list_head ld_free; + struct list_head ld_queue; + struct list_head ld_active; + + struct { + struct rz_lmdesc *base; + struct rz_lmdesc *head; + struct rz_lmdesc *tail; + dma_addr_t base_dma; + } lmdesc; +}; + +#define to_rz_dmac_chan(c) container_of(c, struct rz_dmac_chan, vc.chan) + +struct rz_dmac { + struct dma_device engine; + struct device *dev; + struct reset_control *rstc; + void __iomem *base; + void __iomem *ext_base; + + unsigned int n_channels; + struct rz_dmac_chan *channels; + + DECLARE_BITMAP(modules, 1024); +}; + +#define to_rz_dmac(d) container_of(d, struct rz_dmac, engine) + +/* + * ----------------------------------------------------------------------------- + * Registers + */ + +#define CHSTAT 0x0024 +#define CHCTRL 0x0028 +#define CHCFG 0x002c +#define NXLA 0x0038 + +#define DCTRL 0x0000 + +#define EACH_CHANNEL_OFFSET 0x0040 +#define CHANNEL_0_7_OFFSET 0x0000 +#define CHANNEL_0_7_COMMON_BASE 0x0300 +#define CHANNEL_8_15_OFFSET 0x0400 +#define CHANNEL_8_15_COMMON_BASE 0x0700 + +#define CHSTAT_ER BIT(4) +#define CHSTAT_EN BIT(0) + +#define CHCTRL_CLRINTMSK BIT(17) +#define CHCTRL_CLRSUS BIT(9) +#define CHCTRL_CLRTC BIT(6) +#define CHCTRL_CLREND BIT(5) +#define CHCTRL_CLRRQ BIT(4) +#define CHCTRL_SWRST BIT(3) +#define CHCTRL_STG BIT(2) +#define CHCTRL_CLREN BIT(1) +#define CHCTRL_SETEN BIT(0) +#define CHCTRL_DEFAULT (CHCTRL_CLRINTMSK | CHCTRL_CLRSUS | \ + CHCTRL_CLRTC | CHCTRL_CLREND | \ + CHCTRL_CLRRQ | CHCTRL_SWRST | \ + CHCTRL_CLREN) + +#define CHCFG_DMS BIT(31) +#define CHCFG_DEM BIT(24) +#define CHCFG_DAD BIT(21) +#define CHCFG_SAD BIT(20) +#define CHCFG_REQD BIT(3) +#define CHCFG_SEL(bits) ((bits) & 0x07) +#define CHCFG_MEM_COPY (0x80400008) +#define CHCFG_FILL_DDS_MASK GENMASK(19, 16) +#define CHCFG_FILL_SDS_MASK GENMASK(15, 12) +#define CHCFG_FILL_TM(a) (((a) & BIT(5)) << 22) +#define CHCFG_FILL_AM(a) (((a) & GENMASK(4, 2)) << 6) +#define CHCFG_FILL_LVL(a) (((a) & BIT(1)) << 5) +#define CHCFG_FILL_HIEN(a) (((a) & BIT(0)) << 5) + +#define MID_RID_MASK GENMASK(9, 0) +#define CHCFG_MASK GENMASK(15, 10) +#define CHCFG_DS_INVALID 0xFF +#define DCTRL_LVINT BIT(1) +#define DCTRL_PR BIT(0) +#define DCTRL_DEFAULT (DCTRL_LVINT | DCTRL_PR) + +/* LINK MODE DESCRIPTOR */ +#define HEADER_LV BIT(0) + +#define RZ_DMAC_MAX_CHAN_DESCRIPTORS 16 +#define RZ_DMAC_MAX_CHANNELS 16 +#define DMAC_NR_LMDESC 64 + +/* + * ----------------------------------------------------------------------------- + * Device access + */ + +static void rz_dmac_writel(struct rz_dmac *dmac, unsigned int val, + unsigned int offset) +{ + writel(val, dmac->base + offset); +} + +static void rz_dmac_ext_writel(struct rz_dmac *dmac, unsigned int val, + unsigned int offset) +{ + writel(val, dmac->ext_base + offset); +} + +static u32 rz_dmac_ext_readl(struct rz_dmac *dmac, unsigned int offset) +{ + return readl(dmac->ext_base + offset); +} + +static void rz_dmac_ch_writel(struct rz_dmac_chan *channel, unsigned int val, + unsigned int offset, int which) +{ + if (which) + writel(val, channel->ch_base + offset); + else + writel(val, channel->ch_cmn_base + offset); +} + +static u32 rz_dmac_ch_readl(struct rz_dmac_chan *channel, + unsigned int offset, int which) +{ + if (which) + return readl(channel->ch_base + offset); + else + return readl(channel->ch_cmn_base + offset); +} + +/* + * ----------------------------------------------------------------------------- + * Initialization + */ + +static void rz_lmdesc_setup(struct rz_dmac_chan *channel, + struct rz_lmdesc *lmdesc) +{ + u32 nxla; + + channel->lmdesc.base = lmdesc; + channel->lmdesc.head = lmdesc; + channel->lmdesc.tail = lmdesc; + nxla = channel->lmdesc.base_dma; + while (lmdesc < (channel->lmdesc.base + (DMAC_NR_LMDESC - 1))) { + lmdesc->header = 0; + nxla += sizeof(*lmdesc); + lmdesc->nxla = nxla; + lmdesc++; + } + + lmdesc->header = 0; + lmdesc->nxla = channel->lmdesc.base_dma; +} + +/* + * ----------------------------------------------------------------------------- + * Descriptors preparation + */ + +static void rz_dmac_lmdesc_recycle(struct rz_dmac_chan *channel) +{ + struct rz_lmdesc *lmdesc = channel->lmdesc.head; + + while (!(lmdesc->header & HEADER_LV)) { + lmdesc->header = 0; + lmdesc++; + if (lmdesc >= (channel->lmdesc.base + DMAC_NR_LMDESC)) + lmdesc = channel->lmdesc.base; + } + channel->lmdesc.head = lmdesc; +} + +static void rz_dmac_enable_hw(struct rz_dmac_chan *channel) +{ + struct dma_chan *chan = &channel->vc.chan; + struct rz_dmac *dmac = to_rz_dmac(chan->device); + unsigned long flags; + u32 nxla; + u32 chctrl; + u32 chstat; + + dev_dbg(dmac->dev, "%s channel %d\n", __func__, channel->index); + + local_irq_save(flags); + + rz_dmac_lmdesc_recycle(channel); + + nxla = channel->lmdesc.base_dma + + (sizeof(struct rz_lmdesc) * (channel->lmdesc.head - + channel->lmdesc.base)); + + chstat = rz_dmac_ch_readl(channel, CHSTAT, 1); + if (!(chstat & CHSTAT_EN)) { + chctrl = (channel->chctrl | CHCTRL_SETEN); + rz_dmac_ch_writel(channel, nxla, NXLA, 1); + rz_dmac_ch_writel(channel, channel->chcfg, CHCFG, 1); + rz_dmac_ch_writel(channel, CHCTRL_SWRST, CHCTRL, 1); + rz_dmac_ch_writel(channel, chctrl, CHCTRL, 1); + } + + local_irq_restore(flags); +} + +static void rz_dmac_disable_hw(struct rz_dmac_chan *channel) +{ + struct dma_chan *chan = &channel->vc.chan; + struct rz_dmac *dmac = to_rz_dmac(chan->device); + unsigned long flags; + + dev_dbg(dmac->dev, "%s channel %d\n", __func__, channel->index); + + local_irq_save(flags); + rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1); + local_irq_restore(flags); +} + +static void rz_dmac_set_dmars_register(struct rz_dmac *dmac, int nr, u32 dmars) +{ + u32 dmars_offset = (nr / 2) * 4; + u32 shift = (nr % 2) * 16; + u32 dmars32; + + dmars32 = rz_dmac_ext_readl(dmac, dmars_offset); + dmars32 &= ~(0xffff << shift); + dmars32 |= dmars << shift; + + rz_dmac_ext_writel(dmac, dmars32, dmars_offset); +} + +static void rz_dmac_prepare_desc_for_memcpy(struct rz_dmac_chan *channel) +{ + struct dma_chan *chan = &channel->vc.chan; + struct rz_dmac *dmac = to_rz_dmac(chan->device); + struct rz_lmdesc *lmdesc = channel->lmdesc.tail; + struct rz_dmac_desc *d = channel->desc; + u32 chcfg = CHCFG_MEM_COPY; + + /* prepare descriptor */ + lmdesc->sa = d->src; + lmdesc->da = d->dest; + lmdesc->tb = d->len; + lmdesc->chcfg = chcfg; + lmdesc->chitvl = 0; + lmdesc->chext = 0; + lmdesc->header = HEADER_LV; + + rz_dmac_set_dmars_register(dmac, channel->index, 0); + + channel->chcfg = chcfg; + channel->chctrl = CHCTRL_STG | CHCTRL_SETEN; +} + +static void rz_dmac_prepare_descs_for_slave_sg(struct rz_dmac_chan *channel) +{ + struct dma_chan *chan = &channel->vc.chan; + struct rz_dmac *dmac = to_rz_dmac(chan->device); + struct rz_dmac_desc *d = channel->desc; + struct scatterlist *sg, *sgl = d->sg; + struct rz_lmdesc *lmdesc; + unsigned int i, sg_len = d->sgcount; + + channel->chcfg |= CHCFG_SEL(channel->index) | CHCFG_DEM | CHCFG_DMS; + + if (d->direction == DMA_DEV_TO_MEM) { + channel->chcfg |= CHCFG_SAD; + channel->chcfg &= ~CHCFG_REQD; + } else { + channel->chcfg |= CHCFG_DAD | CHCFG_REQD; + } + + lmdesc = channel->lmdesc.tail; + + for (i = 0, sg = sgl; i < sg_len; i++, sg = sg_next(sg)) { + if (d->direction == DMA_DEV_TO_MEM) { + lmdesc->sa = channel->src_per_address; + lmdesc->da = sg_dma_address(sg); + } else { + lmdesc->sa = sg_dma_address(sg); + lmdesc->da = channel->dst_per_address; + } + + lmdesc->tb = sg_dma_len(sg); + lmdesc->chitvl = 0; + lmdesc->chext = 0; + if (i == (sg_len - 1)) { + lmdesc->chcfg = (channel->chcfg & ~CHCFG_DEM); + lmdesc->header = HEADER_LV; + } else { + lmdesc->chcfg = channel->chcfg; + lmdesc->header = HEADER_LV; + } + if (++lmdesc >= (channel->lmdesc.base + DMAC_NR_LMDESC)) + lmdesc = channel->lmdesc.base; + } + + channel->lmdesc.tail = lmdesc; + + rz_dmac_set_dmars_register(dmac, channel->index, channel->mid_rid); + channel->chctrl = CHCTRL_SETEN; +} + +static int rz_dmac_xfer_desc(struct rz_dmac_chan *chan) +{ + struct rz_dmac_desc *d = chan->desc; + struct virt_dma_desc *vd; + + vd = vchan_next_desc(&chan->vc); + if (!vd) + return 0; + + list_del(&vd->node); + + switch (d->type) { + case RZ_DMAC_DESC_MEMCPY: + rz_dmac_prepare_desc_for_memcpy(chan); + break; + + case RZ_DMAC_DESC_SLAVE_SG: + rz_dmac_prepare_descs_for_slave_sg(chan); + break; + + default: + return -EINVAL; + } + + rz_dmac_enable_hw(chan); + + return 0; +} + +/* + * ----------------------------------------------------------------------------- + * DMA engine operations + */ + +static int rz_dmac_alloc_chan_resources(struct dma_chan *chan) +{ + struct rz_dmac_chan *channel = to_rz_dmac_chan(chan); + + while (channel->descs_allocated < RZ_DMAC_MAX_CHAN_DESCRIPTORS) { + struct rz_dmac_desc *desc; + + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) + break; + + list_add_tail(&desc->node, &channel->ld_free); + channel->descs_allocated++; + } + + if (!channel->descs_allocated) + return -ENOMEM; + + return channel->descs_allocated; +} + +static void rz_dmac_free_chan_resources(struct dma_chan *chan) +{ + struct rz_dmac_chan *channel = to_rz_dmac_chan(chan); + struct rz_dmac *dmac = to_rz_dmac(chan->device); + struct rz_lmdesc *lmdesc = channel->lmdesc.base; + struct rz_dmac_desc *desc, *_desc; + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&channel->vc.lock, flags); + + for (i = 0; i < DMAC_NR_LMDESC; i++) + lmdesc[i].header = 0; + + rz_dmac_disable_hw(channel); + list_splice_tail_init(&channel->ld_active, &channel->ld_free); + list_splice_tail_init(&channel->ld_queue, &channel->ld_free); + + if (channel->mid_rid >= 0) { + clear_bit(channel->mid_rid, dmac->modules); + channel->mid_rid = -EINVAL; + } + + spin_unlock_irqrestore(&channel->vc.lock, flags); + + list_for_each_entry_safe(desc, _desc, &channel->ld_free, node) { + kfree(desc); + channel->descs_allocated--; + } + + INIT_LIST_HEAD(&channel->ld_free); + vchan_free_chan_resources(&channel->vc); +} + +static struct dma_async_tx_descriptor * +rz_dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct rz_dmac_chan *channel = to_rz_dmac_chan(chan); + struct rz_dmac *dmac = to_rz_dmac(chan->device); + struct rz_dmac_desc *desc; + + dev_dbg(dmac->dev, "%s channel: %d src=0x%pad dst=0x%pad len=%zu\n", + __func__, channel->index, &src, &dest, len); + + if (list_empty(&channel->ld_free)) + return NULL; + + desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node); + + desc->type = RZ_DMAC_DESC_MEMCPY; + desc->src = src; + desc->dest = dest; + desc->len = len; + desc->direction = DMA_MEM_TO_MEM; + + list_move_tail(channel->ld_free.next, &channel->ld_queue); + return vchan_tx_prep(&channel->vc, &desc->vd, flags); +} + +static struct dma_async_tx_descriptor * +rz_dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, + enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct rz_dmac_chan *channel = to_rz_dmac_chan(chan); + struct rz_dmac_desc *desc; + struct scatterlist *sg; + int dma_length = 0; + int i = 0; + + if (list_empty(&channel->ld_free)) + return NULL; + + desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node); + + for_each_sg(sgl, sg, sg_len, i) { + dma_length += sg_dma_len(sg); + } + + desc->type = RZ_DMAC_DESC_SLAVE_SG; + desc->sg = sgl; + desc->sgcount = sg_len; + desc->len = dma_length; + desc->direction = direction; + + if (direction == DMA_DEV_TO_MEM) + desc->src = channel->src_per_address; + else + desc->dest = channel->dst_per_address; + + list_move_tail(channel->ld_free.next, &channel->ld_queue); + return vchan_tx_prep(&channel->vc, &desc->vd, flags); +} + +static int rz_dmac_terminate_all(struct dma_chan *chan) +{ + struct rz_dmac_chan *channel = to_rz_dmac_chan(chan); + unsigned long flags; + LIST_HEAD(head); + + rz_dmac_disable_hw(channel); + spin_lock_irqsave(&channel->vc.lock, flags); + list_splice_tail_init(&channel->ld_active, &channel->ld_free); + list_splice_tail_init(&channel->ld_queue, &channel->ld_free); + spin_unlock_irqrestore(&channel->vc.lock, flags); + vchan_get_all_descriptors(&channel->vc, &head); + vchan_dma_desc_free_list(&channel->vc, &head); + + return 0; +} + +static void rz_dmac_issue_pending(struct dma_chan *chan) +{ + struct rz_dmac_chan *channel = to_rz_dmac_chan(chan); + struct rz_dmac *dmac = to_rz_dmac(chan->device); + struct rz_dmac_desc *desc; + unsigned long flags; + + spin_lock_irqsave(&channel->vc.lock, flags); + + if (!list_empty(&channel->ld_queue)) { + desc = list_first_entry(&channel->ld_queue, + struct rz_dmac_desc, node); + channel->desc = desc; + if (vchan_issue_pending(&channel->vc)) { + if (rz_dmac_xfer_desc(channel) < 0) + dev_warn(dmac->dev, "ch: %d couldn't issue DMA xfer\n", + channel->index); + else + list_move_tail(channel->ld_queue.next, + &channel->ld_active); + } + } + + spin_unlock_irqrestore(&channel->vc.lock, flags); +} + +static u8 rz_dmac_ds_to_val_mapping(enum dma_slave_buswidth ds) +{ + u8 i; + static const enum dma_slave_buswidth ds_lut[] = { + DMA_SLAVE_BUSWIDTH_1_BYTE, + DMA_SLAVE_BUSWIDTH_2_BYTES, + DMA_SLAVE_BUSWIDTH_4_BYTES, + DMA_SLAVE_BUSWIDTH_8_BYTES, + DMA_SLAVE_BUSWIDTH_16_BYTES, + DMA_SLAVE_BUSWIDTH_32_BYTES, + DMA_SLAVE_BUSWIDTH_64_BYTES, + DMA_SLAVE_BUSWIDTH_128_BYTES, + }; + + for (i = 0; i < ARRAY_SIZE(ds_lut); i++) { + if (ds_lut[i] == ds) + return i; + } + + return CHCFG_DS_INVALID; +} + +static int rz_dmac_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct rz_dmac_chan *channel = to_rz_dmac_chan(chan); + u32 val; + + channel->src_per_address = config->src_addr; + channel->dst_per_address = config->dst_addr; + + val = rz_dmac_ds_to_val_mapping(config->dst_addr_width); + if (val == CHCFG_DS_INVALID) + return -EINVAL; + + channel->chcfg &= ~CHCFG_FILL_DDS_MASK; + channel->chcfg |= FIELD_PREP(CHCFG_FILL_DDS_MASK, val); + + val = rz_dmac_ds_to_val_mapping(config->src_addr_width); + if (val == CHCFG_DS_INVALID) + return -EINVAL; + + channel->chcfg &= ~CHCFG_FILL_SDS_MASK; + channel->chcfg |= FIELD_PREP(CHCFG_FILL_SDS_MASK, val); + + return 0; +} + +static void rz_dmac_virt_desc_free(struct virt_dma_desc *vd) +{ + /* + * Place holder + * Descriptor allocation is done during alloc_chan_resources and + * get freed during free_chan_resources. + * list is used to manage the descriptors and avoid any memory + * allocation/free during DMA read/write. + */ +} + +static void rz_dmac_device_synchronize(struct dma_chan *chan) +{ + struct rz_dmac_chan *channel = to_rz_dmac_chan(chan); + struct rz_dmac *dmac = to_rz_dmac(chan->device); + u32 chstat; + int ret; + + ret = read_poll_timeout(rz_dmac_ch_readl, chstat, !(chstat & CHSTAT_EN), + 100, 100000, false, channel, CHSTAT, 1); + if (ret < 0) + dev_warn(dmac->dev, "DMA Timeout"); + + rz_dmac_set_dmars_register(dmac, channel->index, 0); +} + +/* + * ----------------------------------------------------------------------------- + * IRQ handling + */ + +static void rz_dmac_irq_handle_channel(struct rz_dmac_chan *channel) +{ + struct dma_chan *chan = &channel->vc.chan; + struct rz_dmac *dmac = to_rz_dmac(chan->device); + u32 chstat, chctrl; + + chstat = rz_dmac_ch_readl(channel, CHSTAT, 1); + if (chstat & CHSTAT_ER) { + dev_err(dmac->dev, "DMAC err CHSTAT_%d = %08X\n", + channel->index, chstat); + rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1); + goto done; + } + + chctrl = rz_dmac_ch_readl(channel, CHCTRL, 1); + rz_dmac_ch_writel(channel, chctrl | CHCTRL_CLREND, CHCTRL, 1); +done: + return; +} + +static irqreturn_t rz_dmac_irq_handler(int irq, void *dev_id) +{ + struct rz_dmac_chan *channel = dev_id; + + if (channel) { + rz_dmac_irq_handle_channel(channel); + return IRQ_WAKE_THREAD; + } + /* handle DMAERR irq */ + return IRQ_HANDLED; +} + +static irqreturn_t rz_dmac_irq_handler_thread(int irq, void *dev_id) +{ + struct rz_dmac_chan *channel = dev_id; + struct rz_dmac_desc *desc = NULL; + unsigned long flags; + + spin_lock_irqsave(&channel->vc.lock, flags); + + if (list_empty(&channel->ld_active)) { + /* Someone might have called terminate all */ + goto out; + } + + desc = list_first_entry(&channel->ld_active, struct rz_dmac_desc, node); + vchan_cookie_complete(&desc->vd); + list_move_tail(channel->ld_active.next, &channel->ld_free); + if (!list_empty(&channel->ld_queue)) { + desc = list_first_entry(&channel->ld_queue, struct rz_dmac_desc, + node); + channel->desc = desc; + if (rz_dmac_xfer_desc(channel) == 0) + list_move_tail(channel->ld_queue.next, &channel->ld_active); + } +out: + spin_unlock_irqrestore(&channel->vc.lock, flags); + + return IRQ_HANDLED; +} + +/* + * ----------------------------------------------------------------------------- + * OF xlate and channel filter + */ + +static bool rz_dmac_chan_filter(struct dma_chan *chan, void *arg) +{ + struct rz_dmac_chan *channel = to_rz_dmac_chan(chan); + struct rz_dmac *dmac = to_rz_dmac(chan->device); + struct of_phandle_args *dma_spec = arg; + u32 ch_cfg; + + channel->mid_rid = dma_spec->args[0] & MID_RID_MASK; + ch_cfg = (dma_spec->args[0] & CHCFG_MASK) >> 10; + channel->chcfg = CHCFG_FILL_TM(ch_cfg) | CHCFG_FILL_AM(ch_cfg) | + CHCFG_FILL_LVL(ch_cfg) | CHCFG_FILL_HIEN(ch_cfg); + + return !test_and_set_bit(channel->mid_rid, dmac->modules); +} + +static struct dma_chan *rz_dmac_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + dma_cap_mask_t mask; + + if (dma_spec->args_count != 1) + return NULL; + + /* Only slave DMA channels can be allocated via DT */ + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + + return dma_request_channel(mask, rz_dmac_chan_filter, dma_spec); +} + +/* + * ----------------------------------------------------------------------------- + * Probe and remove + */ + +static int rz_dmac_chan_probe(struct rz_dmac *dmac, + struct rz_dmac_chan *channel, + unsigned int index) +{ + struct platform_device *pdev = to_platform_device(dmac->dev); + struct rz_lmdesc *lmdesc; + char pdev_irqname[5]; + char *irqname; + int ret; + + channel->index = index; + channel->mid_rid = -EINVAL; + + /* Request the channel interrupt. */ + sprintf(pdev_irqname, "ch%u", index); + channel->irq = platform_get_irq_byname(pdev, pdev_irqname); + if (channel->irq < 0) + return channel->irq; + + irqname = devm_kasprintf(dmac->dev, GFP_KERNEL, "%s:%u", + dev_name(dmac->dev), index); + if (!irqname) + return -ENOMEM; + + ret = devm_request_threaded_irq(dmac->dev, channel->irq, + rz_dmac_irq_handler, + rz_dmac_irq_handler_thread, 0, + irqname, channel); + if (ret) { + dev_err(dmac->dev, "failed to request IRQ %u (%d)\n", + channel->irq, ret); + return ret; + } + + /* Set io base address for each channel */ + if (index < 8) { + channel->ch_base = dmac->base + CHANNEL_0_7_OFFSET + + EACH_CHANNEL_OFFSET * index; + channel->ch_cmn_base = dmac->base + CHANNEL_0_7_COMMON_BASE; + } else { + channel->ch_base = dmac->base + CHANNEL_8_15_OFFSET + + EACH_CHANNEL_OFFSET * (index - 8); + channel->ch_cmn_base = dmac->base + CHANNEL_8_15_COMMON_BASE; + } + + /* Allocate descriptors */ + lmdesc = dma_alloc_coherent(&pdev->dev, + sizeof(struct rz_lmdesc) * DMAC_NR_LMDESC, + &channel->lmdesc.base_dma, GFP_KERNEL); + if (!lmdesc) { + dev_err(&pdev->dev, "Can't allocate memory (lmdesc)\n"); + return -ENOMEM; + } + rz_lmdesc_setup(channel, lmdesc); + + /* Initialize register for each channel */ + rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1); + + channel->vc.desc_free = rz_dmac_virt_desc_free; + vchan_init(&channel->vc, &dmac->engine); + INIT_LIST_HEAD(&channel->ld_queue); + INIT_LIST_HEAD(&channel->ld_free); + INIT_LIST_HEAD(&channel->ld_active); + + return 0; +} + +static int rz_dmac_parse_of(struct device *dev, struct rz_dmac *dmac) +{ + struct device_node *np = dev->of_node; + int ret; + + ret = of_property_read_u32(np, "dma-channels", &dmac->n_channels); + if (ret < 0) { + dev_err(dev, "unable to read dma-channels property\n"); + return ret; + } + + if (!dmac->n_channels || dmac->n_channels > RZ_DMAC_MAX_CHANNELS) { + dev_err(dev, "invalid number of channels %u\n", dmac->n_channels); + return -EINVAL; + } + + return 0; +} + +static int rz_dmac_probe(struct platform_device *pdev) +{ + const char *irqname = "error"; + struct dma_device *engine; + struct rz_dmac *dmac; + int channel_num; + unsigned int i; + int ret; + int irq; + + dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL); + if (!dmac) + return -ENOMEM; + + dmac->dev = &pdev->dev; + platform_set_drvdata(pdev, dmac); + + ret = rz_dmac_parse_of(&pdev->dev, dmac); + if (ret < 0) + return ret; + + dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels, + sizeof(*dmac->channels), GFP_KERNEL); + if (!dmac->channels) + return -ENOMEM; + + /* Request resources */ + dmac->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(dmac->base)) + return PTR_ERR(dmac->base); + + dmac->ext_base = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(dmac->ext_base)) + return PTR_ERR(dmac->ext_base); + + /* Register interrupt handler for error */ + irq = platform_get_irq_byname(pdev, irqname); + if (irq < 0) + return irq; + + ret = devm_request_irq(&pdev->dev, irq, rz_dmac_irq_handler, 0, + irqname, NULL); + if (ret) { + dev_err(&pdev->dev, "failed to request IRQ %u (%d)\n", + irq, ret); + return ret; + } + + /* Initialize the channels. */ + INIT_LIST_HEAD(&dmac->engine.channels); + + dmac->rstc = devm_reset_control_array_get_exclusive(&pdev->dev); + if (IS_ERR(dmac->rstc)) + return dev_err_probe(&pdev->dev, PTR_ERR(dmac->rstc), + "failed to get resets\n"); + + pm_runtime_enable(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret < 0) { + dev_err(&pdev->dev, "pm_runtime_resume_and_get failed\n"); + goto err_pm_disable; + } + + ret = reset_control_deassert(dmac->rstc); + if (ret) + goto err_pm_runtime_put; + + for (i = 0; i < dmac->n_channels; i++) { + ret = rz_dmac_chan_probe(dmac, &dmac->channels[i], i); + if (ret < 0) + goto err; + } + + /* Register the DMAC as a DMA provider for DT. */ + ret = of_dma_controller_register(pdev->dev.of_node, rz_dmac_of_xlate, + NULL); + if (ret < 0) + goto err; + + /* Register the DMA engine device. */ + engine = &dmac->engine; + dma_cap_set(DMA_SLAVE, engine->cap_mask); + dma_cap_set(DMA_MEMCPY, engine->cap_mask); + rz_dmac_writel(dmac, DCTRL_DEFAULT, CHANNEL_0_7_COMMON_BASE + DCTRL); + rz_dmac_writel(dmac, DCTRL_DEFAULT, CHANNEL_8_15_COMMON_BASE + DCTRL); + + engine->dev = &pdev->dev; + + engine->device_alloc_chan_resources = rz_dmac_alloc_chan_resources; + engine->device_free_chan_resources = rz_dmac_free_chan_resources; + engine->device_tx_status = dma_cookie_status; + engine->device_prep_slave_sg = rz_dmac_prep_slave_sg; + engine->device_prep_dma_memcpy = rz_dmac_prep_dma_memcpy; + engine->device_config = rz_dmac_config; + engine->device_terminate_all = rz_dmac_terminate_all; + engine->device_issue_pending = rz_dmac_issue_pending; + engine->device_synchronize = rz_dmac_device_synchronize; + + engine->copy_align = DMAENGINE_ALIGN_1_BYTE; + dma_set_max_seg_size(engine->dev, U32_MAX); + + ret = dma_async_device_register(engine); + if (ret < 0) { + dev_err(&pdev->dev, "unable to register\n"); + goto dma_register_err; + } + return 0; + +dma_register_err: + of_dma_controller_free(pdev->dev.of_node); +err: + channel_num = i ? i - 1 : 0; + for (i = 0; i < channel_num; i++) { + struct rz_dmac_chan *channel = &dmac->channels[i]; + + dma_free_coherent(&pdev->dev, + sizeof(struct rz_lmdesc) * DMAC_NR_LMDESC, + channel->lmdesc.base, + channel->lmdesc.base_dma); + } + + reset_control_assert(dmac->rstc); +err_pm_runtime_put: + pm_runtime_put(&pdev->dev); +err_pm_disable: + pm_runtime_disable(&pdev->dev); + + return ret; +} + +static int rz_dmac_remove(struct platform_device *pdev) +{ + struct rz_dmac *dmac = platform_get_drvdata(pdev); + unsigned int i; + + dma_async_device_unregister(&dmac->engine); + of_dma_controller_free(pdev->dev.of_node); + for (i = 0; i < dmac->n_channels; i++) { + struct rz_dmac_chan *channel = &dmac->channels[i]; + + dma_free_coherent(&pdev->dev, + sizeof(struct rz_lmdesc) * DMAC_NR_LMDESC, + channel->lmdesc.base, + channel->lmdesc.base_dma); + } + reset_control_assert(dmac->rstc); + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + return 0; +} + +static const struct of_device_id of_rz_dmac_match[] = { + { .compatible = "renesas,rz-dmac", }, + { /* Sentinel */ } +}; +MODULE_DEVICE_TABLE(of, of_rz_dmac_match); + +static struct platform_driver rz_dmac_driver = { + .driver = { + .name = "rz-dmac", + .of_match_table = of_rz_dmac_match, + }, + .probe = rz_dmac_probe, + .remove = rz_dmac_remove, +}; + +module_platform_driver(rz_dmac_driver); + +MODULE_DESCRIPTION("Renesas RZ/G2L DMA Controller Driver"); +MODULE_AUTHOR("Biju Das "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c new file mode 100644 index 0000000000..588c5f409a --- /dev/null +++ b/drivers/dma/sh/shdma-base.c @@ -0,0 +1,1051 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Dmaengine driver base library for DMA controllers, found on SH-based SoCs + * + * extracted from shdma.c + * + * Copyright (C) 2011-2012 Guennadi Liakhovetski + * Copyright (C) 2009 Nobuhiro Iwamatsu + * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved. + * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../dmaengine.h" + +/* DMA descriptor control */ +enum shdma_desc_status { + DESC_IDLE, + DESC_PREPARED, + DESC_SUBMITTED, + DESC_COMPLETED, /* completed, have to call callback */ + DESC_WAITING, /* callback called, waiting for ack / re-submit */ +}; + +#define NR_DESCS_PER_CHANNEL 32 + +#define to_shdma_chan(c) container_of(c, struct shdma_chan, dma_chan) +#define to_shdma_dev(d) container_of(d, struct shdma_dev, dma_dev) + +/* + * For slave DMA we assume, that there is a finite number of DMA slaves in the + * system, and that each such slave can only use a finite number of channels. + * We use slave channel IDs to make sure, that no such slave channel ID is + * allocated more than once. + */ +static unsigned int slave_num = 256; +module_param(slave_num, uint, 0444); + +/* A bitmask with slave_num bits */ +static unsigned long *shdma_slave_used; + +/* Called under spin_lock_irq(&schan->chan_lock") */ +static void shdma_chan_xfer_ld_queue(struct shdma_chan *schan) +{ + struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device); + const struct shdma_ops *ops = sdev->ops; + struct shdma_desc *sdesc; + + /* DMA work check */ + if (ops->channel_busy(schan)) + return; + + /* Find the first not transferred descriptor */ + list_for_each_entry(sdesc, &schan->ld_queue, node) + if (sdesc->mark == DESC_SUBMITTED) { + ops->start_xfer(schan, sdesc); + break; + } +} + +static dma_cookie_t shdma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct shdma_desc *chunk, *c, *desc = + container_of(tx, struct shdma_desc, async_tx); + struct shdma_chan *schan = to_shdma_chan(tx->chan); + dma_async_tx_callback callback = tx->callback; + dma_cookie_t cookie; + bool power_up; + + spin_lock_irq(&schan->chan_lock); + + power_up = list_empty(&schan->ld_queue); + + cookie = dma_cookie_assign(tx); + + /* Mark all chunks of this descriptor as submitted, move to the queue */ + list_for_each_entry_safe(chunk, c, desc->node.prev, node) { + /* + * All chunks are on the global ld_free, so, we have to find + * the end of the chain ourselves + */ + if (chunk != desc && (chunk->mark == DESC_IDLE || + chunk->async_tx.cookie > 0 || + chunk->async_tx.cookie == -EBUSY || + &chunk->node == &schan->ld_free)) + break; + chunk->mark = DESC_SUBMITTED; + if (chunk->chunks == 1) { + chunk->async_tx.callback = callback; + chunk->async_tx.callback_param = tx->callback_param; + } else { + /* Callback goes to the last chunk */ + chunk->async_tx.callback = NULL; + } + chunk->cookie = cookie; + list_move_tail(&chunk->node, &schan->ld_queue); + + dev_dbg(schan->dev, "submit #%d@%p on %d\n", + tx->cookie, &chunk->async_tx, schan->id); + } + + if (power_up) { + int ret; + schan->pm_state = SHDMA_PM_BUSY; + + ret = pm_runtime_get(schan->dev); + + spin_unlock_irq(&schan->chan_lock); + if (ret < 0) + dev_err(schan->dev, "%s(): GET = %d\n", __func__, ret); + + pm_runtime_barrier(schan->dev); + + spin_lock_irq(&schan->chan_lock); + + /* Have we been reset, while waiting? */ + if (schan->pm_state != SHDMA_PM_ESTABLISHED) { + struct shdma_dev *sdev = + to_shdma_dev(schan->dma_chan.device); + const struct shdma_ops *ops = sdev->ops; + dev_dbg(schan->dev, "Bring up channel %d\n", + schan->id); + /* + * TODO: .xfer_setup() might fail on some platforms. + * Make it int then, on error remove chunks from the + * queue again + */ + ops->setup_xfer(schan, schan->slave_id); + + if (schan->pm_state == SHDMA_PM_PENDING) + shdma_chan_xfer_ld_queue(schan); + schan->pm_state = SHDMA_PM_ESTABLISHED; + } + } else { + /* + * Tell .device_issue_pending() not to run the queue, interrupts + * will do it anyway + */ + schan->pm_state = SHDMA_PM_PENDING; + } + + spin_unlock_irq(&schan->chan_lock); + + return cookie; +} + +/* Called with desc_lock held */ +static struct shdma_desc *shdma_get_desc(struct shdma_chan *schan) +{ + struct shdma_desc *sdesc; + + list_for_each_entry(sdesc, &schan->ld_free, node) + if (sdesc->mark != DESC_PREPARED) { + BUG_ON(sdesc->mark != DESC_IDLE); + list_del(&sdesc->node); + return sdesc; + } + + return NULL; +} + +static int shdma_setup_slave(struct shdma_chan *schan, dma_addr_t slave_addr) +{ + struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device); + const struct shdma_ops *ops = sdev->ops; + int ret, match; + + if (schan->dev->of_node) { + match = schan->hw_req; + ret = ops->set_slave(schan, match, slave_addr, true); + if (ret < 0) + return ret; + } else { + match = schan->real_slave_id; + } + + if (schan->real_slave_id < 0 || schan->real_slave_id >= slave_num) + return -EINVAL; + + if (test_and_set_bit(schan->real_slave_id, shdma_slave_used)) + return -EBUSY; + + ret = ops->set_slave(schan, match, slave_addr, false); + if (ret < 0) { + clear_bit(schan->real_slave_id, shdma_slave_used); + return ret; + } + + schan->slave_id = schan->real_slave_id; + + return 0; +} + +static int shdma_alloc_chan_resources(struct dma_chan *chan) +{ + struct shdma_chan *schan = to_shdma_chan(chan); + struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device); + const struct shdma_ops *ops = sdev->ops; + struct shdma_desc *desc; + struct shdma_slave *slave = chan->private; + int ret, i; + + /* + * This relies on the guarantee from dmaengine that alloc_chan_resources + * never runs concurrently with itself or free_chan_resources. + */ + if (slave) { + /* Legacy mode: .private is set in filter */ + schan->real_slave_id = slave->slave_id; + ret = shdma_setup_slave(schan, 0); + if (ret < 0) + goto esetslave; + } else { + /* Normal mode: real_slave_id was set by filter */ + schan->slave_id = -EINVAL; + } + + schan->desc = kcalloc(NR_DESCS_PER_CHANNEL, + sdev->desc_size, GFP_KERNEL); + if (!schan->desc) { + ret = -ENOMEM; + goto edescalloc; + } + schan->desc_num = NR_DESCS_PER_CHANNEL; + + for (i = 0; i < NR_DESCS_PER_CHANNEL; i++) { + desc = ops->embedded_desc(schan->desc, i); + dma_async_tx_descriptor_init(&desc->async_tx, + &schan->dma_chan); + desc->async_tx.tx_submit = shdma_tx_submit; + desc->mark = DESC_IDLE; + + list_add(&desc->node, &schan->ld_free); + } + + return NR_DESCS_PER_CHANNEL; + +edescalloc: + if (slave) +esetslave: + clear_bit(slave->slave_id, shdma_slave_used); + chan->private = NULL; + return ret; +} + +/* + * This is the standard shdma filter function to be used as a replacement to the + * "old" method, using the .private pointer. + * You always have to pass a valid slave id as the argument, old drivers that + * pass ERR_PTR(-EINVAL) as a filter parameter and set it up in dma_slave_config + * need to be updated so we can remove the slave_id field from dma_slave_config. + * parameter. If this filter is used, the slave driver, after calling + * dma_request_channel(), will also have to call dmaengine_slave_config() with + * .direction, and either .src_addr or .dst_addr set. + * + * NOTE: this filter doesn't support multiple DMAC drivers with the DMA_SLAVE + * capability! If this becomes a requirement, hardware glue drivers, using this + * services would have to provide their own filters, which first would check + * the device driver, similar to how other DMAC drivers, e.g., sa11x0-dma.c, do + * this, and only then, in case of a match, call this common filter. + * NOTE 2: This filter function is also used in the DT case by shdma_of_xlate(). + * In that case the MID-RID value is used for slave channel filtering and is + * passed to this function in the "arg" parameter. + */ +bool shdma_chan_filter(struct dma_chan *chan, void *arg) +{ + struct shdma_chan *schan; + struct shdma_dev *sdev; + int slave_id = (long)arg; + int ret; + + /* Only support channels handled by this driver. */ + if (chan->device->device_alloc_chan_resources != + shdma_alloc_chan_resources) + return false; + + schan = to_shdma_chan(chan); + sdev = to_shdma_dev(chan->device); + + /* + * For DT, the schan->slave_id field is generated by the + * set_slave function from the slave ID that is passed in + * from xlate. For the non-DT case, the slave ID is + * directly passed into the filter function by the driver + */ + if (schan->dev->of_node) { + ret = sdev->ops->set_slave(schan, slave_id, 0, true); + if (ret < 0) + return false; + + schan->real_slave_id = schan->slave_id; + return true; + } + + if (slave_id < 0) { + /* No slave requested - arbitrary channel */ + dev_warn(sdev->dma_dev.dev, "invalid slave ID passed to dma_request_slave\n"); + return true; + } + + if (slave_id >= slave_num) + return false; + + ret = sdev->ops->set_slave(schan, slave_id, 0, true); + if (ret < 0) + return false; + + schan->real_slave_id = slave_id; + + return true; +} +EXPORT_SYMBOL(shdma_chan_filter); + +static dma_async_tx_callback __ld_cleanup(struct shdma_chan *schan, bool all) +{ + struct shdma_desc *desc, *_desc; + /* Is the "exposed" head of a chain acked? */ + bool head_acked = false; + dma_cookie_t cookie = 0; + dma_async_tx_callback callback = NULL; + struct dmaengine_desc_callback cb; + unsigned long flags; + LIST_HEAD(cyclic_list); + + memset(&cb, 0, sizeof(cb)); + spin_lock_irqsave(&schan->chan_lock, flags); + list_for_each_entry_safe(desc, _desc, &schan->ld_queue, node) { + struct dma_async_tx_descriptor *tx = &desc->async_tx; + + BUG_ON(tx->cookie > 0 && tx->cookie != desc->cookie); + BUG_ON(desc->mark != DESC_SUBMITTED && + desc->mark != DESC_COMPLETED && + desc->mark != DESC_WAITING); + + /* + * queue is ordered, and we use this loop to (1) clean up all + * completed descriptors, and to (2) update descriptor flags of + * any chunks in a (partially) completed chain + */ + if (!all && desc->mark == DESC_SUBMITTED && + desc->cookie != cookie) + break; + + if (tx->cookie > 0) + cookie = tx->cookie; + + if (desc->mark == DESC_COMPLETED && desc->chunks == 1) { + if (schan->dma_chan.completed_cookie != desc->cookie - 1) + dev_dbg(schan->dev, + "Completing cookie %d, expected %d\n", + desc->cookie, + schan->dma_chan.completed_cookie + 1); + schan->dma_chan.completed_cookie = desc->cookie; + } + + /* Call callback on the last chunk */ + if (desc->mark == DESC_COMPLETED && tx->callback) { + desc->mark = DESC_WAITING; + dmaengine_desc_get_callback(tx, &cb); + callback = tx->callback; + dev_dbg(schan->dev, "descriptor #%d@%p on %d callback\n", + tx->cookie, tx, schan->id); + BUG_ON(desc->chunks != 1); + break; + } + + if (tx->cookie > 0 || tx->cookie == -EBUSY) { + if (desc->mark == DESC_COMPLETED) { + BUG_ON(tx->cookie < 0); + desc->mark = DESC_WAITING; + } + head_acked = async_tx_test_ack(tx); + } else { + switch (desc->mark) { + case DESC_COMPLETED: + desc->mark = DESC_WAITING; + fallthrough; + case DESC_WAITING: + if (head_acked) + async_tx_ack(&desc->async_tx); + } + } + + dev_dbg(schan->dev, "descriptor %p #%d completed.\n", + tx, tx->cookie); + + if (((desc->mark == DESC_COMPLETED || + desc->mark == DESC_WAITING) && + async_tx_test_ack(&desc->async_tx)) || all) { + + if (all || !desc->cyclic) { + /* Remove from ld_queue list */ + desc->mark = DESC_IDLE; + list_move(&desc->node, &schan->ld_free); + } else { + /* reuse as cyclic */ + desc->mark = DESC_SUBMITTED; + list_move_tail(&desc->node, &cyclic_list); + } + + if (list_empty(&schan->ld_queue)) { + dev_dbg(schan->dev, "Bring down channel %d\n", schan->id); + pm_runtime_put(schan->dev); + schan->pm_state = SHDMA_PM_ESTABLISHED; + } else if (schan->pm_state == SHDMA_PM_PENDING) { + shdma_chan_xfer_ld_queue(schan); + } + } + } + + if (all && !callback) + /* + * Terminating and the loop completed normally: forgive + * uncompleted cookies + */ + schan->dma_chan.completed_cookie = schan->dma_chan.cookie; + + list_splice_tail(&cyclic_list, &schan->ld_queue); + + spin_unlock_irqrestore(&schan->chan_lock, flags); + + dmaengine_desc_callback_invoke(&cb, NULL); + + return callback; +} + +/* + * shdma_chan_ld_cleanup - Clean up link descriptors + * + * Clean up the ld_queue of DMA channel. + */ +static void shdma_chan_ld_cleanup(struct shdma_chan *schan, bool all) +{ + while (__ld_cleanup(schan, all)) + ; +} + +/* + * shdma_free_chan_resources - Free all resources of the channel. + */ +static void shdma_free_chan_resources(struct dma_chan *chan) +{ + struct shdma_chan *schan = to_shdma_chan(chan); + struct shdma_dev *sdev = to_shdma_dev(chan->device); + const struct shdma_ops *ops = sdev->ops; + LIST_HEAD(list); + + /* Protect against ISR */ + spin_lock_irq(&schan->chan_lock); + ops->halt_channel(schan); + spin_unlock_irq(&schan->chan_lock); + + /* Now no new interrupts will occur */ + + /* Prepared and not submitted descriptors can still be on the queue */ + if (!list_empty(&schan->ld_queue)) + shdma_chan_ld_cleanup(schan, true); + + if (schan->slave_id >= 0) { + /* The caller is holding dma_list_mutex */ + clear_bit(schan->slave_id, shdma_slave_used); + chan->private = NULL; + } + + schan->real_slave_id = 0; + + spin_lock_irq(&schan->chan_lock); + + list_splice_init(&schan->ld_free, &list); + schan->desc_num = 0; + + spin_unlock_irq(&schan->chan_lock); + + kfree(schan->desc); +} + +/** + * shdma_add_desc - get, set up and return one transfer descriptor + * @schan: DMA channel + * @flags: DMA transfer flags + * @dst: destination DMA address, incremented when direction equals + * DMA_DEV_TO_MEM or DMA_MEM_TO_MEM + * @src: source DMA address, incremented when direction equals + * DMA_MEM_TO_DEV or DMA_MEM_TO_MEM + * @len: DMA transfer length + * @first: if NULL, set to the current descriptor and cookie set to -EBUSY + * @direction: needed for slave DMA to decide which address to keep constant, + * equals DMA_MEM_TO_MEM for MEMCPY + * Returns 0 or an error + * Locks: called with desc_lock held + */ +static struct shdma_desc *shdma_add_desc(struct shdma_chan *schan, + unsigned long flags, dma_addr_t *dst, dma_addr_t *src, size_t *len, + struct shdma_desc **first, enum dma_transfer_direction direction) +{ + struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device); + const struct shdma_ops *ops = sdev->ops; + struct shdma_desc *new; + size_t copy_size = *len; + + if (!copy_size) + return NULL; + + /* Allocate the link descriptor from the free list */ + new = shdma_get_desc(schan); + if (!new) { + dev_err(schan->dev, "No free link descriptor available\n"); + return NULL; + } + + ops->desc_setup(schan, new, *src, *dst, ©_size); + + if (!*first) { + /* First desc */ + new->async_tx.cookie = -EBUSY; + *first = new; + } else { + /* Other desc - invisible to the user */ + new->async_tx.cookie = -EINVAL; + } + + dev_dbg(schan->dev, + "chaining (%zu/%zu)@%pad -> %pad with %p, cookie %d\n", + copy_size, *len, src, dst, &new->async_tx, + new->async_tx.cookie); + + new->mark = DESC_PREPARED; + new->async_tx.flags = flags; + new->direction = direction; + new->partial = 0; + + *len -= copy_size; + if (direction == DMA_MEM_TO_MEM || direction == DMA_MEM_TO_DEV) + *src += copy_size; + if (direction == DMA_MEM_TO_MEM || direction == DMA_DEV_TO_MEM) + *dst += copy_size; + + return new; +} + +/* + * shdma_prep_sg - prepare transfer descriptors from an SG list + * + * Common routine for public (MEMCPY) and slave DMA. The MEMCPY case is also + * converted to scatter-gather to guarantee consistent locking and a correct + * list manipulation. For slave DMA direction carries the usual meaning, and, + * logically, the SG list is RAM and the addr variable contains slave address, + * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_MEM_TO_MEM + * and the SG list contains only one element and points at the source buffer. + */ +static struct dma_async_tx_descriptor *shdma_prep_sg(struct shdma_chan *schan, + struct scatterlist *sgl, unsigned int sg_len, dma_addr_t *addr, + enum dma_transfer_direction direction, unsigned long flags, bool cyclic) +{ + struct scatterlist *sg; + struct shdma_desc *first = NULL, *new = NULL /* compiler... */; + LIST_HEAD(tx_list); + int chunks = 0; + unsigned long irq_flags; + int i; + + for_each_sg(sgl, sg, sg_len, i) + chunks += DIV_ROUND_UP(sg_dma_len(sg), schan->max_xfer_len); + + /* Have to lock the whole loop to protect against concurrent release */ + spin_lock_irqsave(&schan->chan_lock, irq_flags); + + /* + * Chaining: + * first descriptor is what user is dealing with in all API calls, its + * cookie is at first set to -EBUSY, at tx-submit to a positive + * number + * if more than one chunk is needed further chunks have cookie = -EINVAL + * the last chunk, if not equal to the first, has cookie = -ENOSPC + * all chunks are linked onto the tx_list head with their .node heads + * only during this function, then they are immediately spliced + * back onto the free list in form of a chain + */ + for_each_sg(sgl, sg, sg_len, i) { + dma_addr_t sg_addr = sg_dma_address(sg); + size_t len = sg_dma_len(sg); + + if (!len) + goto err_get_desc; + + do { + dev_dbg(schan->dev, "Add SG #%d@%p[%zu], dma %pad\n", + i, sg, len, &sg_addr); + + if (direction == DMA_DEV_TO_MEM) + new = shdma_add_desc(schan, flags, + &sg_addr, addr, &len, &first, + direction); + else + new = shdma_add_desc(schan, flags, + addr, &sg_addr, &len, &first, + direction); + if (!new) + goto err_get_desc; + + new->cyclic = cyclic; + if (cyclic) + new->chunks = 1; + else + new->chunks = chunks--; + list_add_tail(&new->node, &tx_list); + } while (len); + } + + if (new != first) + new->async_tx.cookie = -ENOSPC; + + /* Put them back on the free list, so, they don't get lost */ + list_splice_tail(&tx_list, &schan->ld_free); + + spin_unlock_irqrestore(&schan->chan_lock, irq_flags); + + return &first->async_tx; + +err_get_desc: + list_for_each_entry(new, &tx_list, node) + new->mark = DESC_IDLE; + list_splice(&tx_list, &schan->ld_free); + + spin_unlock_irqrestore(&schan->chan_lock, irq_flags); + + return NULL; +} + +static struct dma_async_tx_descriptor *shdma_prep_memcpy( + struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src, + size_t len, unsigned long flags) +{ + struct shdma_chan *schan = to_shdma_chan(chan); + struct scatterlist sg; + + if (!chan || !len) + return NULL; + + BUG_ON(!schan->desc_num); + + sg_init_table(&sg, 1); + sg_set_page(&sg, pfn_to_page(PFN_DOWN(dma_src)), len, + offset_in_page(dma_src)); + sg_dma_address(&sg) = dma_src; + sg_dma_len(&sg) = len; + + return shdma_prep_sg(schan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM, + flags, false); +} + +static struct dma_async_tx_descriptor *shdma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, unsigned long flags, void *context) +{ + struct shdma_chan *schan = to_shdma_chan(chan); + struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device); + const struct shdma_ops *ops = sdev->ops; + int slave_id = schan->slave_id; + dma_addr_t slave_addr; + + if (!chan) + return NULL; + + BUG_ON(!schan->desc_num); + + /* Someone calling slave DMA on a generic channel? */ + if (slave_id < 0 || !sg_len) { + dev_warn(schan->dev, "%s: bad parameter: len=%d, id=%d\n", + __func__, sg_len, slave_id); + return NULL; + } + + slave_addr = ops->slave_addr(schan); + + return shdma_prep_sg(schan, sgl, sg_len, &slave_addr, + direction, flags, false); +} + +#define SHDMA_MAX_SG_LEN 32 + +static struct dma_async_tx_descriptor *shdma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct shdma_chan *schan = to_shdma_chan(chan); + struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device); + struct dma_async_tx_descriptor *desc; + const struct shdma_ops *ops = sdev->ops; + unsigned int sg_len = buf_len / period_len; + int slave_id = schan->slave_id; + dma_addr_t slave_addr; + struct scatterlist *sgl; + int i; + + if (!chan) + return NULL; + + BUG_ON(!schan->desc_num); + + if (sg_len > SHDMA_MAX_SG_LEN) { + dev_err(schan->dev, "sg length %d exceeds limit %d", + sg_len, SHDMA_MAX_SG_LEN); + return NULL; + } + + /* Someone calling slave DMA on a generic channel? */ + if (slave_id < 0 || (buf_len < period_len)) { + dev_warn(schan->dev, + "%s: bad parameter: buf_len=%zu, period_len=%zu, id=%d\n", + __func__, buf_len, period_len, slave_id); + return NULL; + } + + slave_addr = ops->slave_addr(schan); + + /* + * Allocate the sg list dynamically as it would consumer too much stack + * space. + */ + sgl = kmalloc_array(sg_len, sizeof(*sgl), GFP_KERNEL); + if (!sgl) + return NULL; + + sg_init_table(sgl, sg_len); + + for (i = 0; i < sg_len; i++) { + dma_addr_t src = buf_addr + (period_len * i); + + sg_set_page(&sgl[i], pfn_to_page(PFN_DOWN(src)), period_len, + offset_in_page(src)); + sg_dma_address(&sgl[i]) = src; + sg_dma_len(&sgl[i]) = period_len; + } + + desc = shdma_prep_sg(schan, sgl, sg_len, &slave_addr, + direction, flags, true); + + kfree(sgl); + return desc; +} + +static int shdma_terminate_all(struct dma_chan *chan) +{ + struct shdma_chan *schan = to_shdma_chan(chan); + struct shdma_dev *sdev = to_shdma_dev(chan->device); + const struct shdma_ops *ops = sdev->ops; + unsigned long flags; + + spin_lock_irqsave(&schan->chan_lock, flags); + ops->halt_channel(schan); + + if (ops->get_partial && !list_empty(&schan->ld_queue)) { + /* Record partial transfer */ + struct shdma_desc *desc = list_first_entry(&schan->ld_queue, + struct shdma_desc, node); + desc->partial = ops->get_partial(schan, desc); + } + + spin_unlock_irqrestore(&schan->chan_lock, flags); + + shdma_chan_ld_cleanup(schan, true); + + return 0; +} + +static int shdma_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct shdma_chan *schan = to_shdma_chan(chan); + + /* + * So far only .slave_id is used, but the slave drivers are + * encouraged to also set a transfer direction and an address. + */ + if (!config) + return -EINVAL; + + /* + * We could lock this, but you shouldn't be configuring the + * channel, while using it... + */ + return shdma_setup_slave(schan, + config->direction == DMA_DEV_TO_MEM ? + config->src_addr : config->dst_addr); +} + +static void shdma_issue_pending(struct dma_chan *chan) +{ + struct shdma_chan *schan = to_shdma_chan(chan); + + spin_lock_irq(&schan->chan_lock); + if (schan->pm_state == SHDMA_PM_ESTABLISHED) + shdma_chan_xfer_ld_queue(schan); + else + schan->pm_state = SHDMA_PM_PENDING; + spin_unlock_irq(&schan->chan_lock); +} + +static enum dma_status shdma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct shdma_chan *schan = to_shdma_chan(chan); + enum dma_status status; + unsigned long flags; + + shdma_chan_ld_cleanup(schan, false); + + spin_lock_irqsave(&schan->chan_lock, flags); + + status = dma_cookie_status(chan, cookie, txstate); + + /* + * If we don't find cookie on the queue, it has been aborted and we have + * to report error + */ + if (status != DMA_COMPLETE) { + struct shdma_desc *sdesc; + status = DMA_ERROR; + list_for_each_entry(sdesc, &schan->ld_queue, node) + if (sdesc->cookie == cookie) { + status = DMA_IN_PROGRESS; + break; + } + } + + spin_unlock_irqrestore(&schan->chan_lock, flags); + + return status; +} + +/* Called from error IRQ or NMI */ +bool shdma_reset(struct shdma_dev *sdev) +{ + const struct shdma_ops *ops = sdev->ops; + struct shdma_chan *schan; + unsigned int handled = 0; + int i; + + /* Reset all channels */ + shdma_for_each_chan(schan, sdev, i) { + struct shdma_desc *sdesc; + LIST_HEAD(dl); + + if (!schan) + continue; + + spin_lock(&schan->chan_lock); + + /* Stop the channel */ + ops->halt_channel(schan); + + list_splice_init(&schan->ld_queue, &dl); + + if (!list_empty(&dl)) { + dev_dbg(schan->dev, "Bring down channel %d\n", schan->id); + pm_runtime_put(schan->dev); + } + schan->pm_state = SHDMA_PM_ESTABLISHED; + + spin_unlock(&schan->chan_lock); + + /* Complete all */ + list_for_each_entry(sdesc, &dl, node) { + struct dma_async_tx_descriptor *tx = &sdesc->async_tx; + + sdesc->mark = DESC_IDLE; + dmaengine_desc_get_callback_invoke(tx, NULL); + } + + spin_lock(&schan->chan_lock); + list_splice(&dl, &schan->ld_free); + spin_unlock(&schan->chan_lock); + + handled++; + } + + return !!handled; +} +EXPORT_SYMBOL(shdma_reset); + +static irqreturn_t chan_irq(int irq, void *dev) +{ + struct shdma_chan *schan = dev; + const struct shdma_ops *ops = + to_shdma_dev(schan->dma_chan.device)->ops; + irqreturn_t ret; + + spin_lock(&schan->chan_lock); + + ret = ops->chan_irq(schan, irq) ? IRQ_WAKE_THREAD : IRQ_NONE; + + spin_unlock(&schan->chan_lock); + + return ret; +} + +static irqreturn_t chan_irqt(int irq, void *dev) +{ + struct shdma_chan *schan = dev; + const struct shdma_ops *ops = + to_shdma_dev(schan->dma_chan.device)->ops; + struct shdma_desc *sdesc; + + spin_lock_irq(&schan->chan_lock); + list_for_each_entry(sdesc, &schan->ld_queue, node) { + if (sdesc->mark == DESC_SUBMITTED && + ops->desc_completed(schan, sdesc)) { + dev_dbg(schan->dev, "done #%d@%p\n", + sdesc->async_tx.cookie, &sdesc->async_tx); + sdesc->mark = DESC_COMPLETED; + break; + } + } + /* Next desc */ + shdma_chan_xfer_ld_queue(schan); + spin_unlock_irq(&schan->chan_lock); + + shdma_chan_ld_cleanup(schan, false); + + return IRQ_HANDLED; +} + +int shdma_request_irq(struct shdma_chan *schan, int irq, + unsigned long flags, const char *name) +{ + int ret = devm_request_threaded_irq(schan->dev, irq, chan_irq, + chan_irqt, flags, name, schan); + + schan->irq = ret < 0 ? ret : irq; + + return ret; +} +EXPORT_SYMBOL(shdma_request_irq); + +void shdma_chan_probe(struct shdma_dev *sdev, + struct shdma_chan *schan, int id) +{ + schan->pm_state = SHDMA_PM_ESTABLISHED; + + /* reference struct dma_device */ + schan->dma_chan.device = &sdev->dma_dev; + dma_cookie_init(&schan->dma_chan); + + schan->dev = sdev->dma_dev.dev; + schan->id = id; + + if (!schan->max_xfer_len) + schan->max_xfer_len = PAGE_SIZE; + + spin_lock_init(&schan->chan_lock); + + /* Init descripter manage list */ + INIT_LIST_HEAD(&schan->ld_queue); + INIT_LIST_HEAD(&schan->ld_free); + + /* Add the channel to DMA device channel list */ + list_add_tail(&schan->dma_chan.device_node, + &sdev->dma_dev.channels); + sdev->schan[id] = schan; +} +EXPORT_SYMBOL(shdma_chan_probe); + +void shdma_chan_remove(struct shdma_chan *schan) +{ + list_del(&schan->dma_chan.device_node); +} +EXPORT_SYMBOL(shdma_chan_remove); + +int shdma_init(struct device *dev, struct shdma_dev *sdev, + int chan_num) +{ + struct dma_device *dma_dev = &sdev->dma_dev; + + /* + * Require all call-backs for now, they can trivially be made optional + * later as required + */ + if (!sdev->ops || + !sdev->desc_size || + !sdev->ops->embedded_desc || + !sdev->ops->start_xfer || + !sdev->ops->setup_xfer || + !sdev->ops->set_slave || + !sdev->ops->desc_setup || + !sdev->ops->slave_addr || + !sdev->ops->channel_busy || + !sdev->ops->halt_channel || + !sdev->ops->desc_completed) + return -EINVAL; + + sdev->schan = kcalloc(chan_num, sizeof(*sdev->schan), GFP_KERNEL); + if (!sdev->schan) + return -ENOMEM; + + INIT_LIST_HEAD(&dma_dev->channels); + + /* Common and MEMCPY operations */ + dma_dev->device_alloc_chan_resources + = shdma_alloc_chan_resources; + dma_dev->device_free_chan_resources = shdma_free_chan_resources; + dma_dev->device_prep_dma_memcpy = shdma_prep_memcpy; + dma_dev->device_tx_status = shdma_tx_status; + dma_dev->device_issue_pending = shdma_issue_pending; + + /* Compulsory for DMA_SLAVE fields */ + dma_dev->device_prep_slave_sg = shdma_prep_slave_sg; + dma_dev->device_prep_dma_cyclic = shdma_prep_dma_cyclic; + dma_dev->device_config = shdma_config; + dma_dev->device_terminate_all = shdma_terminate_all; + + dma_dev->dev = dev; + + return 0; +} +EXPORT_SYMBOL(shdma_init); + +void shdma_cleanup(struct shdma_dev *sdev) +{ + kfree(sdev->schan); +} +EXPORT_SYMBOL(shdma_cleanup); + +static int __init shdma_enter(void) +{ + shdma_slave_used = bitmap_zalloc(slave_num, GFP_KERNEL); + if (!shdma_slave_used) + return -ENOMEM; + return 0; +} +module_init(shdma_enter); + +static void __exit shdma_exit(void) +{ + bitmap_free(shdma_slave_used); +} +module_exit(shdma_exit); + +MODULE_DESCRIPTION("SH-DMA driver base library"); +MODULE_AUTHOR("Guennadi Liakhovetski "); diff --git a/drivers/dma/sh/shdma.h b/drivers/dma/sh/shdma.h new file mode 100644 index 0000000000..9c121a4b33 --- /dev/null +++ b/drivers/dma/sh/shdma.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Renesas SuperH DMA Engine support + * + * Copyright (C) 2009 Nobuhiro Iwamatsu + * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved. + * + */ +#ifndef __DMA_SHDMA_H +#define __DMA_SHDMA_H + +#include +#include +#include +#include +#include + +#define SH_DMAE_MAX_CHANNELS 20 +#define SH_DMAE_TCR_MAX 0x00FFFFFF /* 16MB */ + +struct device; + +struct sh_dmae_chan { + struct shdma_chan shdma_chan; + const struct sh_dmae_slave_config *config; /* Slave DMA configuration */ + int xmit_shift; /* log_2(bytes_per_xfer) */ + void __iomem *base; + char dev_id[16]; /* unique name per DMAC of channel */ + int pm_error; + dma_addr_t slave_addr; +}; + +struct sh_dmae_device { + struct shdma_dev shdma_dev; + struct sh_dmae_chan *chan[SH_DMAE_MAX_CHANNELS]; + const struct sh_dmae_pdata *pdata; + struct list_head node; + void __iomem *chan_reg; + void __iomem *dmars; + unsigned int chcr_offset; + u32 chcr_ie_bit; +}; + +struct sh_dmae_regs { + u32 sar; /* SAR / source address */ + u32 dar; /* DAR / destination address */ + u32 tcr; /* TCR / transfer count */ +}; + +struct sh_dmae_desc { + struct sh_dmae_regs hw; + struct shdma_desc shdma_desc; +}; + +#define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, shdma_chan) +#define to_sh_desc(lh) container_of(lh, struct sh_desc, node) +#define tx_to_sh_desc(tx) container_of(tx, struct sh_desc, async_tx) +#define to_sh_dev(chan) container_of(chan->shdma_chan.dma_chan.device,\ + struct sh_dmae_device, shdma_dev.dma_dev) + +#endif /* __DMA_SHDMA_H */ diff --git a/drivers/dma/sh/shdmac.c b/drivers/dma/sh/shdmac.c new file mode 100644 index 0000000000..00067b29e2 --- /dev/null +++ b/drivers/dma/sh/shdmac.c @@ -0,0 +1,936 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Renesas SuperH DMA Engine support + * + * base is drivers/dma/flsdma.c + * + * Copyright (C) 2011-2012 Guennadi Liakhovetski + * Copyright (C) 2009 Nobuhiro Iwamatsu + * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved. + * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved. + * + * - DMA of SuperH does not have Hardware DMA chain mode. + * - MAX DMA size is 16MB. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../dmaengine.h" +#include "shdma.h" + +/* DMA registers */ +#define SAR 0x00 /* Source Address Register */ +#define DAR 0x04 /* Destination Address Register */ +#define TCR 0x08 /* Transfer Count Register */ +#define CHCR 0x0C /* Channel Control Register */ +#define DMAOR 0x40 /* DMA Operation Register */ + +#define TEND 0x18 /* USB-DMAC */ + +#define SH_DMAE_DRV_NAME "sh-dma-engine" + +/* Default MEMCPY transfer size = 2^2 = 4 bytes */ +#define LOG2_DEFAULT_XFER_SIZE 2 +#define SH_DMA_SLAVE_NUMBER 256 +#define SH_DMA_TCR_MAX (16 * 1024 * 1024 - 1) + +/* + * Used for write-side mutual exclusion for the global device list, + * read-side synchronization by way of RCU, and per-controller data. + */ +static DEFINE_SPINLOCK(sh_dmae_lock); +static LIST_HEAD(sh_dmae_devices); + +/* + * Different DMAC implementations provide different ways to clear DMA channels: + * (1) none - no CHCLR registers are available + * (2) one CHCLR register per channel - 0 has to be written to it to clear + * channel buffers + * (3) one CHCLR per several channels - 1 has to be written to the bit, + * corresponding to the specific channel to reset it + */ +static void channel_clear(struct sh_dmae_chan *sh_dc) +{ + struct sh_dmae_device *shdev = to_sh_dev(sh_dc); + const struct sh_dmae_channel *chan_pdata = shdev->pdata->channel + + sh_dc->shdma_chan.id; + u32 val = shdev->pdata->chclr_bitwise ? 1 << chan_pdata->chclr_bit : 0; + + __raw_writel(val, shdev->chan_reg + chan_pdata->chclr_offset); +} + +static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg) +{ + __raw_writel(data, sh_dc->base + reg); +} + +static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg) +{ + return __raw_readl(sh_dc->base + reg); +} + +static u16 dmaor_read(struct sh_dmae_device *shdev) +{ + void __iomem *addr = shdev->chan_reg + DMAOR; + + if (shdev->pdata->dmaor_is_32bit) + return __raw_readl(addr); + else + return __raw_readw(addr); +} + +static void dmaor_write(struct sh_dmae_device *shdev, u16 data) +{ + void __iomem *addr = shdev->chan_reg + DMAOR; + + if (shdev->pdata->dmaor_is_32bit) + __raw_writel(data, addr); + else + __raw_writew(data, addr); +} + +static void chcr_write(struct sh_dmae_chan *sh_dc, u32 data) +{ + struct sh_dmae_device *shdev = to_sh_dev(sh_dc); + + __raw_writel(data, sh_dc->base + shdev->chcr_offset); +} + +static u32 chcr_read(struct sh_dmae_chan *sh_dc) +{ + struct sh_dmae_device *shdev = to_sh_dev(sh_dc); + + return __raw_readl(sh_dc->base + shdev->chcr_offset); +} + +/* + * Reset DMA controller + * + * SH7780 has two DMAOR register + */ +static void sh_dmae_ctl_stop(struct sh_dmae_device *shdev) +{ + unsigned short dmaor; + unsigned long flags; + + spin_lock_irqsave(&sh_dmae_lock, flags); + + dmaor = dmaor_read(shdev); + dmaor_write(shdev, dmaor & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME)); + + spin_unlock_irqrestore(&sh_dmae_lock, flags); +} + +static int sh_dmae_rst(struct sh_dmae_device *shdev) +{ + unsigned short dmaor; + unsigned long flags; + + spin_lock_irqsave(&sh_dmae_lock, flags); + + dmaor = dmaor_read(shdev) & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME); + + if (shdev->pdata->chclr_present) { + int i; + for (i = 0; i < shdev->pdata->channel_num; i++) { + struct sh_dmae_chan *sh_chan = shdev->chan[i]; + if (sh_chan) + channel_clear(sh_chan); + } + } + + dmaor_write(shdev, dmaor | shdev->pdata->dmaor_init); + + dmaor = dmaor_read(shdev); + + spin_unlock_irqrestore(&sh_dmae_lock, flags); + + if (dmaor & (DMAOR_AE | DMAOR_NMIF)) { + dev_warn(shdev->shdma_dev.dma_dev.dev, "Can't initialize DMAOR.\n"); + return -EIO; + } + if (shdev->pdata->dmaor_init & ~dmaor) + dev_warn(shdev->shdma_dev.dma_dev.dev, + "DMAOR=0x%x hasn't latched the initial value 0x%x.\n", + dmaor, shdev->pdata->dmaor_init); + return 0; +} + +static bool dmae_is_busy(struct sh_dmae_chan *sh_chan) +{ + u32 chcr = chcr_read(sh_chan); + + if ((chcr & (CHCR_DE | CHCR_TE)) == CHCR_DE) + return true; /* working */ + + return false; /* waiting */ +} + +static unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan, u32 chcr) +{ + struct sh_dmae_device *shdev = to_sh_dev(sh_chan); + const struct sh_dmae_pdata *pdata = shdev->pdata; + int cnt = ((chcr & pdata->ts_low_mask) >> pdata->ts_low_shift) | + ((chcr & pdata->ts_high_mask) >> pdata->ts_high_shift); + + if (cnt >= pdata->ts_shift_num) + cnt = 0; + + return pdata->ts_shift[cnt]; +} + +static u32 log2size_to_chcr(struct sh_dmae_chan *sh_chan, int l2size) +{ + struct sh_dmae_device *shdev = to_sh_dev(sh_chan); + const struct sh_dmae_pdata *pdata = shdev->pdata; + int i; + + for (i = 0; i < pdata->ts_shift_num; i++) + if (pdata->ts_shift[i] == l2size) + break; + + if (i == pdata->ts_shift_num) + i = 0; + + return ((i << pdata->ts_low_shift) & pdata->ts_low_mask) | + ((i << pdata->ts_high_shift) & pdata->ts_high_mask); +} + +static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs *hw) +{ + sh_dmae_writel(sh_chan, hw->sar, SAR); + sh_dmae_writel(sh_chan, hw->dar, DAR); + sh_dmae_writel(sh_chan, hw->tcr >> sh_chan->xmit_shift, TCR); +} + +static void dmae_start(struct sh_dmae_chan *sh_chan) +{ + struct sh_dmae_device *shdev = to_sh_dev(sh_chan); + u32 chcr = chcr_read(sh_chan); + + if (shdev->pdata->needs_tend_set) + sh_dmae_writel(sh_chan, 0xFFFFFFFF, TEND); + + chcr |= CHCR_DE | shdev->chcr_ie_bit; + chcr_write(sh_chan, chcr & ~CHCR_TE); +} + +static void dmae_init(struct sh_dmae_chan *sh_chan) +{ + /* + * Default configuration for dual address memory-memory transfer. + */ + u32 chcr = DM_INC | SM_INC | RS_AUTO | log2size_to_chcr(sh_chan, + LOG2_DEFAULT_XFER_SIZE); + sh_chan->xmit_shift = calc_xmit_shift(sh_chan, chcr); + chcr_write(sh_chan, chcr); +} + +static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val) +{ + /* If DMA is active, cannot set CHCR. TODO: remove this superfluous check */ + if (dmae_is_busy(sh_chan)) + return -EBUSY; + + sh_chan->xmit_shift = calc_xmit_shift(sh_chan, val); + chcr_write(sh_chan, val); + + return 0; +} + +static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val) +{ + struct sh_dmae_device *shdev = to_sh_dev(sh_chan); + const struct sh_dmae_pdata *pdata = shdev->pdata; + const struct sh_dmae_channel *chan_pdata = &pdata->channel[sh_chan->shdma_chan.id]; + void __iomem *addr = shdev->dmars; + unsigned int shift = chan_pdata->dmars_bit; + + if (dmae_is_busy(sh_chan)) + return -EBUSY; + + if (pdata->no_dmars) + return 0; + + /* in the case of a missing DMARS resource use first memory window */ + if (!addr) + addr = shdev->chan_reg; + addr += chan_pdata->dmars; + + __raw_writew((__raw_readw(addr) & (0xff00 >> shift)) | (val << shift), + addr); + + return 0; +} + +static void sh_dmae_start_xfer(struct shdma_chan *schan, + struct shdma_desc *sdesc) +{ + struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan, + shdma_chan); + struct sh_dmae_desc *sh_desc = container_of(sdesc, + struct sh_dmae_desc, shdma_desc); + dev_dbg(sh_chan->shdma_chan.dev, "Queue #%d to %d: %u@%x -> %x\n", + sdesc->async_tx.cookie, sh_chan->shdma_chan.id, + sh_desc->hw.tcr, sh_desc->hw.sar, sh_desc->hw.dar); + /* Get the ld start address from ld_queue */ + dmae_set_reg(sh_chan, &sh_desc->hw); + dmae_start(sh_chan); +} + +static bool sh_dmae_channel_busy(struct shdma_chan *schan) +{ + struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan, + shdma_chan); + return dmae_is_busy(sh_chan); +} + +static void sh_dmae_setup_xfer(struct shdma_chan *schan, + int slave_id) +{ + struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan, + shdma_chan); + + if (slave_id >= 0) { + const struct sh_dmae_slave_config *cfg = + sh_chan->config; + + dmae_set_dmars(sh_chan, cfg->mid_rid); + dmae_set_chcr(sh_chan, cfg->chcr); + } else { + dmae_init(sh_chan); + } +} + +/* + * Find a slave channel configuration from the contoller list by either a slave + * ID in the non-DT case, or by a MID/RID value in the DT case + */ +static const struct sh_dmae_slave_config *dmae_find_slave( + struct sh_dmae_chan *sh_chan, int match) +{ + struct sh_dmae_device *shdev = to_sh_dev(sh_chan); + const struct sh_dmae_pdata *pdata = shdev->pdata; + const struct sh_dmae_slave_config *cfg; + int i; + + if (!sh_chan->shdma_chan.dev->of_node) { + if (match >= SH_DMA_SLAVE_NUMBER) + return NULL; + + for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++) + if (cfg->slave_id == match) + return cfg; + } else { + for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++) + if (cfg->mid_rid == match) { + sh_chan->shdma_chan.slave_id = i; + return cfg; + } + } + + return NULL; +} + +static int sh_dmae_set_slave(struct shdma_chan *schan, + int slave_id, dma_addr_t slave_addr, bool try) +{ + struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan, + shdma_chan); + const struct sh_dmae_slave_config *cfg = dmae_find_slave(sh_chan, slave_id); + if (!cfg) + return -ENXIO; + + if (!try) { + sh_chan->config = cfg; + sh_chan->slave_addr = slave_addr ? : cfg->addr; + } + + return 0; +} + +static void dmae_halt(struct sh_dmae_chan *sh_chan) +{ + struct sh_dmae_device *shdev = to_sh_dev(sh_chan); + u32 chcr = chcr_read(sh_chan); + + chcr &= ~(CHCR_DE | CHCR_TE | shdev->chcr_ie_bit); + chcr_write(sh_chan, chcr); +} + +static int sh_dmae_desc_setup(struct shdma_chan *schan, + struct shdma_desc *sdesc, + dma_addr_t src, dma_addr_t dst, size_t *len) +{ + struct sh_dmae_desc *sh_desc = container_of(sdesc, + struct sh_dmae_desc, shdma_desc); + + if (*len > schan->max_xfer_len) + *len = schan->max_xfer_len; + + sh_desc->hw.sar = src; + sh_desc->hw.dar = dst; + sh_desc->hw.tcr = *len; + + return 0; +} + +static void sh_dmae_halt(struct shdma_chan *schan) +{ + struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan, + shdma_chan); + dmae_halt(sh_chan); +} + +static bool sh_dmae_chan_irq(struct shdma_chan *schan, int irq) +{ + struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan, + shdma_chan); + + if (!(chcr_read(sh_chan) & CHCR_TE)) + return false; + + /* DMA stop */ + dmae_halt(sh_chan); + + return true; +} + +static size_t sh_dmae_get_partial(struct shdma_chan *schan, + struct shdma_desc *sdesc) +{ + struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan, + shdma_chan); + struct sh_dmae_desc *sh_desc = container_of(sdesc, + struct sh_dmae_desc, shdma_desc); + return sh_desc->hw.tcr - + (sh_dmae_readl(sh_chan, TCR) << sh_chan->xmit_shift); +} + +/* Called from error IRQ or NMI */ +static bool sh_dmae_reset(struct sh_dmae_device *shdev) +{ + bool ret; + + /* halt the dma controller */ + sh_dmae_ctl_stop(shdev); + + /* We cannot detect, which channel caused the error, have to reset all */ + ret = shdma_reset(&shdev->shdma_dev); + + sh_dmae_rst(shdev); + + return ret; +} + +static irqreturn_t sh_dmae_err(int irq, void *data) +{ + struct sh_dmae_device *shdev = data; + + if (!(dmaor_read(shdev) & DMAOR_AE)) + return IRQ_NONE; + + sh_dmae_reset(shdev); + return IRQ_HANDLED; +} + +static bool sh_dmae_desc_completed(struct shdma_chan *schan, + struct shdma_desc *sdesc) +{ + struct sh_dmae_chan *sh_chan = container_of(schan, + struct sh_dmae_chan, shdma_chan); + struct sh_dmae_desc *sh_desc = container_of(sdesc, + struct sh_dmae_desc, shdma_desc); + u32 sar_buf = sh_dmae_readl(sh_chan, SAR); + u32 dar_buf = sh_dmae_readl(sh_chan, DAR); + + return (sdesc->direction == DMA_DEV_TO_MEM && + (sh_desc->hw.dar + sh_desc->hw.tcr) == dar_buf) || + (sdesc->direction != DMA_DEV_TO_MEM && + (sh_desc->hw.sar + sh_desc->hw.tcr) == sar_buf); +} + +static bool sh_dmae_nmi_notify(struct sh_dmae_device *shdev) +{ + /* Fast path out if NMIF is not asserted for this controller */ + if ((dmaor_read(shdev) & DMAOR_NMIF) == 0) + return false; + + return sh_dmae_reset(shdev); +} + +static int sh_dmae_nmi_handler(struct notifier_block *self, + unsigned long cmd, void *data) +{ + struct sh_dmae_device *shdev; + int ret = NOTIFY_DONE; + bool triggered; + + /* + * Only concern ourselves with NMI events. + * + * Normally we would check the die chain value, but as this needs + * to be architecture independent, check for NMI context instead. + */ + if (!in_nmi()) + return NOTIFY_DONE; + + rcu_read_lock(); + list_for_each_entry_rcu(shdev, &sh_dmae_devices, node) { + /* + * Only stop if one of the controllers has NMIF asserted, + * we do not want to interfere with regular address error + * handling or NMI events that don't concern the DMACs. + */ + triggered = sh_dmae_nmi_notify(shdev); + if (triggered == true) + ret = NOTIFY_OK; + } + rcu_read_unlock(); + + return ret; +} + +static struct notifier_block sh_dmae_nmi_notifier __read_mostly = { + .notifier_call = sh_dmae_nmi_handler, + + /* Run before NMI debug handler and KGDB */ + .priority = 1, +}; + +static int sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id, + int irq, unsigned long flags) +{ + const struct sh_dmae_channel *chan_pdata = &shdev->pdata->channel[id]; + struct shdma_dev *sdev = &shdev->shdma_dev; + struct platform_device *pdev = to_platform_device(sdev->dma_dev.dev); + struct sh_dmae_chan *sh_chan; + struct shdma_chan *schan; + int err; + + sh_chan = devm_kzalloc(sdev->dma_dev.dev, sizeof(struct sh_dmae_chan), + GFP_KERNEL); + if (!sh_chan) + return -ENOMEM; + + schan = &sh_chan->shdma_chan; + schan->max_xfer_len = SH_DMA_TCR_MAX + 1; + + shdma_chan_probe(sdev, schan, id); + + sh_chan->base = shdev->chan_reg + chan_pdata->offset; + + /* set up channel irq */ + if (pdev->id >= 0) + snprintf(sh_chan->dev_id, sizeof(sh_chan->dev_id), + "sh-dmae%d.%d", pdev->id, id); + else + snprintf(sh_chan->dev_id, sizeof(sh_chan->dev_id), + "sh-dma%d", id); + + err = shdma_request_irq(schan, irq, flags, sh_chan->dev_id); + if (err) { + dev_err(sdev->dma_dev.dev, + "DMA channel %d request_irq error %d\n", + id, err); + goto err_no_irq; + } + + shdev->chan[id] = sh_chan; + return 0; + +err_no_irq: + /* remove from dmaengine device node */ + shdma_chan_remove(schan); + return err; +} + +static void sh_dmae_chan_remove(struct sh_dmae_device *shdev) +{ + struct shdma_chan *schan; + int i; + + shdma_for_each_chan(schan, &shdev->shdma_dev, i) { + BUG_ON(!schan); + + shdma_chan_remove(schan); + } +} + +#ifdef CONFIG_PM +static int sh_dmae_runtime_suspend(struct device *dev) +{ + struct sh_dmae_device *shdev = dev_get_drvdata(dev); + + sh_dmae_ctl_stop(shdev); + return 0; +} + +static int sh_dmae_runtime_resume(struct device *dev) +{ + struct sh_dmae_device *shdev = dev_get_drvdata(dev); + + return sh_dmae_rst(shdev); +} +#endif + +#ifdef CONFIG_PM_SLEEP +static int sh_dmae_suspend(struct device *dev) +{ + struct sh_dmae_device *shdev = dev_get_drvdata(dev); + + sh_dmae_ctl_stop(shdev); + return 0; +} + +static int sh_dmae_resume(struct device *dev) +{ + struct sh_dmae_device *shdev = dev_get_drvdata(dev); + int i, ret; + + ret = sh_dmae_rst(shdev); + if (ret < 0) + dev_err(dev, "Failed to reset!\n"); + + for (i = 0; i < shdev->pdata->channel_num; i++) { + struct sh_dmae_chan *sh_chan = shdev->chan[i]; + + if (!sh_chan->shdma_chan.desc_num) + continue; + + if (sh_chan->shdma_chan.slave_id >= 0) { + const struct sh_dmae_slave_config *cfg = sh_chan->config; + dmae_set_dmars(sh_chan, cfg->mid_rid); + dmae_set_chcr(sh_chan, cfg->chcr); + } else { + dmae_init(sh_chan); + } + } + + return 0; +} +#endif + +static const struct dev_pm_ops sh_dmae_pm = { + SET_SYSTEM_SLEEP_PM_OPS(sh_dmae_suspend, sh_dmae_resume) + SET_RUNTIME_PM_OPS(sh_dmae_runtime_suspend, sh_dmae_runtime_resume, + NULL) +}; + +static dma_addr_t sh_dmae_slave_addr(struct shdma_chan *schan) +{ + struct sh_dmae_chan *sh_chan = container_of(schan, + struct sh_dmae_chan, shdma_chan); + + /* + * Implicit BUG_ON(!sh_chan->config) + * This is an exclusive slave DMA operation, may only be called after a + * successful slave configuration. + */ + return sh_chan->slave_addr; +} + +static struct shdma_desc *sh_dmae_embedded_desc(void *buf, int i) +{ + return &((struct sh_dmae_desc *)buf)[i].shdma_desc; +} + +static const struct shdma_ops sh_dmae_shdma_ops = { + .desc_completed = sh_dmae_desc_completed, + .halt_channel = sh_dmae_halt, + .channel_busy = sh_dmae_channel_busy, + .slave_addr = sh_dmae_slave_addr, + .desc_setup = sh_dmae_desc_setup, + .set_slave = sh_dmae_set_slave, + .setup_xfer = sh_dmae_setup_xfer, + .start_xfer = sh_dmae_start_xfer, + .embedded_desc = sh_dmae_embedded_desc, + .chan_irq = sh_dmae_chan_irq, + .get_partial = sh_dmae_get_partial, +}; + +static int sh_dmae_probe(struct platform_device *pdev) +{ + const enum dma_slave_buswidth widths = + DMA_SLAVE_BUSWIDTH_1_BYTE | DMA_SLAVE_BUSWIDTH_2_BYTES | + DMA_SLAVE_BUSWIDTH_4_BYTES | DMA_SLAVE_BUSWIDTH_8_BYTES | + DMA_SLAVE_BUSWIDTH_16_BYTES | DMA_SLAVE_BUSWIDTH_32_BYTES; + const struct sh_dmae_pdata *pdata; + unsigned long chan_flag[SH_DMAE_MAX_CHANNELS] = {}; + int chan_irq[SH_DMAE_MAX_CHANNELS]; + unsigned long irqflags = 0; + int err, errirq, i, irq_cnt = 0, irqres = 0, irq_cap = 0; + struct sh_dmae_device *shdev; + struct dma_device *dma_dev; + struct resource *dmars, *errirq_res, *chanirq_res; + + if (pdev->dev.of_node) + pdata = of_device_get_match_data(&pdev->dev); + else + pdata = dev_get_platdata(&pdev->dev); + + /* get platform data */ + if (!pdata || !pdata->channel_num) + return -ENODEV; + + /* DMARS area is optional */ + dmars = platform_get_resource(pdev, IORESOURCE_MEM, 1); + /* + * IRQ resources: + * 1. there always must be at least one IRQ IO-resource. On SH4 it is + * the error IRQ, in which case it is the only IRQ in this resource: + * start == end. If it is the only IRQ resource, all channels also + * use the same IRQ. + * 2. DMA channel IRQ resources can be specified one per resource or in + * ranges (start != end) + * 3. iff all events (channels and, optionally, error) on this + * controller use the same IRQ, only one IRQ resource can be + * specified, otherwise there must be one IRQ per channel, even if + * some of them are equal + * 4. if all IRQs on this controller are equal or if some specific IRQs + * specify IORESOURCE_IRQ_SHAREABLE in their resources, they will be + * requested with the IRQF_SHARED flag + */ + errirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!errirq_res) + return -ENODEV; + + shdev = devm_kzalloc(&pdev->dev, sizeof(struct sh_dmae_device), + GFP_KERNEL); + if (!shdev) + return -ENOMEM; + + dma_dev = &shdev->shdma_dev.dma_dev; + + shdev->chan_reg = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(shdev->chan_reg)) + return PTR_ERR(shdev->chan_reg); + if (dmars) { + shdev->dmars = devm_ioremap_resource(&pdev->dev, dmars); + if (IS_ERR(shdev->dmars)) + return PTR_ERR(shdev->dmars); + } + + dma_dev->src_addr_widths = widths; + dma_dev->dst_addr_widths = widths; + dma_dev->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); + dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + + if (!pdata->slave_only) + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + if (pdata->slave && pdata->slave_num) + dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); + + /* Default transfer size of 32 bytes requires 32-byte alignment */ + dma_dev->copy_align = LOG2_DEFAULT_XFER_SIZE; + + shdev->shdma_dev.ops = &sh_dmae_shdma_ops; + shdev->shdma_dev.desc_size = sizeof(struct sh_dmae_desc); + err = shdma_init(&pdev->dev, &shdev->shdma_dev, + pdata->channel_num); + if (err < 0) + goto eshdma; + + /* platform data */ + shdev->pdata = pdata; + + if (pdata->chcr_offset) + shdev->chcr_offset = pdata->chcr_offset; + else + shdev->chcr_offset = CHCR; + + if (pdata->chcr_ie_bit) + shdev->chcr_ie_bit = pdata->chcr_ie_bit; + else + shdev->chcr_ie_bit = CHCR_IE; + + platform_set_drvdata(pdev, shdev); + + pm_runtime_enable(&pdev->dev); + err = pm_runtime_get_sync(&pdev->dev); + if (err < 0) + dev_err(&pdev->dev, "%s(): GET = %d\n", __func__, err); + + spin_lock_irq(&sh_dmae_lock); + list_add_tail_rcu(&shdev->node, &sh_dmae_devices); + spin_unlock_irq(&sh_dmae_lock); + + /* reset dma controller - only needed as a test */ + err = sh_dmae_rst(shdev); + if (err) + goto rst_err; + + if (IS_ENABLED(CONFIG_CPU_SH4) || IS_ENABLED(CONFIG_ARCH_RENESAS)) { + chanirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 1); + + if (!chanirq_res) + chanirq_res = errirq_res; + else + irqres++; + + if (chanirq_res == errirq_res || + (errirq_res->flags & IORESOURCE_BITS) == IORESOURCE_IRQ_SHAREABLE) + irqflags = IRQF_SHARED; + + errirq = errirq_res->start; + + err = devm_request_irq(&pdev->dev, errirq, sh_dmae_err, + irqflags, "DMAC Address Error", shdev); + if (err) { + dev_err(&pdev->dev, + "DMA failed requesting irq #%d, error %d\n", + errirq, err); + goto eirq_err; + } + } else { + chanirq_res = errirq_res; + } + + if (chanirq_res->start == chanirq_res->end && + !platform_get_resource(pdev, IORESOURCE_IRQ, 1)) { + /* Special case - all multiplexed */ + for (; irq_cnt < pdata->channel_num; irq_cnt++) { + if (irq_cnt < SH_DMAE_MAX_CHANNELS) { + chan_irq[irq_cnt] = chanirq_res->start; + chan_flag[irq_cnt] = IRQF_SHARED; + } else { + irq_cap = 1; + break; + } + } + } else { + do { + for (i = chanirq_res->start; i <= chanirq_res->end; i++) { + if (irq_cnt >= SH_DMAE_MAX_CHANNELS) { + irq_cap = 1; + break; + } + + if ((errirq_res->flags & IORESOURCE_BITS) == + IORESOURCE_IRQ_SHAREABLE) + chan_flag[irq_cnt] = IRQF_SHARED; + else + chan_flag[irq_cnt] = 0; + dev_dbg(&pdev->dev, + "Found IRQ %d for channel %d\n", + i, irq_cnt); + chan_irq[irq_cnt++] = i; + } + + if (irq_cnt >= SH_DMAE_MAX_CHANNELS) + break; + + chanirq_res = platform_get_resource(pdev, + IORESOURCE_IRQ, ++irqres); + } while (irq_cnt < pdata->channel_num && chanirq_res); + } + + /* Create DMA Channel */ + for (i = 0; i < irq_cnt; i++) { + err = sh_dmae_chan_probe(shdev, i, chan_irq[i], chan_flag[i]); + if (err) + goto chan_probe_err; + } + + if (irq_cap) + dev_notice(&pdev->dev, "Attempting to register %d DMA " + "channels when a maximum of %d are supported.\n", + pdata->channel_num, SH_DMAE_MAX_CHANNELS); + + pm_runtime_put(&pdev->dev); + + err = dma_async_device_register(&shdev->shdma_dev.dma_dev); + if (err < 0) + goto edmadevreg; + + return err; + +edmadevreg: + pm_runtime_get(&pdev->dev); + +chan_probe_err: + sh_dmae_chan_remove(shdev); + +eirq_err: +rst_err: + spin_lock_irq(&sh_dmae_lock); + list_del_rcu(&shdev->node); + spin_unlock_irq(&sh_dmae_lock); + + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + shdma_cleanup(&shdev->shdma_dev); +eshdma: + synchronize_rcu(); + + return err; +} + +static int sh_dmae_remove(struct platform_device *pdev) +{ + struct sh_dmae_device *shdev = platform_get_drvdata(pdev); + struct dma_device *dma_dev = &shdev->shdma_dev.dma_dev; + + dma_async_device_unregister(dma_dev); + + spin_lock_irq(&sh_dmae_lock); + list_del_rcu(&shdev->node); + spin_unlock_irq(&sh_dmae_lock); + + pm_runtime_disable(&pdev->dev); + + sh_dmae_chan_remove(shdev); + shdma_cleanup(&shdev->shdma_dev); + + synchronize_rcu(); + + return 0; +} + +static struct platform_driver sh_dmae_driver = { + .driver = { + .pm = &sh_dmae_pm, + .name = SH_DMAE_DRV_NAME, + }, + .remove = sh_dmae_remove, +}; + +static int __init sh_dmae_init(void) +{ + /* Wire up NMI handling */ + int err = register_die_notifier(&sh_dmae_nmi_notifier); + if (err) + return err; + + return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe); +} +module_init(sh_dmae_init); + +static void __exit sh_dmae_exit(void) +{ + platform_driver_unregister(&sh_dmae_driver); + + unregister_die_notifier(&sh_dmae_nmi_notifier); +} +module_exit(sh_dmae_exit); + +MODULE_AUTHOR("Nobuhiro Iwamatsu "); +MODULE_DESCRIPTION("Renesas SH DMA Engine driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:" SH_DMAE_DRV_NAME); diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c new file mode 100644 index 0000000000..b14cf350b6 --- /dev/null +++ b/drivers/dma/sh/usb-dmac.c @@ -0,0 +1,912 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Renesas USB DMA Controller Driver + * + * Copyright (C) 2015 Renesas Electronics Corporation + * + * based on rcar-dmac.c + * Copyright (C) 2014 Renesas Electronics Inc. + * Author: Laurent Pinchart + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../dmaengine.h" +#include "../virt-dma.h" + +/* + * struct usb_dmac_sg - Descriptor for a hardware transfer + * @mem_addr: memory address + * @size: transfer size in bytes + */ +struct usb_dmac_sg { + dma_addr_t mem_addr; + u32 size; +}; + +/* + * struct usb_dmac_desc - USB DMA Transfer Descriptor + * @vd: base virtual channel DMA transaction descriptor + * @direction: direction of the DMA transfer + * @sg_allocated_len: length of allocated sg + * @sg_len: length of sg + * @sg_index: index of sg + * @residue: residue after the DMAC completed a transfer + * @node: node for desc_got and desc_freed + * @done_cookie: cookie after the DMAC completed a transfer + * @sg: information for the transfer + */ +struct usb_dmac_desc { + struct virt_dma_desc vd; + enum dma_transfer_direction direction; + unsigned int sg_allocated_len; + unsigned int sg_len; + unsigned int sg_index; + u32 residue; + struct list_head node; + dma_cookie_t done_cookie; + struct usb_dmac_sg sg[]; +}; + +#define to_usb_dmac_desc(vd) container_of(vd, struct usb_dmac_desc, vd) + +/* + * struct usb_dmac_chan - USB DMA Controller Channel + * @vc: base virtual DMA channel object + * @iomem: channel I/O memory base + * @index: index of this channel in the controller + * @irq: irq number of this channel + * @desc: the current descriptor + * @descs_allocated: number of descriptors allocated + * @desc_got: got descriptors + * @desc_freed: freed descriptors after the DMAC completed a transfer + */ +struct usb_dmac_chan { + struct virt_dma_chan vc; + void __iomem *iomem; + unsigned int index; + int irq; + struct usb_dmac_desc *desc; + int descs_allocated; + struct list_head desc_got; + struct list_head desc_freed; +}; + +#define to_usb_dmac_chan(c) container_of(c, struct usb_dmac_chan, vc.chan) + +/* + * struct usb_dmac - USB DMA Controller + * @engine: base DMA engine object + * @dev: the hardware device + * @iomem: remapped I/O memory base + * @n_channels: number of available channels + * @channels: array of DMAC channels + */ +struct usb_dmac { + struct dma_device engine; + struct device *dev; + void __iomem *iomem; + + unsigned int n_channels; + struct usb_dmac_chan *channels; +}; + +#define to_usb_dmac(d) container_of(d, struct usb_dmac, engine) + +/* ----------------------------------------------------------------------------- + * Registers + */ + +#define USB_DMAC_CHAN_OFFSET(i) (0x20 + 0x20 * (i)) + +#define USB_DMASWR 0x0008 +#define USB_DMASWR_SWR (1 << 0) +#define USB_DMAOR 0x0060 +#define USB_DMAOR_AE (1 << 1) +#define USB_DMAOR_DME (1 << 0) + +#define USB_DMASAR 0x0000 +#define USB_DMADAR 0x0004 +#define USB_DMATCR 0x0008 +#define USB_DMATCR_MASK 0x00ffffff +#define USB_DMACHCR 0x0014 +#define USB_DMACHCR_FTE (1 << 24) +#define USB_DMACHCR_NULLE (1 << 16) +#define USB_DMACHCR_NULL (1 << 12) +#define USB_DMACHCR_TS_8B ((0 << 7) | (0 << 6)) +#define USB_DMACHCR_TS_16B ((0 << 7) | (1 << 6)) +#define USB_DMACHCR_TS_32B ((1 << 7) | (0 << 6)) +#define USB_DMACHCR_IE (1 << 5) +#define USB_DMACHCR_SP (1 << 2) +#define USB_DMACHCR_TE (1 << 1) +#define USB_DMACHCR_DE (1 << 0) +#define USB_DMATEND 0x0018 + +/* Hardcode the xfer_shift to 5 (32bytes) */ +#define USB_DMAC_XFER_SHIFT 5 +#define USB_DMAC_XFER_SIZE (1 << USB_DMAC_XFER_SHIFT) +#define USB_DMAC_CHCR_TS USB_DMACHCR_TS_32B +#define USB_DMAC_SLAVE_BUSWIDTH DMA_SLAVE_BUSWIDTH_32_BYTES + +/* for descriptors */ +#define USB_DMAC_INITIAL_NR_DESC 16 +#define USB_DMAC_INITIAL_NR_SG 8 + +/* ----------------------------------------------------------------------------- + * Device access + */ + +static void usb_dmac_write(struct usb_dmac *dmac, u32 reg, u32 data) +{ + writel(data, dmac->iomem + reg); +} + +static u32 usb_dmac_read(struct usb_dmac *dmac, u32 reg) +{ + return readl(dmac->iomem + reg); +} + +static u32 usb_dmac_chan_read(struct usb_dmac_chan *chan, u32 reg) +{ + return readl(chan->iomem + reg); +} + +static void usb_dmac_chan_write(struct usb_dmac_chan *chan, u32 reg, u32 data) +{ + writel(data, chan->iomem + reg); +} + +/* ----------------------------------------------------------------------------- + * Initialization and configuration + */ + +static bool usb_dmac_chan_is_busy(struct usb_dmac_chan *chan) +{ + u32 chcr = usb_dmac_chan_read(chan, USB_DMACHCR); + + return (chcr & (USB_DMACHCR_DE | USB_DMACHCR_TE)) == USB_DMACHCR_DE; +} + +static u32 usb_dmac_calc_tend(u32 size) +{ + /* + * Please refer to the Figure "Example of Final Transaction Valid + * Data Transfer Enable (EDTEN) Setting" in the data sheet. + */ + return 0xffffffff << (32 - (size % USB_DMAC_XFER_SIZE ? : + USB_DMAC_XFER_SIZE)); +} + +/* This function is already held by vc.lock */ +static void usb_dmac_chan_start_sg(struct usb_dmac_chan *chan, + unsigned int index) +{ + struct usb_dmac_desc *desc = chan->desc; + struct usb_dmac_sg *sg = desc->sg + index; + dma_addr_t src_addr = 0, dst_addr = 0; + + WARN_ON_ONCE(usb_dmac_chan_is_busy(chan)); + + if (desc->direction == DMA_DEV_TO_MEM) + dst_addr = sg->mem_addr; + else + src_addr = sg->mem_addr; + + dev_dbg(chan->vc.chan.device->dev, + "chan%u: queue sg %p: %u@%pad -> %pad\n", + chan->index, sg, sg->size, &src_addr, &dst_addr); + + usb_dmac_chan_write(chan, USB_DMASAR, src_addr & 0xffffffff); + usb_dmac_chan_write(chan, USB_DMADAR, dst_addr & 0xffffffff); + usb_dmac_chan_write(chan, USB_DMATCR, + DIV_ROUND_UP(sg->size, USB_DMAC_XFER_SIZE)); + usb_dmac_chan_write(chan, USB_DMATEND, usb_dmac_calc_tend(sg->size)); + + usb_dmac_chan_write(chan, USB_DMACHCR, USB_DMAC_CHCR_TS | + USB_DMACHCR_NULLE | USB_DMACHCR_IE | USB_DMACHCR_DE); +} + +/* This function is already held by vc.lock */ +static void usb_dmac_chan_start_desc(struct usb_dmac_chan *chan) +{ + struct virt_dma_desc *vd; + + vd = vchan_next_desc(&chan->vc); + if (!vd) { + chan->desc = NULL; + return; + } + + /* + * Remove this request from vc->desc_issued. Otherwise, this driver + * will get the previous value from vchan_next_desc() after a transfer + * was completed. + */ + list_del(&vd->node); + + chan->desc = to_usb_dmac_desc(vd); + chan->desc->sg_index = 0; + usb_dmac_chan_start_sg(chan, 0); +} + +static int usb_dmac_init(struct usb_dmac *dmac) +{ + u16 dmaor; + + /* Clear all channels and enable the DMAC globally. */ + usb_dmac_write(dmac, USB_DMAOR, USB_DMAOR_DME); + + dmaor = usb_dmac_read(dmac, USB_DMAOR); + if ((dmaor & (USB_DMAOR_AE | USB_DMAOR_DME)) != USB_DMAOR_DME) { + dev_warn(dmac->dev, "DMAOR initialization failed.\n"); + return -EIO; + } + + return 0; +} + +/* ----------------------------------------------------------------------------- + * Descriptors allocation and free + */ +static int usb_dmac_desc_alloc(struct usb_dmac_chan *chan, unsigned int sg_len, + gfp_t gfp) +{ + struct usb_dmac_desc *desc; + unsigned long flags; + + desc = kzalloc(struct_size(desc, sg, sg_len), gfp); + if (!desc) + return -ENOMEM; + + desc->sg_allocated_len = sg_len; + INIT_LIST_HEAD(&desc->node); + + spin_lock_irqsave(&chan->vc.lock, flags); + list_add_tail(&desc->node, &chan->desc_freed); + spin_unlock_irqrestore(&chan->vc.lock, flags); + + return 0; +} + +static void usb_dmac_desc_free(struct usb_dmac_chan *chan) +{ + struct usb_dmac_desc *desc, *_desc; + LIST_HEAD(list); + + list_splice_init(&chan->desc_freed, &list); + list_splice_init(&chan->desc_got, &list); + + list_for_each_entry_safe(desc, _desc, &list, node) { + list_del(&desc->node); + kfree(desc); + } + chan->descs_allocated = 0; +} + +static struct usb_dmac_desc *usb_dmac_desc_get(struct usb_dmac_chan *chan, + unsigned int sg_len, gfp_t gfp) +{ + struct usb_dmac_desc *desc = NULL; + unsigned long flags; + + /* Get a freed descritpor */ + spin_lock_irqsave(&chan->vc.lock, flags); + list_for_each_entry(desc, &chan->desc_freed, node) { + if (sg_len <= desc->sg_allocated_len) { + list_move_tail(&desc->node, &chan->desc_got); + spin_unlock_irqrestore(&chan->vc.lock, flags); + return desc; + } + } + spin_unlock_irqrestore(&chan->vc.lock, flags); + + /* Allocate a new descriptor */ + if (!usb_dmac_desc_alloc(chan, sg_len, gfp)) { + /* If allocated the desc, it was added to tail of the list */ + spin_lock_irqsave(&chan->vc.lock, flags); + desc = list_last_entry(&chan->desc_freed, struct usb_dmac_desc, + node); + list_move_tail(&desc->node, &chan->desc_got); + spin_unlock_irqrestore(&chan->vc.lock, flags); + return desc; + } + + return NULL; +} + +static void usb_dmac_desc_put(struct usb_dmac_chan *chan, + struct usb_dmac_desc *desc) +{ + unsigned long flags; + + spin_lock_irqsave(&chan->vc.lock, flags); + list_move_tail(&desc->node, &chan->desc_freed); + spin_unlock_irqrestore(&chan->vc.lock, flags); +} + +/* ----------------------------------------------------------------------------- + * Stop and reset + */ + +static void usb_dmac_soft_reset(struct usb_dmac_chan *uchan) +{ + struct dma_chan *chan = &uchan->vc.chan; + struct usb_dmac *dmac = to_usb_dmac(chan->device); + int i; + + /* Don't issue soft reset if any one of channels is busy */ + for (i = 0; i < dmac->n_channels; ++i) { + if (usb_dmac_chan_is_busy(uchan)) + return; + } + + usb_dmac_write(dmac, USB_DMAOR, 0); + usb_dmac_write(dmac, USB_DMASWR, USB_DMASWR_SWR); + udelay(100); + usb_dmac_write(dmac, USB_DMASWR, 0); + usb_dmac_write(dmac, USB_DMAOR, 1); +} + +static void usb_dmac_chan_halt(struct usb_dmac_chan *chan) +{ + u32 chcr = usb_dmac_chan_read(chan, USB_DMACHCR); + + chcr &= ~(USB_DMACHCR_IE | USB_DMACHCR_TE | USB_DMACHCR_DE); + usb_dmac_chan_write(chan, USB_DMACHCR, chcr); + + usb_dmac_soft_reset(chan); +} + +static void usb_dmac_stop(struct usb_dmac *dmac) +{ + usb_dmac_write(dmac, USB_DMAOR, 0); +} + +/* ----------------------------------------------------------------------------- + * DMA engine operations + */ + +static int usb_dmac_alloc_chan_resources(struct dma_chan *chan) +{ + struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan); + int ret; + + while (uchan->descs_allocated < USB_DMAC_INITIAL_NR_DESC) { + ret = usb_dmac_desc_alloc(uchan, USB_DMAC_INITIAL_NR_SG, + GFP_KERNEL); + if (ret < 0) { + usb_dmac_desc_free(uchan); + return ret; + } + uchan->descs_allocated++; + } + + return pm_runtime_get_sync(chan->device->dev); +} + +static void usb_dmac_free_chan_resources(struct dma_chan *chan) +{ + struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan); + unsigned long flags; + + /* Protect against ISR */ + spin_lock_irqsave(&uchan->vc.lock, flags); + usb_dmac_chan_halt(uchan); + spin_unlock_irqrestore(&uchan->vc.lock, flags); + + usb_dmac_desc_free(uchan); + vchan_free_chan_resources(&uchan->vc); + + pm_runtime_put(chan->device->dev); +} + +static struct dma_async_tx_descriptor * +usb_dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long dma_flags, void *context) +{ + struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan); + struct usb_dmac_desc *desc; + struct scatterlist *sg; + int i; + + if (!sg_len) { + dev_warn(chan->device->dev, + "%s: bad parameter: len=%d\n", __func__, sg_len); + return NULL; + } + + desc = usb_dmac_desc_get(uchan, sg_len, GFP_NOWAIT); + if (!desc) + return NULL; + + desc->direction = dir; + desc->sg_len = sg_len; + for_each_sg(sgl, sg, sg_len, i) { + desc->sg[i].mem_addr = sg_dma_address(sg); + desc->sg[i].size = sg_dma_len(sg); + } + + return vchan_tx_prep(&uchan->vc, &desc->vd, dma_flags); +} + +static int usb_dmac_chan_terminate_all(struct dma_chan *chan) +{ + struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan); + struct usb_dmac_desc *desc, *_desc; + unsigned long flags; + LIST_HEAD(head); + LIST_HEAD(list); + + spin_lock_irqsave(&uchan->vc.lock, flags); + usb_dmac_chan_halt(uchan); + vchan_get_all_descriptors(&uchan->vc, &head); + if (uchan->desc) + uchan->desc = NULL; + list_splice_init(&uchan->desc_got, &list); + list_for_each_entry_safe(desc, _desc, &list, node) + list_move_tail(&desc->node, &uchan->desc_freed); + spin_unlock_irqrestore(&uchan->vc.lock, flags); + vchan_dma_desc_free_list(&uchan->vc, &head); + + return 0; +} + +static unsigned int usb_dmac_get_current_residue(struct usb_dmac_chan *chan, + struct usb_dmac_desc *desc, + unsigned int sg_index) +{ + struct usb_dmac_sg *sg = desc->sg + sg_index; + u32 mem_addr = sg->mem_addr & 0xffffffff; + unsigned int residue = sg->size; + + /* + * We cannot use USB_DMATCR to calculate residue because USB_DMATCR + * has unsuited value to calculate. + */ + if (desc->direction == DMA_DEV_TO_MEM) + residue -= usb_dmac_chan_read(chan, USB_DMADAR) - mem_addr; + else + residue -= usb_dmac_chan_read(chan, USB_DMASAR) - mem_addr; + + return residue; +} + +static u32 usb_dmac_chan_get_residue_if_complete(struct usb_dmac_chan *chan, + dma_cookie_t cookie) +{ + struct usb_dmac_desc *desc; + u32 residue = 0; + + list_for_each_entry_reverse(desc, &chan->desc_freed, node) { + if (desc->done_cookie == cookie) { + residue = desc->residue; + break; + } + } + + return residue; +} + +static u32 usb_dmac_chan_get_residue(struct usb_dmac_chan *chan, + dma_cookie_t cookie) +{ + u32 residue = 0; + struct virt_dma_desc *vd; + struct usb_dmac_desc *desc = chan->desc; + int i; + + if (!desc) { + vd = vchan_find_desc(&chan->vc, cookie); + if (!vd) + return 0; + desc = to_usb_dmac_desc(vd); + } + + /* Compute the size of all usb_dmac_sg still to be transferred */ + for (i = desc->sg_index + 1; i < desc->sg_len; i++) + residue += desc->sg[i].size; + + /* Add the residue for the current sg */ + residue += usb_dmac_get_current_residue(chan, desc, desc->sg_index); + + return residue; +} + +static enum dma_status usb_dmac_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan); + enum dma_status status; + unsigned int residue = 0; + unsigned long flags; + + status = dma_cookie_status(chan, cookie, txstate); + /* a client driver will get residue after DMA_COMPLETE */ + if (!txstate) + return status; + + spin_lock_irqsave(&uchan->vc.lock, flags); + if (status == DMA_COMPLETE) + residue = usb_dmac_chan_get_residue_if_complete(uchan, cookie); + else + residue = usb_dmac_chan_get_residue(uchan, cookie); + spin_unlock_irqrestore(&uchan->vc.lock, flags); + + dma_set_residue(txstate, residue); + + return status; +} + +static void usb_dmac_issue_pending(struct dma_chan *chan) +{ + struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&uchan->vc.lock, flags); + if (vchan_issue_pending(&uchan->vc) && !uchan->desc) + usb_dmac_chan_start_desc(uchan); + spin_unlock_irqrestore(&uchan->vc.lock, flags); +} + +static void usb_dmac_virt_desc_free(struct virt_dma_desc *vd) +{ + struct usb_dmac_desc *desc = to_usb_dmac_desc(vd); + struct usb_dmac_chan *chan = to_usb_dmac_chan(vd->tx.chan); + + usb_dmac_desc_put(chan, desc); +} + +/* ----------------------------------------------------------------------------- + * IRQ handling + */ + +static void usb_dmac_isr_transfer_end(struct usb_dmac_chan *chan) +{ + struct usb_dmac_desc *desc = chan->desc; + + BUG_ON(!desc); + + if (++desc->sg_index < desc->sg_len) { + usb_dmac_chan_start_sg(chan, desc->sg_index); + } else { + desc->residue = usb_dmac_get_current_residue(chan, desc, + desc->sg_index - 1); + desc->done_cookie = desc->vd.tx.cookie; + desc->vd.tx_result.result = DMA_TRANS_NOERROR; + desc->vd.tx_result.residue = desc->residue; + vchan_cookie_complete(&desc->vd); + + /* Restart the next transfer if this driver has a next desc */ + usb_dmac_chan_start_desc(chan); + } +} + +static irqreturn_t usb_dmac_isr_channel(int irq, void *dev) +{ + struct usb_dmac_chan *chan = dev; + irqreturn_t ret = IRQ_NONE; + u32 mask = 0; + u32 chcr; + bool xfer_end = false; + + spin_lock(&chan->vc.lock); + + chcr = usb_dmac_chan_read(chan, USB_DMACHCR); + if (chcr & (USB_DMACHCR_TE | USB_DMACHCR_SP)) { + mask |= USB_DMACHCR_DE | USB_DMACHCR_TE | USB_DMACHCR_SP; + if (chcr & USB_DMACHCR_DE) + xfer_end = true; + ret |= IRQ_HANDLED; + } + if (chcr & USB_DMACHCR_NULL) { + /* An interruption of TE will happen after we set FTE */ + mask |= USB_DMACHCR_NULL; + chcr |= USB_DMACHCR_FTE; + ret |= IRQ_HANDLED; + } + if (mask) + usb_dmac_chan_write(chan, USB_DMACHCR, chcr & ~mask); + + if (xfer_end) + usb_dmac_isr_transfer_end(chan); + + spin_unlock(&chan->vc.lock); + + return ret; +} + +/* ----------------------------------------------------------------------------- + * OF xlate and channel filter + */ + +static bool usb_dmac_chan_filter(struct dma_chan *chan, void *arg) +{ + struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan); + struct of_phandle_args *dma_spec = arg; + + /* USB-DMAC should be used with fixed usb controller's FIFO */ + if (uchan->index != dma_spec->args[0]) + return false; + + return true; +} + +static struct dma_chan *usb_dmac_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct dma_chan *chan; + dma_cap_mask_t mask; + + if (dma_spec->args_count != 1) + return NULL; + + /* Only slave DMA channels can be allocated via DT */ + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + + chan = __dma_request_channel(&mask, usb_dmac_chan_filter, dma_spec, + ofdma->of_node); + if (!chan) + return NULL; + + return chan; +} + +/* ----------------------------------------------------------------------------- + * Power management + */ + +#ifdef CONFIG_PM +static int usb_dmac_runtime_suspend(struct device *dev) +{ + struct usb_dmac *dmac = dev_get_drvdata(dev); + int i; + + for (i = 0; i < dmac->n_channels; ++i) { + if (!dmac->channels[i].iomem) + break; + usb_dmac_chan_halt(&dmac->channels[i]); + } + + return 0; +} + +static int usb_dmac_runtime_resume(struct device *dev) +{ + struct usb_dmac *dmac = dev_get_drvdata(dev); + + return usb_dmac_init(dmac); +} +#endif /* CONFIG_PM */ + +static const struct dev_pm_ops usb_dmac_pm = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) + SET_RUNTIME_PM_OPS(usb_dmac_runtime_suspend, usb_dmac_runtime_resume, + NULL) +}; + +/* ----------------------------------------------------------------------------- + * Probe and remove + */ + +static int usb_dmac_chan_probe(struct usb_dmac *dmac, + struct usb_dmac_chan *uchan, + unsigned int index) +{ + struct platform_device *pdev = to_platform_device(dmac->dev); + char pdev_irqname[5]; + char *irqname; + int ret; + + uchan->index = index; + uchan->iomem = dmac->iomem + USB_DMAC_CHAN_OFFSET(index); + + /* Request the channel interrupt. */ + sprintf(pdev_irqname, "ch%u", index); + uchan->irq = platform_get_irq_byname(pdev, pdev_irqname); + if (uchan->irq < 0) + return -ENODEV; + + irqname = devm_kasprintf(dmac->dev, GFP_KERNEL, "%s:%u", + dev_name(dmac->dev), index); + if (!irqname) + return -ENOMEM; + + ret = devm_request_irq(dmac->dev, uchan->irq, usb_dmac_isr_channel, + IRQF_SHARED, irqname, uchan); + if (ret) { + dev_err(dmac->dev, "failed to request IRQ %u (%d)\n", + uchan->irq, ret); + return ret; + } + + uchan->vc.desc_free = usb_dmac_virt_desc_free; + vchan_init(&uchan->vc, &dmac->engine); + INIT_LIST_HEAD(&uchan->desc_freed); + INIT_LIST_HEAD(&uchan->desc_got); + + return 0; +} + +static int usb_dmac_parse_of(struct device *dev, struct usb_dmac *dmac) +{ + struct device_node *np = dev->of_node; + int ret; + + ret = of_property_read_u32(np, "dma-channels", &dmac->n_channels); + if (ret < 0) { + dev_err(dev, "unable to read dma-channels property\n"); + return ret; + } + + if (dmac->n_channels <= 0 || dmac->n_channels >= 100) { + dev_err(dev, "invalid number of channels %u\n", + dmac->n_channels); + return -EINVAL; + } + + return 0; +} + +static int usb_dmac_probe(struct platform_device *pdev) +{ + const enum dma_slave_buswidth widths = USB_DMAC_SLAVE_BUSWIDTH; + struct dma_device *engine; + struct usb_dmac *dmac; + unsigned int i; + int ret; + + dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL); + if (!dmac) + return -ENOMEM; + + dmac->dev = &pdev->dev; + platform_set_drvdata(pdev, dmac); + + ret = usb_dmac_parse_of(&pdev->dev, dmac); + if (ret < 0) + return ret; + + dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels, + sizeof(*dmac->channels), GFP_KERNEL); + if (!dmac->channels) + return -ENOMEM; + + /* Request resources. */ + dmac->iomem = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(dmac->iomem)) + return PTR_ERR(dmac->iomem); + + /* Enable runtime PM and initialize the device. */ + pm_runtime_enable(&pdev->dev); + ret = pm_runtime_get_sync(&pdev->dev); + if (ret < 0) { + dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret); + goto error_pm; + } + + ret = usb_dmac_init(dmac); + + if (ret) { + dev_err(&pdev->dev, "failed to reset device\n"); + goto error; + } + + /* Initialize the channels. */ + INIT_LIST_HEAD(&dmac->engine.channels); + + for (i = 0; i < dmac->n_channels; ++i) { + ret = usb_dmac_chan_probe(dmac, &dmac->channels[i], i); + if (ret < 0) + goto error; + } + + /* Register the DMAC as a DMA provider for DT. */ + ret = of_dma_controller_register(pdev->dev.of_node, usb_dmac_of_xlate, + NULL); + if (ret < 0) + goto error; + + /* + * Register the DMA engine device. + * + * Default transfer size of 32 bytes requires 32-byte alignment. + */ + engine = &dmac->engine; + dma_cap_set(DMA_SLAVE, engine->cap_mask); + + engine->dev = &pdev->dev; + + engine->src_addr_widths = widths; + engine->dst_addr_widths = widths; + engine->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); + engine->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + engine->device_alloc_chan_resources = usb_dmac_alloc_chan_resources; + engine->device_free_chan_resources = usb_dmac_free_chan_resources; + engine->device_prep_slave_sg = usb_dmac_prep_slave_sg; + engine->device_terminate_all = usb_dmac_chan_terminate_all; + engine->device_tx_status = usb_dmac_tx_status; + engine->device_issue_pending = usb_dmac_issue_pending; + + ret = dma_async_device_register(engine); + if (ret < 0) + goto error; + + pm_runtime_put(&pdev->dev); + return 0; + +error: + of_dma_controller_free(pdev->dev.of_node); +error_pm: + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return ret; +} + +static void usb_dmac_chan_remove(struct usb_dmac *dmac, + struct usb_dmac_chan *uchan) +{ + usb_dmac_chan_halt(uchan); + devm_free_irq(dmac->dev, uchan->irq, uchan); +} + +static int usb_dmac_remove(struct platform_device *pdev) +{ + struct usb_dmac *dmac = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < dmac->n_channels; ++i) + usb_dmac_chan_remove(dmac, &dmac->channels[i]); + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&dmac->engine); + + pm_runtime_disable(&pdev->dev); + + return 0; +} + +static void usb_dmac_shutdown(struct platform_device *pdev) +{ + struct usb_dmac *dmac = platform_get_drvdata(pdev); + + usb_dmac_stop(dmac); +} + +static const struct of_device_id usb_dmac_of_ids[] = { + { .compatible = "renesas,usb-dmac", }, + { /* Sentinel */ } +}; +MODULE_DEVICE_TABLE(of, usb_dmac_of_ids); + +static struct platform_driver usb_dmac_driver = { + .driver = { + .pm = &usb_dmac_pm, + .name = "usb-dmac", + .of_match_table = usb_dmac_of_ids, + }, + .probe = usb_dmac_probe, + .remove = usb_dmac_remove, + .shutdown = usb_dmac_shutdown, +}; + +module_platform_driver(usb_dmac_driver); + +MODULE_DESCRIPTION("Renesas USB DMA Controller Driver"); +MODULE_AUTHOR("Yoshihiro Shimoda "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/sprd-dma.c b/drivers/dma/sprd-dma.c new file mode 100644 index 0000000000..168aa0bd73 --- /dev/null +++ b/drivers/dma/sprd-dma.c @@ -0,0 +1,1308 @@ +/* + * Copyright (C) 2017 Spreadtrum Communications Inc. + * + * SPDX-License-Identifier: GPL-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virt-dma.h" + +#define SPRD_DMA_CHN_REG_OFFSET 0x1000 +#define SPRD_DMA_CHN_REG_LENGTH 0x40 +#define SPRD_DMA_MEMCPY_MIN_SIZE 64 + +/* DMA global registers definition */ +#define SPRD_DMA_GLB_PAUSE 0x0 +#define SPRD_DMA_GLB_FRAG_WAIT 0x4 +#define SPRD_DMA_GLB_REQ_PEND0_EN 0x8 +#define SPRD_DMA_GLB_REQ_PEND1_EN 0xc +#define SPRD_DMA_GLB_INT_RAW_STS 0x10 +#define SPRD_DMA_GLB_INT_MSK_STS 0x14 +#define SPRD_DMA_GLB_REQ_STS 0x18 +#define SPRD_DMA_GLB_CHN_EN_STS 0x1c +#define SPRD_DMA_GLB_DEBUG_STS 0x20 +#define SPRD_DMA_GLB_ARB_SEL_STS 0x24 +#define SPRD_DMA_GLB_2STAGE_GRP1 0x28 +#define SPRD_DMA_GLB_2STAGE_GRP2 0x2c +#define SPRD_DMA_GLB_REQ_UID(uid) (0x4 * ((uid) - 1)) +#define SPRD_DMA_GLB_REQ_UID_OFFSET 0x2000 + +/* DMA channel registers definition */ +#define SPRD_DMA_CHN_PAUSE 0x0 +#define SPRD_DMA_CHN_REQ 0x4 +#define SPRD_DMA_CHN_CFG 0x8 +#define SPRD_DMA_CHN_INTC 0xc +#define SPRD_DMA_CHN_SRC_ADDR 0x10 +#define SPRD_DMA_CHN_DES_ADDR 0x14 +#define SPRD_DMA_CHN_FRG_LEN 0x18 +#define SPRD_DMA_CHN_BLK_LEN 0x1c +#define SPRD_DMA_CHN_TRSC_LEN 0x20 +#define SPRD_DMA_CHN_TRSF_STEP 0x24 +#define SPRD_DMA_CHN_WARP_PTR 0x28 +#define SPRD_DMA_CHN_WARP_TO 0x2c +#define SPRD_DMA_CHN_LLIST_PTR 0x30 +#define SPRD_DMA_CHN_FRAG_STEP 0x34 +#define SPRD_DMA_CHN_SRC_BLK_STEP 0x38 +#define SPRD_DMA_CHN_DES_BLK_STEP 0x3c + +/* SPRD_DMA_GLB_2STAGE_GRP register definition */ +#define SPRD_DMA_GLB_2STAGE_EN BIT(24) +#define SPRD_DMA_GLB_CHN_INT_MASK GENMASK(23, 20) +#define SPRD_DMA_GLB_DEST_INT BIT(22) +#define SPRD_DMA_GLB_SRC_INT BIT(20) +#define SPRD_DMA_GLB_LIST_DONE_TRG BIT(19) +#define SPRD_DMA_GLB_TRANS_DONE_TRG BIT(18) +#define SPRD_DMA_GLB_BLOCK_DONE_TRG BIT(17) +#define SPRD_DMA_GLB_FRAG_DONE_TRG BIT(16) +#define SPRD_DMA_GLB_TRG_OFFSET 16 +#define SPRD_DMA_GLB_DEST_CHN_MASK GENMASK(13, 8) +#define SPRD_DMA_GLB_DEST_CHN_OFFSET 8 +#define SPRD_DMA_GLB_SRC_CHN_MASK GENMASK(5, 0) + +/* SPRD_DMA_CHN_INTC register definition */ +#define SPRD_DMA_INT_MASK GENMASK(4, 0) +#define SPRD_DMA_INT_CLR_OFFSET 24 +#define SPRD_DMA_FRAG_INT_EN BIT(0) +#define SPRD_DMA_BLK_INT_EN BIT(1) +#define SPRD_DMA_TRANS_INT_EN BIT(2) +#define SPRD_DMA_LIST_INT_EN BIT(3) +#define SPRD_DMA_CFG_ERR_INT_EN BIT(4) + +/* SPRD_DMA_CHN_CFG register definition */ +#define SPRD_DMA_CHN_EN BIT(0) +#define SPRD_DMA_LINKLIST_EN BIT(4) +#define SPRD_DMA_WAIT_BDONE_OFFSET 24 +#define SPRD_DMA_DONOT_WAIT_BDONE 1 + +/* SPRD_DMA_CHN_REQ register definition */ +#define SPRD_DMA_REQ_EN BIT(0) + +/* SPRD_DMA_CHN_PAUSE register definition */ +#define SPRD_DMA_PAUSE_EN BIT(0) +#define SPRD_DMA_PAUSE_STS BIT(2) +#define SPRD_DMA_PAUSE_CNT 0x2000 + +/* DMA_CHN_WARP_* register definition */ +#define SPRD_DMA_HIGH_ADDR_MASK GENMASK(31, 28) +#define SPRD_DMA_LOW_ADDR_MASK GENMASK(31, 0) +#define SPRD_DMA_WRAP_ADDR_MASK GENMASK(27, 0) +#define SPRD_DMA_HIGH_ADDR_OFFSET 4 + +/* SPRD_DMA_CHN_INTC register definition */ +#define SPRD_DMA_FRAG_INT_STS BIT(16) +#define SPRD_DMA_BLK_INT_STS BIT(17) +#define SPRD_DMA_TRSC_INT_STS BIT(18) +#define SPRD_DMA_LIST_INT_STS BIT(19) +#define SPRD_DMA_CFGERR_INT_STS BIT(20) +#define SPRD_DMA_CHN_INT_STS \ + (SPRD_DMA_FRAG_INT_STS | SPRD_DMA_BLK_INT_STS | \ + SPRD_DMA_TRSC_INT_STS | SPRD_DMA_LIST_INT_STS | \ + SPRD_DMA_CFGERR_INT_STS) + +/* SPRD_DMA_CHN_FRG_LEN register definition */ +#define SPRD_DMA_SRC_DATAWIDTH_OFFSET 30 +#define SPRD_DMA_DES_DATAWIDTH_OFFSET 28 +#define SPRD_DMA_SWT_MODE_OFFSET 26 +#define SPRD_DMA_REQ_MODE_OFFSET 24 +#define SPRD_DMA_REQ_MODE_MASK GENMASK(1, 0) +#define SPRD_DMA_WRAP_SEL_DEST BIT(23) +#define SPRD_DMA_WRAP_EN BIT(22) +#define SPRD_DMA_FIX_SEL_OFFSET 21 +#define SPRD_DMA_FIX_EN_OFFSET 20 +#define SPRD_DMA_LLIST_END BIT(19) +#define SPRD_DMA_FRG_LEN_MASK GENMASK(16, 0) + +/* SPRD_DMA_CHN_BLK_LEN register definition */ +#define SPRD_DMA_BLK_LEN_MASK GENMASK(16, 0) + +/* SPRD_DMA_CHN_TRSC_LEN register definition */ +#define SPRD_DMA_TRSC_LEN_MASK GENMASK(27, 0) + +/* SPRD_DMA_CHN_TRSF_STEP register definition */ +#define SPRD_DMA_DEST_TRSF_STEP_OFFSET 16 +#define SPRD_DMA_SRC_TRSF_STEP_OFFSET 0 +#define SPRD_DMA_TRSF_STEP_MASK GENMASK(15, 0) + +/* SPRD DMA_SRC_BLK_STEP register definition */ +#define SPRD_DMA_LLIST_HIGH_MASK GENMASK(31, 28) +#define SPRD_DMA_LLIST_HIGH_SHIFT 28 + +/* define DMA channel mode & trigger mode mask */ +#define SPRD_DMA_CHN_MODE_MASK GENMASK(7, 0) +#define SPRD_DMA_TRG_MODE_MASK GENMASK(7, 0) +#define SPRD_DMA_INT_TYPE_MASK GENMASK(7, 0) + +/* define the DMA transfer step type */ +#define SPRD_DMA_NONE_STEP 0 +#define SPRD_DMA_BYTE_STEP 1 +#define SPRD_DMA_SHORT_STEP 2 +#define SPRD_DMA_WORD_STEP 4 +#define SPRD_DMA_DWORD_STEP 8 + +#define SPRD_DMA_SOFTWARE_UID 0 + +/* dma data width values */ +enum sprd_dma_datawidth { + SPRD_DMA_DATAWIDTH_1_BYTE, + SPRD_DMA_DATAWIDTH_2_BYTES, + SPRD_DMA_DATAWIDTH_4_BYTES, + SPRD_DMA_DATAWIDTH_8_BYTES, +}; + +/* dma channel hardware configuration */ +struct sprd_dma_chn_hw { + u32 pause; + u32 req; + u32 cfg; + u32 intc; + u32 src_addr; + u32 des_addr; + u32 frg_len; + u32 blk_len; + u32 trsc_len; + u32 trsf_step; + u32 wrap_ptr; + u32 wrap_to; + u32 llist_ptr; + u32 frg_step; + u32 src_blk_step; + u32 des_blk_step; +}; + +/* dma request description */ +struct sprd_dma_desc { + struct virt_dma_desc vd; + struct sprd_dma_chn_hw chn_hw; + enum dma_transfer_direction dir; +}; + +/* dma channel description */ +struct sprd_dma_chn { + struct virt_dma_chan vc; + void __iomem *chn_base; + struct sprd_dma_linklist linklist; + struct dma_slave_config slave_cfg; + u32 chn_num; + u32 dev_id; + enum sprd_dma_chn_mode chn_mode; + enum sprd_dma_trg_mode trg_mode; + enum sprd_dma_int_type int_type; + struct sprd_dma_desc *cur_desc; +}; + +/* SPRD dma device */ +struct sprd_dma_dev { + struct dma_device dma_dev; + void __iomem *glb_base; + struct clk *clk; + struct clk *ashb_clk; + int irq; + u32 total_chns; + struct sprd_dma_chn channels[]; +}; + +static void sprd_dma_free_desc(struct virt_dma_desc *vd); +static bool sprd_dma_filter_fn(struct dma_chan *chan, void *param); +static struct of_dma_filter_info sprd_dma_info = { + .filter_fn = sprd_dma_filter_fn, +}; + +static inline struct sprd_dma_chn *to_sprd_dma_chan(struct dma_chan *c) +{ + return container_of(c, struct sprd_dma_chn, vc.chan); +} + +static inline struct sprd_dma_dev *to_sprd_dma_dev(struct dma_chan *c) +{ + struct sprd_dma_chn *schan = to_sprd_dma_chan(c); + + return container_of(schan, struct sprd_dma_dev, channels[c->chan_id]); +} + +static inline struct sprd_dma_desc *to_sprd_dma_desc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct sprd_dma_desc, vd); +} + +static void sprd_dma_glb_update(struct sprd_dma_dev *sdev, u32 reg, + u32 mask, u32 val) +{ + u32 orig = readl(sdev->glb_base + reg); + u32 tmp; + + tmp = (orig & ~mask) | val; + writel(tmp, sdev->glb_base + reg); +} + +static void sprd_dma_chn_update(struct sprd_dma_chn *schan, u32 reg, + u32 mask, u32 val) +{ + u32 orig = readl(schan->chn_base + reg); + u32 tmp; + + tmp = (orig & ~mask) | val; + writel(tmp, schan->chn_base + reg); +} + +static int sprd_dma_enable(struct sprd_dma_dev *sdev) +{ + int ret; + + ret = clk_prepare_enable(sdev->clk); + if (ret) + return ret; + + /* + * The ashb_clk is optional and only for AGCP DMA controller, so we + * need add one condition to check if the ashb_clk need enable. + */ + if (!IS_ERR(sdev->ashb_clk)) + ret = clk_prepare_enable(sdev->ashb_clk); + + return ret; +} + +static void sprd_dma_disable(struct sprd_dma_dev *sdev) +{ + clk_disable_unprepare(sdev->clk); + + /* + * Need to check if we need disable the optional ashb_clk for AGCP DMA. + */ + if (!IS_ERR(sdev->ashb_clk)) + clk_disable_unprepare(sdev->ashb_clk); +} + +static void sprd_dma_set_uid(struct sprd_dma_chn *schan) +{ + struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan); + u32 dev_id = schan->dev_id; + + if (dev_id != SPRD_DMA_SOFTWARE_UID) { + u32 uid_offset = SPRD_DMA_GLB_REQ_UID_OFFSET + + SPRD_DMA_GLB_REQ_UID(dev_id); + + writel(schan->chn_num + 1, sdev->glb_base + uid_offset); + } +} + +static void sprd_dma_unset_uid(struct sprd_dma_chn *schan) +{ + struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan); + u32 dev_id = schan->dev_id; + + if (dev_id != SPRD_DMA_SOFTWARE_UID) { + u32 uid_offset = SPRD_DMA_GLB_REQ_UID_OFFSET + + SPRD_DMA_GLB_REQ_UID(dev_id); + + writel(0, sdev->glb_base + uid_offset); + } +} + +static void sprd_dma_clear_int(struct sprd_dma_chn *schan) +{ + sprd_dma_chn_update(schan, SPRD_DMA_CHN_INTC, + SPRD_DMA_INT_MASK << SPRD_DMA_INT_CLR_OFFSET, + SPRD_DMA_INT_MASK << SPRD_DMA_INT_CLR_OFFSET); +} + +static void sprd_dma_enable_chn(struct sprd_dma_chn *schan) +{ + sprd_dma_chn_update(schan, SPRD_DMA_CHN_CFG, SPRD_DMA_CHN_EN, + SPRD_DMA_CHN_EN); +} + +static void sprd_dma_disable_chn(struct sprd_dma_chn *schan) +{ + sprd_dma_chn_update(schan, SPRD_DMA_CHN_CFG, SPRD_DMA_CHN_EN, 0); +} + +static void sprd_dma_soft_request(struct sprd_dma_chn *schan) +{ + sprd_dma_chn_update(schan, SPRD_DMA_CHN_REQ, SPRD_DMA_REQ_EN, + SPRD_DMA_REQ_EN); +} + +static void sprd_dma_pause_resume(struct sprd_dma_chn *schan, bool enable) +{ + struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan); + u32 pause, timeout = SPRD_DMA_PAUSE_CNT; + + if (enable) { + sprd_dma_chn_update(schan, SPRD_DMA_CHN_PAUSE, + SPRD_DMA_PAUSE_EN, SPRD_DMA_PAUSE_EN); + + do { + pause = readl(schan->chn_base + SPRD_DMA_CHN_PAUSE); + if (pause & SPRD_DMA_PAUSE_STS) + break; + + cpu_relax(); + } while (--timeout > 0); + + if (!timeout) + dev_warn(sdev->dma_dev.dev, + "pause dma controller timeout\n"); + } else { + sprd_dma_chn_update(schan, SPRD_DMA_CHN_PAUSE, + SPRD_DMA_PAUSE_EN, 0); + } +} + +static void sprd_dma_stop_and_disable(struct sprd_dma_chn *schan) +{ + u32 cfg = readl(schan->chn_base + SPRD_DMA_CHN_CFG); + + if (!(cfg & SPRD_DMA_CHN_EN)) + return; + + sprd_dma_pause_resume(schan, true); + sprd_dma_disable_chn(schan); +} + +static unsigned long sprd_dma_get_src_addr(struct sprd_dma_chn *schan) +{ + unsigned long addr, addr_high; + + addr = readl(schan->chn_base + SPRD_DMA_CHN_SRC_ADDR); + addr_high = readl(schan->chn_base + SPRD_DMA_CHN_WARP_PTR) & + SPRD_DMA_HIGH_ADDR_MASK; + + return addr | (addr_high << SPRD_DMA_HIGH_ADDR_OFFSET); +} + +static unsigned long sprd_dma_get_dst_addr(struct sprd_dma_chn *schan) +{ + unsigned long addr, addr_high; + + addr = readl(schan->chn_base + SPRD_DMA_CHN_DES_ADDR); + addr_high = readl(schan->chn_base + SPRD_DMA_CHN_WARP_TO) & + SPRD_DMA_HIGH_ADDR_MASK; + + return addr | (addr_high << SPRD_DMA_HIGH_ADDR_OFFSET); +} + +static enum sprd_dma_int_type sprd_dma_get_int_type(struct sprd_dma_chn *schan) +{ + struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan); + u32 intc_sts = readl(schan->chn_base + SPRD_DMA_CHN_INTC) & + SPRD_DMA_CHN_INT_STS; + + switch (intc_sts) { + case SPRD_DMA_CFGERR_INT_STS: + return SPRD_DMA_CFGERR_INT; + + case SPRD_DMA_LIST_INT_STS: + return SPRD_DMA_LIST_INT; + + case SPRD_DMA_TRSC_INT_STS: + return SPRD_DMA_TRANS_INT; + + case SPRD_DMA_BLK_INT_STS: + return SPRD_DMA_BLK_INT; + + case SPRD_DMA_FRAG_INT_STS: + return SPRD_DMA_FRAG_INT; + + default: + dev_warn(sdev->dma_dev.dev, "incorrect dma interrupt type\n"); + return SPRD_DMA_NO_INT; + } +} + +static enum sprd_dma_req_mode sprd_dma_get_req_type(struct sprd_dma_chn *schan) +{ + u32 frag_reg = readl(schan->chn_base + SPRD_DMA_CHN_FRG_LEN); + + return (frag_reg >> SPRD_DMA_REQ_MODE_OFFSET) & SPRD_DMA_REQ_MODE_MASK; +} + +static int sprd_dma_set_2stage_config(struct sprd_dma_chn *schan) +{ + struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan); + u32 val, chn = schan->chn_num + 1; + + switch (schan->chn_mode) { + case SPRD_DMA_SRC_CHN0: + val = chn & SPRD_DMA_GLB_SRC_CHN_MASK; + val |= BIT(schan->trg_mode - 1) << SPRD_DMA_GLB_TRG_OFFSET; + val |= SPRD_DMA_GLB_2STAGE_EN; + if (schan->int_type != SPRD_DMA_NO_INT) + val |= SPRD_DMA_GLB_SRC_INT; + + sprd_dma_glb_update(sdev, SPRD_DMA_GLB_2STAGE_GRP1, val, val); + break; + + case SPRD_DMA_SRC_CHN1: + val = chn & SPRD_DMA_GLB_SRC_CHN_MASK; + val |= BIT(schan->trg_mode - 1) << SPRD_DMA_GLB_TRG_OFFSET; + val |= SPRD_DMA_GLB_2STAGE_EN; + if (schan->int_type != SPRD_DMA_NO_INT) + val |= SPRD_DMA_GLB_SRC_INT; + + sprd_dma_glb_update(sdev, SPRD_DMA_GLB_2STAGE_GRP2, val, val); + break; + + case SPRD_DMA_DST_CHN0: + val = (chn << SPRD_DMA_GLB_DEST_CHN_OFFSET) & + SPRD_DMA_GLB_DEST_CHN_MASK; + val |= SPRD_DMA_GLB_2STAGE_EN; + if (schan->int_type != SPRD_DMA_NO_INT) + val |= SPRD_DMA_GLB_DEST_INT; + + sprd_dma_glb_update(sdev, SPRD_DMA_GLB_2STAGE_GRP1, val, val); + break; + + case SPRD_DMA_DST_CHN1: + val = (chn << SPRD_DMA_GLB_DEST_CHN_OFFSET) & + SPRD_DMA_GLB_DEST_CHN_MASK; + val |= SPRD_DMA_GLB_2STAGE_EN; + if (schan->int_type != SPRD_DMA_NO_INT) + val |= SPRD_DMA_GLB_DEST_INT; + + sprd_dma_glb_update(sdev, SPRD_DMA_GLB_2STAGE_GRP2, val, val); + break; + + default: + dev_err(sdev->dma_dev.dev, "invalid channel mode setting %d\n", + schan->chn_mode); + return -EINVAL; + } + + return 0; +} + +static void sprd_dma_set_pending(struct sprd_dma_chn *schan, bool enable) +{ + struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan); + u32 reg, val, req_id; + + if (schan->dev_id == SPRD_DMA_SOFTWARE_UID) + return; + + /* The DMA request id always starts from 0. */ + req_id = schan->dev_id - 1; + + if (req_id < 32) { + reg = SPRD_DMA_GLB_REQ_PEND0_EN; + val = BIT(req_id); + } else { + reg = SPRD_DMA_GLB_REQ_PEND1_EN; + val = BIT(req_id - 32); + } + + sprd_dma_glb_update(sdev, reg, val, enable ? val : 0); +} + +static void sprd_dma_set_chn_config(struct sprd_dma_chn *schan, + struct sprd_dma_desc *sdesc) +{ + struct sprd_dma_chn_hw *cfg = &sdesc->chn_hw; + + writel(cfg->pause, schan->chn_base + SPRD_DMA_CHN_PAUSE); + writel(cfg->cfg, schan->chn_base + SPRD_DMA_CHN_CFG); + writel(cfg->intc, schan->chn_base + SPRD_DMA_CHN_INTC); + writel(cfg->src_addr, schan->chn_base + SPRD_DMA_CHN_SRC_ADDR); + writel(cfg->des_addr, schan->chn_base + SPRD_DMA_CHN_DES_ADDR); + writel(cfg->frg_len, schan->chn_base + SPRD_DMA_CHN_FRG_LEN); + writel(cfg->blk_len, schan->chn_base + SPRD_DMA_CHN_BLK_LEN); + writel(cfg->trsc_len, schan->chn_base + SPRD_DMA_CHN_TRSC_LEN); + writel(cfg->trsf_step, schan->chn_base + SPRD_DMA_CHN_TRSF_STEP); + writel(cfg->wrap_ptr, schan->chn_base + SPRD_DMA_CHN_WARP_PTR); + writel(cfg->wrap_to, schan->chn_base + SPRD_DMA_CHN_WARP_TO); + writel(cfg->llist_ptr, schan->chn_base + SPRD_DMA_CHN_LLIST_PTR); + writel(cfg->frg_step, schan->chn_base + SPRD_DMA_CHN_FRAG_STEP); + writel(cfg->src_blk_step, schan->chn_base + SPRD_DMA_CHN_SRC_BLK_STEP); + writel(cfg->des_blk_step, schan->chn_base + SPRD_DMA_CHN_DES_BLK_STEP); + writel(cfg->req, schan->chn_base + SPRD_DMA_CHN_REQ); +} + +static void sprd_dma_start(struct sprd_dma_chn *schan) +{ + struct virt_dma_desc *vd = vchan_next_desc(&schan->vc); + + if (!vd) + return; + + list_del(&vd->node); + schan->cur_desc = to_sprd_dma_desc(vd); + + /* + * Set 2-stage configuration if the channel starts one 2-stage + * transfer. + */ + if (schan->chn_mode && sprd_dma_set_2stage_config(schan)) + return; + + /* + * Copy the DMA configuration from DMA descriptor to this hardware + * channel. + */ + sprd_dma_set_chn_config(schan, schan->cur_desc); + sprd_dma_set_uid(schan); + sprd_dma_set_pending(schan, true); + sprd_dma_enable_chn(schan); + + if (schan->dev_id == SPRD_DMA_SOFTWARE_UID && + schan->chn_mode != SPRD_DMA_DST_CHN0 && + schan->chn_mode != SPRD_DMA_DST_CHN1) + sprd_dma_soft_request(schan); +} + +static void sprd_dma_stop(struct sprd_dma_chn *schan) +{ + sprd_dma_stop_and_disable(schan); + sprd_dma_set_pending(schan, false); + sprd_dma_unset_uid(schan); + sprd_dma_clear_int(schan); + schan->cur_desc = NULL; +} + +static bool sprd_dma_check_trans_done(struct sprd_dma_desc *sdesc, + enum sprd_dma_int_type int_type, + enum sprd_dma_req_mode req_mode) +{ + if (int_type == SPRD_DMA_NO_INT) + return false; + + if (int_type >= req_mode + 1) + return true; + else + return false; +} + +static irqreturn_t dma_irq_handle(int irq, void *dev_id) +{ + struct sprd_dma_dev *sdev = (struct sprd_dma_dev *)dev_id; + u32 irq_status = readl(sdev->glb_base + SPRD_DMA_GLB_INT_MSK_STS); + struct sprd_dma_chn *schan; + struct sprd_dma_desc *sdesc; + enum sprd_dma_req_mode req_type; + enum sprd_dma_int_type int_type; + bool trans_done = false, cyclic = false; + u32 i; + + while (irq_status) { + i = __ffs(irq_status); + irq_status &= (irq_status - 1); + schan = &sdev->channels[i]; + + spin_lock(&schan->vc.lock); + + sdesc = schan->cur_desc; + if (!sdesc) { + spin_unlock(&schan->vc.lock); + return IRQ_HANDLED; + } + + int_type = sprd_dma_get_int_type(schan); + req_type = sprd_dma_get_req_type(schan); + sprd_dma_clear_int(schan); + + /* cyclic mode schedule callback */ + cyclic = schan->linklist.phy_addr ? true : false; + if (cyclic == true) { + vchan_cyclic_callback(&sdesc->vd); + } else { + /* Check if the dma request descriptor is done. */ + trans_done = sprd_dma_check_trans_done(sdesc, int_type, + req_type); + if (trans_done == true) { + vchan_cookie_complete(&sdesc->vd); + schan->cur_desc = NULL; + sprd_dma_start(schan); + } + } + spin_unlock(&schan->vc.lock); + } + + return IRQ_HANDLED; +} + +static int sprd_dma_alloc_chan_resources(struct dma_chan *chan) +{ + return pm_runtime_get_sync(chan->device->dev); +} + +static void sprd_dma_free_chan_resources(struct dma_chan *chan) +{ + struct sprd_dma_chn *schan = to_sprd_dma_chan(chan); + struct virt_dma_desc *cur_vd = NULL; + unsigned long flags; + + spin_lock_irqsave(&schan->vc.lock, flags); + if (schan->cur_desc) + cur_vd = &schan->cur_desc->vd; + + sprd_dma_stop(schan); + spin_unlock_irqrestore(&schan->vc.lock, flags); + + if (cur_vd) + sprd_dma_free_desc(cur_vd); + + vchan_free_chan_resources(&schan->vc); + pm_runtime_put(chan->device->dev); +} + +static enum dma_status sprd_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct sprd_dma_chn *schan = to_sprd_dma_chan(chan); + struct virt_dma_desc *vd; + unsigned long flags; + enum dma_status ret; + u32 pos; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE || !txstate) + return ret; + + spin_lock_irqsave(&schan->vc.lock, flags); + vd = vchan_find_desc(&schan->vc, cookie); + if (vd) { + struct sprd_dma_desc *sdesc = to_sprd_dma_desc(vd); + struct sprd_dma_chn_hw *hw = &sdesc->chn_hw; + + if (hw->trsc_len > 0) + pos = hw->trsc_len; + else if (hw->blk_len > 0) + pos = hw->blk_len; + else if (hw->frg_len > 0) + pos = hw->frg_len; + else + pos = 0; + } else if (schan->cur_desc && schan->cur_desc->vd.tx.cookie == cookie) { + struct sprd_dma_desc *sdesc = schan->cur_desc; + + if (sdesc->dir == DMA_DEV_TO_MEM) + pos = sprd_dma_get_dst_addr(schan); + else + pos = sprd_dma_get_src_addr(schan); + } else { + pos = 0; + } + spin_unlock_irqrestore(&schan->vc.lock, flags); + + dma_set_residue(txstate, pos); + return ret; +} + +static void sprd_dma_issue_pending(struct dma_chan *chan) +{ + struct sprd_dma_chn *schan = to_sprd_dma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&schan->vc.lock, flags); + if (vchan_issue_pending(&schan->vc) && !schan->cur_desc) + sprd_dma_start(schan); + spin_unlock_irqrestore(&schan->vc.lock, flags); +} + +static int sprd_dma_get_datawidth(enum dma_slave_buswidth buswidth) +{ + switch (buswidth) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + case DMA_SLAVE_BUSWIDTH_2_BYTES: + case DMA_SLAVE_BUSWIDTH_4_BYTES: + case DMA_SLAVE_BUSWIDTH_8_BYTES: + return ffs(buswidth) - 1; + + default: + return -EINVAL; + } +} + +static int sprd_dma_get_step(enum dma_slave_buswidth buswidth) +{ + switch (buswidth) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + case DMA_SLAVE_BUSWIDTH_2_BYTES: + case DMA_SLAVE_BUSWIDTH_4_BYTES: + case DMA_SLAVE_BUSWIDTH_8_BYTES: + return buswidth; + + default: + return -EINVAL; + } +} + +static int sprd_dma_fill_desc(struct dma_chan *chan, + struct sprd_dma_chn_hw *hw, + unsigned int sglen, int sg_index, + dma_addr_t src, dma_addr_t dst, u32 len, + enum dma_transfer_direction dir, + unsigned long flags, + struct dma_slave_config *slave_cfg) +{ + struct sprd_dma_dev *sdev = to_sprd_dma_dev(chan); + struct sprd_dma_chn *schan = to_sprd_dma_chan(chan); + enum sprd_dma_chn_mode chn_mode = schan->chn_mode; + u32 req_mode = (flags >> SPRD_DMA_REQ_SHIFT) & SPRD_DMA_REQ_MODE_MASK; + u32 int_mode = flags & SPRD_DMA_INT_MASK; + int src_datawidth, dst_datawidth, src_step, dst_step; + u32 temp, fix_mode = 0, fix_en = 0; + phys_addr_t llist_ptr; + + if (dir == DMA_MEM_TO_DEV) { + src_step = sprd_dma_get_step(slave_cfg->src_addr_width); + if (src_step < 0) { + dev_err(sdev->dma_dev.dev, "invalid source step\n"); + return src_step; + } + + /* + * For 2-stage transfer, destination channel step can not be 0, + * since destination device is AON IRAM. + */ + if (chn_mode == SPRD_DMA_DST_CHN0 || + chn_mode == SPRD_DMA_DST_CHN1) + dst_step = src_step; + else + dst_step = SPRD_DMA_NONE_STEP; + } else { + dst_step = sprd_dma_get_step(slave_cfg->dst_addr_width); + if (dst_step < 0) { + dev_err(sdev->dma_dev.dev, "invalid destination step\n"); + return dst_step; + } + src_step = SPRD_DMA_NONE_STEP; + } + + src_datawidth = sprd_dma_get_datawidth(slave_cfg->src_addr_width); + if (src_datawidth < 0) { + dev_err(sdev->dma_dev.dev, "invalid source datawidth\n"); + return src_datawidth; + } + + dst_datawidth = sprd_dma_get_datawidth(slave_cfg->dst_addr_width); + if (dst_datawidth < 0) { + dev_err(sdev->dma_dev.dev, "invalid destination datawidth\n"); + return dst_datawidth; + } + + hw->cfg = SPRD_DMA_DONOT_WAIT_BDONE << SPRD_DMA_WAIT_BDONE_OFFSET; + + /* + * wrap_ptr and wrap_to will save the high 4 bits source address and + * destination address. + */ + hw->wrap_ptr = (src >> SPRD_DMA_HIGH_ADDR_OFFSET) & SPRD_DMA_HIGH_ADDR_MASK; + hw->wrap_to = (dst >> SPRD_DMA_HIGH_ADDR_OFFSET) & SPRD_DMA_HIGH_ADDR_MASK; + hw->src_addr = src & SPRD_DMA_LOW_ADDR_MASK; + hw->des_addr = dst & SPRD_DMA_LOW_ADDR_MASK; + + /* + * If the src step and dst step both are 0 or both are not 0, that means + * we can not enable the fix mode. If one is 0 and another one is not, + * we can enable the fix mode. + */ + if ((src_step != 0 && dst_step != 0) || (src_step | dst_step) == 0) { + fix_en = 0; + } else { + fix_en = 1; + if (src_step) + fix_mode = 1; + else + fix_mode = 0; + } + + hw->intc = int_mode | SPRD_DMA_CFG_ERR_INT_EN; + + temp = src_datawidth << SPRD_DMA_SRC_DATAWIDTH_OFFSET; + temp |= dst_datawidth << SPRD_DMA_DES_DATAWIDTH_OFFSET; + temp |= req_mode << SPRD_DMA_REQ_MODE_OFFSET; + temp |= fix_mode << SPRD_DMA_FIX_SEL_OFFSET; + temp |= fix_en << SPRD_DMA_FIX_EN_OFFSET; + temp |= schan->linklist.wrap_addr ? + SPRD_DMA_WRAP_EN | SPRD_DMA_WRAP_SEL_DEST : 0; + temp |= slave_cfg->src_maxburst & SPRD_DMA_FRG_LEN_MASK; + hw->frg_len = temp; + + hw->blk_len = slave_cfg->src_maxburst & SPRD_DMA_BLK_LEN_MASK; + hw->trsc_len = len & SPRD_DMA_TRSC_LEN_MASK; + + temp = (dst_step & SPRD_DMA_TRSF_STEP_MASK) << SPRD_DMA_DEST_TRSF_STEP_OFFSET; + temp |= (src_step & SPRD_DMA_TRSF_STEP_MASK) << SPRD_DMA_SRC_TRSF_STEP_OFFSET; + hw->trsf_step = temp; + + /* link-list configuration */ + if (schan->linklist.phy_addr) { + hw->cfg |= SPRD_DMA_LINKLIST_EN; + + /* link-list index */ + temp = sglen ? (sg_index + 1) % sglen : 0; + + /* Next link-list configuration's physical address offset */ + temp = temp * sizeof(*hw) + SPRD_DMA_CHN_SRC_ADDR; + /* + * Set the link-list pointer point to next link-list + * configuration's physical address. + */ + llist_ptr = schan->linklist.phy_addr + temp; + hw->llist_ptr = lower_32_bits(llist_ptr); + hw->src_blk_step = (upper_32_bits(llist_ptr) << SPRD_DMA_LLIST_HIGH_SHIFT) & + SPRD_DMA_LLIST_HIGH_MASK; + + if (schan->linklist.wrap_addr) { + hw->wrap_ptr |= schan->linklist.wrap_addr & + SPRD_DMA_WRAP_ADDR_MASK; + hw->wrap_to |= dst & SPRD_DMA_WRAP_ADDR_MASK; + } + } else { + hw->llist_ptr = 0; + hw->src_blk_step = 0; + } + + hw->frg_step = 0; + hw->des_blk_step = 0; + return 0; +} + +static int sprd_dma_fill_linklist_desc(struct dma_chan *chan, + unsigned int sglen, int sg_index, + dma_addr_t src, dma_addr_t dst, u32 len, + enum dma_transfer_direction dir, + unsigned long flags, + struct dma_slave_config *slave_cfg) +{ + struct sprd_dma_chn *schan = to_sprd_dma_chan(chan); + struct sprd_dma_chn_hw *hw; + + if (!schan->linklist.virt_addr) + return -EINVAL; + + hw = (struct sprd_dma_chn_hw *)(schan->linklist.virt_addr + + sg_index * sizeof(*hw)); + + return sprd_dma_fill_desc(chan, hw, sglen, sg_index, src, dst, len, + dir, flags, slave_cfg); +} + +static struct dma_async_tx_descriptor * +sprd_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct sprd_dma_chn *schan = to_sprd_dma_chan(chan); + struct sprd_dma_desc *sdesc; + struct sprd_dma_chn_hw *hw; + enum sprd_dma_datawidth datawidth; + u32 step, temp; + + sdesc = kzalloc(sizeof(*sdesc), GFP_NOWAIT); + if (!sdesc) + return NULL; + + hw = &sdesc->chn_hw; + + hw->cfg = SPRD_DMA_DONOT_WAIT_BDONE << SPRD_DMA_WAIT_BDONE_OFFSET; + hw->intc = SPRD_DMA_TRANS_INT | SPRD_DMA_CFG_ERR_INT_EN; + hw->src_addr = src & SPRD_DMA_LOW_ADDR_MASK; + hw->des_addr = dest & SPRD_DMA_LOW_ADDR_MASK; + hw->wrap_ptr = (src >> SPRD_DMA_HIGH_ADDR_OFFSET) & + SPRD_DMA_HIGH_ADDR_MASK; + hw->wrap_to = (dest >> SPRD_DMA_HIGH_ADDR_OFFSET) & + SPRD_DMA_HIGH_ADDR_MASK; + + if (IS_ALIGNED(len, 8)) { + datawidth = SPRD_DMA_DATAWIDTH_8_BYTES; + step = SPRD_DMA_DWORD_STEP; + } else if (IS_ALIGNED(len, 4)) { + datawidth = SPRD_DMA_DATAWIDTH_4_BYTES; + step = SPRD_DMA_WORD_STEP; + } else if (IS_ALIGNED(len, 2)) { + datawidth = SPRD_DMA_DATAWIDTH_2_BYTES; + step = SPRD_DMA_SHORT_STEP; + } else { + datawidth = SPRD_DMA_DATAWIDTH_1_BYTE; + step = SPRD_DMA_BYTE_STEP; + } + + temp = datawidth << SPRD_DMA_SRC_DATAWIDTH_OFFSET; + temp |= datawidth << SPRD_DMA_DES_DATAWIDTH_OFFSET; + temp |= SPRD_DMA_TRANS_REQ << SPRD_DMA_REQ_MODE_OFFSET; + temp |= len & SPRD_DMA_FRG_LEN_MASK; + hw->frg_len = temp; + + hw->blk_len = len & SPRD_DMA_BLK_LEN_MASK; + hw->trsc_len = len & SPRD_DMA_TRSC_LEN_MASK; + + temp = (step & SPRD_DMA_TRSF_STEP_MASK) << SPRD_DMA_DEST_TRSF_STEP_OFFSET; + temp |= (step & SPRD_DMA_TRSF_STEP_MASK) << SPRD_DMA_SRC_TRSF_STEP_OFFSET; + hw->trsf_step = temp; + + return vchan_tx_prep(&schan->vc, &sdesc->vd, flags); +} + +static struct dma_async_tx_descriptor * +sprd_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sglen, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct sprd_dma_chn *schan = to_sprd_dma_chan(chan); + struct dma_slave_config *slave_cfg = &schan->slave_cfg; + dma_addr_t src = 0, dst = 0; + dma_addr_t start_src = 0, start_dst = 0; + struct sprd_dma_desc *sdesc; + struct scatterlist *sg; + u32 len = 0; + int ret, i; + + if (!is_slave_direction(dir)) + return NULL; + + if (context) { + struct sprd_dma_linklist *ll_cfg = + (struct sprd_dma_linklist *)context; + + schan->linklist.phy_addr = ll_cfg->phy_addr; + schan->linklist.virt_addr = ll_cfg->virt_addr; + schan->linklist.wrap_addr = ll_cfg->wrap_addr; + } else { + schan->linklist.phy_addr = 0; + schan->linklist.virt_addr = 0; + schan->linklist.wrap_addr = 0; + } + + /* + * Set channel mode, interrupt mode and trigger mode for 2-stage + * transfer. + */ + schan->chn_mode = + (flags >> SPRD_DMA_CHN_MODE_SHIFT) & SPRD_DMA_CHN_MODE_MASK; + schan->trg_mode = + (flags >> SPRD_DMA_TRG_MODE_SHIFT) & SPRD_DMA_TRG_MODE_MASK; + schan->int_type = flags & SPRD_DMA_INT_TYPE_MASK; + + sdesc = kzalloc(sizeof(*sdesc), GFP_NOWAIT); + if (!sdesc) + return NULL; + + sdesc->dir = dir; + + for_each_sg(sgl, sg, sglen, i) { + len = sg_dma_len(sg); + + if (dir == DMA_MEM_TO_DEV) { + src = sg_dma_address(sg); + dst = slave_cfg->dst_addr; + } else { + src = slave_cfg->src_addr; + dst = sg_dma_address(sg); + } + + if (!i) { + start_src = src; + start_dst = dst; + } + + /* + * The link-list mode needs at least 2 link-list + * configurations. If there is only one sg, it doesn't + * need to fill the link-list configuration. + */ + if (sglen < 2) + break; + + ret = sprd_dma_fill_linklist_desc(chan, sglen, i, src, dst, len, + dir, flags, slave_cfg); + if (ret) { + kfree(sdesc); + return NULL; + } + } + + ret = sprd_dma_fill_desc(chan, &sdesc->chn_hw, 0, 0, start_src, + start_dst, len, dir, flags, slave_cfg); + if (ret) { + kfree(sdesc); + return NULL; + } + + return vchan_tx_prep(&schan->vc, &sdesc->vd, flags); +} + +static int sprd_dma_slave_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct sprd_dma_chn *schan = to_sprd_dma_chan(chan); + struct dma_slave_config *slave_cfg = &schan->slave_cfg; + + memcpy(slave_cfg, config, sizeof(*config)); + return 0; +} + +static int sprd_dma_pause(struct dma_chan *chan) +{ + struct sprd_dma_chn *schan = to_sprd_dma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&schan->vc.lock, flags); + sprd_dma_pause_resume(schan, true); + spin_unlock_irqrestore(&schan->vc.lock, flags); + + return 0; +} + +static int sprd_dma_resume(struct dma_chan *chan) +{ + struct sprd_dma_chn *schan = to_sprd_dma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&schan->vc.lock, flags); + sprd_dma_pause_resume(schan, false); + spin_unlock_irqrestore(&schan->vc.lock, flags); + + return 0; +} + +static int sprd_dma_terminate_all(struct dma_chan *chan) +{ + struct sprd_dma_chn *schan = to_sprd_dma_chan(chan); + struct virt_dma_desc *cur_vd = NULL; + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&schan->vc.lock, flags); + if (schan->cur_desc) + cur_vd = &schan->cur_desc->vd; + + sprd_dma_stop(schan); + + vchan_get_all_descriptors(&schan->vc, &head); + spin_unlock_irqrestore(&schan->vc.lock, flags); + + if (cur_vd) + sprd_dma_free_desc(cur_vd); + + vchan_dma_desc_free_list(&schan->vc, &head); + return 0; +} + +static void sprd_dma_free_desc(struct virt_dma_desc *vd) +{ + struct sprd_dma_desc *sdesc = to_sprd_dma_desc(vd); + + kfree(sdesc); +} + +static bool sprd_dma_filter_fn(struct dma_chan *chan, void *param) +{ + struct sprd_dma_chn *schan = to_sprd_dma_chan(chan); + u32 slave_id = *(u32 *)param; + + schan->dev_id = slave_id; + return true; +} + +static int sprd_dma_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct sprd_dma_dev *sdev; + struct sprd_dma_chn *dma_chn; + u32 chn_count; + int ret, i; + + /* Parse new and deprecated dma-channels properties */ + ret = device_property_read_u32(&pdev->dev, "dma-channels", &chn_count); + if (ret) + ret = device_property_read_u32(&pdev->dev, "#dma-channels", + &chn_count); + if (ret) { + dev_err(&pdev->dev, "get dma channels count failed\n"); + return ret; + } + + sdev = devm_kzalloc(&pdev->dev, + struct_size(sdev, channels, chn_count), + GFP_KERNEL); + if (!sdev) + return -ENOMEM; + + sdev->clk = devm_clk_get(&pdev->dev, "enable"); + if (IS_ERR(sdev->clk)) { + dev_err(&pdev->dev, "get enable clock failed\n"); + return PTR_ERR(sdev->clk); + } + + /* ashb clock is optional for AGCP DMA */ + sdev->ashb_clk = devm_clk_get(&pdev->dev, "ashb_eb"); + if (IS_ERR(sdev->ashb_clk)) + dev_warn(&pdev->dev, "no optional ashb eb clock\n"); + + /* + * We have three DMA controllers: AP DMA, AON DMA and AGCP DMA. For AGCP + * DMA controller, it can or do not request the irq, which will save + * system power without resuming system by DMA interrupts if AGCP DMA + * does not request the irq. Thus the DMA interrupts property should + * be optional. + */ + sdev->irq = platform_get_irq(pdev, 0); + if (sdev->irq > 0) { + ret = devm_request_irq(&pdev->dev, sdev->irq, dma_irq_handle, + 0, "sprd_dma", (void *)sdev); + if (ret < 0) { + dev_err(&pdev->dev, "request dma irq failed\n"); + return ret; + } + } else { + dev_warn(&pdev->dev, "no interrupts for the dma controller\n"); + } + + sdev->glb_base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(sdev->glb_base)) + return PTR_ERR(sdev->glb_base); + + dma_cap_set(DMA_MEMCPY, sdev->dma_dev.cap_mask); + sdev->total_chns = chn_count; + INIT_LIST_HEAD(&sdev->dma_dev.channels); + INIT_LIST_HEAD(&sdev->dma_dev.global_node); + sdev->dma_dev.dev = &pdev->dev; + sdev->dma_dev.device_alloc_chan_resources = sprd_dma_alloc_chan_resources; + sdev->dma_dev.device_free_chan_resources = sprd_dma_free_chan_resources; + sdev->dma_dev.device_tx_status = sprd_dma_tx_status; + sdev->dma_dev.device_issue_pending = sprd_dma_issue_pending; + sdev->dma_dev.device_prep_dma_memcpy = sprd_dma_prep_dma_memcpy; + sdev->dma_dev.device_prep_slave_sg = sprd_dma_prep_slave_sg; + sdev->dma_dev.device_config = sprd_dma_slave_config; + sdev->dma_dev.device_pause = sprd_dma_pause; + sdev->dma_dev.device_resume = sprd_dma_resume; + sdev->dma_dev.device_terminate_all = sprd_dma_terminate_all; + + for (i = 0; i < chn_count; i++) { + dma_chn = &sdev->channels[i]; + dma_chn->chn_num = i; + dma_chn->cur_desc = NULL; + /* get each channel's registers base address. */ + dma_chn->chn_base = sdev->glb_base + SPRD_DMA_CHN_REG_OFFSET + + SPRD_DMA_CHN_REG_LENGTH * i; + + dma_chn->vc.desc_free = sprd_dma_free_desc; + vchan_init(&dma_chn->vc, &sdev->dma_dev); + } + + platform_set_drvdata(pdev, sdev); + ret = sprd_dma_enable(sdev); + if (ret) + return ret; + + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + + ret = pm_runtime_get_sync(&pdev->dev); + if (ret < 0) + goto err_rpm; + + ret = dma_async_device_register(&sdev->dma_dev); + if (ret < 0) { + dev_err(&pdev->dev, "register dma device failed:%d\n", ret); + goto err_register; + } + + sprd_dma_info.dma_cap = sdev->dma_dev.cap_mask; + ret = of_dma_controller_register(np, of_dma_simple_xlate, + &sprd_dma_info); + if (ret) + goto err_of_register; + + pm_runtime_put(&pdev->dev); + return 0; + +err_of_register: + dma_async_device_unregister(&sdev->dma_dev); +err_register: + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_disable(&pdev->dev); +err_rpm: + sprd_dma_disable(sdev); + return ret; +} + +static int sprd_dma_remove(struct platform_device *pdev) +{ + struct sprd_dma_dev *sdev = platform_get_drvdata(pdev); + struct sprd_dma_chn *c, *cn; + + pm_runtime_get_sync(&pdev->dev); + + /* explicitly free the irq */ + if (sdev->irq > 0) + devm_free_irq(&pdev->dev, sdev->irq, sdev); + + list_for_each_entry_safe(c, cn, &sdev->dma_dev.channels, + vc.chan.device_node) { + list_del(&c->vc.chan.device_node); + tasklet_kill(&c->vc.task); + } + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&sdev->dma_dev); + sprd_dma_disable(sdev); + + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return 0; +} + +static const struct of_device_id sprd_dma_match[] = { + { .compatible = "sprd,sc9860-dma", }, + {}, +}; +MODULE_DEVICE_TABLE(of, sprd_dma_match); + +static int __maybe_unused sprd_dma_runtime_suspend(struct device *dev) +{ + struct sprd_dma_dev *sdev = dev_get_drvdata(dev); + + sprd_dma_disable(sdev); + return 0; +} + +static int __maybe_unused sprd_dma_runtime_resume(struct device *dev) +{ + struct sprd_dma_dev *sdev = dev_get_drvdata(dev); + int ret; + + ret = sprd_dma_enable(sdev); + if (ret) + dev_err(sdev->dma_dev.dev, "enable dma failed\n"); + + return ret; +} + +static const struct dev_pm_ops sprd_dma_pm_ops = { + SET_RUNTIME_PM_OPS(sprd_dma_runtime_suspend, + sprd_dma_runtime_resume, + NULL) +}; + +static struct platform_driver sprd_dma_driver = { + .probe = sprd_dma_probe, + .remove = sprd_dma_remove, + .driver = { + .name = "sprd-dma", + .of_match_table = sprd_dma_match, + .pm = &sprd_dma_pm_ops, + }, +}; +module_platform_driver(sprd_dma_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("DMA driver for Spreadtrum"); +MODULE_AUTHOR("Baolin Wang "); +MODULE_AUTHOR("Eric Long "); +MODULE_ALIAS("platform:sprd-dma"); diff --git a/drivers/dma/st_fdma.c b/drivers/dma/st_fdma.c new file mode 100644 index 0000000000..d95c421877 --- /dev/null +++ b/drivers/dma/st_fdma.c @@ -0,0 +1,877 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * DMA driver for STMicroelectronics STi FDMA controller + * + * Copyright (C) 2014 STMicroelectronics + * + * Author: Ludovic Barre + * Peter Griffin + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "st_fdma.h" + +static inline struct st_fdma_chan *to_st_fdma_chan(struct dma_chan *c) +{ + return container_of(c, struct st_fdma_chan, vchan.chan); +} + +static struct st_fdma_desc *to_st_fdma_desc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct st_fdma_desc, vdesc); +} + +static int st_fdma_dreq_get(struct st_fdma_chan *fchan) +{ + struct st_fdma_dev *fdev = fchan->fdev; + u32 req_line_cfg = fchan->cfg.req_line; + u32 dreq_line; + int try = 0; + + /* + * dreq_mask is shared for n channels of fdma, so all accesses must be + * atomic. if the dreq_mask is changed between ffz and set_bit, + * we retry + */ + do { + if (fdev->dreq_mask == ~0L) { + dev_err(fdev->dev, "No req lines available\n"); + return -EINVAL; + } + + if (try || req_line_cfg >= ST_FDMA_NR_DREQS) { + dev_err(fdev->dev, "Invalid or used req line\n"); + return -EINVAL; + } else { + dreq_line = req_line_cfg; + } + + try++; + } while (test_and_set_bit(dreq_line, &fdev->dreq_mask)); + + dev_dbg(fdev->dev, "get dreq_line:%d mask:%#lx\n", + dreq_line, fdev->dreq_mask); + + return dreq_line; +} + +static void st_fdma_dreq_put(struct st_fdma_chan *fchan) +{ + struct st_fdma_dev *fdev = fchan->fdev; + + dev_dbg(fdev->dev, "put dreq_line:%#x\n", fchan->dreq_line); + clear_bit(fchan->dreq_line, &fdev->dreq_mask); +} + +static void st_fdma_xfer_desc(struct st_fdma_chan *fchan) +{ + struct virt_dma_desc *vdesc; + unsigned long nbytes, ch_cmd, cmd; + + vdesc = vchan_next_desc(&fchan->vchan); + if (!vdesc) + return; + + fchan->fdesc = to_st_fdma_desc(vdesc); + nbytes = fchan->fdesc->node[0].desc->nbytes; + cmd = FDMA_CMD_START(fchan->vchan.chan.chan_id); + ch_cmd = fchan->fdesc->node[0].pdesc | FDMA_CH_CMD_STA_START; + + /* start the channel for the descriptor */ + fnode_write(fchan, nbytes, FDMA_CNTN_OFST); + fchan_write(fchan, ch_cmd, FDMA_CH_CMD_OFST); + writel(cmd, + fchan->fdev->slim_rproc->peri + FDMA_CMD_SET_OFST); + + dev_dbg(fchan->fdev->dev, "start chan:%d\n", fchan->vchan.chan.chan_id); +} + +static void st_fdma_ch_sta_update(struct st_fdma_chan *fchan, + unsigned long int_sta) +{ + unsigned long ch_sta, ch_err; + int ch_id = fchan->vchan.chan.chan_id; + struct st_fdma_dev *fdev = fchan->fdev; + + ch_sta = fchan_read(fchan, FDMA_CH_CMD_OFST); + ch_err = ch_sta & FDMA_CH_CMD_ERR_MASK; + ch_sta &= FDMA_CH_CMD_STA_MASK; + + if (int_sta & FDMA_INT_STA_ERR) { + dev_warn(fdev->dev, "chan:%d, error:%ld\n", ch_id, ch_err); + fchan->status = DMA_ERROR; + return; + } + + switch (ch_sta) { + case FDMA_CH_CMD_STA_PAUSED: + fchan->status = DMA_PAUSED; + break; + + case FDMA_CH_CMD_STA_RUNNING: + fchan->status = DMA_IN_PROGRESS; + break; + } +} + +static irqreturn_t st_fdma_irq_handler(int irq, void *dev_id) +{ + struct st_fdma_dev *fdev = dev_id; + irqreturn_t ret = IRQ_NONE; + struct st_fdma_chan *fchan = &fdev->chans[0]; + unsigned long int_sta, clr; + + int_sta = fdma_read(fdev, FDMA_INT_STA_OFST); + clr = int_sta; + + for (; int_sta != 0 ; int_sta >>= 2, fchan++) { + if (!(int_sta & (FDMA_INT_STA_CH | FDMA_INT_STA_ERR))) + continue; + + spin_lock(&fchan->vchan.lock); + st_fdma_ch_sta_update(fchan, int_sta); + + if (fchan->fdesc) { + if (!fchan->fdesc->iscyclic) { + list_del(&fchan->fdesc->vdesc.node); + vchan_cookie_complete(&fchan->fdesc->vdesc); + fchan->fdesc = NULL; + fchan->status = DMA_COMPLETE; + } else { + vchan_cyclic_callback(&fchan->fdesc->vdesc); + } + + /* Start the next descriptor (if available) */ + if (!fchan->fdesc) + st_fdma_xfer_desc(fchan); + } + + spin_unlock(&fchan->vchan.lock); + ret = IRQ_HANDLED; + } + + fdma_write(fdev, clr, FDMA_INT_CLR_OFST); + + return ret; +} + +static struct dma_chan *st_fdma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct st_fdma_dev *fdev = ofdma->of_dma_data; + struct dma_chan *chan; + struct st_fdma_chan *fchan; + int ret; + + if (dma_spec->args_count < 1) + return ERR_PTR(-EINVAL); + + if (fdev->dma_device.dev->of_node != dma_spec->np) + return ERR_PTR(-EINVAL); + + ret = rproc_boot(fdev->slim_rproc->rproc); + if (ret == -ENOENT) + return ERR_PTR(-EPROBE_DEFER); + else if (ret) + return ERR_PTR(ret); + + chan = dma_get_any_slave_channel(&fdev->dma_device); + if (!chan) + goto err_chan; + + fchan = to_st_fdma_chan(chan); + + fchan->cfg.of_node = dma_spec->np; + fchan->cfg.req_line = dma_spec->args[0]; + fchan->cfg.req_ctrl = 0; + fchan->cfg.type = ST_FDMA_TYPE_FREE_RUN; + + if (dma_spec->args_count > 1) + fchan->cfg.req_ctrl = dma_spec->args[1] + & FDMA_REQ_CTRL_CFG_MASK; + + if (dma_spec->args_count > 2) + fchan->cfg.type = dma_spec->args[2]; + + if (fchan->cfg.type == ST_FDMA_TYPE_FREE_RUN) { + fchan->dreq_line = 0; + } else { + fchan->dreq_line = st_fdma_dreq_get(fchan); + if (IS_ERR_VALUE(fchan->dreq_line)) { + chan = ERR_PTR(fchan->dreq_line); + goto err_chan; + } + } + + dev_dbg(fdev->dev, "xlate req_line:%d type:%d req_ctrl:%#lx\n", + fchan->cfg.req_line, fchan->cfg.type, fchan->cfg.req_ctrl); + + return chan; + +err_chan: + rproc_shutdown(fdev->slim_rproc->rproc); + return chan; + +} + +static void st_fdma_free_desc(struct virt_dma_desc *vdesc) +{ + struct st_fdma_desc *fdesc; + int i; + + fdesc = to_st_fdma_desc(vdesc); + for (i = 0; i < fdesc->n_nodes; i++) + dma_pool_free(fdesc->fchan->node_pool, fdesc->node[i].desc, + fdesc->node[i].pdesc); + kfree(fdesc); +} + +static struct st_fdma_desc *st_fdma_alloc_desc(struct st_fdma_chan *fchan, + int sg_len) +{ + struct st_fdma_desc *fdesc; + int i; + + fdesc = kzalloc(struct_size(fdesc, node, sg_len), GFP_NOWAIT); + if (!fdesc) + return NULL; + + fdesc->fchan = fchan; + fdesc->n_nodes = sg_len; + for (i = 0; i < sg_len; i++) { + fdesc->node[i].desc = dma_pool_alloc(fchan->node_pool, + GFP_NOWAIT, &fdesc->node[i].pdesc); + if (!fdesc->node[i].desc) + goto err; + } + return fdesc; + +err: + while (--i >= 0) + dma_pool_free(fchan->node_pool, fdesc->node[i].desc, + fdesc->node[i].pdesc); + kfree(fdesc); + return NULL; +} + +static int st_fdma_alloc_chan_res(struct dma_chan *chan) +{ + struct st_fdma_chan *fchan = to_st_fdma_chan(chan); + + /* Create the dma pool for descriptor allocation */ + fchan->node_pool = dma_pool_create(dev_name(&chan->dev->device), + fchan->fdev->dev, + sizeof(struct st_fdma_hw_node), + __alignof__(struct st_fdma_hw_node), + 0); + + if (!fchan->node_pool) { + dev_err(fchan->fdev->dev, "unable to allocate desc pool\n"); + return -ENOMEM; + } + + dev_dbg(fchan->fdev->dev, "alloc ch_id:%d type:%d\n", + fchan->vchan.chan.chan_id, fchan->cfg.type); + + return 0; +} + +static void st_fdma_free_chan_res(struct dma_chan *chan) +{ + struct st_fdma_chan *fchan = to_st_fdma_chan(chan); + struct rproc *rproc = fchan->fdev->slim_rproc->rproc; + unsigned long flags; + + dev_dbg(fchan->fdev->dev, "%s: freeing chan:%d\n", + __func__, fchan->vchan.chan.chan_id); + + if (fchan->cfg.type != ST_FDMA_TYPE_FREE_RUN) + st_fdma_dreq_put(fchan); + + spin_lock_irqsave(&fchan->vchan.lock, flags); + fchan->fdesc = NULL; + spin_unlock_irqrestore(&fchan->vchan.lock, flags); + + dma_pool_destroy(fchan->node_pool); + fchan->node_pool = NULL; + memset(&fchan->cfg, 0, sizeof(struct st_fdma_cfg)); + + rproc_shutdown(rproc); +} + +static struct dma_async_tx_descriptor *st_fdma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct st_fdma_chan *fchan; + struct st_fdma_desc *fdesc; + struct st_fdma_hw_node *hw_node; + + if (!len) + return NULL; + + fchan = to_st_fdma_chan(chan); + + /* We only require a single descriptor */ + fdesc = st_fdma_alloc_desc(fchan, 1); + if (!fdesc) { + dev_err(fchan->fdev->dev, "no memory for desc\n"); + return NULL; + } + + hw_node = fdesc->node[0].desc; + hw_node->next = 0; + hw_node->control = FDMA_NODE_CTRL_REQ_MAP_FREE_RUN; + hw_node->control |= FDMA_NODE_CTRL_SRC_INCR; + hw_node->control |= FDMA_NODE_CTRL_DST_INCR; + hw_node->control |= FDMA_NODE_CTRL_INT_EON; + hw_node->nbytes = len; + hw_node->saddr = src; + hw_node->daddr = dst; + hw_node->generic.length = len; + hw_node->generic.sstride = 0; + hw_node->generic.dstride = 0; + + return vchan_tx_prep(&fchan->vchan, &fdesc->vdesc, flags); +} + +static int config_reqctrl(struct st_fdma_chan *fchan, + enum dma_transfer_direction direction) +{ + u32 maxburst = 0, addr = 0; + enum dma_slave_buswidth width; + int ch_id = fchan->vchan.chan.chan_id; + struct st_fdma_dev *fdev = fchan->fdev; + + switch (direction) { + + case DMA_DEV_TO_MEM: + fchan->cfg.req_ctrl &= ~FDMA_REQ_CTRL_WNR; + maxburst = fchan->scfg.src_maxburst; + width = fchan->scfg.src_addr_width; + addr = fchan->scfg.src_addr; + break; + + case DMA_MEM_TO_DEV: + fchan->cfg.req_ctrl |= FDMA_REQ_CTRL_WNR; + maxburst = fchan->scfg.dst_maxburst; + width = fchan->scfg.dst_addr_width; + addr = fchan->scfg.dst_addr; + break; + + default: + return -EINVAL; + } + + fchan->cfg.req_ctrl &= ~FDMA_REQ_CTRL_OPCODE_MASK; + + switch (width) { + + case DMA_SLAVE_BUSWIDTH_1_BYTE: + fchan->cfg.req_ctrl |= FDMA_REQ_CTRL_OPCODE_LD_ST1; + break; + + case DMA_SLAVE_BUSWIDTH_2_BYTES: + fchan->cfg.req_ctrl |= FDMA_REQ_CTRL_OPCODE_LD_ST2; + break; + + case DMA_SLAVE_BUSWIDTH_4_BYTES: + fchan->cfg.req_ctrl |= FDMA_REQ_CTRL_OPCODE_LD_ST4; + break; + + case DMA_SLAVE_BUSWIDTH_8_BYTES: + fchan->cfg.req_ctrl |= FDMA_REQ_CTRL_OPCODE_LD_ST8; + break; + + default: + return -EINVAL; + } + + fchan->cfg.req_ctrl &= ~FDMA_REQ_CTRL_NUM_OPS_MASK; + fchan->cfg.req_ctrl |= FDMA_REQ_CTRL_NUM_OPS(maxburst-1); + dreq_write(fchan, fchan->cfg.req_ctrl, FDMA_REQ_CTRL_OFST); + + fchan->cfg.dev_addr = addr; + fchan->cfg.dir = direction; + + dev_dbg(fdev->dev, "chan:%d config_reqctrl:%#x req_ctrl:%#lx\n", + ch_id, addr, fchan->cfg.req_ctrl); + + return 0; +} + +static void fill_hw_node(struct st_fdma_hw_node *hw_node, + struct st_fdma_chan *fchan, + enum dma_transfer_direction direction) +{ + if (direction == DMA_MEM_TO_DEV) { + hw_node->control |= FDMA_NODE_CTRL_SRC_INCR; + hw_node->control |= FDMA_NODE_CTRL_DST_STATIC; + hw_node->daddr = fchan->cfg.dev_addr; + } else { + hw_node->control |= FDMA_NODE_CTRL_SRC_STATIC; + hw_node->control |= FDMA_NODE_CTRL_DST_INCR; + hw_node->saddr = fchan->cfg.dev_addr; + } + + hw_node->generic.sstride = 0; + hw_node->generic.dstride = 0; +} + +static inline struct st_fdma_chan *st_fdma_prep_common(struct dma_chan *chan, + size_t len, enum dma_transfer_direction direction) +{ + struct st_fdma_chan *fchan; + + if (!chan || !len) + return NULL; + + fchan = to_st_fdma_chan(chan); + + if (!is_slave_direction(direction)) { + dev_err(fchan->fdev->dev, "bad direction?\n"); + return NULL; + } + + return fchan; +} + +static struct dma_async_tx_descriptor *st_fdma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t buf_addr, size_t len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct st_fdma_chan *fchan; + struct st_fdma_desc *fdesc; + int sg_len, i; + + fchan = st_fdma_prep_common(chan, len, direction); + if (!fchan) + return NULL; + + if (!period_len) + return NULL; + + if (config_reqctrl(fchan, direction)) { + dev_err(fchan->fdev->dev, "bad width or direction\n"); + return NULL; + } + + /* the buffer length must be a multiple of period_len */ + if (len % period_len != 0) { + dev_err(fchan->fdev->dev, "len is not multiple of period\n"); + return NULL; + } + + sg_len = len / period_len; + fdesc = st_fdma_alloc_desc(fchan, sg_len); + if (!fdesc) { + dev_err(fchan->fdev->dev, "no memory for desc\n"); + return NULL; + } + + fdesc->iscyclic = true; + + for (i = 0; i < sg_len; i++) { + struct st_fdma_hw_node *hw_node = fdesc->node[i].desc; + + hw_node->next = fdesc->node[(i + 1) % sg_len].pdesc; + + hw_node->control = + FDMA_NODE_CTRL_REQ_MAP_DREQ(fchan->dreq_line); + hw_node->control |= FDMA_NODE_CTRL_INT_EON; + + fill_hw_node(hw_node, fchan, direction); + + if (direction == DMA_MEM_TO_DEV) + hw_node->saddr = buf_addr + (i * period_len); + else + hw_node->daddr = buf_addr + (i * period_len); + + hw_node->nbytes = period_len; + hw_node->generic.length = period_len; + } + + return vchan_tx_prep(&fchan->vchan, &fdesc->vdesc, flags); +} + +static struct dma_async_tx_descriptor *st_fdma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct st_fdma_chan *fchan; + struct st_fdma_desc *fdesc; + struct st_fdma_hw_node *hw_node; + struct scatterlist *sg; + int i; + + fchan = st_fdma_prep_common(chan, sg_len, direction); + if (!fchan) + return NULL; + + if (!sgl) + return NULL; + + fdesc = st_fdma_alloc_desc(fchan, sg_len); + if (!fdesc) { + dev_err(fchan->fdev->dev, "no memory for desc\n"); + return NULL; + } + + fdesc->iscyclic = false; + + for_each_sg(sgl, sg, sg_len, i) { + hw_node = fdesc->node[i].desc; + + hw_node->next = fdesc->node[(i + 1) % sg_len].pdesc; + hw_node->control = FDMA_NODE_CTRL_REQ_MAP_DREQ(fchan->dreq_line); + + fill_hw_node(hw_node, fchan, direction); + + if (direction == DMA_MEM_TO_DEV) + hw_node->saddr = sg_dma_address(sg); + else + hw_node->daddr = sg_dma_address(sg); + + hw_node->nbytes = sg_dma_len(sg); + hw_node->generic.length = sg_dma_len(sg); + } + + /* interrupt at end of last node */ + hw_node->control |= FDMA_NODE_CTRL_INT_EON; + + return vchan_tx_prep(&fchan->vchan, &fdesc->vdesc, flags); +} + +static size_t st_fdma_desc_residue(struct st_fdma_chan *fchan, + struct virt_dma_desc *vdesc, + bool in_progress) +{ + struct st_fdma_desc *fdesc = fchan->fdesc; + size_t residue = 0; + dma_addr_t cur_addr = 0; + int i; + + if (in_progress) { + cur_addr = fchan_read(fchan, FDMA_CH_CMD_OFST); + cur_addr &= FDMA_CH_CMD_DATA_MASK; + } + + for (i = fchan->fdesc->n_nodes - 1 ; i >= 0; i--) { + if (cur_addr == fdesc->node[i].pdesc) { + residue += fnode_read(fchan, FDMA_CNTN_OFST); + break; + } + residue += fdesc->node[i].desc->nbytes; + } + + return residue; +} + +static enum dma_status st_fdma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct st_fdma_chan *fchan = to_st_fdma_chan(chan); + struct virt_dma_desc *vd; + enum dma_status ret; + unsigned long flags; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE || !txstate) + return ret; + + spin_lock_irqsave(&fchan->vchan.lock, flags); + vd = vchan_find_desc(&fchan->vchan, cookie); + if (fchan->fdesc && cookie == fchan->fdesc->vdesc.tx.cookie) + txstate->residue = st_fdma_desc_residue(fchan, vd, true); + else if (vd) + txstate->residue = st_fdma_desc_residue(fchan, vd, false); + else + txstate->residue = 0; + + spin_unlock_irqrestore(&fchan->vchan.lock, flags); + + return ret; +} + +static void st_fdma_issue_pending(struct dma_chan *chan) +{ + struct st_fdma_chan *fchan = to_st_fdma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&fchan->vchan.lock, flags); + + if (vchan_issue_pending(&fchan->vchan) && !fchan->fdesc) + st_fdma_xfer_desc(fchan); + + spin_unlock_irqrestore(&fchan->vchan.lock, flags); +} + +static int st_fdma_pause(struct dma_chan *chan) +{ + unsigned long flags; + struct st_fdma_chan *fchan = to_st_fdma_chan(chan); + int ch_id = fchan->vchan.chan.chan_id; + unsigned long cmd = FDMA_CMD_PAUSE(ch_id); + + dev_dbg(fchan->fdev->dev, "pause chan:%d\n", ch_id); + + spin_lock_irqsave(&fchan->vchan.lock, flags); + if (fchan->fdesc) + fdma_write(fchan->fdev, cmd, FDMA_CMD_SET_OFST); + spin_unlock_irqrestore(&fchan->vchan.lock, flags); + + return 0; +} + +static int st_fdma_resume(struct dma_chan *chan) +{ + unsigned long flags; + unsigned long val; + struct st_fdma_chan *fchan = to_st_fdma_chan(chan); + int ch_id = fchan->vchan.chan.chan_id; + + dev_dbg(fchan->fdev->dev, "resume chan:%d\n", ch_id); + + spin_lock_irqsave(&fchan->vchan.lock, flags); + if (fchan->fdesc) { + val = fchan_read(fchan, FDMA_CH_CMD_OFST); + val &= FDMA_CH_CMD_DATA_MASK; + fchan_write(fchan, val, FDMA_CH_CMD_OFST); + } + spin_unlock_irqrestore(&fchan->vchan.lock, flags); + + return 0; +} + +static int st_fdma_terminate_all(struct dma_chan *chan) +{ + unsigned long flags; + LIST_HEAD(head); + struct st_fdma_chan *fchan = to_st_fdma_chan(chan); + int ch_id = fchan->vchan.chan.chan_id; + unsigned long cmd = FDMA_CMD_PAUSE(ch_id); + + dev_dbg(fchan->fdev->dev, "terminate chan:%d\n", ch_id); + + spin_lock_irqsave(&fchan->vchan.lock, flags); + fdma_write(fchan->fdev, cmd, FDMA_CMD_SET_OFST); + fchan->fdesc = NULL; + vchan_get_all_descriptors(&fchan->vchan, &head); + spin_unlock_irqrestore(&fchan->vchan.lock, flags); + vchan_dma_desc_free_list(&fchan->vchan, &head); + + return 0; +} + +static int st_fdma_slave_config(struct dma_chan *chan, + struct dma_slave_config *slave_cfg) +{ + struct st_fdma_chan *fchan = to_st_fdma_chan(chan); + + memcpy(&fchan->scfg, slave_cfg, sizeof(fchan->scfg)); + return 0; +} + +static const struct st_fdma_driverdata fdma_mpe31_stih407_11 = { + .name = "STiH407", + .id = 0, +}; + +static const struct st_fdma_driverdata fdma_mpe31_stih407_12 = { + .name = "STiH407", + .id = 1, +}; + +static const struct st_fdma_driverdata fdma_mpe31_stih407_13 = { + .name = "STiH407", + .id = 2, +}; + +static const struct of_device_id st_fdma_match[] = { + { .compatible = "st,stih407-fdma-mpe31-11" + , .data = &fdma_mpe31_stih407_11 }, + { .compatible = "st,stih407-fdma-mpe31-12" + , .data = &fdma_mpe31_stih407_12 }, + { .compatible = "st,stih407-fdma-mpe31-13" + , .data = &fdma_mpe31_stih407_13 }, + {}, +}; +MODULE_DEVICE_TABLE(of, st_fdma_match); + +static int st_fdma_parse_dt(struct platform_device *pdev, + const struct st_fdma_driverdata *drvdata, + struct st_fdma_dev *fdev) +{ + snprintf(fdev->fw_name, FW_NAME_SIZE, "fdma_%s_%d.elf", + drvdata->name, drvdata->id); + + return of_property_read_u32(pdev->dev.of_node, "dma-channels", + &fdev->nr_channels); +} +#define FDMA_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) + +static void st_fdma_free(struct st_fdma_dev *fdev) +{ + struct st_fdma_chan *fchan; + int i; + + for (i = 0; i < fdev->nr_channels; i++) { + fchan = &fdev->chans[i]; + list_del(&fchan->vchan.chan.device_node); + tasklet_kill(&fchan->vchan.task); + } +} + +static int st_fdma_probe(struct platform_device *pdev) +{ + struct st_fdma_dev *fdev; + const struct of_device_id *match; + struct device_node *np = pdev->dev.of_node; + const struct st_fdma_driverdata *drvdata; + int ret, i; + + match = of_match_device((st_fdma_match), &pdev->dev); + if (!match || !match->data) { + dev_err(&pdev->dev, "No device match found\n"); + return -ENODEV; + } + + drvdata = match->data; + + fdev = devm_kzalloc(&pdev->dev, sizeof(*fdev), GFP_KERNEL); + if (!fdev) + return -ENOMEM; + + ret = st_fdma_parse_dt(pdev, drvdata, fdev); + if (ret) { + dev_err(&pdev->dev, "unable to find platform data\n"); + goto err; + } + + fdev->chans = devm_kcalloc(&pdev->dev, fdev->nr_channels, + sizeof(struct st_fdma_chan), GFP_KERNEL); + if (!fdev->chans) + return -ENOMEM; + + fdev->dev = &pdev->dev; + fdev->drvdata = drvdata; + platform_set_drvdata(pdev, fdev); + + fdev->irq = platform_get_irq(pdev, 0); + if (fdev->irq < 0) + return -EINVAL; + + ret = devm_request_irq(&pdev->dev, fdev->irq, st_fdma_irq_handler, 0, + dev_name(&pdev->dev), fdev); + if (ret) { + dev_err(&pdev->dev, "Failed to request irq (%d)\n", ret); + goto err; + } + + fdev->slim_rproc = st_slim_rproc_alloc(pdev, fdev->fw_name); + if (IS_ERR(fdev->slim_rproc)) { + ret = PTR_ERR(fdev->slim_rproc); + dev_err(&pdev->dev, "slim_rproc_alloc failed (%d)\n", ret); + goto err; + } + + /* Initialise list of FDMA channels */ + INIT_LIST_HEAD(&fdev->dma_device.channels); + for (i = 0; i < fdev->nr_channels; i++) { + struct st_fdma_chan *fchan = &fdev->chans[i]; + + fchan->fdev = fdev; + fchan->vchan.desc_free = st_fdma_free_desc; + vchan_init(&fchan->vchan, &fdev->dma_device); + } + + /* Initialise the FDMA dreq (reserve 0 & 31 for FDMA use) */ + fdev->dreq_mask = BIT(0) | BIT(31); + + dma_cap_set(DMA_SLAVE, fdev->dma_device.cap_mask); + dma_cap_set(DMA_CYCLIC, fdev->dma_device.cap_mask); + dma_cap_set(DMA_MEMCPY, fdev->dma_device.cap_mask); + + fdev->dma_device.dev = &pdev->dev; + fdev->dma_device.device_alloc_chan_resources = st_fdma_alloc_chan_res; + fdev->dma_device.device_free_chan_resources = st_fdma_free_chan_res; + fdev->dma_device.device_prep_dma_cyclic = st_fdma_prep_dma_cyclic; + fdev->dma_device.device_prep_slave_sg = st_fdma_prep_slave_sg; + fdev->dma_device.device_prep_dma_memcpy = st_fdma_prep_dma_memcpy; + fdev->dma_device.device_tx_status = st_fdma_tx_status; + fdev->dma_device.device_issue_pending = st_fdma_issue_pending; + fdev->dma_device.device_terminate_all = st_fdma_terminate_all; + fdev->dma_device.device_config = st_fdma_slave_config; + fdev->dma_device.device_pause = st_fdma_pause; + fdev->dma_device.device_resume = st_fdma_resume; + + fdev->dma_device.src_addr_widths = FDMA_DMA_BUSWIDTHS; + fdev->dma_device.dst_addr_widths = FDMA_DMA_BUSWIDTHS; + fdev->dma_device.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + fdev->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + ret = dmaenginem_async_device_register(&fdev->dma_device); + if (ret) { + dev_err(&pdev->dev, + "Failed to register DMA device (%d)\n", ret); + goto err_rproc; + } + + ret = of_dma_controller_register(np, st_fdma_of_xlate, fdev); + if (ret) { + dev_err(&pdev->dev, + "Failed to register controller (%d)\n", ret); + goto err_rproc; + } + + dev_info(&pdev->dev, "ST FDMA engine driver, irq:%d\n", fdev->irq); + + return 0; + +err_rproc: + st_fdma_free(fdev); + st_slim_rproc_put(fdev->slim_rproc); +err: + return ret; +} + +static int st_fdma_remove(struct platform_device *pdev) +{ + struct st_fdma_dev *fdev = platform_get_drvdata(pdev); + + devm_free_irq(&pdev->dev, fdev->irq, fdev); + st_slim_rproc_put(fdev->slim_rproc); + of_dma_controller_free(pdev->dev.of_node); + + return 0; +} + +static struct platform_driver st_fdma_platform_driver = { + .driver = { + .name = DRIVER_NAME, + .of_match_table = st_fdma_match, + }, + .probe = st_fdma_probe, + .remove = st_fdma_remove, +}; +module_platform_driver(st_fdma_platform_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("STMicroelectronics FDMA engine driver"); +MODULE_AUTHOR("Ludovic.barre "); +MODULE_AUTHOR("Peter Griffin "); +MODULE_ALIAS("platform:" DRIVER_NAME); diff --git a/drivers/dma/st_fdma.h b/drivers/dma/st_fdma.h new file mode 100644 index 0000000000..fa15b97a3b --- /dev/null +++ b/drivers/dma/st_fdma.h @@ -0,0 +1,245 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * DMA driver header for STMicroelectronics STi FDMA controller + * + * Copyright (C) 2014 STMicroelectronics + * + * Author: Ludovic Barre + */ +#ifndef __DMA_ST_FDMA_H +#define __DMA_ST_FDMA_H + +#include +#include +#include +#include +#include "virt-dma.h" + +#define ST_FDMA_NR_DREQS 32 +#define FW_NAME_SIZE 30 +#define DRIVER_NAME "st-fdma" + +/** + * struct st_fdma_generic_node - Free running/paced generic node + * + * @length: Length in bytes of a line in a 2D mem to mem + * @sstride: Stride, in bytes, between source lines in a 2D data move + * @dstride: Stride, in bytes, between destination lines in a 2D data move + */ +struct st_fdma_generic_node { + u32 length; + u32 sstride; + u32 dstride; +}; + +/** + * struct st_fdma_hw_node - Node structure used by fdma hw + * + * @next: Pointer to next node + * @control: Transfer Control Parameters + * @nbytes: Number of Bytes to read + * @saddr: Source address + * @daddr: Destination address + * + * @generic: generic node for free running/paced transfert type + * 2 others transfert type are possible, but not yet implemented + * + * The NODE structures must be aligned to a 32 byte boundary + */ +struct st_fdma_hw_node { + u32 next; + u32 control; + u32 nbytes; + u32 saddr; + u32 daddr; + union { + struct st_fdma_generic_node generic; + }; +} __aligned(32); + +/* + * node control parameters + */ +#define FDMA_NODE_CTRL_REQ_MAP_MASK GENMASK(4, 0) +#define FDMA_NODE_CTRL_REQ_MAP_FREE_RUN 0x0 +#define FDMA_NODE_CTRL_REQ_MAP_DREQ(n) ((n)&FDMA_NODE_CTRL_REQ_MAP_MASK) +#define FDMA_NODE_CTRL_REQ_MAP_EXT FDMA_NODE_CTRL_REQ_MAP_MASK +#define FDMA_NODE_CTRL_SRC_MASK GENMASK(6, 5) +#define FDMA_NODE_CTRL_SRC_STATIC BIT(5) +#define FDMA_NODE_CTRL_SRC_INCR BIT(6) +#define FDMA_NODE_CTRL_DST_MASK GENMASK(8, 7) +#define FDMA_NODE_CTRL_DST_STATIC BIT(7) +#define FDMA_NODE_CTRL_DST_INCR BIT(8) +#define FDMA_NODE_CTRL_SECURE BIT(15) +#define FDMA_NODE_CTRL_PAUSE_EON BIT(30) +#define FDMA_NODE_CTRL_INT_EON BIT(31) + +/** + * struct st_fdma_sw_node - descriptor structure for link list + * + * @pdesc: Physical address of desc + * @node: link used for putting this into a channel queue + */ +struct st_fdma_sw_node { + dma_addr_t pdesc; + struct st_fdma_hw_node *desc; +}; + +#define NAME_SZ 10 + +struct st_fdma_driverdata { + u32 id; + char name[NAME_SZ]; +}; + +struct st_fdma_desc { + struct virt_dma_desc vdesc; + struct st_fdma_chan *fchan; + bool iscyclic; + unsigned int n_nodes; + struct st_fdma_sw_node node[]; +}; + +enum st_fdma_type { + ST_FDMA_TYPE_FREE_RUN, + ST_FDMA_TYPE_PACED, +}; + +struct st_fdma_cfg { + struct device_node *of_node; + enum st_fdma_type type; + dma_addr_t dev_addr; + enum dma_transfer_direction dir; + int req_line; /* request line */ + long req_ctrl; /* Request control */ +}; + +struct st_fdma_chan { + struct st_fdma_dev *fdev; + struct dma_pool *node_pool; + struct dma_slave_config scfg; + struct st_fdma_cfg cfg; + + int dreq_line; + + struct virt_dma_chan vchan; + struct st_fdma_desc *fdesc; + enum dma_status status; +}; + +struct st_fdma_dev { + struct device *dev; + const struct st_fdma_driverdata *drvdata; + struct dma_device dma_device; + + struct st_slim_rproc *slim_rproc; + + int irq; + + struct st_fdma_chan *chans; + + spinlock_t dreq_lock; + unsigned long dreq_mask; + + u32 nr_channels; + char fw_name[FW_NAME_SIZE]; +}; + +/* Peripheral Registers*/ + +#define FDMA_CMD_STA_OFST 0xFC0 +#define FDMA_CMD_SET_OFST 0xFC4 +#define FDMA_CMD_CLR_OFST 0xFC8 +#define FDMA_CMD_MASK_OFST 0xFCC +#define FDMA_CMD_START(ch) (0x1 << (ch << 1)) +#define FDMA_CMD_PAUSE(ch) (0x2 << (ch << 1)) +#define FDMA_CMD_FLUSH(ch) (0x3 << (ch << 1)) + +#define FDMA_INT_STA_OFST 0xFD0 +#define FDMA_INT_STA_CH 0x1 +#define FDMA_INT_STA_ERR 0x2 + +#define FDMA_INT_SET_OFST 0xFD4 +#define FDMA_INT_CLR_OFST 0xFD8 +#define FDMA_INT_MASK_OFST 0xFDC + +#define fdma_read(fdev, name) \ + readl((fdev)->slim_rproc->peri + name) + +#define fdma_write(fdev, val, name) \ + writel((val), (fdev)->slim_rproc->peri + name) + +/* fchan interface (dmem) */ +#define FDMA_CH_CMD_OFST 0x200 +#define FDMA_CH_CMD_STA_MASK GENMASK(1, 0) +#define FDMA_CH_CMD_STA_IDLE (0x0) +#define FDMA_CH_CMD_STA_START (0x1) +#define FDMA_CH_CMD_STA_RUNNING (0x2) +#define FDMA_CH_CMD_STA_PAUSED (0x3) +#define FDMA_CH_CMD_ERR_MASK GENMASK(4, 2) +#define FDMA_CH_CMD_ERR_INT (0x0 << 2) +#define FDMA_CH_CMD_ERR_NAND (0x1 << 2) +#define FDMA_CH_CMD_ERR_MCHI (0x2 << 2) +#define FDMA_CH_CMD_DATA_MASK GENMASK(31, 5) +#define fchan_read(fchan, name) \ + readl((fchan)->fdev->slim_rproc->mem[ST_SLIM_DMEM].cpu_addr \ + + (fchan)->vchan.chan.chan_id * 0x4 \ + + name) + +#define fchan_write(fchan, val, name) \ + writel((val), (fchan)->fdev->slim_rproc->mem[ST_SLIM_DMEM].cpu_addr \ + + (fchan)->vchan.chan.chan_id * 0x4 \ + + name) + +/* req interface */ +#define FDMA_REQ_CTRL_OFST 0x240 +#define dreq_write(fchan, val, name) \ + writel((val), (fchan)->fdev->slim_rproc->mem[ST_SLIM_DMEM].cpu_addr \ + + fchan->dreq_line * 0x04 \ + + name) +/* node interface */ +#define FDMA_NODE_SZ 128 +#define FDMA_PTRN_OFST 0x800 +#define FDMA_CNTN_OFST 0x808 +#define FDMA_SADDRN_OFST 0x80c +#define FDMA_DADDRN_OFST 0x810 +#define fnode_read(fchan, name) \ + readl((fchan)->fdev->slim_rproc->mem[ST_SLIM_DMEM].cpu_addr \ + + (fchan)->vchan.chan.chan_id * FDMA_NODE_SZ \ + + name) + +#define fnode_write(fchan, val, name) \ + writel((val), (fchan)->fdev->slim_rproc->mem[ST_SLIM_DMEM].cpu_addr \ + + (fchan)->vchan.chan.chan_id * FDMA_NODE_SZ \ + + name) + +/* + * request control bits + */ +#define FDMA_REQ_CTRL_NUM_OPS_MASK GENMASK(31, 24) +#define FDMA_REQ_CTRL_NUM_OPS(n) (FDMA_REQ_CTRL_NUM_OPS_MASK & \ + ((n) << 24)) +#define FDMA_REQ_CTRL_INITIATOR_MASK BIT(22) +#define FDMA_REQ_CTRL_INIT0 (0x0 << 22) +#define FDMA_REQ_CTRL_INIT1 (0x1 << 22) +#define FDMA_REQ_CTRL_INC_ADDR_ON BIT(21) +#define FDMA_REQ_CTRL_DATA_SWAP_ON BIT(17) +#define FDMA_REQ_CTRL_WNR BIT(14) +#define FDMA_REQ_CTRL_OPCODE_MASK GENMASK(7, 4) +#define FDMA_REQ_CTRL_OPCODE_LD_ST1 (0x0 << 4) +#define FDMA_REQ_CTRL_OPCODE_LD_ST2 (0x1 << 4) +#define FDMA_REQ_CTRL_OPCODE_LD_ST4 (0x2 << 4) +#define FDMA_REQ_CTRL_OPCODE_LD_ST8 (0x3 << 4) +#define FDMA_REQ_CTRL_OPCODE_LD_ST16 (0x4 << 4) +#define FDMA_REQ_CTRL_OPCODE_LD_ST32 (0x5 << 4) +#define FDMA_REQ_CTRL_OPCODE_LD_ST64 (0x6 << 4) +#define FDMA_REQ_CTRL_HOLDOFF_MASK GENMASK(2, 0) +#define FDMA_REQ_CTRL_HOLDOFF(n) ((n) & FDMA_REQ_CTRL_HOLDOFF_MASK) + +/* bits used by client to configure request control */ +#define FDMA_REQ_CTRL_CFG_MASK (FDMA_REQ_CTRL_HOLDOFF_MASK | \ + FDMA_REQ_CTRL_DATA_SWAP_ON | \ + FDMA_REQ_CTRL_INC_ADDR_ON | \ + FDMA_REQ_CTRL_INITIATOR_MASK) + +#endif /* __DMA_ST_FDMA_H */ diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c new file mode 100644 index 0000000000..002833fb1f --- /dev/null +++ b/drivers/dma/ste_dma40.c @@ -0,0 +1,3695 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) Ericsson AB 2007-2008 + * Copyright (C) ST-Ericsson SA 2008-2010 + * Author: Per Forlin for ST-Ericsson + * Author: Jonas Aaberg for ST-Ericsson + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" +#include "ste_dma40.h" +#include "ste_dma40_ll.h" + +/** + * struct stedma40_platform_data - Configuration struct for the dma device. + * + * @dev_tx: mapping between destination event line and io address + * @dev_rx: mapping between source event line and io address + * @disabled_channels: A vector, ending with -1, that marks physical channels + * that are for different reasons not available for the driver. + * @soft_lli_chans: A vector, that marks physical channels will use LLI by SW + * which avoids HW bug that exists in some versions of the controller. + * SoftLLI introduces relink overhead that could impact performace for + * certain use cases. + * @num_of_soft_lli_chans: The number of channels that needs to be configured + * to use SoftLLI. + * @use_esram_lcla: flag for mapping the lcla into esram region + * @num_of_memcpy_chans: The number of channels reserved for memcpy. + * @num_of_phy_chans: The number of physical channels implemented in HW. + * 0 means reading the number of channels from DMA HW but this is only valid + * for 'multiple of 4' channels, like 8. + */ +struct stedma40_platform_data { + int disabled_channels[STEDMA40_MAX_PHYS]; + int *soft_lli_chans; + int num_of_soft_lli_chans; + bool use_esram_lcla; + int num_of_memcpy_chans; + int num_of_phy_chans; +}; + +#define D40_NAME "dma40" + +#define D40_PHY_CHAN -1 + +/* For masking out/in 2 bit channel positions */ +#define D40_CHAN_POS(chan) (2 * (chan / 2)) +#define D40_CHAN_POS_MASK(chan) (0x3 << D40_CHAN_POS(chan)) + +/* Maximum iterations taken before giving up suspending a channel */ +#define D40_SUSPEND_MAX_IT 500 + +/* Milliseconds */ +#define DMA40_AUTOSUSPEND_DELAY 100 + +/* Hardware requirement on LCLA alignment */ +#define LCLA_ALIGNMENT 0x40000 + +/* Max number of links per event group */ +#define D40_LCLA_LINK_PER_EVENT_GRP 128 +#define D40_LCLA_END D40_LCLA_LINK_PER_EVENT_GRP + +/* Max number of logical channels per physical channel */ +#define D40_MAX_LOG_CHAN_PER_PHY 32 + +/* Attempts before giving up to trying to get pages that are aligned */ +#define MAX_LCLA_ALLOC_ATTEMPTS 256 + +/* Bit markings for allocation map */ +#define D40_ALLOC_FREE BIT(31) +#define D40_ALLOC_PHY BIT(30) +#define D40_ALLOC_LOG_FREE 0 + +#define D40_MEMCPY_MAX_CHANS 8 + +/* Reserved event lines for memcpy only. */ +#define DB8500_DMA_MEMCPY_EV_0 51 +#define DB8500_DMA_MEMCPY_EV_1 56 +#define DB8500_DMA_MEMCPY_EV_2 57 +#define DB8500_DMA_MEMCPY_EV_3 58 +#define DB8500_DMA_MEMCPY_EV_4 59 +#define DB8500_DMA_MEMCPY_EV_5 60 + +static int dma40_memcpy_channels[] = { + DB8500_DMA_MEMCPY_EV_0, + DB8500_DMA_MEMCPY_EV_1, + DB8500_DMA_MEMCPY_EV_2, + DB8500_DMA_MEMCPY_EV_3, + DB8500_DMA_MEMCPY_EV_4, + DB8500_DMA_MEMCPY_EV_5, +}; + +/* Default configuration for physical memcpy */ +static const struct stedma40_chan_cfg dma40_memcpy_conf_phy = { + .mode = STEDMA40_MODE_PHYSICAL, + .dir = DMA_MEM_TO_MEM, + + .src_info.data_width = DMA_SLAVE_BUSWIDTH_1_BYTE, + .src_info.psize = STEDMA40_PSIZE_PHY_1, + .src_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL, + + .dst_info.data_width = DMA_SLAVE_BUSWIDTH_1_BYTE, + .dst_info.psize = STEDMA40_PSIZE_PHY_1, + .dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL, +}; + +/* Default configuration for logical memcpy */ +static const struct stedma40_chan_cfg dma40_memcpy_conf_log = { + .mode = STEDMA40_MODE_LOGICAL, + .dir = DMA_MEM_TO_MEM, + + .src_info.data_width = DMA_SLAVE_BUSWIDTH_1_BYTE, + .src_info.psize = STEDMA40_PSIZE_LOG_1, + .src_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL, + + .dst_info.data_width = DMA_SLAVE_BUSWIDTH_1_BYTE, + .dst_info.psize = STEDMA40_PSIZE_LOG_1, + .dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL, +}; + +/** + * enum d40_command - The different commands and/or statuses. + * + * @D40_DMA_STOP: DMA channel command STOP or status STOPPED, + * @D40_DMA_RUN: The DMA channel is RUNNING of the command RUN. + * @D40_DMA_SUSPEND_REQ: Request the DMA to SUSPEND as soon as possible. + * @D40_DMA_SUSPENDED: The DMA channel is SUSPENDED. + */ +enum d40_command { + D40_DMA_STOP = 0, + D40_DMA_RUN = 1, + D40_DMA_SUSPEND_REQ = 2, + D40_DMA_SUSPENDED = 3 +}; + +/* + * enum d40_events - The different Event Enables for the event lines. + * + * @D40_DEACTIVATE_EVENTLINE: De-activate Event line, stopping the logical chan. + * @D40_ACTIVATE_EVENTLINE: Activate the Event line, to start a logical chan. + * @D40_SUSPEND_REQ_EVENTLINE: Requesting for suspending a event line. + * @D40_ROUND_EVENTLINE: Status check for event line. + */ + +enum d40_events { + D40_DEACTIVATE_EVENTLINE = 0, + D40_ACTIVATE_EVENTLINE = 1, + D40_SUSPEND_REQ_EVENTLINE = 2, + D40_ROUND_EVENTLINE = 3 +}; + +/* + * These are the registers that has to be saved and later restored + * when the DMA hw is powered off. + * TODO: Add save/restore of D40_DREG_GCC on dma40 v3 or later, if that works. + */ +static __maybe_unused u32 d40_backup_regs[] = { + D40_DREG_LCPA, + D40_DREG_LCLA, + D40_DREG_PRMSE, + D40_DREG_PRMSO, + D40_DREG_PRMOE, + D40_DREG_PRMOO, +}; + +#define BACKUP_REGS_SZ ARRAY_SIZE(d40_backup_regs) + +/* + * since 9540 and 8540 has the same HW revision + * use v4a for 9540 or ealier + * use v4b for 8540 or later + * HW revision: + * DB8500ed has revision 0 + * DB8500v1 has revision 2 + * DB8500v2 has revision 3 + * AP9540v1 has revision 4 + * DB8540v1 has revision 4 + * TODO: Check if all these registers have to be saved/restored on dma40 v4a + */ +static u32 d40_backup_regs_v4a[] = { + D40_DREG_PSEG1, + D40_DREG_PSEG2, + D40_DREG_PSEG3, + D40_DREG_PSEG4, + D40_DREG_PCEG1, + D40_DREG_PCEG2, + D40_DREG_PCEG3, + D40_DREG_PCEG4, + D40_DREG_RSEG1, + D40_DREG_RSEG2, + D40_DREG_RSEG3, + D40_DREG_RSEG4, + D40_DREG_RCEG1, + D40_DREG_RCEG2, + D40_DREG_RCEG3, + D40_DREG_RCEG4, +}; + +#define BACKUP_REGS_SZ_V4A ARRAY_SIZE(d40_backup_regs_v4a) + +static u32 d40_backup_regs_v4b[] = { + D40_DREG_CPSEG1, + D40_DREG_CPSEG2, + D40_DREG_CPSEG3, + D40_DREG_CPSEG4, + D40_DREG_CPSEG5, + D40_DREG_CPCEG1, + D40_DREG_CPCEG2, + D40_DREG_CPCEG3, + D40_DREG_CPCEG4, + D40_DREG_CPCEG5, + D40_DREG_CRSEG1, + D40_DREG_CRSEG2, + D40_DREG_CRSEG3, + D40_DREG_CRSEG4, + D40_DREG_CRSEG5, + D40_DREG_CRCEG1, + D40_DREG_CRCEG2, + D40_DREG_CRCEG3, + D40_DREG_CRCEG4, + D40_DREG_CRCEG5, +}; + +#define BACKUP_REGS_SZ_V4B ARRAY_SIZE(d40_backup_regs_v4b) + +static __maybe_unused u32 d40_backup_regs_chan[] = { + D40_CHAN_REG_SSCFG, + D40_CHAN_REG_SSELT, + D40_CHAN_REG_SSPTR, + D40_CHAN_REG_SSLNK, + D40_CHAN_REG_SDCFG, + D40_CHAN_REG_SDELT, + D40_CHAN_REG_SDPTR, + D40_CHAN_REG_SDLNK, +}; + +#define BACKUP_REGS_SZ_MAX ((BACKUP_REGS_SZ_V4A > BACKUP_REGS_SZ_V4B) ? \ + BACKUP_REGS_SZ_V4A : BACKUP_REGS_SZ_V4B) + +/** + * struct d40_interrupt_lookup - lookup table for interrupt handler + * + * @src: Interrupt mask register. + * @clr: Interrupt clear register. + * @is_error: true if this is an error interrupt. + * @offset: start delta in the lookup_log_chans in d40_base. If equals to + * D40_PHY_CHAN, the lookup_phy_chans shall be used instead. + */ +struct d40_interrupt_lookup { + u32 src; + u32 clr; + bool is_error; + int offset; +}; + + +static struct d40_interrupt_lookup il_v4a[] = { + {D40_DREG_LCTIS0, D40_DREG_LCICR0, false, 0}, + {D40_DREG_LCTIS1, D40_DREG_LCICR1, false, 32}, + {D40_DREG_LCTIS2, D40_DREG_LCICR2, false, 64}, + {D40_DREG_LCTIS3, D40_DREG_LCICR3, false, 96}, + {D40_DREG_LCEIS0, D40_DREG_LCICR0, true, 0}, + {D40_DREG_LCEIS1, D40_DREG_LCICR1, true, 32}, + {D40_DREG_LCEIS2, D40_DREG_LCICR2, true, 64}, + {D40_DREG_LCEIS3, D40_DREG_LCICR3, true, 96}, + {D40_DREG_PCTIS, D40_DREG_PCICR, false, D40_PHY_CHAN}, + {D40_DREG_PCEIS, D40_DREG_PCICR, true, D40_PHY_CHAN}, +}; + +static struct d40_interrupt_lookup il_v4b[] = { + {D40_DREG_CLCTIS1, D40_DREG_CLCICR1, false, 0}, + {D40_DREG_CLCTIS2, D40_DREG_CLCICR2, false, 32}, + {D40_DREG_CLCTIS3, D40_DREG_CLCICR3, false, 64}, + {D40_DREG_CLCTIS4, D40_DREG_CLCICR4, false, 96}, + {D40_DREG_CLCTIS5, D40_DREG_CLCICR5, false, 128}, + {D40_DREG_CLCEIS1, D40_DREG_CLCICR1, true, 0}, + {D40_DREG_CLCEIS2, D40_DREG_CLCICR2, true, 32}, + {D40_DREG_CLCEIS3, D40_DREG_CLCICR3, true, 64}, + {D40_DREG_CLCEIS4, D40_DREG_CLCICR4, true, 96}, + {D40_DREG_CLCEIS5, D40_DREG_CLCICR5, true, 128}, + {D40_DREG_CPCTIS, D40_DREG_CPCICR, false, D40_PHY_CHAN}, + {D40_DREG_CPCEIS, D40_DREG_CPCICR, true, D40_PHY_CHAN}, +}; + +/** + * struct d40_reg_val - simple lookup struct + * + * @reg: The register. + * @val: The value that belongs to the register in reg. + */ +struct d40_reg_val { + unsigned int reg; + unsigned int val; +}; + +static __initdata struct d40_reg_val dma_init_reg_v4a[] = { + /* Clock every part of the DMA block from start */ + { .reg = D40_DREG_GCC, .val = D40_DREG_GCC_ENABLE_ALL}, + + /* Interrupts on all logical channels */ + { .reg = D40_DREG_LCMIS0, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCMIS1, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCMIS2, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCMIS3, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCICR0, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCICR1, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCICR2, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCICR3, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCTIS0, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCTIS1, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCTIS2, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCTIS3, .val = 0xFFFFFFFF} +}; +static __initdata struct d40_reg_val dma_init_reg_v4b[] = { + /* Clock every part of the DMA block from start */ + { .reg = D40_DREG_GCC, .val = D40_DREG_GCC_ENABLE_ALL}, + + /* Interrupts on all logical channels */ + { .reg = D40_DREG_CLCMIS1, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_CLCMIS2, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_CLCMIS3, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_CLCMIS4, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_CLCMIS5, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_CLCICR1, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_CLCICR2, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_CLCICR3, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_CLCICR4, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_CLCICR5, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_CLCTIS1, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_CLCTIS2, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_CLCTIS3, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_CLCTIS4, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_CLCTIS5, .val = 0xFFFFFFFF} +}; + +/** + * struct d40_lli_pool - Structure for keeping LLIs in memory + * + * @base: Pointer to memory area when the pre_alloc_lli's are not large + * enough, IE bigger than the most common case, 1 dst and 1 src. NULL if + * pre_alloc_lli is used. + * @dma_addr: DMA address, if mapped + * @size: The size in bytes of the memory at base or the size of pre_alloc_lli. + * @pre_alloc_lli: Pre allocated area for the most common case of transfers, + * one buffer to one buffer. + */ +struct d40_lli_pool { + void *base; + int size; + dma_addr_t dma_addr; + /* Space for dst and src, plus an extra for padding */ + u8 pre_alloc_lli[3 * sizeof(struct d40_phy_lli)]; +}; + +/** + * struct d40_desc - A descriptor is one DMA job. + * + * @lli_phy: LLI settings for physical channel. Both src and dst= + * points into the lli_pool, to base if lli_len > 1 or to pre_alloc_lli if + * lli_len equals one. + * @lli_log: Same as above but for logical channels. + * @lli_pool: The pool with two entries pre-allocated. + * @lli_len: Number of llis of current descriptor. + * @lli_current: Number of transferred llis. + * @lcla_alloc: Number of LCLA entries allocated. + * @txd: DMA engine struct. Used for among other things for communication + * during a transfer. + * @node: List entry. + * @is_in_client_list: true if the client owns this descriptor. + * @cyclic: true if this is a cyclic job + * + * This descriptor is used for both logical and physical transfers. + */ +struct d40_desc { + /* LLI physical */ + struct d40_phy_lli_bidir lli_phy; + /* LLI logical */ + struct d40_log_lli_bidir lli_log; + + struct d40_lli_pool lli_pool; + int lli_len; + int lli_current; + int lcla_alloc; + + struct dma_async_tx_descriptor txd; + struct list_head node; + + bool is_in_client_list; + bool cyclic; +}; + +/** + * struct d40_lcla_pool - LCLA pool settings and data. + * + * @base: The virtual address of LCLA. 18 bit aligned. + * @dma_addr: DMA address, if mapped + * @base_unaligned: The orignal kmalloc pointer, if kmalloc is used. + * This pointer is only there for clean-up on error. + * @pages: The number of pages needed for all physical channels. + * Only used later for clean-up on error + * @lock: Lock to protect the content in this struct. + * @alloc_map: big map over which LCLA entry is own by which job. + */ +struct d40_lcla_pool { + void *base; + dma_addr_t dma_addr; + void *base_unaligned; + int pages; + spinlock_t lock; + struct d40_desc **alloc_map; +}; + +/** + * struct d40_phy_res - struct for handling eventlines mapped to physical + * channels. + * + * @lock: A lock protection this entity. + * @reserved: True if used by secure world or otherwise. + * @num: The physical channel number of this entity. + * @allocated_src: Bit mapped to show which src event line's are mapped to + * this physical channel. Can also be free or physically allocated. + * @allocated_dst: Same as for src but is dst. + * allocated_dst and allocated_src uses the D40_ALLOC* defines as well as + * event line number. + * @use_soft_lli: To mark if the linked lists of channel are managed by SW. + */ +struct d40_phy_res { + spinlock_t lock; + bool reserved; + int num; + u32 allocated_src; + u32 allocated_dst; + bool use_soft_lli; +}; + +struct d40_base; + +/** + * struct d40_chan - Struct that describes a channel. + * + * @lock: A spinlock to protect this struct. + * @log_num: The logical number, if any of this channel. + * @pending_tx: The number of pending transfers. Used between interrupt handler + * and tasklet. + * @busy: Set to true when transfer is ongoing on this channel. + * @phy_chan: Pointer to physical channel which this instance runs on. If this + * point is NULL, then the channel is not allocated. + * @chan: DMA engine handle. + * @tasklet: Tasklet that gets scheduled from interrupt context to complete a + * transfer and call client callback. + * @client: Cliented owned descriptor list. + * @pending_queue: Submitted jobs, to be issued by issue_pending() + * @active: Active descriptor. + * @done: Completed jobs + * @queue: Queued jobs. + * @prepare_queue: Prepared jobs. + * @dma_cfg: The client configuration of this dma channel. + * @slave_config: DMA slave configuration. + * @configured: whether the dma_cfg configuration is valid + * @base: Pointer to the device instance struct. + * @src_def_cfg: Default cfg register setting for src. + * @dst_def_cfg: Default cfg register setting for dst. + * @log_def: Default logical channel settings. + * @lcpa: Pointer to dst and src lcpa settings. + * @runtime_addr: runtime configured address. + * @runtime_direction: runtime configured direction. + * + * This struct can either "be" a logical or a physical channel. + */ +struct d40_chan { + spinlock_t lock; + int log_num; + int pending_tx; + bool busy; + struct d40_phy_res *phy_chan; + struct dma_chan chan; + struct tasklet_struct tasklet; + struct list_head client; + struct list_head pending_queue; + struct list_head active; + struct list_head done; + struct list_head queue; + struct list_head prepare_queue; + struct stedma40_chan_cfg dma_cfg; + struct dma_slave_config slave_config; + bool configured; + struct d40_base *base; + /* Default register configurations */ + u32 src_def_cfg; + u32 dst_def_cfg; + struct d40_def_lcsp log_def; + struct d40_log_lli_full *lcpa; + /* Runtime reconfiguration */ + dma_addr_t runtime_addr; + enum dma_transfer_direction runtime_direction; +}; + +/** + * struct d40_gen_dmac - generic values to represent u8500/u8540 DMA + * controller + * + * @backup: the pointer to the registers address array for backup + * @backup_size: the size of the registers address array for backup + * @realtime_en: the realtime enable register + * @realtime_clear: the realtime clear register + * @high_prio_en: the high priority enable register + * @high_prio_clear: the high priority clear register + * @interrupt_en: the interrupt enable register + * @interrupt_clear: the interrupt clear register + * @il: the pointer to struct d40_interrupt_lookup + * @il_size: the size of d40_interrupt_lookup array + * @init_reg: the pointer to the struct d40_reg_val + * @init_reg_size: the size of d40_reg_val array + */ +struct d40_gen_dmac { + u32 *backup; + u32 backup_size; + u32 realtime_en; + u32 realtime_clear; + u32 high_prio_en; + u32 high_prio_clear; + u32 interrupt_en; + u32 interrupt_clear; + struct d40_interrupt_lookup *il; + u32 il_size; + struct d40_reg_val *init_reg; + u32 init_reg_size; +}; + +/** + * struct d40_base - The big global struct, one for each probe'd instance. + * + * @interrupt_lock: Lock used to make sure one interrupt is handle a time. + * @execmd_lock: Lock for execute command usage since several channels share + * the same physical register. + * @dev: The device structure. + * @virtbase: The virtual base address of the DMA's register. + * @rev: silicon revision detected. + * @clk: Pointer to the DMA clock structure. + * @irq: The IRQ number. + * @num_memcpy_chans: The number of channels used for memcpy (mem-to-mem + * transfers). + * @num_phy_chans: The number of physical channels. Read from HW. This + * is the number of available channels for this driver, not counting "Secure + * mode" allocated physical channels. + * @num_log_chans: The number of logical channels. Calculated from + * num_phy_chans. + * @dma_both: dma_device channels that can do both memcpy and slave transfers. + * @dma_slave: dma_device channels that can do only do slave transfers. + * @dma_memcpy: dma_device channels that can do only do memcpy transfers. + * @phy_chans: Room for all possible physical channels in system. + * @log_chans: Room for all possible logical channels in system. + * @lookup_log_chans: Used to map interrupt number to logical channel. Points + * to log_chans entries. + * @lookup_phy_chans: Used to map interrupt number to physical channel. Points + * to phy_chans entries. + * @plat_data: Pointer to provided platform_data which is the driver + * configuration. + * @lcpa_regulator: Pointer to hold the regulator for the esram bank for lcla. + * @phy_res: Vector containing all physical channels. + * @lcla_pool: lcla pool settings and data. + * @lcpa_base: The virtual mapped address of LCPA. + * @phy_lcpa: The physical address of the LCPA. + * @lcpa_size: The size of the LCPA area. + * @desc_slab: cache for descriptors. + * @reg_val_backup: Here the values of some hardware registers are stored + * before the DMA is powered off. They are restored when the power is back on. + * @reg_val_backup_v4: Backup of registers that only exits on dma40 v3 and + * later + * @reg_val_backup_chan: Backup data for standard channel parameter registers. + * @regs_interrupt: Scratch space for registers during interrupt. + * @gcc_pwr_off_mask: Mask to maintain the channels that can be turned off. + * @gen_dmac: the struct for generic registers values to represent u8500/8540 + * DMA controller + */ +struct d40_base { + spinlock_t interrupt_lock; + spinlock_t execmd_lock; + struct device *dev; + void __iomem *virtbase; + u8 rev:4; + struct clk *clk; + int irq; + int num_memcpy_chans; + int num_phy_chans; + int num_log_chans; + struct dma_device dma_both; + struct dma_device dma_slave; + struct dma_device dma_memcpy; + struct d40_chan *phy_chans; + struct d40_chan *log_chans; + struct d40_chan **lookup_log_chans; + struct d40_chan **lookup_phy_chans; + struct stedma40_platform_data *plat_data; + struct regulator *lcpa_regulator; + /* Physical half channels */ + struct d40_phy_res *phy_res; + struct d40_lcla_pool lcla_pool; + void *lcpa_base; + dma_addr_t phy_lcpa; + resource_size_t lcpa_size; + struct kmem_cache *desc_slab; + u32 reg_val_backup[BACKUP_REGS_SZ]; + u32 reg_val_backup_v4[BACKUP_REGS_SZ_MAX]; + u32 *reg_val_backup_chan; + u32 *regs_interrupt; + u16 gcc_pwr_off_mask; + struct d40_gen_dmac gen_dmac; +}; + +static struct device *chan2dev(struct d40_chan *d40c) +{ + return &d40c->chan.dev->device; +} + +static bool chan_is_physical(struct d40_chan *chan) +{ + return chan->log_num == D40_PHY_CHAN; +} + +static bool chan_is_logical(struct d40_chan *chan) +{ + return !chan_is_physical(chan); +} + +static void __iomem *chan_base(struct d40_chan *chan) +{ + return chan->base->virtbase + D40_DREG_PCBASE + + chan->phy_chan->num * D40_DREG_PCDELTA; +} + +#define d40_err(dev, format, arg...) \ + dev_err(dev, "[%s] " format, __func__, ## arg) + +#define chan_err(d40c, format, arg...) \ + d40_err(chan2dev(d40c), format, ## arg) + +static int d40_set_runtime_config_write(struct dma_chan *chan, + struct dma_slave_config *config, + enum dma_transfer_direction direction); + +static int d40_pool_lli_alloc(struct d40_chan *d40c, struct d40_desc *d40d, + int lli_len) +{ + bool is_log = chan_is_logical(d40c); + u32 align; + void *base; + + if (is_log) + align = sizeof(struct d40_log_lli); + else + align = sizeof(struct d40_phy_lli); + + if (lli_len == 1) { + base = d40d->lli_pool.pre_alloc_lli; + d40d->lli_pool.size = sizeof(d40d->lli_pool.pre_alloc_lli); + d40d->lli_pool.base = NULL; + } else { + d40d->lli_pool.size = lli_len * 2 * align; + + base = kmalloc(d40d->lli_pool.size + align, GFP_NOWAIT); + d40d->lli_pool.base = base; + + if (d40d->lli_pool.base == NULL) + return -ENOMEM; + } + + if (is_log) { + d40d->lli_log.src = PTR_ALIGN(base, align); + d40d->lli_log.dst = d40d->lli_log.src + lli_len; + + d40d->lli_pool.dma_addr = 0; + } else { + d40d->lli_phy.src = PTR_ALIGN(base, align); + d40d->lli_phy.dst = d40d->lli_phy.src + lli_len; + + d40d->lli_pool.dma_addr = dma_map_single(d40c->base->dev, + d40d->lli_phy.src, + d40d->lli_pool.size, + DMA_TO_DEVICE); + + if (dma_mapping_error(d40c->base->dev, + d40d->lli_pool.dma_addr)) { + kfree(d40d->lli_pool.base); + d40d->lli_pool.base = NULL; + d40d->lli_pool.dma_addr = 0; + return -ENOMEM; + } + } + + return 0; +} + +static void d40_pool_lli_free(struct d40_chan *d40c, struct d40_desc *d40d) +{ + if (d40d->lli_pool.dma_addr) + dma_unmap_single(d40c->base->dev, d40d->lli_pool.dma_addr, + d40d->lli_pool.size, DMA_TO_DEVICE); + + kfree(d40d->lli_pool.base); + d40d->lli_pool.base = NULL; + d40d->lli_pool.size = 0; + d40d->lli_log.src = NULL; + d40d->lli_log.dst = NULL; + d40d->lli_phy.src = NULL; + d40d->lli_phy.dst = NULL; +} + +static int d40_lcla_alloc_one(struct d40_chan *d40c, + struct d40_desc *d40d) +{ + unsigned long flags; + int i; + int ret = -EINVAL; + + spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags); + + /* + * Allocate both src and dst at the same time, therefore the half + * start on 1 since 0 can't be used since zero is used as end marker. + */ + for (i = 1 ; i < D40_LCLA_LINK_PER_EVENT_GRP / 2; i++) { + int idx = d40c->phy_chan->num * D40_LCLA_LINK_PER_EVENT_GRP + i; + + if (!d40c->base->lcla_pool.alloc_map[idx]) { + d40c->base->lcla_pool.alloc_map[idx] = d40d; + d40d->lcla_alloc++; + ret = i; + break; + } + } + + spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags); + + return ret; +} + +static int d40_lcla_free_all(struct d40_chan *d40c, + struct d40_desc *d40d) +{ + unsigned long flags; + int i; + int ret = -EINVAL; + + if (chan_is_physical(d40c)) + return 0; + + spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags); + + for (i = 1 ; i < D40_LCLA_LINK_PER_EVENT_GRP / 2; i++) { + int idx = d40c->phy_chan->num * D40_LCLA_LINK_PER_EVENT_GRP + i; + + if (d40c->base->lcla_pool.alloc_map[idx] == d40d) { + d40c->base->lcla_pool.alloc_map[idx] = NULL; + d40d->lcla_alloc--; + if (d40d->lcla_alloc == 0) { + ret = 0; + break; + } + } + } + + spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags); + + return ret; + +} + +static void d40_desc_remove(struct d40_desc *d40d) +{ + list_del(&d40d->node); +} + +static struct d40_desc *d40_desc_get(struct d40_chan *d40c) +{ + struct d40_desc *desc = NULL; + + if (!list_empty(&d40c->client)) { + struct d40_desc *d; + struct d40_desc *_d; + + list_for_each_entry_safe(d, _d, &d40c->client, node) { + if (async_tx_test_ack(&d->txd)) { + d40_desc_remove(d); + desc = d; + memset(desc, 0, sizeof(*desc)); + break; + } + } + } + + if (!desc) + desc = kmem_cache_zalloc(d40c->base->desc_slab, GFP_NOWAIT); + + if (desc) + INIT_LIST_HEAD(&desc->node); + + return desc; +} + +static void d40_desc_free(struct d40_chan *d40c, struct d40_desc *d40d) +{ + + d40_pool_lli_free(d40c, d40d); + d40_lcla_free_all(d40c, d40d); + kmem_cache_free(d40c->base->desc_slab, d40d); +} + +static void d40_desc_submit(struct d40_chan *d40c, struct d40_desc *desc) +{ + list_add_tail(&desc->node, &d40c->active); +} + +static void d40_phy_lli_load(struct d40_chan *chan, struct d40_desc *desc) +{ + struct d40_phy_lli *lli_dst = desc->lli_phy.dst; + struct d40_phy_lli *lli_src = desc->lli_phy.src; + void __iomem *base = chan_base(chan); + + writel(lli_src->reg_cfg, base + D40_CHAN_REG_SSCFG); + writel(lli_src->reg_elt, base + D40_CHAN_REG_SSELT); + writel(lli_src->reg_ptr, base + D40_CHAN_REG_SSPTR); + writel(lli_src->reg_lnk, base + D40_CHAN_REG_SSLNK); + + writel(lli_dst->reg_cfg, base + D40_CHAN_REG_SDCFG); + writel(lli_dst->reg_elt, base + D40_CHAN_REG_SDELT); + writel(lli_dst->reg_ptr, base + D40_CHAN_REG_SDPTR); + writel(lli_dst->reg_lnk, base + D40_CHAN_REG_SDLNK); +} + +static void d40_desc_done(struct d40_chan *d40c, struct d40_desc *desc) +{ + list_add_tail(&desc->node, &d40c->done); +} + +static void d40_log_lli_to_lcxa(struct d40_chan *chan, struct d40_desc *desc) +{ + struct d40_lcla_pool *pool = &chan->base->lcla_pool; + struct d40_log_lli_bidir *lli = &desc->lli_log; + int lli_current = desc->lli_current; + int lli_len = desc->lli_len; + bool cyclic = desc->cyclic; + int curr_lcla = -EINVAL; + int first_lcla = 0; + bool use_esram_lcla = chan->base->plat_data->use_esram_lcla; + bool linkback; + + /* + * We may have partially running cyclic transfers, in case we did't get + * enough LCLA entries. + */ + linkback = cyclic && lli_current == 0; + + /* + * For linkback, we need one LCLA even with only one link, because we + * can't link back to the one in LCPA space + */ + if (linkback || (lli_len - lli_current > 1)) { + /* + * If the channel is expected to use only soft_lli don't + * allocate a lcla. This is to avoid a HW issue that exists + * in some controller during a peripheral to memory transfer + * that uses linked lists. + */ + if (!(chan->phy_chan->use_soft_lli && + chan->dma_cfg.dir == DMA_DEV_TO_MEM)) + curr_lcla = d40_lcla_alloc_one(chan, desc); + + first_lcla = curr_lcla; + } + + /* + * For linkback, we normally load the LCPA in the loop since we need to + * link it to the second LCLA and not the first. However, if we + * couldn't even get a first LCLA, then we have to run in LCPA and + * reload manually. + */ + if (!linkback || curr_lcla == -EINVAL) { + unsigned int flags = 0; + + if (curr_lcla == -EINVAL) + flags |= LLI_TERM_INT; + + d40_log_lli_lcpa_write(chan->lcpa, + &lli->dst[lli_current], + &lli->src[lli_current], + curr_lcla, + flags); + lli_current++; + } + + if (curr_lcla < 0) + goto set_current; + + for (; lli_current < lli_len; lli_current++) { + unsigned int lcla_offset = chan->phy_chan->num * 1024 + + 8 * curr_lcla * 2; + struct d40_log_lli *lcla = pool->base + lcla_offset; + unsigned int flags = 0; + int next_lcla; + + if (lli_current + 1 < lli_len) + next_lcla = d40_lcla_alloc_one(chan, desc); + else + next_lcla = linkback ? first_lcla : -EINVAL; + + if (cyclic || next_lcla == -EINVAL) + flags |= LLI_TERM_INT; + + if (linkback && curr_lcla == first_lcla) { + /* First link goes in both LCPA and LCLA */ + d40_log_lli_lcpa_write(chan->lcpa, + &lli->dst[lli_current], + &lli->src[lli_current], + next_lcla, flags); + } + + /* + * One unused LCLA in the cyclic case if the very first + * next_lcla fails... + */ + d40_log_lli_lcla_write(lcla, + &lli->dst[lli_current], + &lli->src[lli_current], + next_lcla, flags); + + /* + * Cache maintenance is not needed if lcla is + * mapped in esram + */ + if (!use_esram_lcla) { + dma_sync_single_range_for_device(chan->base->dev, + pool->dma_addr, lcla_offset, + 2 * sizeof(struct d40_log_lli), + DMA_TO_DEVICE); + } + curr_lcla = next_lcla; + + if (curr_lcla == -EINVAL || curr_lcla == first_lcla) { + lli_current++; + break; + } + } + set_current: + desc->lli_current = lli_current; +} + +static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d) +{ + if (chan_is_physical(d40c)) { + d40_phy_lli_load(d40c, d40d); + d40d->lli_current = d40d->lli_len; + } else + d40_log_lli_to_lcxa(d40c, d40d); +} + +static struct d40_desc *d40_first_active_get(struct d40_chan *d40c) +{ + return list_first_entry_or_null(&d40c->active, struct d40_desc, node); +} + +/* remove desc from current queue and add it to the pending_queue */ +static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc) +{ + d40_desc_remove(desc); + desc->is_in_client_list = false; + list_add_tail(&desc->node, &d40c->pending_queue); +} + +static struct d40_desc *d40_first_pending(struct d40_chan *d40c) +{ + return list_first_entry_or_null(&d40c->pending_queue, struct d40_desc, + node); +} + +static struct d40_desc *d40_first_queued(struct d40_chan *d40c) +{ + return list_first_entry_or_null(&d40c->queue, struct d40_desc, node); +} + +static struct d40_desc *d40_first_done(struct d40_chan *d40c) +{ + return list_first_entry_or_null(&d40c->done, struct d40_desc, node); +} + +static int d40_psize_2_burst_size(bool is_log, int psize) +{ + if (is_log) { + if (psize == STEDMA40_PSIZE_LOG_1) + return 1; + } else { + if (psize == STEDMA40_PSIZE_PHY_1) + return 1; + } + + return 2 << psize; +} + +/* + * The dma only supports transmitting packages up to + * STEDMA40_MAX_SEG_SIZE * data_width, where data_width is stored in Bytes. + * + * Calculate the total number of dma elements required to send the entire sg list. + */ +static int d40_size_2_dmalen(int size, u32 data_width1, u32 data_width2) +{ + int dmalen; + u32 max_w = max(data_width1, data_width2); + u32 min_w = min(data_width1, data_width2); + u32 seg_max = ALIGN(STEDMA40_MAX_SEG_SIZE * min_w, max_w); + + if (seg_max > STEDMA40_MAX_SEG_SIZE) + seg_max -= max_w; + + if (!IS_ALIGNED(size, max_w)) + return -EINVAL; + + if (size <= seg_max) + dmalen = 1; + else { + dmalen = size / seg_max; + if (dmalen * seg_max < size) + dmalen++; + } + return dmalen; +} + +static int d40_sg_2_dmalen(struct scatterlist *sgl, int sg_len, + u32 data_width1, u32 data_width2) +{ + struct scatterlist *sg; + int i; + int len = 0; + int ret; + + for_each_sg(sgl, sg, sg_len, i) { + ret = d40_size_2_dmalen(sg_dma_len(sg), + data_width1, data_width2); + if (ret < 0) + return ret; + len += ret; + } + return len; +} + +static int __d40_execute_command_phy(struct d40_chan *d40c, + enum d40_command command) +{ + u32 status; + int i; + void __iomem *active_reg; + int ret = 0; + unsigned long flags; + u32 wmask; + + if (command == D40_DMA_STOP) { + ret = __d40_execute_command_phy(d40c, D40_DMA_SUSPEND_REQ); + if (ret) + return ret; + } + + spin_lock_irqsave(&d40c->base->execmd_lock, flags); + + if (d40c->phy_chan->num % 2 == 0) + active_reg = d40c->base->virtbase + D40_DREG_ACTIVE; + else + active_reg = d40c->base->virtbase + D40_DREG_ACTIVO; + + if (command == D40_DMA_SUSPEND_REQ) { + status = (readl(active_reg) & + D40_CHAN_POS_MASK(d40c->phy_chan->num)) >> + D40_CHAN_POS(d40c->phy_chan->num); + + if (status == D40_DMA_SUSPENDED || status == D40_DMA_STOP) + goto unlock; + } + + wmask = 0xffffffff & ~(D40_CHAN_POS_MASK(d40c->phy_chan->num)); + writel(wmask | (command << D40_CHAN_POS(d40c->phy_chan->num)), + active_reg); + + if (command == D40_DMA_SUSPEND_REQ) { + + for (i = 0 ; i < D40_SUSPEND_MAX_IT; i++) { + status = (readl(active_reg) & + D40_CHAN_POS_MASK(d40c->phy_chan->num)) >> + D40_CHAN_POS(d40c->phy_chan->num); + + cpu_relax(); + /* + * Reduce the number of bus accesses while + * waiting for the DMA to suspend. + */ + udelay(3); + + if (status == D40_DMA_STOP || + status == D40_DMA_SUSPENDED) + break; + } + + if (i == D40_SUSPEND_MAX_IT) { + chan_err(d40c, + "unable to suspend the chl %d (log: %d) status %x\n", + d40c->phy_chan->num, d40c->log_num, + status); + dump_stack(); + ret = -EBUSY; + } + + } + unlock: + spin_unlock_irqrestore(&d40c->base->execmd_lock, flags); + return ret; +} + +static void d40_term_all(struct d40_chan *d40c) +{ + struct d40_desc *d40d; + struct d40_desc *_d; + + /* Release completed descriptors */ + while ((d40d = d40_first_done(d40c))) { + d40_desc_remove(d40d); + d40_desc_free(d40c, d40d); + } + + /* Release active descriptors */ + while ((d40d = d40_first_active_get(d40c))) { + d40_desc_remove(d40d); + d40_desc_free(d40c, d40d); + } + + /* Release queued descriptors waiting for transfer */ + while ((d40d = d40_first_queued(d40c))) { + d40_desc_remove(d40d); + d40_desc_free(d40c, d40d); + } + + /* Release pending descriptors */ + while ((d40d = d40_first_pending(d40c))) { + d40_desc_remove(d40d); + d40_desc_free(d40c, d40d); + } + + /* Release client owned descriptors */ + if (!list_empty(&d40c->client)) + list_for_each_entry_safe(d40d, _d, &d40c->client, node) { + d40_desc_remove(d40d); + d40_desc_free(d40c, d40d); + } + + /* Release descriptors in prepare queue */ + if (!list_empty(&d40c->prepare_queue)) + list_for_each_entry_safe(d40d, _d, + &d40c->prepare_queue, node) { + d40_desc_remove(d40d); + d40_desc_free(d40c, d40d); + } + + d40c->pending_tx = 0; +} + +static void __d40_config_set_event(struct d40_chan *d40c, + enum d40_events event_type, u32 event, + int reg) +{ + void __iomem *addr = chan_base(d40c) + reg; + int tries; + u32 status; + + switch (event_type) { + + case D40_DEACTIVATE_EVENTLINE: + + writel((D40_DEACTIVATE_EVENTLINE << D40_EVENTLINE_POS(event)) + | ~D40_EVENTLINE_MASK(event), addr); + break; + + case D40_SUSPEND_REQ_EVENTLINE: + status = (readl(addr) & D40_EVENTLINE_MASK(event)) >> + D40_EVENTLINE_POS(event); + + if (status == D40_DEACTIVATE_EVENTLINE || + status == D40_SUSPEND_REQ_EVENTLINE) + break; + + writel((D40_SUSPEND_REQ_EVENTLINE << D40_EVENTLINE_POS(event)) + | ~D40_EVENTLINE_MASK(event), addr); + + for (tries = 0 ; tries < D40_SUSPEND_MAX_IT; tries++) { + + status = (readl(addr) & D40_EVENTLINE_MASK(event)) >> + D40_EVENTLINE_POS(event); + + cpu_relax(); + /* + * Reduce the number of bus accesses while + * waiting for the DMA to suspend. + */ + udelay(3); + + if (status == D40_DEACTIVATE_EVENTLINE) + break; + } + + if (tries == D40_SUSPEND_MAX_IT) { + chan_err(d40c, + "unable to stop the event_line chl %d (log: %d)" + "status %x\n", d40c->phy_chan->num, + d40c->log_num, status); + } + break; + + case D40_ACTIVATE_EVENTLINE: + /* + * The hardware sometimes doesn't register the enable when src and dst + * event lines are active on the same logical channel. Retry to ensure + * it does. Usually only one retry is sufficient. + */ + tries = 100; + while (--tries) { + writel((D40_ACTIVATE_EVENTLINE << + D40_EVENTLINE_POS(event)) | + ~D40_EVENTLINE_MASK(event), addr); + + if (readl(addr) & D40_EVENTLINE_MASK(event)) + break; + } + + if (tries != 99) + dev_dbg(chan2dev(d40c), + "[%s] workaround enable S%cLNK (%d tries)\n", + __func__, reg == D40_CHAN_REG_SSLNK ? 'S' : 'D', + 100 - tries); + + WARN_ON(!tries); + break; + + case D40_ROUND_EVENTLINE: + BUG(); + break; + + } +} + +static void d40_config_set_event(struct d40_chan *d40c, + enum d40_events event_type) +{ + u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dev_type); + + /* Enable event line connected to device (or memcpy) */ + if ((d40c->dma_cfg.dir == DMA_DEV_TO_MEM) || + (d40c->dma_cfg.dir == DMA_DEV_TO_DEV)) + __d40_config_set_event(d40c, event_type, event, + D40_CHAN_REG_SSLNK); + + if (d40c->dma_cfg.dir != DMA_DEV_TO_MEM) + __d40_config_set_event(d40c, event_type, event, + D40_CHAN_REG_SDLNK); +} + +static u32 d40_chan_has_events(struct d40_chan *d40c) +{ + void __iomem *chanbase = chan_base(d40c); + u32 val; + + val = readl(chanbase + D40_CHAN_REG_SSLNK); + val |= readl(chanbase + D40_CHAN_REG_SDLNK); + + return val; +} + +static int +__d40_execute_command_log(struct d40_chan *d40c, enum d40_command command) +{ + unsigned long flags; + int ret = 0; + u32 active_status; + void __iomem *active_reg; + + if (d40c->phy_chan->num % 2 == 0) + active_reg = d40c->base->virtbase + D40_DREG_ACTIVE; + else + active_reg = d40c->base->virtbase + D40_DREG_ACTIVO; + + + spin_lock_irqsave(&d40c->phy_chan->lock, flags); + + switch (command) { + case D40_DMA_STOP: + case D40_DMA_SUSPEND_REQ: + + active_status = (readl(active_reg) & + D40_CHAN_POS_MASK(d40c->phy_chan->num)) >> + D40_CHAN_POS(d40c->phy_chan->num); + + if (active_status == D40_DMA_RUN) + d40_config_set_event(d40c, D40_SUSPEND_REQ_EVENTLINE); + else + d40_config_set_event(d40c, D40_DEACTIVATE_EVENTLINE); + + if (!d40_chan_has_events(d40c) && (command == D40_DMA_STOP)) + ret = __d40_execute_command_phy(d40c, command); + + break; + + case D40_DMA_RUN: + + d40_config_set_event(d40c, D40_ACTIVATE_EVENTLINE); + ret = __d40_execute_command_phy(d40c, command); + break; + + case D40_DMA_SUSPENDED: + BUG(); + break; + } + + spin_unlock_irqrestore(&d40c->phy_chan->lock, flags); + return ret; +} + +static int d40_channel_execute_command(struct d40_chan *d40c, + enum d40_command command) +{ + if (chan_is_logical(d40c)) + return __d40_execute_command_log(d40c, command); + else + return __d40_execute_command_phy(d40c, command); +} + +static u32 d40_get_prmo(struct d40_chan *d40c) +{ + static const unsigned int phy_map[] = { + [STEDMA40_PCHAN_BASIC_MODE] + = D40_DREG_PRMO_PCHAN_BASIC, + [STEDMA40_PCHAN_MODULO_MODE] + = D40_DREG_PRMO_PCHAN_MODULO, + [STEDMA40_PCHAN_DOUBLE_DST_MODE] + = D40_DREG_PRMO_PCHAN_DOUBLE_DST, + }; + static const unsigned int log_map[] = { + [STEDMA40_LCHAN_SRC_PHY_DST_LOG] + = D40_DREG_PRMO_LCHAN_SRC_PHY_DST_LOG, + [STEDMA40_LCHAN_SRC_LOG_DST_PHY] + = D40_DREG_PRMO_LCHAN_SRC_LOG_DST_PHY, + [STEDMA40_LCHAN_SRC_LOG_DST_LOG] + = D40_DREG_PRMO_LCHAN_SRC_LOG_DST_LOG, + }; + + if (chan_is_physical(d40c)) + return phy_map[d40c->dma_cfg.mode_opt]; + else + return log_map[d40c->dma_cfg.mode_opt]; +} + +static void d40_config_write(struct d40_chan *d40c) +{ + u32 addr_base; + u32 var; + + /* Odd addresses are even addresses + 4 */ + addr_base = (d40c->phy_chan->num % 2) * 4; + /* Setup channel mode to logical or physical */ + var = ((u32)(chan_is_logical(d40c)) + 1) << + D40_CHAN_POS(d40c->phy_chan->num); + writel(var, d40c->base->virtbase + D40_DREG_PRMSE + addr_base); + + /* Setup operational mode option register */ + var = d40_get_prmo(d40c) << D40_CHAN_POS(d40c->phy_chan->num); + + writel(var, d40c->base->virtbase + D40_DREG_PRMOE + addr_base); + + if (chan_is_logical(d40c)) { + int lidx = (d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) + & D40_SREG_ELEM_LOG_LIDX_MASK; + void __iomem *chanbase = chan_base(d40c); + + /* Set default config for CFG reg */ + writel(d40c->src_def_cfg, chanbase + D40_CHAN_REG_SSCFG); + writel(d40c->dst_def_cfg, chanbase + D40_CHAN_REG_SDCFG); + + /* Set LIDX for lcla */ + writel(lidx, chanbase + D40_CHAN_REG_SSELT); + writel(lidx, chanbase + D40_CHAN_REG_SDELT); + + /* Clear LNK which will be used by d40_chan_has_events() */ + writel(0, chanbase + D40_CHAN_REG_SSLNK); + writel(0, chanbase + D40_CHAN_REG_SDLNK); + } +} + +static u32 d40_residue(struct d40_chan *d40c) +{ + u32 num_elt; + + if (chan_is_logical(d40c)) + num_elt = (readl(&d40c->lcpa->lcsp2) & D40_MEM_LCSP2_ECNT_MASK) + >> D40_MEM_LCSP2_ECNT_POS; + else { + u32 val = readl(chan_base(d40c) + D40_CHAN_REG_SDELT); + num_elt = (val & D40_SREG_ELEM_PHY_ECNT_MASK) + >> D40_SREG_ELEM_PHY_ECNT_POS; + } + + return num_elt * d40c->dma_cfg.dst_info.data_width; +} + +static bool d40_tx_is_linked(struct d40_chan *d40c) +{ + bool is_link; + + if (chan_is_logical(d40c)) + is_link = readl(&d40c->lcpa->lcsp3) & D40_MEM_LCSP3_DLOS_MASK; + else + is_link = readl(chan_base(d40c) + D40_CHAN_REG_SDLNK) + & D40_SREG_LNK_PHYS_LNK_MASK; + + return is_link; +} + +static int d40_pause(struct dma_chan *chan) +{ + struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); + int res = 0; + unsigned long flags; + + if (d40c->phy_chan == NULL) { + chan_err(d40c, "Channel is not allocated!\n"); + return -EINVAL; + } + + if (!d40c->busy) + return 0; + + spin_lock_irqsave(&d40c->lock, flags); + pm_runtime_get_sync(d40c->base->dev); + + res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ); + + pm_runtime_mark_last_busy(d40c->base->dev); + pm_runtime_put_autosuspend(d40c->base->dev); + spin_unlock_irqrestore(&d40c->lock, flags); + return res; +} + +static int d40_resume(struct dma_chan *chan) +{ + struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); + int res = 0; + unsigned long flags; + + if (d40c->phy_chan == NULL) { + chan_err(d40c, "Channel is not allocated!\n"); + return -EINVAL; + } + + if (!d40c->busy) + return 0; + + spin_lock_irqsave(&d40c->lock, flags); + pm_runtime_get_sync(d40c->base->dev); + + /* If bytes left to transfer or linked tx resume job */ + if (d40_residue(d40c) || d40_tx_is_linked(d40c)) + res = d40_channel_execute_command(d40c, D40_DMA_RUN); + + pm_runtime_mark_last_busy(d40c->base->dev); + pm_runtime_put_autosuspend(d40c->base->dev); + spin_unlock_irqrestore(&d40c->lock, flags); + return res; +} + +static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct d40_chan *d40c = container_of(tx->chan, + struct d40_chan, + chan); + struct d40_desc *d40d = container_of(tx, struct d40_desc, txd); + unsigned long flags; + dma_cookie_t cookie; + + spin_lock_irqsave(&d40c->lock, flags); + cookie = dma_cookie_assign(tx); + d40_desc_queue(d40c, d40d); + spin_unlock_irqrestore(&d40c->lock, flags); + + return cookie; +} + +static int d40_start(struct d40_chan *d40c) +{ + return d40_channel_execute_command(d40c, D40_DMA_RUN); +} + +static struct d40_desc *d40_queue_start(struct d40_chan *d40c) +{ + struct d40_desc *d40d; + int err; + + /* Start queued jobs, if any */ + d40d = d40_first_queued(d40c); + + if (d40d != NULL) { + if (!d40c->busy) { + d40c->busy = true; + pm_runtime_get_sync(d40c->base->dev); + } + + /* Remove from queue */ + d40_desc_remove(d40d); + + /* Add to active queue */ + d40_desc_submit(d40c, d40d); + + /* Initiate DMA job */ + d40_desc_load(d40c, d40d); + + /* Start dma job */ + err = d40_start(d40c); + + if (err) + return NULL; + } + + return d40d; +} + +/* called from interrupt context */ +static void dma_tc_handle(struct d40_chan *d40c) +{ + struct d40_desc *d40d; + + /* Get first active entry from list */ + d40d = d40_first_active_get(d40c); + + if (d40d == NULL) + return; + + if (d40d->cyclic) { + /* + * If this was a paritially loaded list, we need to reloaded + * it, and only when the list is completed. We need to check + * for done because the interrupt will hit for every link, and + * not just the last one. + */ + if (d40d->lli_current < d40d->lli_len + && !d40_tx_is_linked(d40c) + && !d40_residue(d40c)) { + d40_lcla_free_all(d40c, d40d); + d40_desc_load(d40c, d40d); + (void) d40_start(d40c); + + if (d40d->lli_current == d40d->lli_len) + d40d->lli_current = 0; + } + } else { + d40_lcla_free_all(d40c, d40d); + + if (d40d->lli_current < d40d->lli_len) { + d40_desc_load(d40c, d40d); + /* Start dma job */ + (void) d40_start(d40c); + return; + } + + if (d40_queue_start(d40c) == NULL) { + d40c->busy = false; + + pm_runtime_mark_last_busy(d40c->base->dev); + pm_runtime_put_autosuspend(d40c->base->dev); + } + + d40_desc_remove(d40d); + d40_desc_done(d40c, d40d); + } + + d40c->pending_tx++; + tasklet_schedule(&d40c->tasklet); + +} + +static void dma_tasklet(struct tasklet_struct *t) +{ + struct d40_chan *d40c = from_tasklet(d40c, t, tasklet); + struct d40_desc *d40d; + unsigned long flags; + bool callback_active; + struct dmaengine_desc_callback cb; + + spin_lock_irqsave(&d40c->lock, flags); + + /* Get first entry from the done list */ + d40d = d40_first_done(d40c); + if (d40d == NULL) { + /* Check if we have reached here for cyclic job */ + d40d = d40_first_active_get(d40c); + if (d40d == NULL || !d40d->cyclic) + goto check_pending_tx; + } + + if (!d40d->cyclic) + dma_cookie_complete(&d40d->txd); + + /* + * If terminating a channel pending_tx is set to zero. + * This prevents any finished active jobs to return to the client. + */ + if (d40c->pending_tx == 0) { + spin_unlock_irqrestore(&d40c->lock, flags); + return; + } + + /* Callback to client */ + callback_active = !!(d40d->txd.flags & DMA_PREP_INTERRUPT); + dmaengine_desc_get_callback(&d40d->txd, &cb); + + if (!d40d->cyclic) { + if (async_tx_test_ack(&d40d->txd)) { + d40_desc_remove(d40d); + d40_desc_free(d40c, d40d); + } else if (!d40d->is_in_client_list) { + d40_desc_remove(d40d); + d40_lcla_free_all(d40c, d40d); + list_add_tail(&d40d->node, &d40c->client); + d40d->is_in_client_list = true; + } + } + + d40c->pending_tx--; + + if (d40c->pending_tx) + tasklet_schedule(&d40c->tasklet); + + spin_unlock_irqrestore(&d40c->lock, flags); + + if (callback_active) + dmaengine_desc_callback_invoke(&cb, NULL); + + return; + check_pending_tx: + /* Rescue manouver if receiving double interrupts */ + if (d40c->pending_tx > 0) + d40c->pending_tx--; + spin_unlock_irqrestore(&d40c->lock, flags); +} + +static irqreturn_t d40_handle_interrupt(int irq, void *data) +{ + int i; + u32 idx; + u32 row; + long chan = -1; + struct d40_chan *d40c; + struct d40_base *base = data; + u32 *regs = base->regs_interrupt; + struct d40_interrupt_lookup *il = base->gen_dmac.il; + u32 il_size = base->gen_dmac.il_size; + + spin_lock(&base->interrupt_lock); + + /* Read interrupt status of both logical and physical channels */ + for (i = 0; i < il_size; i++) + regs[i] = readl(base->virtbase + il[i].src); + + for (;;) { + + chan = find_next_bit((unsigned long *)regs, + BITS_PER_LONG * il_size, chan + 1); + + /* No more set bits found? */ + if (chan == BITS_PER_LONG * il_size) + break; + + row = chan / BITS_PER_LONG; + idx = chan & (BITS_PER_LONG - 1); + + if (il[row].offset == D40_PHY_CHAN) + d40c = base->lookup_phy_chans[idx]; + else + d40c = base->lookup_log_chans[il[row].offset + idx]; + + if (!d40c) { + /* + * No error because this can happen if something else + * in the system is using the channel. + */ + continue; + } + + /* ACK interrupt */ + writel(BIT(idx), base->virtbase + il[row].clr); + + spin_lock(&d40c->lock); + + if (!il[row].is_error) + dma_tc_handle(d40c); + else + d40_err(base->dev, "IRQ chan: %ld offset %d idx %d\n", + chan, il[row].offset, idx); + + spin_unlock(&d40c->lock); + } + + spin_unlock(&base->interrupt_lock); + + return IRQ_HANDLED; +} + +static int d40_validate_conf(struct d40_chan *d40c, + struct stedma40_chan_cfg *conf) +{ + int res = 0; + bool is_log = conf->mode == STEDMA40_MODE_LOGICAL; + + if (!conf->dir) { + chan_err(d40c, "Invalid direction.\n"); + res = -EINVAL; + } + + if ((is_log && conf->dev_type > d40c->base->num_log_chans) || + (!is_log && conf->dev_type > d40c->base->num_phy_chans) || + (conf->dev_type < 0)) { + chan_err(d40c, "Invalid device type (%d)\n", conf->dev_type); + res = -EINVAL; + } + + if (conf->dir == DMA_DEV_TO_DEV) { + /* + * DMAC HW supports it. Will be added to this driver, + * in case any dma client requires it. + */ + chan_err(d40c, "periph to periph not supported\n"); + res = -EINVAL; + } + + if (d40_psize_2_burst_size(is_log, conf->src_info.psize) * + conf->src_info.data_width != + d40_psize_2_burst_size(is_log, conf->dst_info.psize) * + conf->dst_info.data_width) { + /* + * The DMAC hardware only supports + * src (burst x width) == dst (burst x width) + */ + + chan_err(d40c, "src (burst x width) != dst (burst x width)\n"); + res = -EINVAL; + } + + return res; +} + +static bool d40_alloc_mask_set(struct d40_phy_res *phy, + bool is_src, int log_event_line, bool is_log, + bool *first_user) +{ + unsigned long flags; + spin_lock_irqsave(&phy->lock, flags); + + *first_user = ((phy->allocated_src | phy->allocated_dst) + == D40_ALLOC_FREE); + + if (!is_log) { + /* Physical interrupts are masked per physical full channel */ + if (phy->allocated_src == D40_ALLOC_FREE && + phy->allocated_dst == D40_ALLOC_FREE) { + phy->allocated_dst = D40_ALLOC_PHY; + phy->allocated_src = D40_ALLOC_PHY; + goto found_unlock; + } else + goto not_found_unlock; + } + + /* Logical channel */ + if (is_src) { + if (phy->allocated_src == D40_ALLOC_PHY) + goto not_found_unlock; + + if (phy->allocated_src == D40_ALLOC_FREE) + phy->allocated_src = D40_ALLOC_LOG_FREE; + + if (!(phy->allocated_src & BIT(log_event_line))) { + phy->allocated_src |= BIT(log_event_line); + goto found_unlock; + } else + goto not_found_unlock; + } else { + if (phy->allocated_dst == D40_ALLOC_PHY) + goto not_found_unlock; + + if (phy->allocated_dst == D40_ALLOC_FREE) + phy->allocated_dst = D40_ALLOC_LOG_FREE; + + if (!(phy->allocated_dst & BIT(log_event_line))) { + phy->allocated_dst |= BIT(log_event_line); + goto found_unlock; + } + } + not_found_unlock: + spin_unlock_irqrestore(&phy->lock, flags); + return false; + found_unlock: + spin_unlock_irqrestore(&phy->lock, flags); + return true; +} + +static bool d40_alloc_mask_free(struct d40_phy_res *phy, bool is_src, + int log_event_line) +{ + unsigned long flags; + bool is_free = false; + + spin_lock_irqsave(&phy->lock, flags); + if (!log_event_line) { + phy->allocated_dst = D40_ALLOC_FREE; + phy->allocated_src = D40_ALLOC_FREE; + is_free = true; + goto unlock; + } + + /* Logical channel */ + if (is_src) { + phy->allocated_src &= ~BIT(log_event_line); + if (phy->allocated_src == D40_ALLOC_LOG_FREE) + phy->allocated_src = D40_ALLOC_FREE; + } else { + phy->allocated_dst &= ~BIT(log_event_line); + if (phy->allocated_dst == D40_ALLOC_LOG_FREE) + phy->allocated_dst = D40_ALLOC_FREE; + } + + is_free = ((phy->allocated_src | phy->allocated_dst) == + D40_ALLOC_FREE); + unlock: + spin_unlock_irqrestore(&phy->lock, flags); + + return is_free; +} + +static int d40_allocate_channel(struct d40_chan *d40c, bool *first_phy_user) +{ + int dev_type = d40c->dma_cfg.dev_type; + int event_group; + int event_line; + struct d40_phy_res *phys; + int i; + int j; + int log_num; + int num_phy_chans; + bool is_src; + bool is_log = d40c->dma_cfg.mode == STEDMA40_MODE_LOGICAL; + + phys = d40c->base->phy_res; + num_phy_chans = d40c->base->num_phy_chans; + + if (d40c->dma_cfg.dir == DMA_DEV_TO_MEM) { + log_num = 2 * dev_type; + is_src = true; + } else if (d40c->dma_cfg.dir == DMA_MEM_TO_DEV || + d40c->dma_cfg.dir == DMA_MEM_TO_MEM) { + /* dst event lines are used for logical memcpy */ + log_num = 2 * dev_type + 1; + is_src = false; + } else + return -EINVAL; + + event_group = D40_TYPE_TO_GROUP(dev_type); + event_line = D40_TYPE_TO_EVENT(dev_type); + + if (!is_log) { + if (d40c->dma_cfg.dir == DMA_MEM_TO_MEM) { + /* Find physical half channel */ + if (d40c->dma_cfg.use_fixed_channel) { + i = d40c->dma_cfg.phy_channel; + if (d40_alloc_mask_set(&phys[i], is_src, + 0, is_log, + first_phy_user)) + goto found_phy; + } else { + for (i = 0; i < num_phy_chans; i++) { + if (d40_alloc_mask_set(&phys[i], is_src, + 0, is_log, + first_phy_user)) + goto found_phy; + } + } + } else + for (j = 0; j < d40c->base->num_phy_chans; j += 8) { + int phy_num = j + event_group * 2; + for (i = phy_num; i < phy_num + 2; i++) { + if (d40_alloc_mask_set(&phys[i], + is_src, + 0, + is_log, + first_phy_user)) + goto found_phy; + } + } + return -EINVAL; +found_phy: + d40c->phy_chan = &phys[i]; + d40c->log_num = D40_PHY_CHAN; + goto out; + } + if (dev_type == -1) + return -EINVAL; + + /* Find logical channel */ + for (j = 0; j < d40c->base->num_phy_chans; j += 8) { + int phy_num = j + event_group * 2; + + if (d40c->dma_cfg.use_fixed_channel) { + i = d40c->dma_cfg.phy_channel; + + if ((i != phy_num) && (i != phy_num + 1)) { + dev_err(chan2dev(d40c), + "invalid fixed phy channel %d\n", i); + return -EINVAL; + } + + if (d40_alloc_mask_set(&phys[i], is_src, event_line, + is_log, first_phy_user)) + goto found_log; + + dev_err(chan2dev(d40c), + "could not allocate fixed phy channel %d\n", i); + return -EINVAL; + } + + /* + * Spread logical channels across all available physical rather + * than pack every logical channel at the first available phy + * channels. + */ + if (is_src) { + for (i = phy_num; i < phy_num + 2; i++) { + if (d40_alloc_mask_set(&phys[i], is_src, + event_line, is_log, + first_phy_user)) + goto found_log; + } + } else { + for (i = phy_num + 1; i >= phy_num; i--) { + if (d40_alloc_mask_set(&phys[i], is_src, + event_line, is_log, + first_phy_user)) + goto found_log; + } + } + } + return -EINVAL; + +found_log: + d40c->phy_chan = &phys[i]; + d40c->log_num = log_num; +out: + + if (is_log) + d40c->base->lookup_log_chans[d40c->log_num] = d40c; + else + d40c->base->lookup_phy_chans[d40c->phy_chan->num] = d40c; + + return 0; + +} + +static int d40_config_memcpy(struct d40_chan *d40c) +{ + dma_cap_mask_t cap = d40c->chan.device->cap_mask; + + if (dma_has_cap(DMA_MEMCPY, cap) && !dma_has_cap(DMA_SLAVE, cap)) { + d40c->dma_cfg = dma40_memcpy_conf_log; + d40c->dma_cfg.dev_type = dma40_memcpy_channels[d40c->chan.chan_id]; + + d40_log_cfg(&d40c->dma_cfg, + &d40c->log_def.lcsp1, &d40c->log_def.lcsp3); + + } else if (dma_has_cap(DMA_MEMCPY, cap) && + dma_has_cap(DMA_SLAVE, cap)) { + d40c->dma_cfg = dma40_memcpy_conf_phy; + + /* Generate interrupt at end of transfer or relink. */ + d40c->dst_def_cfg |= BIT(D40_SREG_CFG_TIM_POS); + + /* Generate interrupt on error. */ + d40c->src_def_cfg |= BIT(D40_SREG_CFG_EIM_POS); + d40c->dst_def_cfg |= BIT(D40_SREG_CFG_EIM_POS); + + } else { + chan_err(d40c, "No memcpy\n"); + return -EINVAL; + } + + return 0; +} + +static int d40_free_dma(struct d40_chan *d40c) +{ + + int res = 0; + u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dev_type); + struct d40_phy_res *phy = d40c->phy_chan; + bool is_src; + + /* Terminate all queued and active transfers */ + d40_term_all(d40c); + + if (phy == NULL) { + chan_err(d40c, "phy == null\n"); + return -EINVAL; + } + + if (phy->allocated_src == D40_ALLOC_FREE && + phy->allocated_dst == D40_ALLOC_FREE) { + chan_err(d40c, "channel already free\n"); + return -EINVAL; + } + + if (d40c->dma_cfg.dir == DMA_MEM_TO_DEV || + d40c->dma_cfg.dir == DMA_MEM_TO_MEM) + is_src = false; + else if (d40c->dma_cfg.dir == DMA_DEV_TO_MEM) + is_src = true; + else { + chan_err(d40c, "Unknown direction\n"); + return -EINVAL; + } + + pm_runtime_get_sync(d40c->base->dev); + res = d40_channel_execute_command(d40c, D40_DMA_STOP); + if (res) { + chan_err(d40c, "stop failed\n"); + goto mark_last_busy; + } + + d40_alloc_mask_free(phy, is_src, chan_is_logical(d40c) ? event : 0); + + if (chan_is_logical(d40c)) + d40c->base->lookup_log_chans[d40c->log_num] = NULL; + else + d40c->base->lookup_phy_chans[phy->num] = NULL; + + if (d40c->busy) { + pm_runtime_mark_last_busy(d40c->base->dev); + pm_runtime_put_autosuspend(d40c->base->dev); + } + + d40c->busy = false; + d40c->phy_chan = NULL; + d40c->configured = false; + mark_last_busy: + pm_runtime_mark_last_busy(d40c->base->dev); + pm_runtime_put_autosuspend(d40c->base->dev); + return res; +} + +static bool d40_is_paused(struct d40_chan *d40c) +{ + void __iomem *chanbase = chan_base(d40c); + bool is_paused = false; + unsigned long flags; + void __iomem *active_reg; + u32 status; + u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dev_type); + + spin_lock_irqsave(&d40c->lock, flags); + + if (chan_is_physical(d40c)) { + if (d40c->phy_chan->num % 2 == 0) + active_reg = d40c->base->virtbase + D40_DREG_ACTIVE; + else + active_reg = d40c->base->virtbase + D40_DREG_ACTIVO; + + status = (readl(active_reg) & + D40_CHAN_POS_MASK(d40c->phy_chan->num)) >> + D40_CHAN_POS(d40c->phy_chan->num); + if (status == D40_DMA_SUSPENDED || status == D40_DMA_STOP) + is_paused = true; + goto unlock; + } + + if (d40c->dma_cfg.dir == DMA_MEM_TO_DEV || + d40c->dma_cfg.dir == DMA_MEM_TO_MEM) { + status = readl(chanbase + D40_CHAN_REG_SDLNK); + } else if (d40c->dma_cfg.dir == DMA_DEV_TO_MEM) { + status = readl(chanbase + D40_CHAN_REG_SSLNK); + } else { + chan_err(d40c, "Unknown direction\n"); + goto unlock; + } + + status = (status & D40_EVENTLINE_MASK(event)) >> + D40_EVENTLINE_POS(event); + + if (status != D40_DMA_RUN) + is_paused = true; + unlock: + spin_unlock_irqrestore(&d40c->lock, flags); + return is_paused; + +} + +static u32 stedma40_residue(struct dma_chan *chan) +{ + struct d40_chan *d40c = + container_of(chan, struct d40_chan, chan); + u32 bytes_left; + unsigned long flags; + + spin_lock_irqsave(&d40c->lock, flags); + bytes_left = d40_residue(d40c); + spin_unlock_irqrestore(&d40c->lock, flags); + + return bytes_left; +} + +static int +d40_prep_sg_log(struct d40_chan *chan, struct d40_desc *desc, + struct scatterlist *sg_src, struct scatterlist *sg_dst, + unsigned int sg_len, dma_addr_t src_dev_addr, + dma_addr_t dst_dev_addr) +{ + struct stedma40_chan_cfg *cfg = &chan->dma_cfg; + struct stedma40_half_channel_info *src_info = &cfg->src_info; + struct stedma40_half_channel_info *dst_info = &cfg->dst_info; + int ret; + + ret = d40_log_sg_to_lli(sg_src, sg_len, + src_dev_addr, + desc->lli_log.src, + chan->log_def.lcsp1, + src_info->data_width, + dst_info->data_width); + + ret = d40_log_sg_to_lli(sg_dst, sg_len, + dst_dev_addr, + desc->lli_log.dst, + chan->log_def.lcsp3, + dst_info->data_width, + src_info->data_width); + + return ret < 0 ? ret : 0; +} + +static int +d40_prep_sg_phy(struct d40_chan *chan, struct d40_desc *desc, + struct scatterlist *sg_src, struct scatterlist *sg_dst, + unsigned int sg_len, dma_addr_t src_dev_addr, + dma_addr_t dst_dev_addr) +{ + struct stedma40_chan_cfg *cfg = &chan->dma_cfg; + struct stedma40_half_channel_info *src_info = &cfg->src_info; + struct stedma40_half_channel_info *dst_info = &cfg->dst_info; + unsigned long flags = 0; + int ret; + + if (desc->cyclic) + flags |= LLI_CYCLIC | LLI_TERM_INT; + + ret = d40_phy_sg_to_lli(sg_src, sg_len, src_dev_addr, + desc->lli_phy.src, + virt_to_phys(desc->lli_phy.src), + chan->src_def_cfg, + src_info, dst_info, flags); + + ret = d40_phy_sg_to_lli(sg_dst, sg_len, dst_dev_addr, + desc->lli_phy.dst, + virt_to_phys(desc->lli_phy.dst), + chan->dst_def_cfg, + dst_info, src_info, flags); + + dma_sync_single_for_device(chan->base->dev, desc->lli_pool.dma_addr, + desc->lli_pool.size, DMA_TO_DEVICE); + + return ret < 0 ? ret : 0; +} + +static struct d40_desc * +d40_prep_desc(struct d40_chan *chan, struct scatterlist *sg, + unsigned int sg_len, unsigned long dma_flags) +{ + struct stedma40_chan_cfg *cfg; + struct d40_desc *desc; + int ret; + + desc = d40_desc_get(chan); + if (!desc) + return NULL; + + cfg = &chan->dma_cfg; + desc->lli_len = d40_sg_2_dmalen(sg, sg_len, cfg->src_info.data_width, + cfg->dst_info.data_width); + if (desc->lli_len < 0) { + chan_err(chan, "Unaligned size\n"); + goto free_desc; + } + + ret = d40_pool_lli_alloc(chan, desc, desc->lli_len); + if (ret < 0) { + chan_err(chan, "Could not allocate lli\n"); + goto free_desc; + } + + desc->lli_current = 0; + desc->txd.flags = dma_flags; + desc->txd.tx_submit = d40_tx_submit; + + dma_async_tx_descriptor_init(&desc->txd, &chan->chan); + + return desc; + free_desc: + d40_desc_free(chan, desc); + return NULL; +} + +static struct dma_async_tx_descriptor * +d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src, + struct scatterlist *sg_dst, unsigned int sg_len, + enum dma_transfer_direction direction, unsigned long dma_flags) +{ + struct d40_chan *chan = container_of(dchan, struct d40_chan, chan); + dma_addr_t src_dev_addr; + dma_addr_t dst_dev_addr; + struct d40_desc *desc; + unsigned long flags; + int ret; + + if (!chan->phy_chan) { + chan_err(chan, "Cannot prepare unallocated channel\n"); + return NULL; + } + + d40_set_runtime_config_write(dchan, &chan->slave_config, direction); + + spin_lock_irqsave(&chan->lock, flags); + + desc = d40_prep_desc(chan, sg_src, sg_len, dma_flags); + if (desc == NULL) + goto unlock; + + if (sg_next(&sg_src[sg_len - 1]) == sg_src) + desc->cyclic = true; + + src_dev_addr = 0; + dst_dev_addr = 0; + if (direction == DMA_DEV_TO_MEM) + src_dev_addr = chan->runtime_addr; + else if (direction == DMA_MEM_TO_DEV) + dst_dev_addr = chan->runtime_addr; + + if (chan_is_logical(chan)) + ret = d40_prep_sg_log(chan, desc, sg_src, sg_dst, + sg_len, src_dev_addr, dst_dev_addr); + else + ret = d40_prep_sg_phy(chan, desc, sg_src, sg_dst, + sg_len, src_dev_addr, dst_dev_addr); + + if (ret) { + chan_err(chan, "Failed to prepare %s sg job: %d\n", + chan_is_logical(chan) ? "log" : "phy", ret); + goto free_desc; + } + + /* + * add descriptor to the prepare queue in order to be able + * to free them later in terminate_all + */ + list_add_tail(&desc->node, &chan->prepare_queue); + + spin_unlock_irqrestore(&chan->lock, flags); + + return &desc->txd; + free_desc: + d40_desc_free(chan, desc); + unlock: + spin_unlock_irqrestore(&chan->lock, flags); + return NULL; +} + +static bool stedma40_filter(struct dma_chan *chan, void *data) +{ + struct stedma40_chan_cfg *info = data; + struct d40_chan *d40c = + container_of(chan, struct d40_chan, chan); + int err; + + if (data) { + err = d40_validate_conf(d40c, info); + if (!err) + d40c->dma_cfg = *info; + } else + err = d40_config_memcpy(d40c); + + if (!err) + d40c->configured = true; + + return err == 0; +} + +static void __d40_set_prio_rt(struct d40_chan *d40c, int dev_type, bool src) +{ + bool realtime = d40c->dma_cfg.realtime; + bool highprio = d40c->dma_cfg.high_priority; + u32 rtreg; + u32 event = D40_TYPE_TO_EVENT(dev_type); + u32 group = D40_TYPE_TO_GROUP(dev_type); + u32 bit = BIT(event); + u32 prioreg; + struct d40_gen_dmac *dmac = &d40c->base->gen_dmac; + + rtreg = realtime ? dmac->realtime_en : dmac->realtime_clear; + /* + * Due to a hardware bug, in some cases a logical channel triggered by + * a high priority destination event line can generate extra packet + * transactions. + * + * The workaround is to not set the high priority level for the + * destination event lines that trigger logical channels. + */ + if (!src && chan_is_logical(d40c)) + highprio = false; + + prioreg = highprio ? dmac->high_prio_en : dmac->high_prio_clear; + + /* Destination event lines are stored in the upper halfword */ + if (!src) + bit <<= 16; + + writel(bit, d40c->base->virtbase + prioreg + group * 4); + writel(bit, d40c->base->virtbase + rtreg + group * 4); +} + +static void d40_set_prio_realtime(struct d40_chan *d40c) +{ + if (d40c->base->rev < 3) + return; + + if ((d40c->dma_cfg.dir == DMA_DEV_TO_MEM) || + (d40c->dma_cfg.dir == DMA_DEV_TO_DEV)) + __d40_set_prio_rt(d40c, d40c->dma_cfg.dev_type, true); + + if ((d40c->dma_cfg.dir == DMA_MEM_TO_DEV) || + (d40c->dma_cfg.dir == DMA_DEV_TO_DEV)) + __d40_set_prio_rt(d40c, d40c->dma_cfg.dev_type, false); +} + +#define D40_DT_FLAGS_MODE(flags) ((flags >> 0) & 0x1) +#define D40_DT_FLAGS_DIR(flags) ((flags >> 1) & 0x1) +#define D40_DT_FLAGS_BIG_ENDIAN(flags) ((flags >> 2) & 0x1) +#define D40_DT_FLAGS_FIXED_CHAN(flags) ((flags >> 3) & 0x1) +#define D40_DT_FLAGS_HIGH_PRIO(flags) ((flags >> 4) & 0x1) + +static struct dma_chan *d40_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct stedma40_chan_cfg cfg; + dma_cap_mask_t cap; + u32 flags; + + memset(&cfg, 0, sizeof(struct stedma40_chan_cfg)); + + dma_cap_zero(cap); + dma_cap_set(DMA_SLAVE, cap); + + cfg.dev_type = dma_spec->args[0]; + flags = dma_spec->args[2]; + + switch (D40_DT_FLAGS_MODE(flags)) { + case 0: cfg.mode = STEDMA40_MODE_LOGICAL; break; + case 1: cfg.mode = STEDMA40_MODE_PHYSICAL; break; + } + + switch (D40_DT_FLAGS_DIR(flags)) { + case 0: + cfg.dir = DMA_MEM_TO_DEV; + cfg.dst_info.big_endian = D40_DT_FLAGS_BIG_ENDIAN(flags); + break; + case 1: + cfg.dir = DMA_DEV_TO_MEM; + cfg.src_info.big_endian = D40_DT_FLAGS_BIG_ENDIAN(flags); + break; + } + + if (D40_DT_FLAGS_FIXED_CHAN(flags)) { + cfg.phy_channel = dma_spec->args[1]; + cfg.use_fixed_channel = true; + } + + if (D40_DT_FLAGS_HIGH_PRIO(flags)) + cfg.high_priority = true; + + return dma_request_channel(cap, stedma40_filter, &cfg); +} + +/* DMA ENGINE functions */ +static int d40_alloc_chan_resources(struct dma_chan *chan) +{ + int err; + unsigned long flags; + struct d40_chan *d40c = + container_of(chan, struct d40_chan, chan); + bool is_free_phy; + spin_lock_irqsave(&d40c->lock, flags); + + dma_cookie_init(chan); + + /* If no dma configuration is set use default configuration (memcpy) */ + if (!d40c->configured) { + err = d40_config_memcpy(d40c); + if (err) { + chan_err(d40c, "Failed to configure memcpy channel\n"); + goto mark_last_busy; + } + } + + err = d40_allocate_channel(d40c, &is_free_phy); + if (err) { + chan_err(d40c, "Failed to allocate channel\n"); + d40c->configured = false; + goto mark_last_busy; + } + + pm_runtime_get_sync(d40c->base->dev); + + d40_set_prio_realtime(d40c); + + if (chan_is_logical(d40c)) { + if (d40c->dma_cfg.dir == DMA_DEV_TO_MEM) + d40c->lcpa = d40c->base->lcpa_base + + d40c->dma_cfg.dev_type * D40_LCPA_CHAN_SIZE; + else + d40c->lcpa = d40c->base->lcpa_base + + d40c->dma_cfg.dev_type * + D40_LCPA_CHAN_SIZE + D40_LCPA_CHAN_DST_DELTA; + + /* Unmask the Global Interrupt Mask. */ + d40c->src_def_cfg |= BIT(D40_SREG_CFG_LOG_GIM_POS); + d40c->dst_def_cfg |= BIT(D40_SREG_CFG_LOG_GIM_POS); + } + + dev_dbg(chan2dev(d40c), "allocated %s channel (phy %d%s)\n", + chan_is_logical(d40c) ? "logical" : "physical", + d40c->phy_chan->num, + d40c->dma_cfg.use_fixed_channel ? ", fixed" : ""); + + + /* + * Only write channel configuration to the DMA if the physical + * resource is free. In case of multiple logical channels + * on the same physical resource, only the first write is necessary. + */ + if (is_free_phy) + d40_config_write(d40c); + mark_last_busy: + pm_runtime_mark_last_busy(d40c->base->dev); + pm_runtime_put_autosuspend(d40c->base->dev); + spin_unlock_irqrestore(&d40c->lock, flags); + return err; +} + +static void d40_free_chan_resources(struct dma_chan *chan) +{ + struct d40_chan *d40c = + container_of(chan, struct d40_chan, chan); + int err; + unsigned long flags; + + if (d40c->phy_chan == NULL) { + chan_err(d40c, "Cannot free unallocated channel\n"); + return; + } + + spin_lock_irqsave(&d40c->lock, flags); + + err = d40_free_dma(d40c); + + if (err) + chan_err(d40c, "Failed to free channel\n"); + spin_unlock_irqrestore(&d40c->lock, flags); +} + +static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan, + dma_addr_t dst, + dma_addr_t src, + size_t size, + unsigned long dma_flags) +{ + struct scatterlist dst_sg; + struct scatterlist src_sg; + + sg_init_table(&dst_sg, 1); + sg_init_table(&src_sg, 1); + + sg_dma_address(&dst_sg) = dst; + sg_dma_address(&src_sg) = src; + + sg_dma_len(&dst_sg) = size; + sg_dma_len(&src_sg) = size; + + return d40_prep_sg(chan, &src_sg, &dst_sg, 1, + DMA_MEM_TO_MEM, dma_flags); +} + +static struct dma_async_tx_descriptor * +d40_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long dma_flags, void *context) +{ + if (!is_slave_direction(direction)) + return NULL; + + return d40_prep_sg(chan, sgl, sgl, sg_len, direction, dma_flags); +} + +static struct dma_async_tx_descriptor * +dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr, + size_t buf_len, size_t period_len, + enum dma_transfer_direction direction, unsigned long flags) +{ + unsigned int periods = buf_len / period_len; + struct dma_async_tx_descriptor *txd; + struct scatterlist *sg; + int i; + + sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_NOWAIT); + if (!sg) + return NULL; + + for (i = 0; i < periods; i++) { + sg_dma_address(&sg[i]) = dma_addr; + sg_dma_len(&sg[i]) = period_len; + dma_addr += period_len; + } + + sg_chain(sg, periods + 1, sg); + + txd = d40_prep_sg(chan, sg, sg, periods, direction, + DMA_PREP_INTERRUPT); + + kfree(sg); + + return txd; +} + +static enum dma_status d40_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); + enum dma_status ret; + + if (d40c->phy_chan == NULL) { + chan_err(d40c, "Cannot read status of unallocated channel\n"); + return -EINVAL; + } + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret != DMA_COMPLETE && txstate) + dma_set_residue(txstate, stedma40_residue(chan)); + + if (d40_is_paused(d40c)) + ret = DMA_PAUSED; + + return ret; +} + +static void d40_issue_pending(struct dma_chan *chan) +{ + struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); + unsigned long flags; + + if (d40c->phy_chan == NULL) { + chan_err(d40c, "Channel is not allocated!\n"); + return; + } + + spin_lock_irqsave(&d40c->lock, flags); + + list_splice_tail_init(&d40c->pending_queue, &d40c->queue); + + /* Busy means that queued jobs are already being processed */ + if (!d40c->busy) + (void) d40_queue_start(d40c); + + spin_unlock_irqrestore(&d40c->lock, flags); +} + +static int d40_terminate_all(struct dma_chan *chan) +{ + unsigned long flags; + struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); + int ret; + + if (d40c->phy_chan == NULL) { + chan_err(d40c, "Channel is not allocated!\n"); + return -EINVAL; + } + + spin_lock_irqsave(&d40c->lock, flags); + + pm_runtime_get_sync(d40c->base->dev); + ret = d40_channel_execute_command(d40c, D40_DMA_STOP); + if (ret) + chan_err(d40c, "Failed to stop channel\n"); + + d40_term_all(d40c); + pm_runtime_mark_last_busy(d40c->base->dev); + pm_runtime_put_autosuspend(d40c->base->dev); + if (d40c->busy) { + pm_runtime_mark_last_busy(d40c->base->dev); + pm_runtime_put_autosuspend(d40c->base->dev); + } + d40c->busy = false; + + spin_unlock_irqrestore(&d40c->lock, flags); + return 0; +} + +static int +dma40_config_to_halfchannel(struct d40_chan *d40c, + struct stedma40_half_channel_info *info, + u32 maxburst) +{ + int psize; + + if (chan_is_logical(d40c)) { + if (maxburst >= 16) + psize = STEDMA40_PSIZE_LOG_16; + else if (maxburst >= 8) + psize = STEDMA40_PSIZE_LOG_8; + else if (maxburst >= 4) + psize = STEDMA40_PSIZE_LOG_4; + else + psize = STEDMA40_PSIZE_LOG_1; + } else { + if (maxburst >= 16) + psize = STEDMA40_PSIZE_PHY_16; + else if (maxburst >= 8) + psize = STEDMA40_PSIZE_PHY_8; + else if (maxburst >= 4) + psize = STEDMA40_PSIZE_PHY_4; + else + psize = STEDMA40_PSIZE_PHY_1; + } + + info->psize = psize; + info->flow_ctrl = STEDMA40_NO_FLOW_CTRL; + + return 0; +} + +static int d40_set_runtime_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); + + memcpy(&d40c->slave_config, config, sizeof(*config)); + + return 0; +} + +/* Runtime reconfiguration extension */ +static int d40_set_runtime_config_write(struct dma_chan *chan, + struct dma_slave_config *config, + enum dma_transfer_direction direction) +{ + struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); + struct stedma40_chan_cfg *cfg = &d40c->dma_cfg; + enum dma_slave_buswidth src_addr_width, dst_addr_width; + dma_addr_t config_addr; + u32 src_maxburst, dst_maxburst; + int ret; + + if (d40c->phy_chan == NULL) { + chan_err(d40c, "Channel is not allocated!\n"); + return -EINVAL; + } + + src_addr_width = config->src_addr_width; + src_maxburst = config->src_maxburst; + dst_addr_width = config->dst_addr_width; + dst_maxburst = config->dst_maxburst; + + if (direction == DMA_DEV_TO_MEM) { + config_addr = config->src_addr; + + if (cfg->dir != DMA_DEV_TO_MEM) + dev_dbg(d40c->base->dev, + "channel was not configured for peripheral " + "to memory transfer (%d) overriding\n", + cfg->dir); + cfg->dir = DMA_DEV_TO_MEM; + + /* Configure the memory side */ + if (dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + dst_addr_width = src_addr_width; + if (dst_maxburst == 0) + dst_maxburst = src_maxburst; + + } else if (direction == DMA_MEM_TO_DEV) { + config_addr = config->dst_addr; + + if (cfg->dir != DMA_MEM_TO_DEV) + dev_dbg(d40c->base->dev, + "channel was not configured for memory " + "to peripheral transfer (%d) overriding\n", + cfg->dir); + cfg->dir = DMA_MEM_TO_DEV; + + /* Configure the memory side */ + if (src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + src_addr_width = dst_addr_width; + if (src_maxburst == 0) + src_maxburst = dst_maxburst; + } else { + dev_err(d40c->base->dev, + "unrecognized channel direction %d\n", + direction); + return -EINVAL; + } + + if (config_addr <= 0) { + dev_err(d40c->base->dev, "no address supplied\n"); + return -EINVAL; + } + + if (src_maxburst * src_addr_width != dst_maxburst * dst_addr_width) { + dev_err(d40c->base->dev, + "src/dst width/maxburst mismatch: %d*%d != %d*%d\n", + src_maxburst, + src_addr_width, + dst_maxburst, + dst_addr_width); + return -EINVAL; + } + + if (src_maxburst > 16) { + src_maxburst = 16; + dst_maxburst = src_maxburst * src_addr_width / dst_addr_width; + } else if (dst_maxburst > 16) { + dst_maxburst = 16; + src_maxburst = dst_maxburst * dst_addr_width / src_addr_width; + } + + /* Only valid widths are; 1, 2, 4 and 8. */ + if (src_addr_width <= DMA_SLAVE_BUSWIDTH_UNDEFINED || + src_addr_width > DMA_SLAVE_BUSWIDTH_8_BYTES || + dst_addr_width <= DMA_SLAVE_BUSWIDTH_UNDEFINED || + dst_addr_width > DMA_SLAVE_BUSWIDTH_8_BYTES || + !is_power_of_2(src_addr_width) || + !is_power_of_2(dst_addr_width)) + return -EINVAL; + + cfg->src_info.data_width = src_addr_width; + cfg->dst_info.data_width = dst_addr_width; + + ret = dma40_config_to_halfchannel(d40c, &cfg->src_info, + src_maxburst); + if (ret) + return ret; + + ret = dma40_config_to_halfchannel(d40c, &cfg->dst_info, + dst_maxburst); + if (ret) + return ret; + + /* Fill in register values */ + if (chan_is_logical(d40c)) + d40_log_cfg(cfg, &d40c->log_def.lcsp1, &d40c->log_def.lcsp3); + else + d40_phy_cfg(cfg, &d40c->src_def_cfg, &d40c->dst_def_cfg); + + /* These settings will take precedence later */ + d40c->runtime_addr = config_addr; + d40c->runtime_direction = direction; + dev_dbg(d40c->base->dev, + "configured channel %s for %s, data width %d/%d, " + "maxburst %d/%d elements, LE, no flow control\n", + dma_chan_name(chan), + (direction == DMA_DEV_TO_MEM) ? "RX" : "TX", + src_addr_width, dst_addr_width, + src_maxburst, dst_maxburst); + + return 0; +} + +/* Initialization functions */ + +static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma, + struct d40_chan *chans, int offset, + int num_chans) +{ + int i = 0; + struct d40_chan *d40c; + + INIT_LIST_HEAD(&dma->channels); + + for (i = offset; i < offset + num_chans; i++) { + d40c = &chans[i]; + d40c->base = base; + d40c->chan.device = dma; + + spin_lock_init(&d40c->lock); + + d40c->log_num = D40_PHY_CHAN; + + INIT_LIST_HEAD(&d40c->done); + INIT_LIST_HEAD(&d40c->active); + INIT_LIST_HEAD(&d40c->queue); + INIT_LIST_HEAD(&d40c->pending_queue); + INIT_LIST_HEAD(&d40c->client); + INIT_LIST_HEAD(&d40c->prepare_queue); + + tasklet_setup(&d40c->tasklet, dma_tasklet); + + list_add_tail(&d40c->chan.device_node, + &dma->channels); + } +} + +static void d40_ops_init(struct d40_base *base, struct dma_device *dev) +{ + if (dma_has_cap(DMA_SLAVE, dev->cap_mask)) { + dev->device_prep_slave_sg = d40_prep_slave_sg; + dev->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + } + + if (dma_has_cap(DMA_MEMCPY, dev->cap_mask)) { + dev->device_prep_dma_memcpy = d40_prep_memcpy; + dev->directions = BIT(DMA_MEM_TO_MEM); + /* + * This controller can only access address at even + * 32bit boundaries, i.e. 2^2 + */ + dev->copy_align = DMAENGINE_ALIGN_4_BYTES; + } + + if (dma_has_cap(DMA_CYCLIC, dev->cap_mask)) + dev->device_prep_dma_cyclic = dma40_prep_dma_cyclic; + + dev->device_alloc_chan_resources = d40_alloc_chan_resources; + dev->device_free_chan_resources = d40_free_chan_resources; + dev->device_issue_pending = d40_issue_pending; + dev->device_tx_status = d40_tx_status; + dev->device_config = d40_set_runtime_config; + dev->device_pause = d40_pause; + dev->device_resume = d40_resume; + dev->device_terminate_all = d40_terminate_all; + dev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + dev->dev = base->dev; +} + +static int __init d40_dmaengine_init(struct d40_base *base, + int num_reserved_chans) +{ + int err ; + + d40_chan_init(base, &base->dma_slave, base->log_chans, + 0, base->num_log_chans); + + dma_cap_zero(base->dma_slave.cap_mask); + dma_cap_set(DMA_SLAVE, base->dma_slave.cap_mask); + dma_cap_set(DMA_CYCLIC, base->dma_slave.cap_mask); + + d40_ops_init(base, &base->dma_slave); + + err = dmaenginem_async_device_register(&base->dma_slave); + + if (err) { + d40_err(base->dev, "Failed to register slave channels\n"); + goto exit; + } + + d40_chan_init(base, &base->dma_memcpy, base->log_chans, + base->num_log_chans, base->num_memcpy_chans); + + dma_cap_zero(base->dma_memcpy.cap_mask); + dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask); + + d40_ops_init(base, &base->dma_memcpy); + + err = dmaenginem_async_device_register(&base->dma_memcpy); + + if (err) { + d40_err(base->dev, + "Failed to register memcpy only channels\n"); + goto exit; + } + + d40_chan_init(base, &base->dma_both, base->phy_chans, + 0, num_reserved_chans); + + dma_cap_zero(base->dma_both.cap_mask); + dma_cap_set(DMA_SLAVE, base->dma_both.cap_mask); + dma_cap_set(DMA_MEMCPY, base->dma_both.cap_mask); + dma_cap_set(DMA_CYCLIC, base->dma_slave.cap_mask); + + d40_ops_init(base, &base->dma_both); + err = dmaenginem_async_device_register(&base->dma_both); + + if (err) { + d40_err(base->dev, + "Failed to register logical and physical capable channels\n"); + goto exit; + } + return 0; + exit: + return err; +} + +/* Suspend resume functionality */ +#ifdef CONFIG_PM_SLEEP +static int dma40_suspend(struct device *dev) +{ + struct d40_base *base = dev_get_drvdata(dev); + int ret; + + ret = pm_runtime_force_suspend(dev); + if (ret) + return ret; + + if (base->lcpa_regulator) + ret = regulator_disable(base->lcpa_regulator); + return ret; +} + +static int dma40_resume(struct device *dev) +{ + struct d40_base *base = dev_get_drvdata(dev); + int ret = 0; + + if (base->lcpa_regulator) { + ret = regulator_enable(base->lcpa_regulator); + if (ret) + return ret; + } + + return pm_runtime_force_resume(dev); +} +#endif + +#ifdef CONFIG_PM +static void dma40_backup(void __iomem *baseaddr, u32 *backup, + u32 *regaddr, int num, bool save) +{ + int i; + + for (i = 0; i < num; i++) { + void __iomem *addr = baseaddr + regaddr[i]; + + if (save) + backup[i] = readl_relaxed(addr); + else + writel_relaxed(backup[i], addr); + } +} + +static void d40_save_restore_registers(struct d40_base *base, bool save) +{ + int i; + + /* Save/Restore channel specific registers */ + for (i = 0; i < base->num_phy_chans; i++) { + void __iomem *addr; + int idx; + + if (base->phy_res[i].reserved) + continue; + + addr = base->virtbase + D40_DREG_PCBASE + i * D40_DREG_PCDELTA; + idx = i * ARRAY_SIZE(d40_backup_regs_chan); + + dma40_backup(addr, &base->reg_val_backup_chan[idx], + d40_backup_regs_chan, + ARRAY_SIZE(d40_backup_regs_chan), + save); + } + + /* Save/Restore global registers */ + dma40_backup(base->virtbase, base->reg_val_backup, + d40_backup_regs, ARRAY_SIZE(d40_backup_regs), + save); + + /* Save/Restore registers only existing on dma40 v3 and later */ + if (base->gen_dmac.backup) + dma40_backup(base->virtbase, base->reg_val_backup_v4, + base->gen_dmac.backup, + base->gen_dmac.backup_size, + save); +} + +static int dma40_runtime_suspend(struct device *dev) +{ + struct d40_base *base = dev_get_drvdata(dev); + + d40_save_restore_registers(base, true); + + /* Don't disable/enable clocks for v1 due to HW bugs */ + if (base->rev != 1) + writel_relaxed(base->gcc_pwr_off_mask, + base->virtbase + D40_DREG_GCC); + + return 0; +} + +static int dma40_runtime_resume(struct device *dev) +{ + struct d40_base *base = dev_get_drvdata(dev); + + d40_save_restore_registers(base, false); + + writel_relaxed(D40_DREG_GCC_ENABLE_ALL, + base->virtbase + D40_DREG_GCC); + return 0; +} +#endif + +static const struct dev_pm_ops dma40_pm_ops = { + SET_LATE_SYSTEM_SLEEP_PM_OPS(dma40_suspend, dma40_resume) + SET_RUNTIME_PM_OPS(dma40_runtime_suspend, + dma40_runtime_resume, + NULL) +}; + +/* Initialization functions. */ + +static int __init d40_phy_res_init(struct d40_base *base) +{ + int i; + int num_phy_chans_avail = 0; + u32 val[2]; + int odd_even_bit = -2; + int gcc = D40_DREG_GCC_ENA; + + val[0] = readl(base->virtbase + D40_DREG_PRSME); + val[1] = readl(base->virtbase + D40_DREG_PRSMO); + + for (i = 0; i < base->num_phy_chans; i++) { + base->phy_res[i].num = i; + odd_even_bit += 2 * ((i % 2) == 0); + if (((val[i % 2] >> odd_even_bit) & 3) == 1) { + /* Mark security only channels as occupied */ + base->phy_res[i].allocated_src = D40_ALLOC_PHY; + base->phy_res[i].allocated_dst = D40_ALLOC_PHY; + base->phy_res[i].reserved = true; + gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(i), + D40_DREG_GCC_SRC); + gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(i), + D40_DREG_GCC_DST); + + + } else { + base->phy_res[i].allocated_src = D40_ALLOC_FREE; + base->phy_res[i].allocated_dst = D40_ALLOC_FREE; + base->phy_res[i].reserved = false; + num_phy_chans_avail++; + } + spin_lock_init(&base->phy_res[i].lock); + } + + /* Mark disabled channels as occupied */ + for (i = 0; base->plat_data->disabled_channels[i] != -1; i++) { + int chan = base->plat_data->disabled_channels[i]; + + base->phy_res[chan].allocated_src = D40_ALLOC_PHY; + base->phy_res[chan].allocated_dst = D40_ALLOC_PHY; + base->phy_res[chan].reserved = true; + gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(chan), + D40_DREG_GCC_SRC); + gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(chan), + D40_DREG_GCC_DST); + num_phy_chans_avail--; + } + + /* Mark soft_lli channels */ + for (i = 0; i < base->plat_data->num_of_soft_lli_chans; i++) { + int chan = base->plat_data->soft_lli_chans[i]; + + base->phy_res[chan].use_soft_lli = true; + } + + dev_info(base->dev, "%d of %d physical DMA channels available\n", + num_phy_chans_avail, base->num_phy_chans); + + /* Verify settings extended vs standard */ + val[0] = readl(base->virtbase + D40_DREG_PRTYP); + + for (i = 0; i < base->num_phy_chans; i++) { + + if (base->phy_res[i].allocated_src == D40_ALLOC_FREE && + (val[0] & 0x3) != 1) + dev_info(base->dev, + "[%s] INFO: channel %d is misconfigured (%d)\n", + __func__, i, val[0] & 0x3); + + val[0] = val[0] >> 2; + } + + /* + * To keep things simple, Enable all clocks initially. + * The clocks will get managed later post channel allocation. + * The clocks for the event lines on which reserved channels exists + * are not managed here. + */ + writel(D40_DREG_GCC_ENABLE_ALL, base->virtbase + D40_DREG_GCC); + base->gcc_pwr_off_mask = gcc; + + return num_phy_chans_avail; +} + +/* Called from the registered devm action */ +static void d40_drop_kmem_cache_action(void *d) +{ + struct kmem_cache *desc_slab = d; + + kmem_cache_destroy(desc_slab); +} + +static int __init d40_hw_detect_init(struct platform_device *pdev, + struct d40_base **retbase) +{ + struct stedma40_platform_data *plat_data = dev_get_platdata(&pdev->dev); + struct device *dev = &pdev->dev; + struct clk *clk; + void __iomem *virtbase; + struct d40_base *base; + int num_log_chans; + int num_phy_chans; + int num_memcpy_chans; + int i; + u32 pid; + u32 cid; + u8 rev; + int ret; + + clk = devm_clk_get_enabled(dev, NULL); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + /* Get IO for DMAC base address */ + virtbase = devm_platform_ioremap_resource_byname(pdev, "base"); + if (IS_ERR(virtbase)) + return PTR_ERR(virtbase); + + /* This is just a regular AMBA PrimeCell ID actually */ + for (pid = 0, i = 0; i < 4; i++) + pid |= (readl(virtbase + SZ_4K - 0x20 + 4 * i) + & 255) << (i * 8); + for (cid = 0, i = 0; i < 4; i++) + cid |= (readl(virtbase + SZ_4K - 0x10 + 4 * i) + & 255) << (i * 8); + + if (cid != AMBA_CID) { + d40_err(dev, "Unknown hardware! No PrimeCell ID\n"); + return -EINVAL; + } + if (AMBA_MANF_BITS(pid) != AMBA_VENDOR_ST) { + d40_err(dev, "Unknown designer! Got %x wanted %x\n", + AMBA_MANF_BITS(pid), + AMBA_VENDOR_ST); + return -EINVAL; + } + /* + * HW revision: + * DB8500ed has revision 0 + * ? has revision 1 + * DB8500v1 has revision 2 + * DB8500v2 has revision 3 + * AP9540v1 has revision 4 + * DB8540v1 has revision 4 + */ + rev = AMBA_REV_BITS(pid); + if (rev < 2) { + d40_err(dev, "hardware revision: %d is not supported", rev); + return -EINVAL; + } + + /* The number of physical channels on this HW */ + if (plat_data->num_of_phy_chans) + num_phy_chans = plat_data->num_of_phy_chans; + else + num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4; + + /* The number of channels used for memcpy */ + if (plat_data->num_of_memcpy_chans) + num_memcpy_chans = plat_data->num_of_memcpy_chans; + else + num_memcpy_chans = ARRAY_SIZE(dma40_memcpy_channels); + + num_log_chans = num_phy_chans * D40_MAX_LOG_CHAN_PER_PHY; + + dev_info(dev, + "hardware rev: %d with %d physical and %d logical channels\n", + rev, num_phy_chans, num_log_chans); + + base = devm_kzalloc(dev, + ALIGN(sizeof(struct d40_base), 4) + + (num_phy_chans + num_log_chans + num_memcpy_chans) * + sizeof(struct d40_chan), GFP_KERNEL); + + if (!base) + return -ENOMEM; + + base->rev = rev; + base->clk = clk; + base->num_memcpy_chans = num_memcpy_chans; + base->num_phy_chans = num_phy_chans; + base->num_log_chans = num_log_chans; + base->virtbase = virtbase; + base->plat_data = plat_data; + base->dev = dev; + base->phy_chans = ((void *)base) + ALIGN(sizeof(struct d40_base), 4); + base->log_chans = &base->phy_chans[num_phy_chans]; + + if (base->plat_data->num_of_phy_chans == 14) { + base->gen_dmac.backup = d40_backup_regs_v4b; + base->gen_dmac.backup_size = BACKUP_REGS_SZ_V4B; + base->gen_dmac.interrupt_en = D40_DREG_CPCMIS; + base->gen_dmac.interrupt_clear = D40_DREG_CPCICR; + base->gen_dmac.realtime_en = D40_DREG_CRSEG1; + base->gen_dmac.realtime_clear = D40_DREG_CRCEG1; + base->gen_dmac.high_prio_en = D40_DREG_CPSEG1; + base->gen_dmac.high_prio_clear = D40_DREG_CPCEG1; + base->gen_dmac.il = il_v4b; + base->gen_dmac.il_size = ARRAY_SIZE(il_v4b); + base->gen_dmac.init_reg = dma_init_reg_v4b; + base->gen_dmac.init_reg_size = ARRAY_SIZE(dma_init_reg_v4b); + } else { + if (base->rev >= 3) { + base->gen_dmac.backup = d40_backup_regs_v4a; + base->gen_dmac.backup_size = BACKUP_REGS_SZ_V4A; + } + base->gen_dmac.interrupt_en = D40_DREG_PCMIS; + base->gen_dmac.interrupt_clear = D40_DREG_PCICR; + base->gen_dmac.realtime_en = D40_DREG_RSEG1; + base->gen_dmac.realtime_clear = D40_DREG_RCEG1; + base->gen_dmac.high_prio_en = D40_DREG_PSEG1; + base->gen_dmac.high_prio_clear = D40_DREG_PCEG1; + base->gen_dmac.il = il_v4a; + base->gen_dmac.il_size = ARRAY_SIZE(il_v4a); + base->gen_dmac.init_reg = dma_init_reg_v4a; + base->gen_dmac.init_reg_size = ARRAY_SIZE(dma_init_reg_v4a); + } + + base->phy_res = devm_kcalloc(dev, num_phy_chans, + sizeof(*base->phy_res), + GFP_KERNEL); + if (!base->phy_res) + return -ENOMEM; + + base->lookup_phy_chans = devm_kcalloc(dev, num_phy_chans, + sizeof(*base->lookup_phy_chans), + GFP_KERNEL); + if (!base->lookup_phy_chans) + return -ENOMEM; + + base->lookup_log_chans = devm_kcalloc(dev, num_log_chans, + sizeof(*base->lookup_log_chans), + GFP_KERNEL); + if (!base->lookup_log_chans) + return -ENOMEM; + + base->reg_val_backup_chan = devm_kmalloc_array(dev, base->num_phy_chans, + sizeof(d40_backup_regs_chan), + GFP_KERNEL); + if (!base->reg_val_backup_chan) + return -ENOMEM; + + base->lcla_pool.alloc_map = devm_kcalloc(dev, num_phy_chans + * D40_LCLA_LINK_PER_EVENT_GRP, + sizeof(*base->lcla_pool.alloc_map), + GFP_KERNEL); + if (!base->lcla_pool.alloc_map) + return -ENOMEM; + + base->regs_interrupt = devm_kmalloc_array(dev, base->gen_dmac.il_size, + sizeof(*base->regs_interrupt), + GFP_KERNEL); + if (!base->regs_interrupt) + return -ENOMEM; + + base->desc_slab = kmem_cache_create(D40_NAME, sizeof(struct d40_desc), + 0, SLAB_HWCACHE_ALIGN, + NULL); + if (!base->desc_slab) + return -ENOMEM; + + ret = devm_add_action_or_reset(dev, d40_drop_kmem_cache_action, + base->desc_slab); + if (ret) + return ret; + + *retbase = base; + + return 0; +} + +static void __init d40_hw_init(struct d40_base *base) +{ + + int i; + u32 prmseo[2] = {0, 0}; + u32 activeo[2] = {0xFFFFFFFF, 0xFFFFFFFF}; + u32 pcmis = 0; + u32 pcicr = 0; + struct d40_reg_val *dma_init_reg = base->gen_dmac.init_reg; + u32 reg_size = base->gen_dmac.init_reg_size; + + for (i = 0; i < reg_size; i++) + writel(dma_init_reg[i].val, + base->virtbase + dma_init_reg[i].reg); + + /* Configure all our dma channels to default settings */ + for (i = 0; i < base->num_phy_chans; i++) { + + activeo[i % 2] = activeo[i % 2] << 2; + + if (base->phy_res[base->num_phy_chans - i - 1].allocated_src + == D40_ALLOC_PHY) { + activeo[i % 2] |= 3; + continue; + } + + /* Enable interrupt # */ + pcmis = (pcmis << 1) | 1; + + /* Clear interrupt # */ + pcicr = (pcicr << 1) | 1; + + /* Set channel to physical mode */ + prmseo[i % 2] = prmseo[i % 2] << 2; + prmseo[i % 2] |= 1; + + } + + writel(prmseo[1], base->virtbase + D40_DREG_PRMSE); + writel(prmseo[0], base->virtbase + D40_DREG_PRMSO); + writel(activeo[1], base->virtbase + D40_DREG_ACTIVE); + writel(activeo[0], base->virtbase + D40_DREG_ACTIVO); + + /* Write which interrupt to enable */ + writel(pcmis, base->virtbase + base->gen_dmac.interrupt_en); + + /* Write which interrupt to clear */ + writel(pcicr, base->virtbase + base->gen_dmac.interrupt_clear); + + /* These are __initdata and cannot be accessed after init */ + base->gen_dmac.init_reg = NULL; + base->gen_dmac.init_reg_size = 0; +} + +static int __init d40_lcla_allocate(struct d40_base *base) +{ + struct d40_lcla_pool *pool = &base->lcla_pool; + unsigned long *page_list; + int i, j; + int ret; + + /* + * This is somewhat ugly. We need 8192 bytes that are 18 bit aligned, + * To full fill this hardware requirement without wasting 256 kb + * we allocate pages until we get an aligned one. + */ + page_list = kmalloc_array(MAX_LCLA_ALLOC_ATTEMPTS, + sizeof(*page_list), + GFP_KERNEL); + if (!page_list) + return -ENOMEM; + + /* Calculating how many pages that are required */ + base->lcla_pool.pages = SZ_1K * base->num_phy_chans / PAGE_SIZE; + + for (i = 0; i < MAX_LCLA_ALLOC_ATTEMPTS; i++) { + page_list[i] = __get_free_pages(GFP_KERNEL, + base->lcla_pool.pages); + if (!page_list[i]) { + + d40_err(base->dev, "Failed to allocate %d pages.\n", + base->lcla_pool.pages); + ret = -ENOMEM; + + for (j = 0; j < i; j++) + free_pages(page_list[j], base->lcla_pool.pages); + goto free_page_list; + } + + if ((virt_to_phys((void *)page_list[i]) & + (LCLA_ALIGNMENT - 1)) == 0) + break; + } + + for (j = 0; j < i; j++) + free_pages(page_list[j], base->lcla_pool.pages); + + if (i < MAX_LCLA_ALLOC_ATTEMPTS) { + base->lcla_pool.base = (void *)page_list[i]; + } else { + /* + * After many attempts and no succees with finding the correct + * alignment, try with allocating a big buffer. + */ + dev_warn(base->dev, + "[%s] Failed to get %d pages @ 18 bit align.\n", + __func__, base->lcla_pool.pages); + base->lcla_pool.base_unaligned = kmalloc(SZ_1K * + base->num_phy_chans + + LCLA_ALIGNMENT, + GFP_KERNEL); + if (!base->lcla_pool.base_unaligned) { + ret = -ENOMEM; + goto free_page_list; + } + + base->lcla_pool.base = PTR_ALIGN(base->lcla_pool.base_unaligned, + LCLA_ALIGNMENT); + } + + pool->dma_addr = dma_map_single(base->dev, pool->base, + SZ_1K * base->num_phy_chans, + DMA_TO_DEVICE); + if (dma_mapping_error(base->dev, pool->dma_addr)) { + pool->dma_addr = 0; + ret = -ENOMEM; + goto free_page_list; + } + + writel(virt_to_phys(base->lcla_pool.base), + base->virtbase + D40_DREG_LCLA); + ret = 0; + free_page_list: + kfree(page_list); + return ret; +} + +static int __init d40_of_probe(struct device *dev, + struct device_node *np) +{ + struct stedma40_platform_data *pdata; + int num_phy = 0, num_memcpy = 0, num_disabled = 0; + const __be32 *list; + + pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + /* If absent this value will be obtained from h/w. */ + of_property_read_u32(np, "dma-channels", &num_phy); + if (num_phy > 0) + pdata->num_of_phy_chans = num_phy; + + list = of_get_property(np, "memcpy-channels", &num_memcpy); + num_memcpy /= sizeof(*list); + + if (num_memcpy > D40_MEMCPY_MAX_CHANS || num_memcpy <= 0) { + d40_err(dev, + "Invalid number of memcpy channels specified (%d)\n", + num_memcpy); + return -EINVAL; + } + pdata->num_of_memcpy_chans = num_memcpy; + + of_property_read_u32_array(np, "memcpy-channels", + dma40_memcpy_channels, + num_memcpy); + + list = of_get_property(np, "disabled-channels", &num_disabled); + num_disabled /= sizeof(*list); + + if (num_disabled >= STEDMA40_MAX_PHYS || num_disabled < 0) { + d40_err(dev, + "Invalid number of disabled channels specified (%d)\n", + num_disabled); + return -EINVAL; + } + + of_property_read_u32_array(np, "disabled-channels", + pdata->disabled_channels, + num_disabled); + pdata->disabled_channels[num_disabled] = -1; + + dev->platform_data = pdata; + + return 0; +} + +static int __init d40_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = pdev->dev.of_node; + struct device_node *np_lcpa; + struct d40_base *base; + struct resource *res; + struct resource res_lcpa; + int num_reserved_chans; + u32 val; + int ret; + + if (d40_of_probe(dev, np)) { + ret = -ENOMEM; + goto report_failure; + } + + ret = d40_hw_detect_init(pdev, &base); + if (ret) + goto report_failure; + + num_reserved_chans = d40_phy_res_init(base); + + platform_set_drvdata(pdev, base); + + spin_lock_init(&base->interrupt_lock); + spin_lock_init(&base->execmd_lock); + + /* Get IO for logical channel parameter address (LCPA) */ + np_lcpa = of_parse_phandle(np, "sram", 0); + if (!np_lcpa) { + dev_err(dev, "no LCPA SRAM node\n"); + ret = -EINVAL; + goto report_failure; + } + /* This is no device so read the address directly from the node */ + ret = of_address_to_resource(np_lcpa, 0, &res_lcpa); + if (ret) { + dev_err(dev, "no LCPA SRAM resource\n"); + goto report_failure; + } + base->lcpa_size = resource_size(&res_lcpa); + base->phy_lcpa = res_lcpa.start; + dev_info(dev, "found LCPA SRAM at %pad, size %pa\n", + &base->phy_lcpa, &base->lcpa_size); + + /* We make use of ESRAM memory for this. */ + val = readl(base->virtbase + D40_DREG_LCPA); + if (base->phy_lcpa != val && val != 0) { + dev_warn(dev, + "[%s] Mismatch LCPA dma 0x%x, def %08x\n", + __func__, val, (u32)base->phy_lcpa); + } else + writel(base->phy_lcpa, base->virtbase + D40_DREG_LCPA); + + base->lcpa_base = devm_ioremap(dev, base->phy_lcpa, base->lcpa_size); + if (!base->lcpa_base) { + ret = -ENOMEM; + d40_err(dev, "Failed to ioremap LCPA region\n"); + goto report_failure; + } + /* If lcla has to be located in ESRAM we don't need to allocate */ + if (base->plat_data->use_esram_lcla) { + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + "lcla_esram"); + if (!res) { + ret = -ENOENT; + d40_err(dev, + "No \"lcla_esram\" memory resource\n"); + goto report_failure; + } + base->lcla_pool.base = devm_ioremap(dev, res->start, + resource_size(res)); + if (!base->lcla_pool.base) { + ret = -ENOMEM; + d40_err(dev, "Failed to ioremap LCLA region\n"); + goto report_failure; + } + writel(res->start, base->virtbase + D40_DREG_LCLA); + + } else { + ret = d40_lcla_allocate(base); + if (ret) { + d40_err(dev, "Failed to allocate LCLA area\n"); + goto destroy_cache; + } + } + + spin_lock_init(&base->lcla_pool.lock); + + base->irq = platform_get_irq(pdev, 0); + if (base->irq < 0) { + ret = base->irq; + goto destroy_cache; + } + + ret = request_irq(base->irq, d40_handle_interrupt, 0, D40_NAME, base); + if (ret) { + d40_err(dev, "No IRQ defined\n"); + goto destroy_cache; + } + + if (base->plat_data->use_esram_lcla) { + + base->lcpa_regulator = regulator_get(base->dev, "lcla_esram"); + if (IS_ERR(base->lcpa_regulator)) { + d40_err(dev, "Failed to get lcpa_regulator\n"); + ret = PTR_ERR(base->lcpa_regulator); + base->lcpa_regulator = NULL; + goto destroy_cache; + } + + ret = regulator_enable(base->lcpa_regulator); + if (ret) { + d40_err(dev, + "Failed to enable lcpa_regulator\n"); + regulator_put(base->lcpa_regulator); + base->lcpa_regulator = NULL; + goto destroy_cache; + } + } + + writel_relaxed(D40_DREG_GCC_ENABLE_ALL, base->virtbase + D40_DREG_GCC); + + pm_runtime_irq_safe(base->dev); + pm_runtime_set_autosuspend_delay(base->dev, DMA40_AUTOSUSPEND_DELAY); + pm_runtime_use_autosuspend(base->dev); + pm_runtime_mark_last_busy(base->dev); + pm_runtime_set_active(base->dev); + pm_runtime_enable(base->dev); + + ret = d40_dmaengine_init(base, num_reserved_chans); + if (ret) + goto destroy_cache; + + ret = dma_set_max_seg_size(base->dev, STEDMA40_MAX_SEG_SIZE); + if (ret) { + d40_err(dev, "Failed to set dma max seg size\n"); + goto destroy_cache; + } + + d40_hw_init(base); + + ret = of_dma_controller_register(np, d40_xlate, NULL); + if (ret) { + dev_err(dev, + "could not register of_dma_controller\n"); + goto destroy_cache; + } + + dev_info(base->dev, "initialized\n"); + return 0; + + destroy_cache: + if (base->lcla_pool.dma_addr) + dma_unmap_single(base->dev, base->lcla_pool.dma_addr, + SZ_1K * base->num_phy_chans, + DMA_TO_DEVICE); + + if (!base->lcla_pool.base_unaligned && base->lcla_pool.base) + free_pages((unsigned long)base->lcla_pool.base, + base->lcla_pool.pages); + + kfree(base->lcla_pool.base_unaligned); + + if (base->lcpa_regulator) { + regulator_disable(base->lcpa_regulator); + regulator_put(base->lcpa_regulator); + } + pm_runtime_disable(base->dev); + + report_failure: + d40_err(dev, "probe failed\n"); + return ret; +} + +static const struct of_device_id d40_match[] = { + { .compatible = "stericsson,dma40", }, + {} +}; + +static struct platform_driver d40_driver = { + .driver = { + .name = D40_NAME, + .pm = &dma40_pm_ops, + .of_match_table = d40_match, + }, +}; + +static int __init stedma40_init(void) +{ + return platform_driver_probe(&d40_driver, d40_probe); +} +subsys_initcall(stedma40_init); diff --git a/drivers/dma/ste_dma40.h b/drivers/dma/ste_dma40.h new file mode 100644 index 0000000000..c697bfe16a --- /dev/null +++ b/drivers/dma/ste_dma40.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef STE_DMA40_H +#define STE_DMA40_H + +/* + * Maxium size for a single dma descriptor + * Size is limited to 16 bits. + * Size is in the units of addr-widths (1,2,4,8 bytes) + * Larger transfers will be split up to multiple linked desc + */ +#define STEDMA40_MAX_SEG_SIZE 0xFFFF + +/* dev types for memcpy */ +#define STEDMA40_DEV_DST_MEMORY (-1) +#define STEDMA40_DEV_SRC_MEMORY (-1) + +enum stedma40_mode { + STEDMA40_MODE_LOGICAL = 0, + STEDMA40_MODE_PHYSICAL, + STEDMA40_MODE_OPERATION, +}; + +enum stedma40_mode_opt { + STEDMA40_PCHAN_BASIC_MODE = 0, + STEDMA40_LCHAN_SRC_LOG_DST_LOG = 0, + STEDMA40_PCHAN_MODULO_MODE, + STEDMA40_PCHAN_DOUBLE_DST_MODE, + STEDMA40_LCHAN_SRC_PHY_DST_LOG, + STEDMA40_LCHAN_SRC_LOG_DST_PHY, +}; + +#define STEDMA40_ESIZE_8_BIT 0x0 +#define STEDMA40_ESIZE_16_BIT 0x1 +#define STEDMA40_ESIZE_32_BIT 0x2 +#define STEDMA40_ESIZE_64_BIT 0x3 + +/* The value 4 indicates that PEN-reg shall be set to 0 */ +#define STEDMA40_PSIZE_PHY_1 0x4 +#define STEDMA40_PSIZE_PHY_2 0x0 +#define STEDMA40_PSIZE_PHY_4 0x1 +#define STEDMA40_PSIZE_PHY_8 0x2 +#define STEDMA40_PSIZE_PHY_16 0x3 + +/* + * The number of elements differ in logical and + * physical mode + */ +#define STEDMA40_PSIZE_LOG_1 STEDMA40_PSIZE_PHY_2 +#define STEDMA40_PSIZE_LOG_4 STEDMA40_PSIZE_PHY_4 +#define STEDMA40_PSIZE_LOG_8 STEDMA40_PSIZE_PHY_8 +#define STEDMA40_PSIZE_LOG_16 STEDMA40_PSIZE_PHY_16 + +/* Maximum number of possible physical channels */ +#define STEDMA40_MAX_PHYS 32 + +enum stedma40_flow_ctrl { + STEDMA40_NO_FLOW_CTRL, + STEDMA40_FLOW_CTRL, +}; + +/** + * struct stedma40_half_channel_info - dst/src channel configuration + * + * @big_endian: true if the src/dst should be read as big endian + * @data_width: Data width of the src/dst hardware + * @p_size: Burst size + * @flow_ctrl: Flow control on/off. + */ +struct stedma40_half_channel_info { + bool big_endian; + enum dma_slave_buswidth data_width; + int psize; + enum stedma40_flow_ctrl flow_ctrl; +}; + +/** + * struct stedma40_chan_cfg - Structure to be filled by client drivers. + * + * @dir: MEM 2 MEM, PERIPH 2 MEM , MEM 2 PERIPH, PERIPH 2 PERIPH + * @high_priority: true if high-priority + * @realtime: true if realtime mode is to be enabled. Only available on DMA40 + * version 3+, i.e DB8500v2+ + * @mode: channel mode: physical, logical, or operation + * @mode_opt: options for the chosen channel mode + * @dev_type: src/dst device type (driver uses dir to figure out which) + * @src_info: Parameters for dst half channel + * @dst_info: Parameters for dst half channel + * @use_fixed_channel: if true, use physical channel specified by phy_channel + * @phy_channel: physical channel to use, only if use_fixed_channel is true + * + * This structure has to be filled by the client drivers. + * It is recommended to do all dma configurations for clients in the machine. + * + */ +struct stedma40_chan_cfg { + enum dma_transfer_direction dir; + bool high_priority; + bool realtime; + enum stedma40_mode mode; + enum stedma40_mode_opt mode_opt; + int dev_type; + struct stedma40_half_channel_info src_info; + struct stedma40_half_channel_info dst_info; + + bool use_fixed_channel; + int phy_channel; +}; + +#endif /* STE_DMA40_H */ diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c new file mode 100644 index 0000000000..4c489b126c --- /dev/null +++ b/drivers/dma/ste_dma40_ll.c @@ -0,0 +1,449 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) ST-Ericsson SA 2007-2010 + * Author: Per Forlin for ST-Ericsson + * Author: Jonas Aaberg for ST-Ericsson + */ + +#include +#include + +#include "ste_dma40.h" +#include "ste_dma40_ll.h" + +static u8 d40_width_to_bits(enum dma_slave_buswidth width) +{ + if (width == DMA_SLAVE_BUSWIDTH_1_BYTE) + return STEDMA40_ESIZE_8_BIT; + else if (width == DMA_SLAVE_BUSWIDTH_2_BYTES) + return STEDMA40_ESIZE_16_BIT; + else if (width == DMA_SLAVE_BUSWIDTH_8_BYTES) + return STEDMA40_ESIZE_64_BIT; + else + return STEDMA40_ESIZE_32_BIT; +} + +/* Sets up proper LCSP1 and LCSP3 register for a logical channel */ +void d40_log_cfg(struct stedma40_chan_cfg *cfg, + u32 *lcsp1, u32 *lcsp3) +{ + u32 l3 = 0; /* dst */ + u32 l1 = 0; /* src */ + + /* src is mem? -> increase address pos */ + if (cfg->dir == DMA_MEM_TO_DEV || + cfg->dir == DMA_MEM_TO_MEM) + l1 |= BIT(D40_MEM_LCSP1_SCFG_INCR_POS); + + /* dst is mem? -> increase address pos */ + if (cfg->dir == DMA_DEV_TO_MEM || + cfg->dir == DMA_MEM_TO_MEM) + l3 |= BIT(D40_MEM_LCSP3_DCFG_INCR_POS); + + /* src is hw? -> master port 1 */ + if (cfg->dir == DMA_DEV_TO_MEM || + cfg->dir == DMA_DEV_TO_DEV) + l1 |= BIT(D40_MEM_LCSP1_SCFG_MST_POS); + + /* dst is hw? -> master port 1 */ + if (cfg->dir == DMA_MEM_TO_DEV || + cfg->dir == DMA_DEV_TO_DEV) + l3 |= BIT(D40_MEM_LCSP3_DCFG_MST_POS); + + l3 |= BIT(D40_MEM_LCSP3_DCFG_EIM_POS); + l3 |= cfg->dst_info.psize << D40_MEM_LCSP3_DCFG_PSIZE_POS; + l3 |= d40_width_to_bits(cfg->dst_info.data_width) + << D40_MEM_LCSP3_DCFG_ESIZE_POS; + + l1 |= BIT(D40_MEM_LCSP1_SCFG_EIM_POS); + l1 |= cfg->src_info.psize << D40_MEM_LCSP1_SCFG_PSIZE_POS; + l1 |= d40_width_to_bits(cfg->src_info.data_width) + << D40_MEM_LCSP1_SCFG_ESIZE_POS; + + *lcsp1 = l1; + *lcsp3 = l3; + +} + +void d40_phy_cfg(struct stedma40_chan_cfg *cfg, u32 *src_cfg, u32 *dst_cfg) +{ + u32 src = 0; + u32 dst = 0; + + if ((cfg->dir == DMA_DEV_TO_MEM) || + (cfg->dir == DMA_DEV_TO_DEV)) { + /* Set master port to 1 */ + src |= BIT(D40_SREG_CFG_MST_POS); + src |= D40_TYPE_TO_EVENT(cfg->dev_type); + + if (cfg->src_info.flow_ctrl == STEDMA40_NO_FLOW_CTRL) + src |= BIT(D40_SREG_CFG_PHY_TM_POS); + else + src |= 3 << D40_SREG_CFG_PHY_TM_POS; + } + if ((cfg->dir == DMA_MEM_TO_DEV) || + (cfg->dir == DMA_DEV_TO_DEV)) { + /* Set master port to 1 */ + dst |= BIT(D40_SREG_CFG_MST_POS); + dst |= D40_TYPE_TO_EVENT(cfg->dev_type); + + if (cfg->dst_info.flow_ctrl == STEDMA40_NO_FLOW_CTRL) + dst |= BIT(D40_SREG_CFG_PHY_TM_POS); + else + dst |= 3 << D40_SREG_CFG_PHY_TM_POS; + } + /* Interrupt on end of transfer for destination */ + dst |= BIT(D40_SREG_CFG_TIM_POS); + + /* Generate interrupt on error */ + src |= BIT(D40_SREG_CFG_EIM_POS); + dst |= BIT(D40_SREG_CFG_EIM_POS); + + /* PSIZE */ + if (cfg->src_info.psize != STEDMA40_PSIZE_PHY_1) { + src |= BIT(D40_SREG_CFG_PHY_PEN_POS); + src |= cfg->src_info.psize << D40_SREG_CFG_PSIZE_POS; + } + if (cfg->dst_info.psize != STEDMA40_PSIZE_PHY_1) { + dst |= BIT(D40_SREG_CFG_PHY_PEN_POS); + dst |= cfg->dst_info.psize << D40_SREG_CFG_PSIZE_POS; + } + + /* Element size */ + src |= d40_width_to_bits(cfg->src_info.data_width) + << D40_SREG_CFG_ESIZE_POS; + dst |= d40_width_to_bits(cfg->dst_info.data_width) + << D40_SREG_CFG_ESIZE_POS; + + /* Set the priority bit to high for the physical channel */ + if (cfg->high_priority) { + src |= BIT(D40_SREG_CFG_PRI_POS); + dst |= BIT(D40_SREG_CFG_PRI_POS); + } + + if (cfg->src_info.big_endian) + src |= BIT(D40_SREG_CFG_LBE_POS); + if (cfg->dst_info.big_endian) + dst |= BIT(D40_SREG_CFG_LBE_POS); + + *src_cfg = src; + *dst_cfg = dst; +} + +static int d40_phy_fill_lli(struct d40_phy_lli *lli, + dma_addr_t data, + u32 data_size, + dma_addr_t next_lli, + u32 reg_cfg, + struct stedma40_half_channel_info *info, + unsigned int flags) +{ + bool addr_inc = flags & LLI_ADDR_INC; + bool term_int = flags & LLI_TERM_INT; + unsigned int data_width = info->data_width; + int psize = info->psize; + int num_elems; + + if (psize == STEDMA40_PSIZE_PHY_1) + num_elems = 1; + else + num_elems = 2 << psize; + + /* Must be aligned */ + if (!IS_ALIGNED(data, data_width)) + return -EINVAL; + + /* Transfer size can't be smaller than (num_elms * elem_size) */ + if (data_size < num_elems * data_width) + return -EINVAL; + + /* The number of elements. IE now many chunks */ + lli->reg_elt = (data_size / data_width) << D40_SREG_ELEM_PHY_ECNT_POS; + + /* + * Distance to next element sized entry. + * Usually the size of the element unless you want gaps. + */ + if (addr_inc) + lli->reg_elt |= data_width << D40_SREG_ELEM_PHY_EIDX_POS; + + /* Where the data is */ + lli->reg_ptr = data; + lli->reg_cfg = reg_cfg; + + /* If this scatter list entry is the last one, no next link */ + if (next_lli == 0) + lli->reg_lnk = BIT(D40_SREG_LNK_PHY_TCP_POS); + else + lli->reg_lnk = next_lli; + + /* Set/clear interrupt generation on this link item.*/ + if (term_int) + lli->reg_cfg |= BIT(D40_SREG_CFG_TIM_POS); + else + lli->reg_cfg &= ~BIT(D40_SREG_CFG_TIM_POS); + + /* + * Post link - D40_SREG_LNK_PHY_PRE_POS = 0 + * Relink happens after transfer completion. + */ + + return 0; +} + +static int d40_seg_size(int size, int data_width1, int data_width2) +{ + u32 max_w = max(data_width1, data_width2); + u32 min_w = min(data_width1, data_width2); + u32 seg_max = ALIGN(STEDMA40_MAX_SEG_SIZE * min_w, max_w); + + if (seg_max > STEDMA40_MAX_SEG_SIZE) + seg_max -= max_w; + + if (size <= seg_max) + return size; + + if (size <= 2 * seg_max) + return ALIGN(size / 2, max_w); + + return seg_max; +} + +static struct d40_phy_lli * +d40_phy_buf_to_lli(struct d40_phy_lli *lli, dma_addr_t addr, u32 size, + dma_addr_t lli_phys, dma_addr_t first_phys, u32 reg_cfg, + struct stedma40_half_channel_info *info, + struct stedma40_half_channel_info *otherinfo, + unsigned long flags) +{ + bool lastlink = flags & LLI_LAST_LINK; + bool addr_inc = flags & LLI_ADDR_INC; + bool term_int = flags & LLI_TERM_INT; + bool cyclic = flags & LLI_CYCLIC; + int err; + dma_addr_t next = lli_phys; + int size_rest = size; + int size_seg = 0; + + /* + * This piece may be split up based on d40_seg_size(); we only want the + * term int on the last part. + */ + if (term_int) + flags &= ~LLI_TERM_INT; + + do { + size_seg = d40_seg_size(size_rest, info->data_width, + otherinfo->data_width); + size_rest -= size_seg; + + if (size_rest == 0 && term_int) + flags |= LLI_TERM_INT; + + if (size_rest == 0 && lastlink) + next = cyclic ? first_phys : 0; + else + next = ALIGN(next + sizeof(struct d40_phy_lli), + D40_LLI_ALIGN); + + err = d40_phy_fill_lli(lli, addr, size_seg, next, + reg_cfg, info, flags); + + if (err) + goto err; + + lli++; + if (addr_inc) + addr += size_seg; + } while (size_rest); + + return lli; + +err: + return NULL; +} + +int d40_phy_sg_to_lli(struct scatterlist *sg, + int sg_len, + dma_addr_t target, + struct d40_phy_lli *lli_sg, + dma_addr_t lli_phys, + u32 reg_cfg, + struct stedma40_half_channel_info *info, + struct stedma40_half_channel_info *otherinfo, + unsigned long flags) +{ + int total_size = 0; + int i; + struct scatterlist *current_sg = sg; + struct d40_phy_lli *lli = lli_sg; + dma_addr_t l_phys = lli_phys; + + if (!target) + flags |= LLI_ADDR_INC; + + for_each_sg(sg, current_sg, sg_len, i) { + dma_addr_t sg_addr = sg_dma_address(current_sg); + unsigned int len = sg_dma_len(current_sg); + dma_addr_t dst = target ?: sg_addr; + + total_size += sg_dma_len(current_sg); + + if (i == sg_len - 1) + flags |= LLI_TERM_INT | LLI_LAST_LINK; + + l_phys = ALIGN(lli_phys + (lli - lli_sg) * + sizeof(struct d40_phy_lli), D40_LLI_ALIGN); + + lli = d40_phy_buf_to_lli(lli, dst, len, l_phys, lli_phys, + reg_cfg, info, otherinfo, flags); + + if (lli == NULL) + return -EINVAL; + } + + return total_size; +} + + +/* DMA logical lli operations */ + +static void d40_log_lli_link(struct d40_log_lli *lli_dst, + struct d40_log_lli *lli_src, + int next, unsigned int flags) +{ + bool interrupt = flags & LLI_TERM_INT; + u32 slos = 0; + u32 dlos = 0; + + if (next != -EINVAL) { + slos = next * 2; + dlos = next * 2 + 1; + } + + if (interrupt) { + lli_dst->lcsp13 |= D40_MEM_LCSP1_SCFG_TIM_MASK; + lli_dst->lcsp13 |= D40_MEM_LCSP3_DTCP_MASK; + } + + lli_src->lcsp13 = (lli_src->lcsp13 & ~D40_MEM_LCSP1_SLOS_MASK) | + (slos << D40_MEM_LCSP1_SLOS_POS); + + lli_dst->lcsp13 = (lli_dst->lcsp13 & ~D40_MEM_LCSP1_SLOS_MASK) | + (dlos << D40_MEM_LCSP1_SLOS_POS); +} + +void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa, + struct d40_log_lli *lli_dst, + struct d40_log_lli *lli_src, + int next, unsigned int flags) +{ + d40_log_lli_link(lli_dst, lli_src, next, flags); + + writel_relaxed(lli_src->lcsp02, &lcpa[0].lcsp0); + writel_relaxed(lli_src->lcsp13, &lcpa[0].lcsp1); + writel_relaxed(lli_dst->lcsp02, &lcpa[0].lcsp2); + writel_relaxed(lli_dst->lcsp13, &lcpa[0].lcsp3); +} + +void d40_log_lli_lcla_write(struct d40_log_lli *lcla, + struct d40_log_lli *lli_dst, + struct d40_log_lli *lli_src, + int next, unsigned int flags) +{ + d40_log_lli_link(lli_dst, lli_src, next, flags); + + writel_relaxed(lli_src->lcsp02, &lcla[0].lcsp02); + writel_relaxed(lli_src->lcsp13, &lcla[0].lcsp13); + writel_relaxed(lli_dst->lcsp02, &lcla[1].lcsp02); + writel_relaxed(lli_dst->lcsp13, &lcla[1].lcsp13); +} + +static void d40_log_fill_lli(struct d40_log_lli *lli, + dma_addr_t data, u32 data_size, + u32 reg_cfg, + u32 data_width, + unsigned int flags) +{ + bool addr_inc = flags & LLI_ADDR_INC; + + lli->lcsp13 = reg_cfg; + + /* The number of elements to transfer */ + lli->lcsp02 = ((data_size / data_width) << + D40_MEM_LCSP0_ECNT_POS) & D40_MEM_LCSP0_ECNT_MASK; + + BUG_ON((data_size / data_width) > STEDMA40_MAX_SEG_SIZE); + + /* 16 LSBs address of the current element */ + lli->lcsp02 |= data & D40_MEM_LCSP0_SPTR_MASK; + /* 16 MSBs address of the current element */ + lli->lcsp13 |= data & D40_MEM_LCSP1_SPTR_MASK; + + if (addr_inc) + lli->lcsp13 |= D40_MEM_LCSP1_SCFG_INCR_MASK; + +} + +static struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg, + dma_addr_t addr, + int size, + u32 lcsp13, /* src or dst*/ + u32 data_width1, + u32 data_width2, + unsigned int flags) +{ + bool addr_inc = flags & LLI_ADDR_INC; + struct d40_log_lli *lli = lli_sg; + int size_rest = size; + int size_seg = 0; + + do { + size_seg = d40_seg_size(size_rest, data_width1, data_width2); + size_rest -= size_seg; + + d40_log_fill_lli(lli, + addr, + size_seg, + lcsp13, data_width1, + flags); + if (addr_inc) + addr += size_seg; + lli++; + } while (size_rest); + + return lli; +} + +int d40_log_sg_to_lli(struct scatterlist *sg, + int sg_len, + dma_addr_t dev_addr, + struct d40_log_lli *lli_sg, + u32 lcsp13, /* src or dst*/ + u32 data_width1, u32 data_width2) +{ + int total_size = 0; + struct scatterlist *current_sg = sg; + int i; + struct d40_log_lli *lli = lli_sg; + unsigned long flags = 0; + + if (!dev_addr) + flags |= LLI_ADDR_INC; + + for_each_sg(sg, current_sg, sg_len, i) { + dma_addr_t sg_addr = sg_dma_address(current_sg); + unsigned int len = sg_dma_len(current_sg); + dma_addr_t addr = dev_addr ?: sg_addr; + + total_size += sg_dma_len(current_sg); + + lli = d40_log_buf_to_lli(lli, addr, len, + lcsp13, + data_width1, + data_width2, + flags); + } + + return total_size; +} diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h new file mode 100644 index 0000000000..c504e855eb --- /dev/null +++ b/drivers/dma/ste_dma40_ll.h @@ -0,0 +1,470 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) ST-Ericsson SA 2007-2010 + * Author: Per Friden for ST-Ericsson SA + * Author: Jonas Aaberg for ST-Ericsson SA + */ +#ifndef STE_DMA40_LL_H +#define STE_DMA40_LL_H + +#define D40_DREG_PCBASE 0x400 +#define D40_DREG_PCDELTA (8 * 4) +#define D40_LLI_ALIGN 16 /* LLI alignment must be 16 bytes. */ + +#define D40_LCPA_CHAN_SIZE 32 +#define D40_LCPA_CHAN_DST_DELTA 16 + +#define D40_TYPE_TO_GROUP(type) (type / 16) +#define D40_TYPE_TO_EVENT(type) (type % 16) +#define D40_GROUP_SIZE 8 +#define D40_PHYS_TO_GROUP(phys) ((phys & (D40_GROUP_SIZE - 1)) / 2) + +/* Most bits of the CFG register are the same in log as in phy mode */ +#define D40_SREG_CFG_MST_POS 15 +#define D40_SREG_CFG_TIM_POS 14 +#define D40_SREG_CFG_EIM_POS 13 +#define D40_SREG_CFG_LOG_INCR_POS 12 +#define D40_SREG_CFG_PHY_PEN_POS 12 +#define D40_SREG_CFG_PSIZE_POS 10 +#define D40_SREG_CFG_ESIZE_POS 8 +#define D40_SREG_CFG_PRI_POS 7 +#define D40_SREG_CFG_LBE_POS 6 +#define D40_SREG_CFG_LOG_GIM_POS 5 +#define D40_SREG_CFG_LOG_MFU_POS 4 +#define D40_SREG_CFG_PHY_TM_POS 4 +#define D40_SREG_CFG_PHY_EVTL_POS 0 + + +/* Standard channel parameters - basic mode (element register) */ +#define D40_SREG_ELEM_PHY_ECNT_POS 16 +#define D40_SREG_ELEM_PHY_EIDX_POS 0 + +#define D40_SREG_ELEM_PHY_ECNT_MASK (0xFFFF << D40_SREG_ELEM_PHY_ECNT_POS) + +/* Standard channel parameters - basic mode (Link register) */ +#define D40_SREG_LNK_PHY_TCP_POS 0 +#define D40_SREG_LNK_PHY_LMP_POS 1 +#define D40_SREG_LNK_PHY_PRE_POS 2 +/* + * Source destination link address. Contains the + * 29-bit byte word aligned address of the reload area. + */ +#define D40_SREG_LNK_PHYS_LNK_MASK 0xFFFFFFF8UL + +/* Standard basic channel logical mode */ + +/* Element register */ +#define D40_SREG_ELEM_LOG_ECNT_POS 16 +#define D40_SREG_ELEM_LOG_LIDX_POS 8 +#define D40_SREG_ELEM_LOG_LOS_POS 1 +#define D40_SREG_ELEM_LOG_TCP_POS 0 + +#define D40_SREG_ELEM_LOG_LIDX_MASK (0xFF << D40_SREG_ELEM_LOG_LIDX_POS) + +/* Link register */ +#define D40_EVENTLINE_POS(i) (2 * i) +#define D40_EVENTLINE_MASK(i) (0x3 << D40_EVENTLINE_POS(i)) + +/* Standard basic channel logical params in memory */ + +/* LCSP0 */ +#define D40_MEM_LCSP0_ECNT_POS 16 +#define D40_MEM_LCSP0_SPTR_POS 0 + +#define D40_MEM_LCSP0_ECNT_MASK (0xFFFF << D40_MEM_LCSP0_ECNT_POS) +#define D40_MEM_LCSP0_SPTR_MASK (0xFFFF << D40_MEM_LCSP0_SPTR_POS) + +/* LCSP1 */ +#define D40_MEM_LCSP1_SPTR_POS 16 +#define D40_MEM_LCSP1_SCFG_MST_POS 15 +#define D40_MEM_LCSP1_SCFG_TIM_POS 14 +#define D40_MEM_LCSP1_SCFG_EIM_POS 13 +#define D40_MEM_LCSP1_SCFG_INCR_POS 12 +#define D40_MEM_LCSP1_SCFG_PSIZE_POS 10 +#define D40_MEM_LCSP1_SCFG_ESIZE_POS 8 +#define D40_MEM_LCSP1_SLOS_POS 1 +#define D40_MEM_LCSP1_STCP_POS 0 + +#define D40_MEM_LCSP1_SPTR_MASK (0xFFFF << D40_MEM_LCSP1_SPTR_POS) +#define D40_MEM_LCSP1_SCFG_TIM_MASK (0x1 << D40_MEM_LCSP1_SCFG_TIM_POS) +#define D40_MEM_LCSP1_SCFG_INCR_MASK (0x1 << D40_MEM_LCSP1_SCFG_INCR_POS) +#define D40_MEM_LCSP1_SCFG_PSIZE_MASK (0x3 << D40_MEM_LCSP1_SCFG_PSIZE_POS) +#define D40_MEM_LCSP1_SLOS_MASK (0x7F << D40_MEM_LCSP1_SLOS_POS) +#define D40_MEM_LCSP1_STCP_MASK (0x1 << D40_MEM_LCSP1_STCP_POS) + +/* LCSP2 */ +#define D40_MEM_LCSP2_ECNT_POS 16 + +#define D40_MEM_LCSP2_ECNT_MASK (0xFFFF << D40_MEM_LCSP2_ECNT_POS) + +/* LCSP3 */ +#define D40_MEM_LCSP3_DCFG_MST_POS 15 +#define D40_MEM_LCSP3_DCFG_TIM_POS 14 +#define D40_MEM_LCSP3_DCFG_EIM_POS 13 +#define D40_MEM_LCSP3_DCFG_INCR_POS 12 +#define D40_MEM_LCSP3_DCFG_PSIZE_POS 10 +#define D40_MEM_LCSP3_DCFG_ESIZE_POS 8 +#define D40_MEM_LCSP3_DLOS_POS 1 +#define D40_MEM_LCSP3_DTCP_POS 0 + +#define D40_MEM_LCSP3_DLOS_MASK (0x7F << D40_MEM_LCSP3_DLOS_POS) +#define D40_MEM_LCSP3_DTCP_MASK (0x1 << D40_MEM_LCSP3_DTCP_POS) + + +/* Standard channel parameter register offsets */ +#define D40_CHAN_REG_SSCFG 0x00 +#define D40_CHAN_REG_SSELT 0x04 +#define D40_CHAN_REG_SSPTR 0x08 +#define D40_CHAN_REG_SSLNK 0x0C +#define D40_CHAN_REG_SDCFG 0x10 +#define D40_CHAN_REG_SDELT 0x14 +#define D40_CHAN_REG_SDPTR 0x18 +#define D40_CHAN_REG_SDLNK 0x1C + +/* DMA Register Offsets */ +#define D40_DREG_GCC 0x000 +#define D40_DREG_GCC_ENA 0x1 +/* This assumes that there are only 4 event groups */ +#define D40_DREG_GCC_ENABLE_ALL 0x3ff01 +#define D40_DREG_GCC_EVTGRP_POS 8 +#define D40_DREG_GCC_SRC 0 +#define D40_DREG_GCC_DST 1 +#define D40_DREG_GCC_EVTGRP_ENA(x, y) \ + (1 << (D40_DREG_GCC_EVTGRP_POS + 2 * x + y)) + +#define D40_DREG_PRTYP 0x004 +#define D40_DREG_PRSME 0x008 +#define D40_DREG_PRSMO 0x00C +#define D40_DREG_PRMSE 0x010 +#define D40_DREG_PRMSO 0x014 +#define D40_DREG_PRMOE 0x018 +#define D40_DREG_PRMOO 0x01C +#define D40_DREG_PRMO_PCHAN_BASIC 0x1 +#define D40_DREG_PRMO_PCHAN_MODULO 0x2 +#define D40_DREG_PRMO_PCHAN_DOUBLE_DST 0x3 +#define D40_DREG_PRMO_LCHAN_SRC_PHY_DST_LOG 0x1 +#define D40_DREG_PRMO_LCHAN_SRC_LOG_DST_PHY 0x2 +#define D40_DREG_PRMO_LCHAN_SRC_LOG_DST_LOG 0x3 + +#define D40_DREG_LCPA 0x020 +#define D40_DREG_LCLA 0x024 + +#define D40_DREG_SSEG1 0x030 +#define D40_DREG_SSEG2 0x034 +#define D40_DREG_SSEG3 0x038 +#define D40_DREG_SSEG4 0x03C + +#define D40_DREG_SCEG1 0x040 +#define D40_DREG_SCEG2 0x044 +#define D40_DREG_SCEG3 0x048 +#define D40_DREG_SCEG4 0x04C + +#define D40_DREG_ACTIVE 0x050 +#define D40_DREG_ACTIVO 0x054 +#define D40_DREG_CIDMOD 0x058 +#define D40_DREG_TCIDV 0x05C +#define D40_DREG_PCMIS 0x060 +#define D40_DREG_PCICR 0x064 +#define D40_DREG_PCTIS 0x068 +#define D40_DREG_PCEIS 0x06C + +#define D40_DREG_SPCMIS 0x070 +#define D40_DREG_SPCICR 0x074 +#define D40_DREG_SPCTIS 0x078 +#define D40_DREG_SPCEIS 0x07C + +#define D40_DREG_LCMIS0 0x080 +#define D40_DREG_LCMIS1 0x084 +#define D40_DREG_LCMIS2 0x088 +#define D40_DREG_LCMIS3 0x08C +#define D40_DREG_LCICR0 0x090 +#define D40_DREG_LCICR1 0x094 +#define D40_DREG_LCICR2 0x098 +#define D40_DREG_LCICR3 0x09C +#define D40_DREG_LCTIS0 0x0A0 +#define D40_DREG_LCTIS1 0x0A4 +#define D40_DREG_LCTIS2 0x0A8 +#define D40_DREG_LCTIS3 0x0AC +#define D40_DREG_LCEIS0 0x0B0 +#define D40_DREG_LCEIS1 0x0B4 +#define D40_DREG_LCEIS2 0x0B8 +#define D40_DREG_LCEIS3 0x0BC + +#define D40_DREG_SLCMIS1 0x0C0 +#define D40_DREG_SLCMIS2 0x0C4 +#define D40_DREG_SLCMIS3 0x0C8 +#define D40_DREG_SLCMIS4 0x0CC + +#define D40_DREG_SLCICR1 0x0D0 +#define D40_DREG_SLCICR2 0x0D4 +#define D40_DREG_SLCICR3 0x0D8 +#define D40_DREG_SLCICR4 0x0DC + +#define D40_DREG_SLCTIS1 0x0E0 +#define D40_DREG_SLCTIS2 0x0E4 +#define D40_DREG_SLCTIS3 0x0E8 +#define D40_DREG_SLCTIS4 0x0EC + +#define D40_DREG_SLCEIS1 0x0F0 +#define D40_DREG_SLCEIS2 0x0F4 +#define D40_DREG_SLCEIS3 0x0F8 +#define D40_DREG_SLCEIS4 0x0FC + +#define D40_DREG_FSESS1 0x100 +#define D40_DREG_FSESS2 0x104 + +#define D40_DREG_FSEBS1 0x108 +#define D40_DREG_FSEBS2 0x10C + +#define D40_DREG_PSEG1 0x110 +#define D40_DREG_PSEG2 0x114 +#define D40_DREG_PSEG3 0x118 +#define D40_DREG_PSEG4 0x11C +#define D40_DREG_PCEG1 0x120 +#define D40_DREG_PCEG2 0x124 +#define D40_DREG_PCEG3 0x128 +#define D40_DREG_PCEG4 0x12C +#define D40_DREG_RSEG1 0x130 +#define D40_DREG_RSEG2 0x134 +#define D40_DREG_RSEG3 0x138 +#define D40_DREG_RSEG4 0x13C +#define D40_DREG_RCEG1 0x140 +#define D40_DREG_RCEG2 0x144 +#define D40_DREG_RCEG3 0x148 +#define D40_DREG_RCEG4 0x14C + +#define D40_DREG_PREFOT 0x15C +#define D40_DREG_EXTCFG 0x160 + +#define D40_DREG_CPSEG1 0x200 +#define D40_DREG_CPSEG2 0x204 +#define D40_DREG_CPSEG3 0x208 +#define D40_DREG_CPSEG4 0x20C +#define D40_DREG_CPSEG5 0x210 + +#define D40_DREG_CPCEG1 0x220 +#define D40_DREG_CPCEG2 0x224 +#define D40_DREG_CPCEG3 0x228 +#define D40_DREG_CPCEG4 0x22C +#define D40_DREG_CPCEG5 0x230 + +#define D40_DREG_CRSEG1 0x240 +#define D40_DREG_CRSEG2 0x244 +#define D40_DREG_CRSEG3 0x248 +#define D40_DREG_CRSEG4 0x24C +#define D40_DREG_CRSEG5 0x250 + +#define D40_DREG_CRCEG1 0x260 +#define D40_DREG_CRCEG2 0x264 +#define D40_DREG_CRCEG3 0x268 +#define D40_DREG_CRCEG4 0x26C +#define D40_DREG_CRCEG5 0x270 + +#define D40_DREG_CFSESS1 0x280 +#define D40_DREG_CFSESS2 0x284 +#define D40_DREG_CFSESS3 0x288 + +#define D40_DREG_CFSEBS1 0x290 +#define D40_DREG_CFSEBS2 0x294 +#define D40_DREG_CFSEBS3 0x298 + +#define D40_DREG_CLCMIS1 0x300 +#define D40_DREG_CLCMIS2 0x304 +#define D40_DREG_CLCMIS3 0x308 +#define D40_DREG_CLCMIS4 0x30C +#define D40_DREG_CLCMIS5 0x310 + +#define D40_DREG_CLCICR1 0x320 +#define D40_DREG_CLCICR2 0x324 +#define D40_DREG_CLCICR3 0x328 +#define D40_DREG_CLCICR4 0x32C +#define D40_DREG_CLCICR5 0x330 + +#define D40_DREG_CLCTIS1 0x340 +#define D40_DREG_CLCTIS2 0x344 +#define D40_DREG_CLCTIS3 0x348 +#define D40_DREG_CLCTIS4 0x34C +#define D40_DREG_CLCTIS5 0x350 + +#define D40_DREG_CLCEIS1 0x360 +#define D40_DREG_CLCEIS2 0x364 +#define D40_DREG_CLCEIS3 0x368 +#define D40_DREG_CLCEIS4 0x36C +#define D40_DREG_CLCEIS5 0x370 + +#define D40_DREG_CPCMIS 0x380 +#define D40_DREG_CPCICR 0x384 +#define D40_DREG_CPCTIS 0x388 +#define D40_DREG_CPCEIS 0x38C + +#define D40_DREG_SCCIDA1 0xE80 +#define D40_DREG_SCCIDA2 0xE90 +#define D40_DREG_SCCIDA3 0xEA0 +#define D40_DREG_SCCIDA4 0xEB0 +#define D40_DREG_SCCIDA5 0xEC0 + +#define D40_DREG_SCCIDB1 0xE84 +#define D40_DREG_SCCIDB2 0xE94 +#define D40_DREG_SCCIDB3 0xEA4 +#define D40_DREG_SCCIDB4 0xEB4 +#define D40_DREG_SCCIDB5 0xEC4 + +#define D40_DREG_PRSCCIDA 0xF80 +#define D40_DREG_PRSCCIDB 0xF84 + +#define D40_DREG_STFU 0xFC8 +#define D40_DREG_ICFG 0xFCC +#define D40_DREG_PERIPHID0 0xFE0 +#define D40_DREG_PERIPHID1 0xFE4 +#define D40_DREG_PERIPHID2 0xFE8 +#define D40_DREG_PERIPHID3 0xFEC +#define D40_DREG_CELLID0 0xFF0 +#define D40_DREG_CELLID1 0xFF4 +#define D40_DREG_CELLID2 0xFF8 +#define D40_DREG_CELLID3 0xFFC + +/* LLI related structures */ + +/** + * struct d40_phy_lli - The basic configuration register for each physical + * channel. + * + * @reg_cfg: The configuration register. + * @reg_elt: The element register. + * @reg_ptr: The pointer register. + * @reg_lnk: The link register. + * + * These registers are set up for both physical and logical transfers + * Note that the bit in each register means differently in logical and + * physical(standard) mode. + * + * This struct must be 16 bytes aligned, and only contain physical registers + * since it will be directly accessed by the DMA. + */ +struct d40_phy_lli { + u32 reg_cfg; + u32 reg_elt; + u32 reg_ptr; + u32 reg_lnk; +}; + +/** + * struct d40_phy_lli_bidir - struct for a transfer. + * + * @src: Register settings for src channel. + * @dst: Register settings for dst channel. + * + * All DMA transfers have a source and a destination. + */ + +struct d40_phy_lli_bidir { + struct d40_phy_lli *src; + struct d40_phy_lli *dst; +}; + + +/** + * struct d40_log_lli - logical lli configuration + * + * @lcsp02: Either maps to register lcsp0 if src or lcsp2 if dst. + * @lcsp13: Either maps to register lcsp1 if src or lcsp3 if dst. + * + * This struct must be 8 bytes aligned since it will be accessed directy by + * the DMA. Never add any none hw mapped registers to this struct. + */ + +struct d40_log_lli { + u32 lcsp02; + u32 lcsp13; +}; + +/** + * struct d40_log_lli_bidir - For both src and dst + * + * @src: pointer to src lli configuration. + * @dst: pointer to dst lli configuration. + * + * You always have a src and a dst when doing DMA transfers. + */ + +struct d40_log_lli_bidir { + struct d40_log_lli *src; + struct d40_log_lli *dst; +}; + +/** + * struct d40_log_lli_full - LCPA layout + * + * @lcsp0: Logical Channel Standard Param 0 - Src. + * @lcsp1: Logical Channel Standard Param 1 - Src. + * @lcsp2: Logical Channel Standard Param 2 - Dst. + * @lcsp3: Logical Channel Standard Param 3 - Dst. + * + * This struct maps to LCPA physical memory layout. Must map to + * the hw. + */ +struct d40_log_lli_full { + u32 lcsp0; + u32 lcsp1; + u32 lcsp2; + u32 lcsp3; +}; + +/** + * struct d40_def_lcsp - Default LCSP1 and LCSP3 settings + * + * @lcsp3: The default configuration for dst. + * @lcsp1: The default configuration for src. + */ +struct d40_def_lcsp { + u32 lcsp3; + u32 lcsp1; +}; + +/* Physical channels */ + +enum d40_lli_flags { + LLI_ADDR_INC = 1 << 0, + LLI_TERM_INT = 1 << 1, + LLI_CYCLIC = 1 << 2, + LLI_LAST_LINK = 1 << 3, +}; + +void d40_phy_cfg(struct stedma40_chan_cfg *cfg, + u32 *src_cfg, + u32 *dst_cfg); + +void d40_log_cfg(struct stedma40_chan_cfg *cfg, + u32 *lcsp1, + u32 *lcsp2); + +int d40_phy_sg_to_lli(struct scatterlist *sg, + int sg_len, + dma_addr_t target, + struct d40_phy_lli *lli, + dma_addr_t lli_phys, + u32 reg_cfg, + struct stedma40_half_channel_info *info, + struct stedma40_half_channel_info *otherinfo, + unsigned long flags); + +/* Logical channels */ + +int d40_log_sg_to_lli(struct scatterlist *sg, + int sg_len, + dma_addr_t dev_addr, + struct d40_log_lli *lli_sg, + u32 lcsp13, /* src or dst*/ + u32 data_width1, u32 data_width2); + +void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa, + struct d40_log_lli *lli_dst, + struct d40_log_lli *lli_src, + int next, unsigned int flags); + +void d40_log_lli_lcla_write(struct d40_log_lli *lcla, + struct d40_log_lli *lli_dst, + struct d40_log_lli *lli_src, + int next, unsigned int flags); + +#endif /* STE_DMA40_LLI_H */ diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c new file mode 100644 index 0000000000..9840594a6a --- /dev/null +++ b/drivers/dma/stm32-dma.c @@ -0,0 +1,1793 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Driver for STM32 DMA controller + * + * Inspired by dma-jz4740.c and tegra20-apb-dma.c + * + * Copyright (C) M'boumba Cedric Madianga 2015 + * Author: M'boumba Cedric Madianga + * Pierre-Yves Mordret + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virt-dma.h" + +#define STM32_DMA_LISR 0x0000 /* DMA Low Int Status Reg */ +#define STM32_DMA_HISR 0x0004 /* DMA High Int Status Reg */ +#define STM32_DMA_ISR(n) (((n) & 4) ? STM32_DMA_HISR : STM32_DMA_LISR) +#define STM32_DMA_LIFCR 0x0008 /* DMA Low Int Flag Clear Reg */ +#define STM32_DMA_HIFCR 0x000c /* DMA High Int Flag Clear Reg */ +#define STM32_DMA_IFCR(n) (((n) & 4) ? STM32_DMA_HIFCR : STM32_DMA_LIFCR) +#define STM32_DMA_TCI BIT(5) /* Transfer Complete Interrupt */ +#define STM32_DMA_HTI BIT(4) /* Half Transfer Interrupt */ +#define STM32_DMA_TEI BIT(3) /* Transfer Error Interrupt */ +#define STM32_DMA_DMEI BIT(2) /* Direct Mode Error Interrupt */ +#define STM32_DMA_FEI BIT(0) /* FIFO Error Interrupt */ +#define STM32_DMA_MASKI (STM32_DMA_TCI \ + | STM32_DMA_TEI \ + | STM32_DMA_DMEI \ + | STM32_DMA_FEI) +/* + * If (chan->id % 4) is 2 or 3, left shift the mask by 16 bits; + * if (ch % 4) is 1 or 3, additionally left shift the mask by 6 bits. + */ +#define STM32_DMA_FLAGS_SHIFT(n) ({ typeof(n) (_n) = (n); \ + (((_n) & 2) << 3) | (((_n) & 1) * 6); }) + +/* DMA Stream x Configuration Register */ +#define STM32_DMA_SCR(x) (0x0010 + 0x18 * (x)) /* x = 0..7 */ +#define STM32_DMA_SCR_REQ_MASK GENMASK(27, 25) +#define STM32_DMA_SCR_MBURST_MASK GENMASK(24, 23) +#define STM32_DMA_SCR_PBURST_MASK GENMASK(22, 21) +#define STM32_DMA_SCR_PL_MASK GENMASK(17, 16) +#define STM32_DMA_SCR_MSIZE_MASK GENMASK(14, 13) +#define STM32_DMA_SCR_PSIZE_MASK GENMASK(12, 11) +#define STM32_DMA_SCR_DIR_MASK GENMASK(7, 6) +#define STM32_DMA_SCR_TRBUFF BIT(20) /* Bufferable transfer for USART/UART */ +#define STM32_DMA_SCR_CT BIT(19) /* Target in double buffer */ +#define STM32_DMA_SCR_DBM BIT(18) /* Double Buffer Mode */ +#define STM32_DMA_SCR_PINCOS BIT(15) /* Peripheral inc offset size */ +#define STM32_DMA_SCR_MINC BIT(10) /* Memory increment mode */ +#define STM32_DMA_SCR_PINC BIT(9) /* Peripheral increment mode */ +#define STM32_DMA_SCR_CIRC BIT(8) /* Circular mode */ +#define STM32_DMA_SCR_PFCTRL BIT(5) /* Peripheral Flow Controller */ +#define STM32_DMA_SCR_TCIE BIT(4) /* Transfer Complete Int Enable + */ +#define STM32_DMA_SCR_TEIE BIT(2) /* Transfer Error Int Enable */ +#define STM32_DMA_SCR_DMEIE BIT(1) /* Direct Mode Err Int Enable */ +#define STM32_DMA_SCR_EN BIT(0) /* Stream Enable */ +#define STM32_DMA_SCR_CFG_MASK (STM32_DMA_SCR_PINC \ + | STM32_DMA_SCR_MINC \ + | STM32_DMA_SCR_PINCOS \ + | STM32_DMA_SCR_PL_MASK) +#define STM32_DMA_SCR_IRQ_MASK (STM32_DMA_SCR_TCIE \ + | STM32_DMA_SCR_TEIE \ + | STM32_DMA_SCR_DMEIE) + +/* DMA Stream x number of data register */ +#define STM32_DMA_SNDTR(x) (0x0014 + 0x18 * (x)) + +/* DMA stream peripheral address register */ +#define STM32_DMA_SPAR(x) (0x0018 + 0x18 * (x)) + +/* DMA stream x memory 0 address register */ +#define STM32_DMA_SM0AR(x) (0x001c + 0x18 * (x)) + +/* DMA stream x memory 1 address register */ +#define STM32_DMA_SM1AR(x) (0x0020 + 0x18 * (x)) + +/* DMA stream x FIFO control register */ +#define STM32_DMA_SFCR(x) (0x0024 + 0x18 * (x)) +#define STM32_DMA_SFCR_FTH_MASK GENMASK(1, 0) +#define STM32_DMA_SFCR_FEIE BIT(7) /* FIFO error interrupt enable */ +#define STM32_DMA_SFCR_DMDIS BIT(2) /* Direct mode disable */ +#define STM32_DMA_SFCR_MASK (STM32_DMA_SFCR_FEIE \ + | STM32_DMA_SFCR_DMDIS) + +/* DMA direction */ +#define STM32_DMA_DEV_TO_MEM 0x00 +#define STM32_DMA_MEM_TO_DEV 0x01 +#define STM32_DMA_MEM_TO_MEM 0x02 + +/* DMA priority level */ +#define STM32_DMA_PRIORITY_LOW 0x00 +#define STM32_DMA_PRIORITY_MEDIUM 0x01 +#define STM32_DMA_PRIORITY_HIGH 0x02 +#define STM32_DMA_PRIORITY_VERY_HIGH 0x03 + +/* DMA FIFO threshold selection */ +#define STM32_DMA_FIFO_THRESHOLD_1QUARTERFULL 0x00 +#define STM32_DMA_FIFO_THRESHOLD_HALFFULL 0x01 +#define STM32_DMA_FIFO_THRESHOLD_3QUARTERSFULL 0x02 +#define STM32_DMA_FIFO_THRESHOLD_FULL 0x03 +#define STM32_DMA_FIFO_THRESHOLD_NONE 0x04 + +#define STM32_DMA_MAX_DATA_ITEMS 0xffff +/* + * Valid transfer starts from @0 to @0xFFFE leading to unaligned scatter + * gather at boundary. Thus it's safer to round down this value on FIFO + * size (16 Bytes) + */ +#define STM32_DMA_ALIGNED_MAX_DATA_ITEMS \ + ALIGN_DOWN(STM32_DMA_MAX_DATA_ITEMS, 16) +#define STM32_DMA_MAX_CHANNELS 0x08 +#define STM32_DMA_MAX_REQUEST_ID 0x08 +#define STM32_DMA_MAX_DATA_PARAM 0x03 +#define STM32_DMA_FIFO_SIZE 16 /* FIFO is 16 bytes */ +#define STM32_DMA_MIN_BURST 4 +#define STM32_DMA_MAX_BURST 16 + +/* DMA Features */ +#define STM32_DMA_THRESHOLD_FTR_MASK GENMASK(1, 0) +#define STM32_DMA_DIRECT_MODE_MASK BIT(2) +#define STM32_DMA_ALT_ACK_MODE_MASK BIT(4) +#define STM32_DMA_MDMA_STREAM_ID_MASK GENMASK(19, 16) + +enum stm32_dma_width { + STM32_DMA_BYTE, + STM32_DMA_HALF_WORD, + STM32_DMA_WORD, +}; + +enum stm32_dma_burst_size { + STM32_DMA_BURST_SINGLE, + STM32_DMA_BURST_INCR4, + STM32_DMA_BURST_INCR8, + STM32_DMA_BURST_INCR16, +}; + +/** + * struct stm32_dma_cfg - STM32 DMA custom configuration + * @channel_id: channel ID + * @request_line: DMA request + * @stream_config: 32bit mask specifying the DMA channel configuration + * @features: 32bit mask specifying the DMA Feature list + */ +struct stm32_dma_cfg { + u32 channel_id; + u32 request_line; + u32 stream_config; + u32 features; +}; + +struct stm32_dma_chan_reg { + u32 dma_lisr; + u32 dma_hisr; + u32 dma_lifcr; + u32 dma_hifcr; + u32 dma_scr; + u32 dma_sndtr; + u32 dma_spar; + u32 dma_sm0ar; + u32 dma_sm1ar; + u32 dma_sfcr; +}; + +struct stm32_dma_sg_req { + u32 len; + struct stm32_dma_chan_reg chan_reg; +}; + +struct stm32_dma_desc { + struct virt_dma_desc vdesc; + bool cyclic; + u32 num_sgs; + struct stm32_dma_sg_req sg_req[]; +}; + +/** + * struct stm32_dma_mdma_config - STM32 DMA MDMA configuration + * @stream_id: DMA request to trigger STM32 MDMA transfer + * @ifcr: DMA interrupt flag clear register address, + * used by STM32 MDMA to clear DMA Transfer Complete flag + * @tcf: DMA Transfer Complete flag + */ +struct stm32_dma_mdma_config { + u32 stream_id; + u32 ifcr; + u32 tcf; +}; + +struct stm32_dma_chan { + struct virt_dma_chan vchan; + bool config_init; + bool busy; + u32 id; + u32 irq; + struct stm32_dma_desc *desc; + u32 next_sg; + struct dma_slave_config dma_sconfig; + struct stm32_dma_chan_reg chan_reg; + u32 threshold; + u32 mem_burst; + u32 mem_width; + enum dma_status status; + bool trig_mdma; + struct stm32_dma_mdma_config mdma_config; +}; + +struct stm32_dma_device { + struct dma_device ddev; + void __iomem *base; + struct clk *clk; + bool mem2mem; + struct stm32_dma_chan chan[STM32_DMA_MAX_CHANNELS]; +}; + +static struct stm32_dma_device *stm32_dma_get_dev(struct stm32_dma_chan *chan) +{ + return container_of(chan->vchan.chan.device, struct stm32_dma_device, + ddev); +} + +static struct stm32_dma_chan *to_stm32_dma_chan(struct dma_chan *c) +{ + return container_of(c, struct stm32_dma_chan, vchan.chan); +} + +static struct stm32_dma_desc *to_stm32_dma_desc(struct virt_dma_desc *vdesc) +{ + return container_of(vdesc, struct stm32_dma_desc, vdesc); +} + +static struct device *chan2dev(struct stm32_dma_chan *chan) +{ + return &chan->vchan.chan.dev->device; +} + +static u32 stm32_dma_read(struct stm32_dma_device *dmadev, u32 reg) +{ + return readl_relaxed(dmadev->base + reg); +} + +static void stm32_dma_write(struct stm32_dma_device *dmadev, u32 reg, u32 val) +{ + writel_relaxed(val, dmadev->base + reg); +} + +static int stm32_dma_get_width(struct stm32_dma_chan *chan, + enum dma_slave_buswidth width) +{ + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + return STM32_DMA_BYTE; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + return STM32_DMA_HALF_WORD; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + return STM32_DMA_WORD; + default: + dev_err(chan2dev(chan), "Dma bus width not supported\n"); + return -EINVAL; + } +} + +static enum dma_slave_buswidth stm32_dma_get_max_width(u32 buf_len, + dma_addr_t buf_addr, + u32 threshold) +{ + enum dma_slave_buswidth max_width; + + if (threshold == STM32_DMA_FIFO_THRESHOLD_FULL) + max_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + else + max_width = DMA_SLAVE_BUSWIDTH_2_BYTES; + + while ((buf_len < max_width || buf_len % max_width) && + max_width > DMA_SLAVE_BUSWIDTH_1_BYTE) + max_width = max_width >> 1; + + if (buf_addr & (max_width - 1)) + max_width = DMA_SLAVE_BUSWIDTH_1_BYTE; + + return max_width; +} + +static bool stm32_dma_fifo_threshold_is_allowed(u32 burst, u32 threshold, + enum dma_slave_buswidth width) +{ + u32 remaining; + + if (threshold == STM32_DMA_FIFO_THRESHOLD_NONE) + return false; + + if (width != DMA_SLAVE_BUSWIDTH_UNDEFINED) { + if (burst != 0) { + /* + * If number of beats fit in several whole bursts + * this configuration is allowed. + */ + remaining = ((STM32_DMA_FIFO_SIZE / width) * + (threshold + 1) / 4) % burst; + + if (remaining == 0) + return true; + } else { + return true; + } + } + + return false; +} + +static bool stm32_dma_is_burst_possible(u32 buf_len, u32 threshold) +{ + /* If FIFO direct mode, burst is not possible */ + if (threshold == STM32_DMA_FIFO_THRESHOLD_NONE) + return false; + + /* + * Buffer or period length has to be aligned on FIFO depth. + * Otherwise bytes may be stuck within FIFO at buffer or period + * length. + */ + return ((buf_len % ((threshold + 1) * 4)) == 0); +} + +static u32 stm32_dma_get_best_burst(u32 buf_len, u32 max_burst, u32 threshold, + enum dma_slave_buswidth width) +{ + u32 best_burst = max_burst; + + if (best_burst == 1 || !stm32_dma_is_burst_possible(buf_len, threshold)) + return 0; + + while ((buf_len < best_burst * width && best_burst > 1) || + !stm32_dma_fifo_threshold_is_allowed(best_burst, threshold, + width)) { + if (best_burst > STM32_DMA_MIN_BURST) + best_burst = best_burst >> 1; + else + best_burst = 0; + } + + return best_burst; +} + +static int stm32_dma_get_burst(struct stm32_dma_chan *chan, u32 maxburst) +{ + switch (maxburst) { + case 0: + case 1: + return STM32_DMA_BURST_SINGLE; + case 4: + return STM32_DMA_BURST_INCR4; + case 8: + return STM32_DMA_BURST_INCR8; + case 16: + return STM32_DMA_BURST_INCR16; + default: + dev_err(chan2dev(chan), "Dma burst size not supported\n"); + return -EINVAL; + } +} + +static void stm32_dma_set_fifo_config(struct stm32_dma_chan *chan, + u32 src_burst, u32 dst_burst) +{ + chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_MASK; + chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_DMEIE; + + if (!src_burst && !dst_burst) { + /* Using direct mode */ + chan->chan_reg.dma_scr |= STM32_DMA_SCR_DMEIE; + } else { + /* Using FIFO mode */ + chan->chan_reg.dma_sfcr |= STM32_DMA_SFCR_MASK; + } +} + +static int stm32_dma_slave_config(struct dma_chan *c, + struct dma_slave_config *config) +{ + struct stm32_dma_chan *chan = to_stm32_dma_chan(c); + + memcpy(&chan->dma_sconfig, config, sizeof(*config)); + + /* Check if user is requesting DMA to trigger STM32 MDMA */ + if (config->peripheral_size) { + config->peripheral_config = &chan->mdma_config; + config->peripheral_size = sizeof(chan->mdma_config); + chan->trig_mdma = true; + } + + chan->config_init = true; + + return 0; +} + +static u32 stm32_dma_irq_status(struct stm32_dma_chan *chan) +{ + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + u32 flags, dma_isr; + + /* + * Read "flags" from DMA_xISR register corresponding to the selected + * DMA channel at the correct bit offset inside that register. + */ + + dma_isr = stm32_dma_read(dmadev, STM32_DMA_ISR(chan->id)); + flags = dma_isr >> STM32_DMA_FLAGS_SHIFT(chan->id); + + return flags & STM32_DMA_MASKI; +} + +static void stm32_dma_irq_clear(struct stm32_dma_chan *chan, u32 flags) +{ + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + u32 dma_ifcr; + + /* + * Write "flags" to the DMA_xIFCR register corresponding to the selected + * DMA channel at the correct bit offset inside that register. + */ + flags &= STM32_DMA_MASKI; + dma_ifcr = flags << STM32_DMA_FLAGS_SHIFT(chan->id); + + stm32_dma_write(dmadev, STM32_DMA_IFCR(chan->id), dma_ifcr); +} + +static int stm32_dma_disable_chan(struct stm32_dma_chan *chan) +{ + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + u32 dma_scr, id, reg; + + id = chan->id; + reg = STM32_DMA_SCR(id); + dma_scr = stm32_dma_read(dmadev, reg); + + if (dma_scr & STM32_DMA_SCR_EN) { + dma_scr &= ~STM32_DMA_SCR_EN; + stm32_dma_write(dmadev, reg, dma_scr); + + return readl_relaxed_poll_timeout_atomic(dmadev->base + reg, + dma_scr, !(dma_scr & STM32_DMA_SCR_EN), + 10, 1000000); + } + + return 0; +} + +static void stm32_dma_stop(struct stm32_dma_chan *chan) +{ + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + u32 dma_scr, dma_sfcr, status; + int ret; + + /* Disable interrupts */ + dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id)); + dma_scr &= ~STM32_DMA_SCR_IRQ_MASK; + stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), dma_scr); + dma_sfcr = stm32_dma_read(dmadev, STM32_DMA_SFCR(chan->id)); + dma_sfcr &= ~STM32_DMA_SFCR_FEIE; + stm32_dma_write(dmadev, STM32_DMA_SFCR(chan->id), dma_sfcr); + + /* Disable DMA */ + ret = stm32_dma_disable_chan(chan); + if (ret < 0) + return; + + /* Clear interrupt status if it is there */ + status = stm32_dma_irq_status(chan); + if (status) { + dev_dbg(chan2dev(chan), "%s(): clearing interrupt: 0x%08x\n", + __func__, status); + stm32_dma_irq_clear(chan, status); + } + + chan->busy = false; + chan->status = DMA_COMPLETE; +} + +static int stm32_dma_terminate_all(struct dma_chan *c) +{ + struct stm32_dma_chan *chan = to_stm32_dma_chan(c); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&chan->vchan.lock, flags); + + if (chan->desc) { + dma_cookie_complete(&chan->desc->vdesc.tx); + vchan_terminate_vdesc(&chan->desc->vdesc); + if (chan->busy) + stm32_dma_stop(chan); + chan->desc = NULL; + } + + vchan_get_all_descriptors(&chan->vchan, &head); + spin_unlock_irqrestore(&chan->vchan.lock, flags); + vchan_dma_desc_free_list(&chan->vchan, &head); + + return 0; +} + +static void stm32_dma_synchronize(struct dma_chan *c) +{ + struct stm32_dma_chan *chan = to_stm32_dma_chan(c); + + vchan_synchronize(&chan->vchan); +} + +static void stm32_dma_dump_reg(struct stm32_dma_chan *chan) +{ + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + u32 scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id)); + u32 ndtr = stm32_dma_read(dmadev, STM32_DMA_SNDTR(chan->id)); + u32 spar = stm32_dma_read(dmadev, STM32_DMA_SPAR(chan->id)); + u32 sm0ar = stm32_dma_read(dmadev, STM32_DMA_SM0AR(chan->id)); + u32 sm1ar = stm32_dma_read(dmadev, STM32_DMA_SM1AR(chan->id)); + u32 sfcr = stm32_dma_read(dmadev, STM32_DMA_SFCR(chan->id)); + + dev_dbg(chan2dev(chan), "SCR: 0x%08x\n", scr); + dev_dbg(chan2dev(chan), "NDTR: 0x%08x\n", ndtr); + dev_dbg(chan2dev(chan), "SPAR: 0x%08x\n", spar); + dev_dbg(chan2dev(chan), "SM0AR: 0x%08x\n", sm0ar); + dev_dbg(chan2dev(chan), "SM1AR: 0x%08x\n", sm1ar); + dev_dbg(chan2dev(chan), "SFCR: 0x%08x\n", sfcr); +} + +static void stm32_dma_sg_inc(struct stm32_dma_chan *chan) +{ + chan->next_sg++; + if (chan->desc->cyclic && (chan->next_sg == chan->desc->num_sgs)) + chan->next_sg = 0; +} + +static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan); + +static void stm32_dma_start_transfer(struct stm32_dma_chan *chan) +{ + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + struct virt_dma_desc *vdesc; + struct stm32_dma_sg_req *sg_req; + struct stm32_dma_chan_reg *reg; + u32 status; + int ret; + + ret = stm32_dma_disable_chan(chan); + if (ret < 0) + return; + + if (!chan->desc) { + vdesc = vchan_next_desc(&chan->vchan); + if (!vdesc) + return; + + list_del(&vdesc->node); + + chan->desc = to_stm32_dma_desc(vdesc); + chan->next_sg = 0; + } + + if (chan->next_sg == chan->desc->num_sgs) + chan->next_sg = 0; + + sg_req = &chan->desc->sg_req[chan->next_sg]; + reg = &sg_req->chan_reg; + + /* When DMA triggers STM32 MDMA, DMA Transfer Complete is managed by STM32 MDMA */ + if (chan->trig_mdma && chan->dma_sconfig.direction != DMA_MEM_TO_DEV) + reg->dma_scr &= ~STM32_DMA_SCR_TCIE; + + reg->dma_scr &= ~STM32_DMA_SCR_EN; + stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg->dma_scr); + stm32_dma_write(dmadev, STM32_DMA_SPAR(chan->id), reg->dma_spar); + stm32_dma_write(dmadev, STM32_DMA_SM0AR(chan->id), reg->dma_sm0ar); + stm32_dma_write(dmadev, STM32_DMA_SFCR(chan->id), reg->dma_sfcr); + stm32_dma_write(dmadev, STM32_DMA_SM1AR(chan->id), reg->dma_sm1ar); + stm32_dma_write(dmadev, STM32_DMA_SNDTR(chan->id), reg->dma_sndtr); + + stm32_dma_sg_inc(chan); + + /* Clear interrupt status if it is there */ + status = stm32_dma_irq_status(chan); + if (status) + stm32_dma_irq_clear(chan, status); + + if (chan->desc->cyclic) + stm32_dma_configure_next_sg(chan); + + stm32_dma_dump_reg(chan); + + /* Start DMA */ + chan->busy = true; + chan->status = DMA_IN_PROGRESS; + reg->dma_scr |= STM32_DMA_SCR_EN; + stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg->dma_scr); + + dev_dbg(chan2dev(chan), "vchan %pK: started\n", &chan->vchan); +} + +static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan) +{ + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + struct stm32_dma_sg_req *sg_req; + u32 dma_scr, dma_sm0ar, dma_sm1ar, id; + + id = chan->id; + dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(id)); + + sg_req = &chan->desc->sg_req[chan->next_sg]; + + if (dma_scr & STM32_DMA_SCR_CT) { + dma_sm0ar = sg_req->chan_reg.dma_sm0ar; + stm32_dma_write(dmadev, STM32_DMA_SM0AR(id), dma_sm0ar); + dev_dbg(chan2dev(chan), "CT=1 <=> SM0AR: 0x%08x\n", + stm32_dma_read(dmadev, STM32_DMA_SM0AR(id))); + } else { + dma_sm1ar = sg_req->chan_reg.dma_sm1ar; + stm32_dma_write(dmadev, STM32_DMA_SM1AR(id), dma_sm1ar); + dev_dbg(chan2dev(chan), "CT=0 <=> SM1AR: 0x%08x\n", + stm32_dma_read(dmadev, STM32_DMA_SM1AR(id))); + } +} + +static void stm32_dma_handle_chan_paused(struct stm32_dma_chan *chan) +{ + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + u32 dma_scr; + + /* + * Read and store current remaining data items and peripheral/memory addresses to be + * updated on resume + */ + dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id)); + /* + * Transfer can be paused while between a previous resume and reconfiguration on transfer + * complete. If transfer is cyclic and CIRC and DBM have been deactivated for resume, need + * to set it here in SCR backup to ensure a good reconfiguration on transfer complete. + */ + if (chan->desc && chan->desc->cyclic) { + if (chan->desc->num_sgs == 1) + dma_scr |= STM32_DMA_SCR_CIRC; + else + dma_scr |= STM32_DMA_SCR_DBM; + } + chan->chan_reg.dma_scr = dma_scr; + + /* + * Need to temporarily deactivate CIRC/DBM until next Transfer Complete interrupt, otherwise + * on resume NDTR autoreload value will be wrong (lower than the initial period length) + */ + if (chan->desc && chan->desc->cyclic) { + dma_scr &= ~(STM32_DMA_SCR_DBM | STM32_DMA_SCR_CIRC); + stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), dma_scr); + } + + chan->chan_reg.dma_sndtr = stm32_dma_read(dmadev, STM32_DMA_SNDTR(chan->id)); + + chan->status = DMA_PAUSED; + + dev_dbg(chan2dev(chan), "vchan %pK: paused\n", &chan->vchan); +} + +static void stm32_dma_post_resume_reconfigure(struct stm32_dma_chan *chan) +{ + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + struct stm32_dma_sg_req *sg_req; + u32 dma_scr, status, id; + + id = chan->id; + dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(id)); + + /* Clear interrupt status if it is there */ + status = stm32_dma_irq_status(chan); + if (status) + stm32_dma_irq_clear(chan, status); + + if (!chan->next_sg) + sg_req = &chan->desc->sg_req[chan->desc->num_sgs - 1]; + else + sg_req = &chan->desc->sg_req[chan->next_sg - 1]; + + /* Reconfigure NDTR with the initial value */ + stm32_dma_write(dmadev, STM32_DMA_SNDTR(chan->id), sg_req->chan_reg.dma_sndtr); + + /* Restore SPAR */ + stm32_dma_write(dmadev, STM32_DMA_SPAR(id), sg_req->chan_reg.dma_spar); + + /* Restore SM0AR/SM1AR whatever DBM/CT as they may have been modified */ + stm32_dma_write(dmadev, STM32_DMA_SM0AR(id), sg_req->chan_reg.dma_sm0ar); + stm32_dma_write(dmadev, STM32_DMA_SM1AR(id), sg_req->chan_reg.dma_sm1ar); + + /* Reactivate CIRC/DBM if needed */ + if (chan->chan_reg.dma_scr & STM32_DMA_SCR_DBM) { + dma_scr |= STM32_DMA_SCR_DBM; + /* Restore CT */ + if (chan->chan_reg.dma_scr & STM32_DMA_SCR_CT) + dma_scr &= ~STM32_DMA_SCR_CT; + else + dma_scr |= STM32_DMA_SCR_CT; + } else if (chan->chan_reg.dma_scr & STM32_DMA_SCR_CIRC) { + dma_scr |= STM32_DMA_SCR_CIRC; + } + stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), dma_scr); + + stm32_dma_configure_next_sg(chan); + + stm32_dma_dump_reg(chan); + + dma_scr |= STM32_DMA_SCR_EN; + stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), dma_scr); + + dev_dbg(chan2dev(chan), "vchan %pK: reconfigured after pause/resume\n", &chan->vchan); +} + +static void stm32_dma_handle_chan_done(struct stm32_dma_chan *chan, u32 scr) +{ + if (!chan->desc) + return; + + if (chan->desc->cyclic) { + vchan_cyclic_callback(&chan->desc->vdesc); + if (chan->trig_mdma) + return; + stm32_dma_sg_inc(chan); + /* cyclic while CIRC/DBM disable => post resume reconfiguration needed */ + if (!(scr & (STM32_DMA_SCR_CIRC | STM32_DMA_SCR_DBM))) + stm32_dma_post_resume_reconfigure(chan); + else if (scr & STM32_DMA_SCR_DBM) + stm32_dma_configure_next_sg(chan); + } else { + chan->busy = false; + chan->status = DMA_COMPLETE; + if (chan->next_sg == chan->desc->num_sgs) { + vchan_cookie_complete(&chan->desc->vdesc); + chan->desc = NULL; + } + stm32_dma_start_transfer(chan); + } +} + +static irqreturn_t stm32_dma_chan_irq(int irq, void *devid) +{ + struct stm32_dma_chan *chan = devid; + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + u32 status, scr, sfcr; + + spin_lock(&chan->vchan.lock); + + status = stm32_dma_irq_status(chan); + scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id)); + sfcr = stm32_dma_read(dmadev, STM32_DMA_SFCR(chan->id)); + + if (status & STM32_DMA_FEI) { + stm32_dma_irq_clear(chan, STM32_DMA_FEI); + status &= ~STM32_DMA_FEI; + if (sfcr & STM32_DMA_SFCR_FEIE) { + if (!(scr & STM32_DMA_SCR_EN) && + !(status & STM32_DMA_TCI)) + dev_err(chan2dev(chan), "FIFO Error\n"); + else + dev_dbg(chan2dev(chan), "FIFO over/underrun\n"); + } + } + if (status & STM32_DMA_DMEI) { + stm32_dma_irq_clear(chan, STM32_DMA_DMEI); + status &= ~STM32_DMA_DMEI; + if (sfcr & STM32_DMA_SCR_DMEIE) + dev_dbg(chan2dev(chan), "Direct mode overrun\n"); + } + + if (status & STM32_DMA_TCI) { + stm32_dma_irq_clear(chan, STM32_DMA_TCI); + if (scr & STM32_DMA_SCR_TCIE) { + if (chan->status != DMA_PAUSED) + stm32_dma_handle_chan_done(chan, scr); + } + status &= ~STM32_DMA_TCI; + } + + if (status & STM32_DMA_HTI) { + stm32_dma_irq_clear(chan, STM32_DMA_HTI); + status &= ~STM32_DMA_HTI; + } + + if (status) { + stm32_dma_irq_clear(chan, status); + dev_err(chan2dev(chan), "DMA error: status=0x%08x\n", status); + if (!(scr & STM32_DMA_SCR_EN)) + dev_err(chan2dev(chan), "chan disabled by HW\n"); + } + + spin_unlock(&chan->vchan.lock); + + return IRQ_HANDLED; +} + +static void stm32_dma_issue_pending(struct dma_chan *c) +{ + struct stm32_dma_chan *chan = to_stm32_dma_chan(c); + unsigned long flags; + + spin_lock_irqsave(&chan->vchan.lock, flags); + if (vchan_issue_pending(&chan->vchan) && !chan->desc && !chan->busy) { + dev_dbg(chan2dev(chan), "vchan %pK: issued\n", &chan->vchan); + stm32_dma_start_transfer(chan); + + } + spin_unlock_irqrestore(&chan->vchan.lock, flags); +} + +static int stm32_dma_pause(struct dma_chan *c) +{ + struct stm32_dma_chan *chan = to_stm32_dma_chan(c); + unsigned long flags; + int ret; + + if (chan->status != DMA_IN_PROGRESS) + return -EPERM; + + spin_lock_irqsave(&chan->vchan.lock, flags); + + ret = stm32_dma_disable_chan(chan); + if (!ret) + stm32_dma_handle_chan_paused(chan); + + spin_unlock_irqrestore(&chan->vchan.lock, flags); + + return ret; +} + +static int stm32_dma_resume(struct dma_chan *c) +{ + struct stm32_dma_chan *chan = to_stm32_dma_chan(c); + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + struct stm32_dma_chan_reg chan_reg = chan->chan_reg; + u32 id = chan->id, scr, ndtr, offset, spar, sm0ar, sm1ar; + struct stm32_dma_sg_req *sg_req; + unsigned long flags; + + if (chan->status != DMA_PAUSED) + return -EPERM; + + scr = stm32_dma_read(dmadev, STM32_DMA_SCR(id)); + if (WARN_ON(scr & STM32_DMA_SCR_EN)) + return -EPERM; + + spin_lock_irqsave(&chan->vchan.lock, flags); + + /* sg_reg[prev_sg] contains original ndtr, sm0ar and sm1ar before pausing the transfer */ + if (!chan->next_sg) + sg_req = &chan->desc->sg_req[chan->desc->num_sgs - 1]; + else + sg_req = &chan->desc->sg_req[chan->next_sg - 1]; + + ndtr = sg_req->chan_reg.dma_sndtr; + offset = (ndtr - chan_reg.dma_sndtr); + offset <<= FIELD_GET(STM32_DMA_SCR_PSIZE_MASK, chan_reg.dma_scr); + spar = sg_req->chan_reg.dma_spar; + sm0ar = sg_req->chan_reg.dma_sm0ar; + sm1ar = sg_req->chan_reg.dma_sm1ar; + + /* + * The peripheral and/or memory addresses have to be updated in order to adjust the + * address pointers. Need to check increment. + */ + if (chan_reg.dma_scr & STM32_DMA_SCR_PINC) + stm32_dma_write(dmadev, STM32_DMA_SPAR(id), spar + offset); + else + stm32_dma_write(dmadev, STM32_DMA_SPAR(id), spar); + + if (!(chan_reg.dma_scr & STM32_DMA_SCR_MINC)) + offset = 0; + + /* + * In case of DBM, the current target could be SM1AR. + * Need to temporarily deactivate CIRC/DBM to finish the current transfer, so + * SM0AR becomes the current target and must be updated with SM1AR + offset if CT=1. + */ + if ((chan_reg.dma_scr & STM32_DMA_SCR_DBM) && (chan_reg.dma_scr & STM32_DMA_SCR_CT)) + stm32_dma_write(dmadev, STM32_DMA_SM1AR(id), sm1ar + offset); + else + stm32_dma_write(dmadev, STM32_DMA_SM0AR(id), sm0ar + offset); + + /* NDTR must be restored otherwise internal HW counter won't be correctly reset */ + stm32_dma_write(dmadev, STM32_DMA_SNDTR(id), chan_reg.dma_sndtr); + + /* + * Need to temporarily deactivate CIRC/DBM until next Transfer Complete interrupt, + * otherwise NDTR autoreload value will be wrong (lower than the initial period length) + */ + if (chan_reg.dma_scr & (STM32_DMA_SCR_CIRC | STM32_DMA_SCR_DBM)) + chan_reg.dma_scr &= ~(STM32_DMA_SCR_CIRC | STM32_DMA_SCR_DBM); + + if (chan_reg.dma_scr & STM32_DMA_SCR_DBM) + stm32_dma_configure_next_sg(chan); + + stm32_dma_dump_reg(chan); + + /* The stream may then be re-enabled to restart transfer from the point it was stopped */ + chan->status = DMA_IN_PROGRESS; + chan_reg.dma_scr |= STM32_DMA_SCR_EN; + stm32_dma_write(dmadev, STM32_DMA_SCR(id), chan_reg.dma_scr); + + spin_unlock_irqrestore(&chan->vchan.lock, flags); + + dev_dbg(chan2dev(chan), "vchan %pK: resumed\n", &chan->vchan); + + return 0; +} + +static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan, + enum dma_transfer_direction direction, + enum dma_slave_buswidth *buswidth, + u32 buf_len, dma_addr_t buf_addr) +{ + enum dma_slave_buswidth src_addr_width, dst_addr_width; + int src_bus_width, dst_bus_width; + int src_burst_size, dst_burst_size; + u32 src_maxburst, dst_maxburst, src_best_burst, dst_best_burst; + u32 dma_scr, fifoth; + + src_addr_width = chan->dma_sconfig.src_addr_width; + dst_addr_width = chan->dma_sconfig.dst_addr_width; + src_maxburst = chan->dma_sconfig.src_maxburst; + dst_maxburst = chan->dma_sconfig.dst_maxburst; + fifoth = chan->threshold; + + switch (direction) { + case DMA_MEM_TO_DEV: + /* Set device data size */ + dst_bus_width = stm32_dma_get_width(chan, dst_addr_width); + if (dst_bus_width < 0) + return dst_bus_width; + + /* Set device burst size */ + dst_best_burst = stm32_dma_get_best_burst(buf_len, + dst_maxburst, + fifoth, + dst_addr_width); + + dst_burst_size = stm32_dma_get_burst(chan, dst_best_burst); + if (dst_burst_size < 0) + return dst_burst_size; + + /* Set memory data size */ + src_addr_width = stm32_dma_get_max_width(buf_len, buf_addr, + fifoth); + chan->mem_width = src_addr_width; + src_bus_width = stm32_dma_get_width(chan, src_addr_width); + if (src_bus_width < 0) + return src_bus_width; + + /* + * Set memory burst size - burst not possible if address is not aligned on + * the address boundary equal to the size of the transfer + */ + if (buf_addr & (buf_len - 1)) + src_maxburst = 1; + else + src_maxburst = STM32_DMA_MAX_BURST; + src_best_burst = stm32_dma_get_best_burst(buf_len, + src_maxburst, + fifoth, + src_addr_width); + src_burst_size = stm32_dma_get_burst(chan, src_best_burst); + if (src_burst_size < 0) + return src_burst_size; + + dma_scr = FIELD_PREP(STM32_DMA_SCR_DIR_MASK, STM32_DMA_MEM_TO_DEV) | + FIELD_PREP(STM32_DMA_SCR_PSIZE_MASK, dst_bus_width) | + FIELD_PREP(STM32_DMA_SCR_MSIZE_MASK, src_bus_width) | + FIELD_PREP(STM32_DMA_SCR_PBURST_MASK, dst_burst_size) | + FIELD_PREP(STM32_DMA_SCR_MBURST_MASK, src_burst_size); + + /* Set FIFO threshold */ + chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_FTH_MASK; + if (fifoth != STM32_DMA_FIFO_THRESHOLD_NONE) + chan->chan_reg.dma_sfcr |= FIELD_PREP(STM32_DMA_SFCR_FTH_MASK, fifoth); + + /* Set peripheral address */ + chan->chan_reg.dma_spar = chan->dma_sconfig.dst_addr; + *buswidth = dst_addr_width; + break; + + case DMA_DEV_TO_MEM: + /* Set device data size */ + src_bus_width = stm32_dma_get_width(chan, src_addr_width); + if (src_bus_width < 0) + return src_bus_width; + + /* Set device burst size */ + src_best_burst = stm32_dma_get_best_burst(buf_len, + src_maxburst, + fifoth, + src_addr_width); + chan->mem_burst = src_best_burst; + src_burst_size = stm32_dma_get_burst(chan, src_best_burst); + if (src_burst_size < 0) + return src_burst_size; + + /* Set memory data size */ + dst_addr_width = stm32_dma_get_max_width(buf_len, buf_addr, + fifoth); + chan->mem_width = dst_addr_width; + dst_bus_width = stm32_dma_get_width(chan, dst_addr_width); + if (dst_bus_width < 0) + return dst_bus_width; + + /* + * Set memory burst size - burst not possible if address is not aligned on + * the address boundary equal to the size of the transfer + */ + if (buf_addr & (buf_len - 1)) + dst_maxburst = 1; + else + dst_maxburst = STM32_DMA_MAX_BURST; + dst_best_burst = stm32_dma_get_best_burst(buf_len, + dst_maxburst, + fifoth, + dst_addr_width); + chan->mem_burst = dst_best_burst; + dst_burst_size = stm32_dma_get_burst(chan, dst_best_burst); + if (dst_burst_size < 0) + return dst_burst_size; + + dma_scr = FIELD_PREP(STM32_DMA_SCR_DIR_MASK, STM32_DMA_DEV_TO_MEM) | + FIELD_PREP(STM32_DMA_SCR_PSIZE_MASK, src_bus_width) | + FIELD_PREP(STM32_DMA_SCR_MSIZE_MASK, dst_bus_width) | + FIELD_PREP(STM32_DMA_SCR_PBURST_MASK, src_burst_size) | + FIELD_PREP(STM32_DMA_SCR_MBURST_MASK, dst_burst_size); + + /* Set FIFO threshold */ + chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_FTH_MASK; + if (fifoth != STM32_DMA_FIFO_THRESHOLD_NONE) + chan->chan_reg.dma_sfcr |= FIELD_PREP(STM32_DMA_SFCR_FTH_MASK, fifoth); + + /* Set peripheral address */ + chan->chan_reg.dma_spar = chan->dma_sconfig.src_addr; + *buswidth = chan->dma_sconfig.src_addr_width; + break; + + default: + dev_err(chan2dev(chan), "Dma direction is not supported\n"); + return -EINVAL; + } + + stm32_dma_set_fifo_config(chan, src_best_burst, dst_best_burst); + + /* Set DMA control register */ + chan->chan_reg.dma_scr &= ~(STM32_DMA_SCR_DIR_MASK | + STM32_DMA_SCR_PSIZE_MASK | STM32_DMA_SCR_MSIZE_MASK | + STM32_DMA_SCR_PBURST_MASK | STM32_DMA_SCR_MBURST_MASK); + chan->chan_reg.dma_scr |= dma_scr; + + return 0; +} + +static void stm32_dma_clear_reg(struct stm32_dma_chan_reg *regs) +{ + memset(regs, 0, sizeof(struct stm32_dma_chan_reg)); +} + +static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg( + struct dma_chan *c, struct scatterlist *sgl, + u32 sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct stm32_dma_chan *chan = to_stm32_dma_chan(c); + struct stm32_dma_desc *desc; + struct scatterlist *sg; + enum dma_slave_buswidth buswidth; + u32 nb_data_items; + int i, ret; + + if (!chan->config_init) { + dev_err(chan2dev(chan), "dma channel is not configured\n"); + return NULL; + } + + if (sg_len < 1) { + dev_err(chan2dev(chan), "Invalid segment length %d\n", sg_len); + return NULL; + } + + desc = kzalloc(struct_size(desc, sg_req, sg_len), GFP_NOWAIT); + if (!desc) + return NULL; + + /* Set peripheral flow controller */ + if (chan->dma_sconfig.device_fc) + chan->chan_reg.dma_scr |= STM32_DMA_SCR_PFCTRL; + else + chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_PFCTRL; + + /* Activate Double Buffer Mode if DMA triggers STM32 MDMA and more than 1 sg */ + if (chan->trig_mdma && sg_len > 1) { + chan->chan_reg.dma_scr |= STM32_DMA_SCR_DBM; + chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_CT; + } + + for_each_sg(sgl, sg, sg_len, i) { + ret = stm32_dma_set_xfer_param(chan, direction, &buswidth, + sg_dma_len(sg), + sg_dma_address(sg)); + if (ret < 0) + goto err; + + desc->sg_req[i].len = sg_dma_len(sg); + + nb_data_items = desc->sg_req[i].len / buswidth; + if (nb_data_items > STM32_DMA_ALIGNED_MAX_DATA_ITEMS) { + dev_err(chan2dev(chan), "nb items not supported\n"); + goto err; + } + + stm32_dma_clear_reg(&desc->sg_req[i].chan_reg); + desc->sg_req[i].chan_reg.dma_scr = chan->chan_reg.dma_scr; + desc->sg_req[i].chan_reg.dma_sfcr = chan->chan_reg.dma_sfcr; + desc->sg_req[i].chan_reg.dma_spar = chan->chan_reg.dma_spar; + desc->sg_req[i].chan_reg.dma_sm0ar = sg_dma_address(sg); + desc->sg_req[i].chan_reg.dma_sm1ar = sg_dma_address(sg); + if (chan->trig_mdma) + desc->sg_req[i].chan_reg.dma_sm1ar += sg_dma_len(sg); + desc->sg_req[i].chan_reg.dma_sndtr = nb_data_items; + } + + desc->num_sgs = sg_len; + desc->cyclic = false; + + return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); + +err: + kfree(desc); + return NULL; +} + +static struct dma_async_tx_descriptor *stm32_dma_prep_dma_cyclic( + struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct stm32_dma_chan *chan = to_stm32_dma_chan(c); + struct stm32_dma_desc *desc; + enum dma_slave_buswidth buswidth; + u32 num_periods, nb_data_items; + int i, ret; + + if (!buf_len || !period_len) { + dev_err(chan2dev(chan), "Invalid buffer/period len\n"); + return NULL; + } + + if (!chan->config_init) { + dev_err(chan2dev(chan), "dma channel is not configured\n"); + return NULL; + } + + if (buf_len % period_len) { + dev_err(chan2dev(chan), "buf_len not multiple of period_len\n"); + return NULL; + } + + /* + * We allow to take more number of requests till DMA is + * not started. The driver will loop over all requests. + * Once DMA is started then new requests can be queued only after + * terminating the DMA. + */ + if (chan->busy) { + dev_err(chan2dev(chan), "Request not allowed when dma busy\n"); + return NULL; + } + + ret = stm32_dma_set_xfer_param(chan, direction, &buswidth, period_len, + buf_addr); + if (ret < 0) + return NULL; + + nb_data_items = period_len / buswidth; + if (nb_data_items > STM32_DMA_ALIGNED_MAX_DATA_ITEMS) { + dev_err(chan2dev(chan), "number of items not supported\n"); + return NULL; + } + + /* Enable Circular mode or double buffer mode */ + if (buf_len == period_len) { + chan->chan_reg.dma_scr |= STM32_DMA_SCR_CIRC; + } else { + chan->chan_reg.dma_scr |= STM32_DMA_SCR_DBM; + chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_CT; + } + + /* Clear periph ctrl if client set it */ + chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_PFCTRL; + + num_periods = buf_len / period_len; + + desc = kzalloc(struct_size(desc, sg_req, num_periods), GFP_NOWAIT); + if (!desc) + return NULL; + + for (i = 0; i < num_periods; i++) { + desc->sg_req[i].len = period_len; + + stm32_dma_clear_reg(&desc->sg_req[i].chan_reg); + desc->sg_req[i].chan_reg.dma_scr = chan->chan_reg.dma_scr; + desc->sg_req[i].chan_reg.dma_sfcr = chan->chan_reg.dma_sfcr; + desc->sg_req[i].chan_reg.dma_spar = chan->chan_reg.dma_spar; + desc->sg_req[i].chan_reg.dma_sm0ar = buf_addr; + desc->sg_req[i].chan_reg.dma_sm1ar = buf_addr; + if (chan->trig_mdma) + desc->sg_req[i].chan_reg.dma_sm1ar += period_len; + desc->sg_req[i].chan_reg.dma_sndtr = nb_data_items; + if (!chan->trig_mdma) + buf_addr += period_len; + } + + desc->num_sgs = num_periods; + desc->cyclic = true; + + return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); +} + +static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy( + struct dma_chan *c, dma_addr_t dest, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct stm32_dma_chan *chan = to_stm32_dma_chan(c); + enum dma_slave_buswidth max_width; + struct stm32_dma_desc *desc; + size_t xfer_count, offset; + u32 num_sgs, best_burst, threshold; + int dma_burst, i; + + num_sgs = DIV_ROUND_UP(len, STM32_DMA_ALIGNED_MAX_DATA_ITEMS); + desc = kzalloc(struct_size(desc, sg_req, num_sgs), GFP_NOWAIT); + if (!desc) + return NULL; + + threshold = chan->threshold; + + for (offset = 0, i = 0; offset < len; offset += xfer_count, i++) { + xfer_count = min_t(size_t, len - offset, + STM32_DMA_ALIGNED_MAX_DATA_ITEMS); + + /* Compute best burst size */ + max_width = DMA_SLAVE_BUSWIDTH_1_BYTE; + best_burst = stm32_dma_get_best_burst(len, STM32_DMA_MAX_BURST, + threshold, max_width); + dma_burst = stm32_dma_get_burst(chan, best_burst); + if (dma_burst < 0) { + kfree(desc); + return NULL; + } + + stm32_dma_clear_reg(&desc->sg_req[i].chan_reg); + desc->sg_req[i].chan_reg.dma_scr = + FIELD_PREP(STM32_DMA_SCR_DIR_MASK, STM32_DMA_MEM_TO_MEM) | + FIELD_PREP(STM32_DMA_SCR_PBURST_MASK, dma_burst) | + FIELD_PREP(STM32_DMA_SCR_MBURST_MASK, dma_burst) | + STM32_DMA_SCR_MINC | + STM32_DMA_SCR_PINC | + STM32_DMA_SCR_TCIE | + STM32_DMA_SCR_TEIE; + desc->sg_req[i].chan_reg.dma_sfcr |= STM32_DMA_SFCR_MASK; + desc->sg_req[i].chan_reg.dma_sfcr |= FIELD_PREP(STM32_DMA_SFCR_FTH_MASK, threshold); + desc->sg_req[i].chan_reg.dma_spar = src + offset; + desc->sg_req[i].chan_reg.dma_sm0ar = dest + offset; + desc->sg_req[i].chan_reg.dma_sndtr = xfer_count; + desc->sg_req[i].len = xfer_count; + } + + desc->num_sgs = num_sgs; + desc->cyclic = false; + + return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); +} + +static u32 stm32_dma_get_remaining_bytes(struct stm32_dma_chan *chan) +{ + u32 dma_scr, width, ndtr; + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + + dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id)); + width = FIELD_GET(STM32_DMA_SCR_PSIZE_MASK, dma_scr); + ndtr = stm32_dma_read(dmadev, STM32_DMA_SNDTR(chan->id)); + + return ndtr << width; +} + +/** + * stm32_dma_is_current_sg - check that expected sg_req is currently transferred + * @chan: dma channel + * + * This function called when IRQ are disable, checks that the hardware has not + * switched on the next transfer in double buffer mode. The test is done by + * comparing the next_sg memory address with the hardware related register + * (based on CT bit value). + * + * Returns true if expected current transfer is still running or double + * buffer mode is not activated. + */ +static bool stm32_dma_is_current_sg(struct stm32_dma_chan *chan) +{ + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + struct stm32_dma_sg_req *sg_req; + u32 dma_scr, dma_smar, id, period_len; + + id = chan->id; + dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(id)); + + /* In cyclic CIRC but not DBM, CT is not used */ + if (!(dma_scr & STM32_DMA_SCR_DBM)) + return true; + + sg_req = &chan->desc->sg_req[chan->next_sg]; + period_len = sg_req->len; + + /* DBM - take care of a previous pause/resume not yet post reconfigured */ + if (dma_scr & STM32_DMA_SCR_CT) { + dma_smar = stm32_dma_read(dmadev, STM32_DMA_SM0AR(id)); + /* + * If transfer has been pause/resumed, + * SM0AR is in the range of [SM0AR:SM0AR+period_len] + */ + return (dma_smar >= sg_req->chan_reg.dma_sm0ar && + dma_smar < sg_req->chan_reg.dma_sm0ar + period_len); + } + + dma_smar = stm32_dma_read(dmadev, STM32_DMA_SM1AR(id)); + /* + * If transfer has been pause/resumed, + * SM1AR is in the range of [SM1AR:SM1AR+period_len] + */ + return (dma_smar >= sg_req->chan_reg.dma_sm1ar && + dma_smar < sg_req->chan_reg.dma_sm1ar + period_len); +} + +static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan, + struct stm32_dma_desc *desc, + u32 next_sg) +{ + u32 modulo, burst_size; + u32 residue; + u32 n_sg = next_sg; + struct stm32_dma_sg_req *sg_req = &chan->desc->sg_req[chan->next_sg]; + int i; + + /* + * Calculate the residue means compute the descriptors + * information: + * - the sg_req currently transferred + * - the Hardware remaining position in this sg (NDTR bits field). + * + * A race condition may occur if DMA is running in cyclic or double + * buffer mode, since the DMA register are automatically reloaded at end + * of period transfer. The hardware may have switched to the next + * transfer (CT bit updated) just before the position (SxNDTR reg) is + * read. + * In this case the SxNDTR reg could (or not) correspond to the new + * transfer position, and not the expected one. + * The strategy implemented in the stm32 driver is to: + * - read the SxNDTR register + * - crosscheck that hardware is still in current transfer. + * In case of switch, we can assume that the DMA is at the beginning of + * the next transfer. So we approximate the residue in consequence, by + * pointing on the beginning of next transfer. + * + * This race condition doesn't apply for none cyclic mode, as double + * buffer is not used. In such situation registers are updated by the + * software. + */ + + residue = stm32_dma_get_remaining_bytes(chan); + + if ((chan->desc->cyclic || chan->trig_mdma) && !stm32_dma_is_current_sg(chan)) { + n_sg++; + if (n_sg == chan->desc->num_sgs) + n_sg = 0; + if (!chan->trig_mdma) + residue = sg_req->len; + } + + /* + * In cyclic mode, for the last period, residue = remaining bytes + * from NDTR, + * else for all other periods in cyclic mode, and in sg mode, + * residue = remaining bytes from NDTR + remaining + * periods/sg to be transferred + */ + if ((!chan->desc->cyclic && !chan->trig_mdma) || n_sg != 0) + for (i = n_sg; i < desc->num_sgs; i++) + residue += desc->sg_req[i].len; + + if (!chan->mem_burst) + return residue; + + burst_size = chan->mem_burst * chan->mem_width; + modulo = residue % burst_size; + if (modulo) + residue = residue - modulo + burst_size; + + return residue; +} + +static enum dma_status stm32_dma_tx_status(struct dma_chan *c, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct stm32_dma_chan *chan = to_stm32_dma_chan(c); + struct virt_dma_desc *vdesc; + enum dma_status status; + unsigned long flags; + u32 residue = 0; + + status = dma_cookie_status(c, cookie, state); + if (status == DMA_COMPLETE) + return status; + + status = chan->status; + + if (!state) + return status; + + spin_lock_irqsave(&chan->vchan.lock, flags); + vdesc = vchan_find_desc(&chan->vchan, cookie); + if (chan->desc && cookie == chan->desc->vdesc.tx.cookie) + residue = stm32_dma_desc_residue(chan, chan->desc, + chan->next_sg); + else if (vdesc) + residue = stm32_dma_desc_residue(chan, + to_stm32_dma_desc(vdesc), 0); + dma_set_residue(state, residue); + + spin_unlock_irqrestore(&chan->vchan.lock, flags); + + return status; +} + +static int stm32_dma_alloc_chan_resources(struct dma_chan *c) +{ + struct stm32_dma_chan *chan = to_stm32_dma_chan(c); + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + int ret; + + chan->config_init = false; + + ret = pm_runtime_resume_and_get(dmadev->ddev.dev); + if (ret < 0) + return ret; + + ret = stm32_dma_disable_chan(chan); + if (ret < 0) + pm_runtime_put(dmadev->ddev.dev); + + return ret; +} + +static void stm32_dma_free_chan_resources(struct dma_chan *c) +{ + struct stm32_dma_chan *chan = to_stm32_dma_chan(c); + struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); + unsigned long flags; + + dev_dbg(chan2dev(chan), "Freeing channel %d\n", chan->id); + + if (chan->busy) { + spin_lock_irqsave(&chan->vchan.lock, flags); + stm32_dma_stop(chan); + chan->desc = NULL; + spin_unlock_irqrestore(&chan->vchan.lock, flags); + } + + pm_runtime_put(dmadev->ddev.dev); + + vchan_free_chan_resources(to_virt_chan(c)); + stm32_dma_clear_reg(&chan->chan_reg); + chan->threshold = 0; +} + +static void stm32_dma_desc_free(struct virt_dma_desc *vdesc) +{ + kfree(container_of(vdesc, struct stm32_dma_desc, vdesc)); +} + +static void stm32_dma_set_config(struct stm32_dma_chan *chan, + struct stm32_dma_cfg *cfg) +{ + stm32_dma_clear_reg(&chan->chan_reg); + + chan->chan_reg.dma_scr = cfg->stream_config & STM32_DMA_SCR_CFG_MASK; + chan->chan_reg.dma_scr |= FIELD_PREP(STM32_DMA_SCR_REQ_MASK, cfg->request_line); + + /* Enable Interrupts */ + chan->chan_reg.dma_scr |= STM32_DMA_SCR_TEIE | STM32_DMA_SCR_TCIE; + + chan->threshold = FIELD_GET(STM32_DMA_THRESHOLD_FTR_MASK, cfg->features); + if (FIELD_GET(STM32_DMA_DIRECT_MODE_MASK, cfg->features)) + chan->threshold = STM32_DMA_FIFO_THRESHOLD_NONE; + if (FIELD_GET(STM32_DMA_ALT_ACK_MODE_MASK, cfg->features)) + chan->chan_reg.dma_scr |= STM32_DMA_SCR_TRBUFF; + chan->mdma_config.stream_id = FIELD_GET(STM32_DMA_MDMA_STREAM_ID_MASK, cfg->features); +} + +static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct stm32_dma_device *dmadev = ofdma->of_dma_data; + struct device *dev = dmadev->ddev.dev; + struct stm32_dma_cfg cfg; + struct stm32_dma_chan *chan; + struct dma_chan *c; + + if (dma_spec->args_count < 4) { + dev_err(dev, "Bad number of cells\n"); + return NULL; + } + + cfg.channel_id = dma_spec->args[0]; + cfg.request_line = dma_spec->args[1]; + cfg.stream_config = dma_spec->args[2]; + cfg.features = dma_spec->args[3]; + + if (cfg.channel_id >= STM32_DMA_MAX_CHANNELS || + cfg.request_line >= STM32_DMA_MAX_REQUEST_ID) { + dev_err(dev, "Bad channel and/or request id\n"); + return NULL; + } + + chan = &dmadev->chan[cfg.channel_id]; + + c = dma_get_slave_channel(&chan->vchan.chan); + if (!c) { + dev_err(dev, "No more channels available\n"); + return NULL; + } + + stm32_dma_set_config(chan, &cfg); + + return c; +} + +static const struct of_device_id stm32_dma_of_match[] = { + { .compatible = "st,stm32-dma", }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, stm32_dma_of_match); + +static int stm32_dma_probe(struct platform_device *pdev) +{ + struct stm32_dma_chan *chan; + struct stm32_dma_device *dmadev; + struct dma_device *dd; + const struct of_device_id *match; + struct resource *res; + struct reset_control *rst; + int i, ret; + + match = of_match_device(stm32_dma_of_match, &pdev->dev); + if (!match) { + dev_err(&pdev->dev, "Error: No device match found\n"); + return -ENODEV; + } + + dmadev = devm_kzalloc(&pdev->dev, sizeof(*dmadev), GFP_KERNEL); + if (!dmadev) + return -ENOMEM; + + dd = &dmadev->ddev; + + dmadev->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(dmadev->base)) + return PTR_ERR(dmadev->base); + + dmadev->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(dmadev->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(dmadev->clk), "Can't get clock\n"); + + ret = clk_prepare_enable(dmadev->clk); + if (ret < 0) { + dev_err(&pdev->dev, "clk_prep_enable error: %d\n", ret); + return ret; + } + + dmadev->mem2mem = of_property_read_bool(pdev->dev.of_node, + "st,mem2mem"); + + rst = devm_reset_control_get(&pdev->dev, NULL); + if (IS_ERR(rst)) { + ret = PTR_ERR(rst); + if (ret == -EPROBE_DEFER) + goto clk_free; + } else { + reset_control_assert(rst); + udelay(2); + reset_control_deassert(rst); + } + + dma_set_max_seg_size(&pdev->dev, STM32_DMA_ALIGNED_MAX_DATA_ITEMS); + + dma_cap_set(DMA_SLAVE, dd->cap_mask); + dma_cap_set(DMA_PRIVATE, dd->cap_mask); + dma_cap_set(DMA_CYCLIC, dd->cap_mask); + dd->device_alloc_chan_resources = stm32_dma_alloc_chan_resources; + dd->device_free_chan_resources = stm32_dma_free_chan_resources; + dd->device_tx_status = stm32_dma_tx_status; + dd->device_issue_pending = stm32_dma_issue_pending; + dd->device_prep_slave_sg = stm32_dma_prep_slave_sg; + dd->device_prep_dma_cyclic = stm32_dma_prep_dma_cyclic; + dd->device_config = stm32_dma_slave_config; + dd->device_pause = stm32_dma_pause; + dd->device_resume = stm32_dma_resume; + dd->device_terminate_all = stm32_dma_terminate_all; + dd->device_synchronize = stm32_dma_synchronize; + dd->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + dd->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + dd->copy_align = DMAENGINE_ALIGN_32_BYTES; + dd->max_burst = STM32_DMA_MAX_BURST; + dd->max_sg_burst = STM32_DMA_ALIGNED_MAX_DATA_ITEMS; + dd->descriptor_reuse = true; + dd->dev = &pdev->dev; + INIT_LIST_HEAD(&dd->channels); + + if (dmadev->mem2mem) { + dma_cap_set(DMA_MEMCPY, dd->cap_mask); + dd->device_prep_dma_memcpy = stm32_dma_prep_dma_memcpy; + dd->directions |= BIT(DMA_MEM_TO_MEM); + } + + for (i = 0; i < STM32_DMA_MAX_CHANNELS; i++) { + chan = &dmadev->chan[i]; + chan->id = i; + chan->vchan.desc_free = stm32_dma_desc_free; + vchan_init(&chan->vchan, dd); + + chan->mdma_config.ifcr = res->start; + chan->mdma_config.ifcr += STM32_DMA_IFCR(chan->id); + + chan->mdma_config.tcf = STM32_DMA_TCI; + chan->mdma_config.tcf <<= STM32_DMA_FLAGS_SHIFT(chan->id); + } + + ret = dma_async_device_register(dd); + if (ret) + goto clk_free; + + for (i = 0; i < STM32_DMA_MAX_CHANNELS; i++) { + chan = &dmadev->chan[i]; + ret = platform_get_irq(pdev, i); + if (ret < 0) + goto err_unregister; + chan->irq = ret; + + ret = devm_request_irq(&pdev->dev, chan->irq, + stm32_dma_chan_irq, 0, + dev_name(chan2dev(chan)), chan); + if (ret) { + dev_err(&pdev->dev, + "request_irq failed with err %d channel %d\n", + ret, i); + goto err_unregister; + } + } + + ret = of_dma_controller_register(pdev->dev.of_node, + stm32_dma_of_xlate, dmadev); + if (ret < 0) { + dev_err(&pdev->dev, + "STM32 DMA DMA OF registration failed %d\n", ret); + goto err_unregister; + } + + platform_set_drvdata(pdev, dmadev); + + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + pm_runtime_get_noresume(&pdev->dev); + pm_runtime_put(&pdev->dev); + + dev_info(&pdev->dev, "STM32 DMA driver registered\n"); + + return 0; + +err_unregister: + dma_async_device_unregister(dd); +clk_free: + clk_disable_unprepare(dmadev->clk); + + return ret; +} + +#ifdef CONFIG_PM +static int stm32_dma_runtime_suspend(struct device *dev) +{ + struct stm32_dma_device *dmadev = dev_get_drvdata(dev); + + clk_disable_unprepare(dmadev->clk); + + return 0; +} + +static int stm32_dma_runtime_resume(struct device *dev) +{ + struct stm32_dma_device *dmadev = dev_get_drvdata(dev); + int ret; + + ret = clk_prepare_enable(dmadev->clk); + if (ret) { + dev_err(dev, "failed to prepare_enable clock\n"); + return ret; + } + + return 0; +} +#endif + +#ifdef CONFIG_PM_SLEEP +static int stm32_dma_pm_suspend(struct device *dev) +{ + struct stm32_dma_device *dmadev = dev_get_drvdata(dev); + int id, ret, scr; + + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) + return ret; + + for (id = 0; id < STM32_DMA_MAX_CHANNELS; id++) { + scr = stm32_dma_read(dmadev, STM32_DMA_SCR(id)); + if (scr & STM32_DMA_SCR_EN) { + dev_warn(dev, "Suspend is prevented by Chan %i\n", id); + return -EBUSY; + } + } + + pm_runtime_put_sync(dev); + + pm_runtime_force_suspend(dev); + + return 0; +} + +static int stm32_dma_pm_resume(struct device *dev) +{ + return pm_runtime_force_resume(dev); +} +#endif + +static const struct dev_pm_ops stm32_dma_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(stm32_dma_pm_suspend, stm32_dma_pm_resume) + SET_RUNTIME_PM_OPS(stm32_dma_runtime_suspend, + stm32_dma_runtime_resume, NULL) +}; + +static struct platform_driver stm32_dma_driver = { + .driver = { + .name = "stm32-dma", + .of_match_table = stm32_dma_of_match, + .pm = &stm32_dma_pm_ops, + }, + .probe = stm32_dma_probe, +}; + +static int __init stm32_dma_init(void) +{ + return platform_driver_register(&stm32_dma_driver); +} +subsys_initcall(stm32_dma_init); diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c new file mode 100644 index 0000000000..8d77e2a793 --- /dev/null +++ b/drivers/dma/stm32-dmamux.c @@ -0,0 +1,402 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * + * Copyright (C) STMicroelectronics SA 2017 + * Author(s): M'boumba Cedric Madianga + * Pierre-Yves Mordret + * + * DMA Router driver for STM32 DMA MUX + * + * Based on TI DMA Crossbar driver + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define STM32_DMAMUX_CCR(x) (0x4 * (x)) +#define STM32_DMAMUX_MAX_DMA_REQUESTS 32 +#define STM32_DMAMUX_MAX_REQUESTS 255 + +struct stm32_dmamux { + u32 master; + u32 request; + u32 chan_id; +}; + +struct stm32_dmamux_data { + struct dma_router dmarouter; + struct clk *clk; + void __iomem *iomem; + u32 dma_requests; /* Number of DMA requests connected to DMAMUX */ + u32 dmamux_requests; /* Number of DMA requests routed toward DMAs */ + spinlock_t lock; /* Protects register access */ + DECLARE_BITMAP(dma_inuse, STM32_DMAMUX_MAX_DMA_REQUESTS); /* Used DMA channel */ + u32 ccr[STM32_DMAMUX_MAX_DMA_REQUESTS]; /* Used to backup CCR register + * in suspend + */ + u32 dma_reqs[]; /* Number of DMA Request per DMA masters. + * [0] holds number of DMA Masters. + * To be kept at very end of this structure + */ +}; + +static inline u32 stm32_dmamux_read(void __iomem *iomem, u32 reg) +{ + return readl_relaxed(iomem + reg); +} + +static inline void stm32_dmamux_write(void __iomem *iomem, u32 reg, u32 val) +{ + writel_relaxed(val, iomem + reg); +} + +static void stm32_dmamux_free(struct device *dev, void *route_data) +{ + struct stm32_dmamux_data *dmamux = dev_get_drvdata(dev); + struct stm32_dmamux *mux = route_data; + unsigned long flags; + + /* Clear dma request */ + spin_lock_irqsave(&dmamux->lock, flags); + + stm32_dmamux_write(dmamux->iomem, STM32_DMAMUX_CCR(mux->chan_id), 0); + clear_bit(mux->chan_id, dmamux->dma_inuse); + + pm_runtime_put_sync(dev); + + spin_unlock_irqrestore(&dmamux->lock, flags); + + dev_dbg(dev, "Unmapping DMAMUX(%u) to DMA%u(%u)\n", + mux->request, mux->master, mux->chan_id); + + kfree(mux); +} + +static void *stm32_dmamux_route_allocate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct platform_device *pdev = of_find_device_by_node(ofdma->of_node); + struct stm32_dmamux_data *dmamux = platform_get_drvdata(pdev); + struct stm32_dmamux *mux; + u32 i, min, max; + int ret; + unsigned long flags; + + if (dma_spec->args_count != 3) { + dev_err(&pdev->dev, "invalid number of dma mux args\n"); + return ERR_PTR(-EINVAL); + } + + if (dma_spec->args[0] > dmamux->dmamux_requests) { + dev_err(&pdev->dev, "invalid mux request number: %d\n", + dma_spec->args[0]); + return ERR_PTR(-EINVAL); + } + + mux = kzalloc(sizeof(*mux), GFP_KERNEL); + if (!mux) + return ERR_PTR(-ENOMEM); + + spin_lock_irqsave(&dmamux->lock, flags); + mux->chan_id = find_first_zero_bit(dmamux->dma_inuse, + dmamux->dma_requests); + + if (mux->chan_id == dmamux->dma_requests) { + spin_unlock_irqrestore(&dmamux->lock, flags); + dev_err(&pdev->dev, "Run out of free DMA requests\n"); + ret = -ENOMEM; + goto error_chan_id; + } + set_bit(mux->chan_id, dmamux->dma_inuse); + spin_unlock_irqrestore(&dmamux->lock, flags); + + /* Look for DMA Master */ + for (i = 1, min = 0, max = dmamux->dma_reqs[i]; + i <= dmamux->dma_reqs[0]; + min += dmamux->dma_reqs[i], max += dmamux->dma_reqs[++i]) + if (mux->chan_id < max) + break; + mux->master = i - 1; + + /* The of_node_put() will be done in of_dma_router_xlate function */ + dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", i - 1); + if (!dma_spec->np) { + dev_err(&pdev->dev, "can't get dma master\n"); + ret = -EINVAL; + goto error; + } + + /* Set dma request */ + spin_lock_irqsave(&dmamux->lock, flags); + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret < 0) { + spin_unlock_irqrestore(&dmamux->lock, flags); + goto error; + } + spin_unlock_irqrestore(&dmamux->lock, flags); + + mux->request = dma_spec->args[0]; + + /* craft DMA spec */ + dma_spec->args[3] = dma_spec->args[2] | mux->chan_id << 16; + dma_spec->args[2] = dma_spec->args[1]; + dma_spec->args[1] = 0; + dma_spec->args[0] = mux->chan_id - min; + dma_spec->args_count = 4; + + stm32_dmamux_write(dmamux->iomem, STM32_DMAMUX_CCR(mux->chan_id), + mux->request); + dev_dbg(&pdev->dev, "Mapping DMAMUX(%u) to DMA%u(%u)\n", + mux->request, mux->master, mux->chan_id); + + return mux; + +error: + clear_bit(mux->chan_id, dmamux->dma_inuse); + +error_chan_id: + kfree(mux); + return ERR_PTR(ret); +} + +static const struct of_device_id stm32_stm32dma_master_match[] __maybe_unused = { + { .compatible = "st,stm32-dma", }, + {}, +}; + +static int stm32_dmamux_probe(struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + const struct of_device_id *match; + struct device_node *dma_node; + struct stm32_dmamux_data *stm32_dmamux; + void __iomem *iomem; + struct reset_control *rst; + int i, count, ret; + u32 dma_req; + + if (!node) + return -ENODEV; + + count = device_property_count_u32(&pdev->dev, "dma-masters"); + if (count < 0) { + dev_err(&pdev->dev, "Can't get DMA master(s) node\n"); + return -ENODEV; + } + + stm32_dmamux = devm_kzalloc(&pdev->dev, sizeof(*stm32_dmamux) + + sizeof(u32) * (count + 1), GFP_KERNEL); + if (!stm32_dmamux) + return -ENOMEM; + + dma_req = 0; + for (i = 1; i <= count; i++) { + dma_node = of_parse_phandle(node, "dma-masters", i - 1); + + match = of_match_node(stm32_stm32dma_master_match, dma_node); + if (!match) { + dev_err(&pdev->dev, "DMA master is not supported\n"); + of_node_put(dma_node); + return -EINVAL; + } + + if (of_property_read_u32(dma_node, "dma-requests", + &stm32_dmamux->dma_reqs[i])) { + dev_info(&pdev->dev, + "Missing MUX output information, using %u.\n", + STM32_DMAMUX_MAX_DMA_REQUESTS); + stm32_dmamux->dma_reqs[i] = + STM32_DMAMUX_MAX_DMA_REQUESTS; + } + dma_req += stm32_dmamux->dma_reqs[i]; + of_node_put(dma_node); + } + + if (dma_req > STM32_DMAMUX_MAX_DMA_REQUESTS) { + dev_err(&pdev->dev, "Too many DMA Master Requests to manage\n"); + return -ENODEV; + } + + stm32_dmamux->dma_requests = dma_req; + stm32_dmamux->dma_reqs[0] = count; + + if (device_property_read_u32(&pdev->dev, "dma-requests", + &stm32_dmamux->dmamux_requests)) { + stm32_dmamux->dmamux_requests = STM32_DMAMUX_MAX_REQUESTS; + dev_warn(&pdev->dev, "DMAMUX defaulting on %u requests\n", + stm32_dmamux->dmamux_requests); + } + pm_runtime_get_noresume(&pdev->dev); + + iomem = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(iomem)) + return PTR_ERR(iomem); + + spin_lock_init(&stm32_dmamux->lock); + + stm32_dmamux->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(stm32_dmamux->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(stm32_dmamux->clk), + "Missing clock controller\n"); + + ret = clk_prepare_enable(stm32_dmamux->clk); + if (ret < 0) { + dev_err(&pdev->dev, "clk_prep_enable error: %d\n", ret); + return ret; + } + + rst = devm_reset_control_get(&pdev->dev, NULL); + if (IS_ERR(rst)) { + ret = PTR_ERR(rst); + if (ret == -EPROBE_DEFER) + goto err_clk; + } else if (count > 1) { /* Don't reset if there is only one dma-master */ + reset_control_assert(rst); + udelay(2); + reset_control_deassert(rst); + } + + stm32_dmamux->iomem = iomem; + stm32_dmamux->dmarouter.dev = &pdev->dev; + stm32_dmamux->dmarouter.route_free = stm32_dmamux_free; + + platform_set_drvdata(pdev, stm32_dmamux); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + + pm_runtime_get_noresume(&pdev->dev); + + /* Reset the dmamux */ + for (i = 0; i < stm32_dmamux->dma_requests; i++) + stm32_dmamux_write(stm32_dmamux->iomem, STM32_DMAMUX_CCR(i), 0); + + pm_runtime_put(&pdev->dev); + + ret = of_dma_router_register(node, stm32_dmamux_route_allocate, + &stm32_dmamux->dmarouter); + if (ret) + goto pm_disable; + + return 0; + +pm_disable: + pm_runtime_disable(&pdev->dev); +err_clk: + clk_disable_unprepare(stm32_dmamux->clk); + + return ret; +} + +#ifdef CONFIG_PM +static int stm32_dmamux_runtime_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct stm32_dmamux_data *stm32_dmamux = platform_get_drvdata(pdev); + + clk_disable_unprepare(stm32_dmamux->clk); + + return 0; +} + +static int stm32_dmamux_runtime_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct stm32_dmamux_data *stm32_dmamux = platform_get_drvdata(pdev); + int ret; + + ret = clk_prepare_enable(stm32_dmamux->clk); + if (ret) { + dev_err(&pdev->dev, "failed to prepare_enable clock\n"); + return ret; + } + + return 0; +} +#endif + +#ifdef CONFIG_PM_SLEEP +static int stm32_dmamux_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct stm32_dmamux_data *stm32_dmamux = platform_get_drvdata(pdev); + int i, ret; + + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) + return ret; + + for (i = 0; i < stm32_dmamux->dma_requests; i++) + stm32_dmamux->ccr[i] = stm32_dmamux_read(stm32_dmamux->iomem, + STM32_DMAMUX_CCR(i)); + + pm_runtime_put_sync(dev); + + pm_runtime_force_suspend(dev); + + return 0; +} + +static int stm32_dmamux_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct stm32_dmamux_data *stm32_dmamux = platform_get_drvdata(pdev); + int i, ret; + + ret = pm_runtime_force_resume(dev); + if (ret < 0) + return ret; + + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) + return ret; + + for (i = 0; i < stm32_dmamux->dma_requests; i++) + stm32_dmamux_write(stm32_dmamux->iomem, STM32_DMAMUX_CCR(i), + stm32_dmamux->ccr[i]); + + pm_runtime_put_sync(dev); + + return 0; +} +#endif + +static const struct dev_pm_ops stm32_dmamux_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(stm32_dmamux_suspend, stm32_dmamux_resume) + SET_RUNTIME_PM_OPS(stm32_dmamux_runtime_suspend, + stm32_dmamux_runtime_resume, NULL) +}; + +static const struct of_device_id stm32_dmamux_match[] = { + { .compatible = "st,stm32h7-dmamux" }, + {}, +}; + +static struct platform_driver stm32_dmamux_driver = { + .probe = stm32_dmamux_probe, + .driver = { + .name = "stm32-dmamux", + .of_match_table = stm32_dmamux_match, + .pm = &stm32_dmamux_pm_ops, + }, +}; + +static int __init stm32_dmamux_init(void) +{ + return platform_driver_register(&stm32_dmamux_driver); +} +arch_initcall(stm32_dmamux_init); + +MODULE_DESCRIPTION("DMA Router driver for STM32 DMA MUX"); +MODULE_AUTHOR("M'boumba Cedric Madianga "); +MODULE_AUTHOR("Pierre-Yves Mordret "); diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c new file mode 100644 index 0000000000..f414efdbd8 --- /dev/null +++ b/drivers/dma/stm32-mdma.c @@ -0,0 +1,1830 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * + * Copyright (C) STMicroelectronics SA 2017 + * Author(s): M'boumba Cedric Madianga + * Pierre-Yves Mordret + * + * Driver for STM32 MDMA controller + * + * Inspired by stm32-dma.c and dma-jz4780.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virt-dma.h" + +#define STM32_MDMA_GISR0 0x0000 /* MDMA Int Status Reg 1 */ + +/* MDMA Channel x interrupt/status register */ +#define STM32_MDMA_CISR(x) (0x40 + 0x40 * (x)) /* x = 0..62 */ +#define STM32_MDMA_CISR_CRQA BIT(16) +#define STM32_MDMA_CISR_TCIF BIT(4) +#define STM32_MDMA_CISR_BTIF BIT(3) +#define STM32_MDMA_CISR_BRTIF BIT(2) +#define STM32_MDMA_CISR_CTCIF BIT(1) +#define STM32_MDMA_CISR_TEIF BIT(0) + +/* MDMA Channel x interrupt flag clear register */ +#define STM32_MDMA_CIFCR(x) (0x44 + 0x40 * (x)) +#define STM32_MDMA_CIFCR_CLTCIF BIT(4) +#define STM32_MDMA_CIFCR_CBTIF BIT(3) +#define STM32_MDMA_CIFCR_CBRTIF BIT(2) +#define STM32_MDMA_CIFCR_CCTCIF BIT(1) +#define STM32_MDMA_CIFCR_CTEIF BIT(0) +#define STM32_MDMA_CIFCR_CLEAR_ALL (STM32_MDMA_CIFCR_CLTCIF \ + | STM32_MDMA_CIFCR_CBTIF \ + | STM32_MDMA_CIFCR_CBRTIF \ + | STM32_MDMA_CIFCR_CCTCIF \ + | STM32_MDMA_CIFCR_CTEIF) + +/* MDMA Channel x error status register */ +#define STM32_MDMA_CESR(x) (0x48 + 0x40 * (x)) +#define STM32_MDMA_CESR_BSE BIT(11) +#define STM32_MDMA_CESR_ASR BIT(10) +#define STM32_MDMA_CESR_TEMD BIT(9) +#define STM32_MDMA_CESR_TELD BIT(8) +#define STM32_MDMA_CESR_TED BIT(7) +#define STM32_MDMA_CESR_TEA_MASK GENMASK(6, 0) + +/* MDMA Channel x control register */ +#define STM32_MDMA_CCR(x) (0x4C + 0x40 * (x)) +#define STM32_MDMA_CCR_SWRQ BIT(16) +#define STM32_MDMA_CCR_WEX BIT(14) +#define STM32_MDMA_CCR_HEX BIT(13) +#define STM32_MDMA_CCR_BEX BIT(12) +#define STM32_MDMA_CCR_SM BIT(8) +#define STM32_MDMA_CCR_PL_MASK GENMASK(7, 6) +#define STM32_MDMA_CCR_PL(n) FIELD_PREP(STM32_MDMA_CCR_PL_MASK, (n)) +#define STM32_MDMA_CCR_TCIE BIT(5) +#define STM32_MDMA_CCR_BTIE BIT(4) +#define STM32_MDMA_CCR_BRTIE BIT(3) +#define STM32_MDMA_CCR_CTCIE BIT(2) +#define STM32_MDMA_CCR_TEIE BIT(1) +#define STM32_MDMA_CCR_EN BIT(0) +#define STM32_MDMA_CCR_IRQ_MASK (STM32_MDMA_CCR_TCIE \ + | STM32_MDMA_CCR_BTIE \ + | STM32_MDMA_CCR_BRTIE \ + | STM32_MDMA_CCR_CTCIE \ + | STM32_MDMA_CCR_TEIE) + +/* MDMA Channel x transfer configuration register */ +#define STM32_MDMA_CTCR(x) (0x50 + 0x40 * (x)) +#define STM32_MDMA_CTCR_BWM BIT(31) +#define STM32_MDMA_CTCR_SWRM BIT(30) +#define STM32_MDMA_CTCR_TRGM_MSK GENMASK(29, 28) +#define STM32_MDMA_CTCR_TRGM(n) FIELD_PREP(STM32_MDMA_CTCR_TRGM_MSK, (n)) +#define STM32_MDMA_CTCR_TRGM_GET(n) FIELD_GET(STM32_MDMA_CTCR_TRGM_MSK, (n)) +#define STM32_MDMA_CTCR_PAM_MASK GENMASK(27, 26) +#define STM32_MDMA_CTCR_PAM(n) FIELD_PREP(STM32_MDMA_CTCR_PAM_MASK, (n)) +#define STM32_MDMA_CTCR_PKE BIT(25) +#define STM32_MDMA_CTCR_TLEN_MSK GENMASK(24, 18) +#define STM32_MDMA_CTCR_TLEN(n) FIELD_PREP(STM32_MDMA_CTCR_TLEN_MSK, (n)) +#define STM32_MDMA_CTCR_TLEN_GET(n) FIELD_GET(STM32_MDMA_CTCR_TLEN_MSK, (n)) +#define STM32_MDMA_CTCR_LEN2_MSK GENMASK(25, 18) +#define STM32_MDMA_CTCR_LEN2(n) FIELD_PREP(STM32_MDMA_CTCR_LEN2_MSK, (n)) +#define STM32_MDMA_CTCR_LEN2_GET(n) FIELD_GET(STM32_MDMA_CTCR_LEN2_MSK, (n)) +#define STM32_MDMA_CTCR_DBURST_MASK GENMASK(17, 15) +#define STM32_MDMA_CTCR_DBURST(n) FIELD_PREP(STM32_MDMA_CTCR_DBURST_MASK, (n)) +#define STM32_MDMA_CTCR_SBURST_MASK GENMASK(14, 12) +#define STM32_MDMA_CTCR_SBURST(n) FIELD_PREP(STM32_MDMA_CTCR_SBURST_MASK, (n)) +#define STM32_MDMA_CTCR_DINCOS_MASK GENMASK(11, 10) +#define STM32_MDMA_CTCR_DINCOS(n) FIELD_PREP(STM32_MDMA_CTCR_DINCOS_MASK, (n)) +#define STM32_MDMA_CTCR_SINCOS_MASK GENMASK(9, 8) +#define STM32_MDMA_CTCR_SINCOS(n) FIELD_PREP(STM32_MDMA_CTCR_SINCOS_MASK, (n)) +#define STM32_MDMA_CTCR_DSIZE_MASK GENMASK(7, 6) +#define STM32_MDMA_CTCR_DSIZE(n) FIELD_PREP(STM32_MDMA_CTCR_DSIZE_MASK, (n)) +#define STM32_MDMA_CTCR_SSIZE_MASK GENMASK(5, 4) +#define STM32_MDMA_CTCR_SSIZE(n) FIELD_PREP(STM32_MDMA_CTCR_SSIZE_MASK, (n)) +#define STM32_MDMA_CTCR_DINC_MASK GENMASK(3, 2) +#define STM32_MDMA_CTCR_DINC(n) FIELD_PREP(STM32_MDMA_CTCR_DINC_MASK, (n)) +#define STM32_MDMA_CTCR_SINC_MASK GENMASK(1, 0) +#define STM32_MDMA_CTCR_SINC(n) FIELD_PREP(STM32_MDMA_CTCR_SINC_MASK, (n)) +#define STM32_MDMA_CTCR_CFG_MASK (STM32_MDMA_CTCR_SINC_MASK \ + | STM32_MDMA_CTCR_DINC_MASK \ + | STM32_MDMA_CTCR_SINCOS_MASK \ + | STM32_MDMA_CTCR_DINCOS_MASK \ + | STM32_MDMA_CTCR_LEN2_MSK \ + | STM32_MDMA_CTCR_TRGM_MSK) + +/* MDMA Channel x block number of data register */ +#define STM32_MDMA_CBNDTR(x) (0x54 + 0x40 * (x)) +#define STM32_MDMA_CBNDTR_BRC_MK GENMASK(31, 20) +#define STM32_MDMA_CBNDTR_BRC(n) FIELD_PREP(STM32_MDMA_CBNDTR_BRC_MK, (n)) +#define STM32_MDMA_CBNDTR_BRC_GET(n) FIELD_GET(STM32_MDMA_CBNDTR_BRC_MK, (n)) + +#define STM32_MDMA_CBNDTR_BRDUM BIT(19) +#define STM32_MDMA_CBNDTR_BRSUM BIT(18) +#define STM32_MDMA_CBNDTR_BNDT_MASK GENMASK(16, 0) +#define STM32_MDMA_CBNDTR_BNDT(n) FIELD_PREP(STM32_MDMA_CBNDTR_BNDT_MASK, (n)) + +/* MDMA Channel x source address register */ +#define STM32_MDMA_CSAR(x) (0x58 + 0x40 * (x)) + +/* MDMA Channel x destination address register */ +#define STM32_MDMA_CDAR(x) (0x5C + 0x40 * (x)) + +/* MDMA Channel x block repeat address update register */ +#define STM32_MDMA_CBRUR(x) (0x60 + 0x40 * (x)) +#define STM32_MDMA_CBRUR_DUV_MASK GENMASK(31, 16) +#define STM32_MDMA_CBRUR_DUV(n) FIELD_PREP(STM32_MDMA_CBRUR_DUV_MASK, (n)) +#define STM32_MDMA_CBRUR_SUV_MASK GENMASK(15, 0) +#define STM32_MDMA_CBRUR_SUV(n) FIELD_PREP(STM32_MDMA_CBRUR_SUV_MASK, (n)) + +/* MDMA Channel x link address register */ +#define STM32_MDMA_CLAR(x) (0x64 + 0x40 * (x)) + +/* MDMA Channel x trigger and bus selection register */ +#define STM32_MDMA_CTBR(x) (0x68 + 0x40 * (x)) +#define STM32_MDMA_CTBR_DBUS BIT(17) +#define STM32_MDMA_CTBR_SBUS BIT(16) +#define STM32_MDMA_CTBR_TSEL_MASK GENMASK(5, 0) +#define STM32_MDMA_CTBR_TSEL(n) FIELD_PREP(STM32_MDMA_CTBR_TSEL_MASK, (n)) + +/* MDMA Channel x mask address register */ +#define STM32_MDMA_CMAR(x) (0x70 + 0x40 * (x)) + +/* MDMA Channel x mask data register */ +#define STM32_MDMA_CMDR(x) (0x74 + 0x40 * (x)) + +#define STM32_MDMA_MAX_BUF_LEN 128 +#define STM32_MDMA_MAX_BLOCK_LEN 65536 +#define STM32_MDMA_MAX_CHANNELS 32 +#define STM32_MDMA_MAX_REQUESTS 256 +#define STM32_MDMA_MAX_BURST 128 +#define STM32_MDMA_VERY_HIGH_PRIORITY 0x3 + +enum stm32_mdma_trigger_mode { + STM32_MDMA_BUFFER, + STM32_MDMA_BLOCK, + STM32_MDMA_BLOCK_REP, + STM32_MDMA_LINKED_LIST, +}; + +enum stm32_mdma_width { + STM32_MDMA_BYTE, + STM32_MDMA_HALF_WORD, + STM32_MDMA_WORD, + STM32_MDMA_DOUBLE_WORD, +}; + +enum stm32_mdma_inc_mode { + STM32_MDMA_FIXED = 0, + STM32_MDMA_INC = 2, + STM32_MDMA_DEC = 3, +}; + +struct stm32_mdma_chan_config { + u32 request; + u32 priority_level; + u32 transfer_config; + u32 mask_addr; + u32 mask_data; + bool m2m_hw; /* True when MDMA is triggered by STM32 DMA */ +}; + +struct stm32_mdma_hwdesc { + u32 ctcr; + u32 cbndtr; + u32 csar; + u32 cdar; + u32 cbrur; + u32 clar; + u32 ctbr; + u32 dummy; + u32 cmar; + u32 cmdr; +} __aligned(64); + +struct stm32_mdma_desc_node { + struct stm32_mdma_hwdesc *hwdesc; + dma_addr_t hwdesc_phys; +}; + +struct stm32_mdma_desc { + struct virt_dma_desc vdesc; + u32 ccr; + bool cyclic; + u32 count; + struct stm32_mdma_desc_node node[]; +}; + +struct stm32_mdma_dma_config { + u32 request; /* STM32 DMA channel stream id, triggering MDMA */ + u32 cmar; /* STM32 DMA interrupt flag clear register address */ + u32 cmdr; /* STM32 DMA Transfer Complete flag */ +}; + +struct stm32_mdma_chan { + struct virt_dma_chan vchan; + struct dma_pool *desc_pool; + u32 id; + struct stm32_mdma_desc *desc; + u32 curr_hwdesc; + struct dma_slave_config dma_config; + struct stm32_mdma_chan_config chan_config; + bool busy; + u32 mem_burst; + u32 mem_width; +}; + +struct stm32_mdma_device { + struct dma_device ddev; + void __iomem *base; + struct clk *clk; + int irq; + u32 nr_channels; + u32 nr_requests; + u32 nr_ahb_addr_masks; + u32 chan_reserved; + struct stm32_mdma_chan chan[STM32_MDMA_MAX_CHANNELS]; + u32 ahb_addr_masks[]; +}; + +static struct stm32_mdma_device *stm32_mdma_get_dev( + struct stm32_mdma_chan *chan) +{ + return container_of(chan->vchan.chan.device, struct stm32_mdma_device, + ddev); +} + +static struct stm32_mdma_chan *to_stm32_mdma_chan(struct dma_chan *c) +{ + return container_of(c, struct stm32_mdma_chan, vchan.chan); +} + +static struct stm32_mdma_desc *to_stm32_mdma_desc(struct virt_dma_desc *vdesc) +{ + return container_of(vdesc, struct stm32_mdma_desc, vdesc); +} + +static struct device *chan2dev(struct stm32_mdma_chan *chan) +{ + return &chan->vchan.chan.dev->device; +} + +static struct device *mdma2dev(struct stm32_mdma_device *mdma_dev) +{ + return mdma_dev->ddev.dev; +} + +static u32 stm32_mdma_read(struct stm32_mdma_device *dmadev, u32 reg) +{ + return readl_relaxed(dmadev->base + reg); +} + +static void stm32_mdma_write(struct stm32_mdma_device *dmadev, u32 reg, u32 val) +{ + writel_relaxed(val, dmadev->base + reg); +} + +static void stm32_mdma_set_bits(struct stm32_mdma_device *dmadev, u32 reg, + u32 mask) +{ + void __iomem *addr = dmadev->base + reg; + + writel_relaxed(readl_relaxed(addr) | mask, addr); +} + +static void stm32_mdma_clr_bits(struct stm32_mdma_device *dmadev, u32 reg, + u32 mask) +{ + void __iomem *addr = dmadev->base + reg; + + writel_relaxed(readl_relaxed(addr) & ~mask, addr); +} + +static struct stm32_mdma_desc *stm32_mdma_alloc_desc( + struct stm32_mdma_chan *chan, u32 count) +{ + struct stm32_mdma_desc *desc; + int i; + + desc = kzalloc(struct_size(desc, node, count), GFP_NOWAIT); + if (!desc) + return NULL; + + for (i = 0; i < count; i++) { + desc->node[i].hwdesc = + dma_pool_alloc(chan->desc_pool, GFP_NOWAIT, + &desc->node[i].hwdesc_phys); + if (!desc->node[i].hwdesc) + goto err; + } + + desc->count = count; + + return desc; + +err: + dev_err(chan2dev(chan), "Failed to allocate descriptor\n"); + while (--i >= 0) + dma_pool_free(chan->desc_pool, desc->node[i].hwdesc, + desc->node[i].hwdesc_phys); + kfree(desc); + return NULL; +} + +static void stm32_mdma_desc_free(struct virt_dma_desc *vdesc) +{ + struct stm32_mdma_desc *desc = to_stm32_mdma_desc(vdesc); + struct stm32_mdma_chan *chan = to_stm32_mdma_chan(vdesc->tx.chan); + int i; + + for (i = 0; i < desc->count; i++) + dma_pool_free(chan->desc_pool, desc->node[i].hwdesc, + desc->node[i].hwdesc_phys); + kfree(desc); +} + +static int stm32_mdma_get_width(struct stm32_mdma_chan *chan, + enum dma_slave_buswidth width) +{ + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + case DMA_SLAVE_BUSWIDTH_2_BYTES: + case DMA_SLAVE_BUSWIDTH_4_BYTES: + case DMA_SLAVE_BUSWIDTH_8_BYTES: + return ffs(width) - 1; + default: + dev_err(chan2dev(chan), "Dma bus width %i not supported\n", + width); + return -EINVAL; + } +} + +static enum dma_slave_buswidth stm32_mdma_get_max_width(dma_addr_t addr, + u32 buf_len, u32 tlen) +{ + enum dma_slave_buswidth max_width = DMA_SLAVE_BUSWIDTH_8_BYTES; + + for (max_width = DMA_SLAVE_BUSWIDTH_8_BYTES; + max_width > DMA_SLAVE_BUSWIDTH_1_BYTE; + max_width >>= 1) { + /* + * Address and buffer length both have to be aligned on + * bus width + */ + if ((((buf_len | addr) & (max_width - 1)) == 0) && + tlen >= max_width) + break; + } + + return max_width; +} + +static u32 stm32_mdma_get_best_burst(u32 buf_len, u32 tlen, u32 max_burst, + enum dma_slave_buswidth width) +{ + u32 best_burst; + + best_burst = min((u32)1 << __ffs(tlen | buf_len), + max_burst * width) / width; + + return (best_burst > 0) ? best_burst : 1; +} + +static int stm32_mdma_disable_chan(struct stm32_mdma_chan *chan) +{ + struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); + u32 ccr, cisr, id, reg; + int ret; + + id = chan->id; + reg = STM32_MDMA_CCR(id); + + /* Disable interrupts */ + stm32_mdma_clr_bits(dmadev, reg, STM32_MDMA_CCR_IRQ_MASK); + + ccr = stm32_mdma_read(dmadev, reg); + if (ccr & STM32_MDMA_CCR_EN) { + stm32_mdma_clr_bits(dmadev, reg, STM32_MDMA_CCR_EN); + + /* Ensure that any ongoing transfer has been completed */ + ret = readl_relaxed_poll_timeout_atomic( + dmadev->base + STM32_MDMA_CISR(id), cisr, + (cisr & STM32_MDMA_CISR_CTCIF), 10, 1000); + if (ret) { + dev_err(chan2dev(chan), "%s: timeout!\n", __func__); + return -EBUSY; + } + } + + return 0; +} + +static void stm32_mdma_stop(struct stm32_mdma_chan *chan) +{ + struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); + u32 status; + int ret; + + /* Disable DMA */ + ret = stm32_mdma_disable_chan(chan); + if (ret < 0) + return; + + /* Clear interrupt status if it is there */ + status = stm32_mdma_read(dmadev, STM32_MDMA_CISR(chan->id)); + if (status) { + dev_dbg(chan2dev(chan), "%s(): clearing interrupt: 0x%08x\n", + __func__, status); + stm32_mdma_set_bits(dmadev, STM32_MDMA_CIFCR(chan->id), status); + } + + chan->busy = false; +} + +static void stm32_mdma_set_bus(struct stm32_mdma_device *dmadev, u32 *ctbr, + u32 ctbr_mask, u32 src_addr) +{ + u32 mask; + int i; + + /* Check if memory device is on AHB or AXI */ + *ctbr &= ~ctbr_mask; + mask = src_addr & 0xF0000000; + for (i = 0; i < dmadev->nr_ahb_addr_masks; i++) { + if (mask == dmadev->ahb_addr_masks[i]) { + *ctbr |= ctbr_mask; + break; + } + } +} + +static int stm32_mdma_set_xfer_param(struct stm32_mdma_chan *chan, + enum dma_transfer_direction direction, + u32 *mdma_ccr, u32 *mdma_ctcr, + u32 *mdma_ctbr, dma_addr_t addr, + u32 buf_len) +{ + struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); + struct stm32_mdma_chan_config *chan_config = &chan->chan_config; + enum dma_slave_buswidth src_addr_width, dst_addr_width; + phys_addr_t src_addr, dst_addr; + int src_bus_width, dst_bus_width; + u32 src_maxburst, dst_maxburst, src_best_burst, dst_best_burst; + u32 ccr, ctcr, ctbr, tlen; + + src_addr_width = chan->dma_config.src_addr_width; + dst_addr_width = chan->dma_config.dst_addr_width; + src_maxburst = chan->dma_config.src_maxburst; + dst_maxburst = chan->dma_config.dst_maxburst; + + ccr = stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id)) & ~STM32_MDMA_CCR_EN; + ctcr = stm32_mdma_read(dmadev, STM32_MDMA_CTCR(chan->id)); + ctbr = stm32_mdma_read(dmadev, STM32_MDMA_CTBR(chan->id)); + + /* Enable HW request mode */ + ctcr &= ~STM32_MDMA_CTCR_SWRM; + + /* Set DINC, SINC, DINCOS, SINCOS, TRGM and TLEN retrieve from DT */ + ctcr &= ~STM32_MDMA_CTCR_CFG_MASK; + ctcr |= chan_config->transfer_config & STM32_MDMA_CTCR_CFG_MASK; + + /* + * For buffer transfer length (TLEN) we have to set + * the number of bytes - 1 in CTCR register + */ + tlen = STM32_MDMA_CTCR_LEN2_GET(ctcr); + ctcr &= ~STM32_MDMA_CTCR_LEN2_MSK; + ctcr |= STM32_MDMA_CTCR_TLEN((tlen - 1)); + + /* Disable Pack Enable */ + ctcr &= ~STM32_MDMA_CTCR_PKE; + + /* Check burst size constraints */ + if (src_maxburst * src_addr_width > STM32_MDMA_MAX_BURST || + dst_maxburst * dst_addr_width > STM32_MDMA_MAX_BURST) { + dev_err(chan2dev(chan), + "burst size * bus width higher than %d bytes\n", + STM32_MDMA_MAX_BURST); + return -EINVAL; + } + + if ((!is_power_of_2(src_maxburst) && src_maxburst > 0) || + (!is_power_of_2(dst_maxburst) && dst_maxburst > 0)) { + dev_err(chan2dev(chan), "burst size must be a power of 2\n"); + return -EINVAL; + } + + /* + * Configure channel control: + * - Clear SW request as in this case this is a HW one + * - Clear WEX, HEX and BEX bits + * - Set priority level + */ + ccr &= ~(STM32_MDMA_CCR_SWRQ | STM32_MDMA_CCR_WEX | STM32_MDMA_CCR_HEX | + STM32_MDMA_CCR_BEX | STM32_MDMA_CCR_PL_MASK); + ccr |= STM32_MDMA_CCR_PL(chan_config->priority_level); + + /* Configure Trigger selection */ + ctbr &= ~STM32_MDMA_CTBR_TSEL_MASK; + ctbr |= STM32_MDMA_CTBR_TSEL(chan_config->request); + + switch (direction) { + case DMA_MEM_TO_DEV: + dst_addr = chan->dma_config.dst_addr; + + /* Set device data size */ + if (chan_config->m2m_hw) + dst_addr_width = stm32_mdma_get_max_width(dst_addr, buf_len, + STM32_MDMA_MAX_BUF_LEN); + dst_bus_width = stm32_mdma_get_width(chan, dst_addr_width); + if (dst_bus_width < 0) + return dst_bus_width; + ctcr &= ~STM32_MDMA_CTCR_DSIZE_MASK; + ctcr |= STM32_MDMA_CTCR_DSIZE(dst_bus_width); + if (chan_config->m2m_hw) { + ctcr &= ~STM32_MDMA_CTCR_DINCOS_MASK; + ctcr |= STM32_MDMA_CTCR_DINCOS(dst_bus_width); + } + + /* Set device burst value */ + if (chan_config->m2m_hw) + dst_maxburst = STM32_MDMA_MAX_BUF_LEN / dst_addr_width; + + dst_best_burst = stm32_mdma_get_best_burst(buf_len, tlen, + dst_maxburst, + dst_addr_width); + chan->mem_burst = dst_best_burst; + ctcr &= ~STM32_MDMA_CTCR_DBURST_MASK; + ctcr |= STM32_MDMA_CTCR_DBURST((ilog2(dst_best_burst))); + + /* Set memory data size */ + src_addr_width = stm32_mdma_get_max_width(addr, buf_len, tlen); + chan->mem_width = src_addr_width; + src_bus_width = stm32_mdma_get_width(chan, src_addr_width); + if (src_bus_width < 0) + return src_bus_width; + ctcr &= ~STM32_MDMA_CTCR_SSIZE_MASK | + STM32_MDMA_CTCR_SINCOS_MASK; + ctcr |= STM32_MDMA_CTCR_SSIZE(src_bus_width) | + STM32_MDMA_CTCR_SINCOS(src_bus_width); + + /* Set memory burst value */ + src_maxburst = STM32_MDMA_MAX_BUF_LEN / src_addr_width; + src_best_burst = stm32_mdma_get_best_burst(buf_len, tlen, + src_maxburst, + src_addr_width); + chan->mem_burst = src_best_burst; + ctcr &= ~STM32_MDMA_CTCR_SBURST_MASK; + ctcr |= STM32_MDMA_CTCR_SBURST((ilog2(src_best_burst))); + + /* Select bus */ + stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_DBUS, + dst_addr); + + if (dst_bus_width != src_bus_width) + ctcr |= STM32_MDMA_CTCR_PKE; + + /* Set destination address */ + stm32_mdma_write(dmadev, STM32_MDMA_CDAR(chan->id), dst_addr); + break; + + case DMA_DEV_TO_MEM: + src_addr = chan->dma_config.src_addr; + + /* Set device data size */ + if (chan_config->m2m_hw) + src_addr_width = stm32_mdma_get_max_width(src_addr, buf_len, + STM32_MDMA_MAX_BUF_LEN); + + src_bus_width = stm32_mdma_get_width(chan, src_addr_width); + if (src_bus_width < 0) + return src_bus_width; + ctcr &= ~STM32_MDMA_CTCR_SSIZE_MASK; + ctcr |= STM32_MDMA_CTCR_SSIZE(src_bus_width); + if (chan_config->m2m_hw) { + ctcr &= ~STM32_MDMA_CTCR_SINCOS_MASK; + ctcr |= STM32_MDMA_CTCR_SINCOS(src_bus_width); + } + + /* Set device burst value */ + if (chan_config->m2m_hw) + src_maxburst = STM32_MDMA_MAX_BUF_LEN / src_addr_width; + + src_best_burst = stm32_mdma_get_best_burst(buf_len, tlen, + src_maxburst, + src_addr_width); + ctcr &= ~STM32_MDMA_CTCR_SBURST_MASK; + ctcr |= STM32_MDMA_CTCR_SBURST((ilog2(src_best_burst))); + + /* Set memory data size */ + dst_addr_width = stm32_mdma_get_max_width(addr, buf_len, tlen); + chan->mem_width = dst_addr_width; + dst_bus_width = stm32_mdma_get_width(chan, dst_addr_width); + if (dst_bus_width < 0) + return dst_bus_width; + ctcr &= ~(STM32_MDMA_CTCR_DSIZE_MASK | + STM32_MDMA_CTCR_DINCOS_MASK); + ctcr |= STM32_MDMA_CTCR_DSIZE(dst_bus_width) | + STM32_MDMA_CTCR_DINCOS(dst_bus_width); + + /* Set memory burst value */ + dst_maxburst = STM32_MDMA_MAX_BUF_LEN / dst_addr_width; + dst_best_burst = stm32_mdma_get_best_burst(buf_len, tlen, + dst_maxburst, + dst_addr_width); + ctcr &= ~STM32_MDMA_CTCR_DBURST_MASK; + ctcr |= STM32_MDMA_CTCR_DBURST((ilog2(dst_best_burst))); + + /* Select bus */ + stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_SBUS, + src_addr); + + if (dst_bus_width != src_bus_width) + ctcr |= STM32_MDMA_CTCR_PKE; + + /* Set source address */ + stm32_mdma_write(dmadev, STM32_MDMA_CSAR(chan->id), src_addr); + break; + + default: + dev_err(chan2dev(chan), "Dma direction is not supported\n"); + return -EINVAL; + } + + *mdma_ccr = ccr; + *mdma_ctcr = ctcr; + *mdma_ctbr = ctbr; + + return 0; +} + +static void stm32_mdma_dump_hwdesc(struct stm32_mdma_chan *chan, + struct stm32_mdma_desc_node *node) +{ + dev_dbg(chan2dev(chan), "hwdesc: %pad\n", &node->hwdesc_phys); + dev_dbg(chan2dev(chan), "CTCR: 0x%08x\n", node->hwdesc->ctcr); + dev_dbg(chan2dev(chan), "CBNDTR: 0x%08x\n", node->hwdesc->cbndtr); + dev_dbg(chan2dev(chan), "CSAR: 0x%08x\n", node->hwdesc->csar); + dev_dbg(chan2dev(chan), "CDAR: 0x%08x\n", node->hwdesc->cdar); + dev_dbg(chan2dev(chan), "CBRUR: 0x%08x\n", node->hwdesc->cbrur); + dev_dbg(chan2dev(chan), "CLAR: 0x%08x\n", node->hwdesc->clar); + dev_dbg(chan2dev(chan), "CTBR: 0x%08x\n", node->hwdesc->ctbr); + dev_dbg(chan2dev(chan), "CMAR: 0x%08x\n", node->hwdesc->cmar); + dev_dbg(chan2dev(chan), "CMDR: 0x%08x\n\n", node->hwdesc->cmdr); +} + +static void stm32_mdma_setup_hwdesc(struct stm32_mdma_chan *chan, + struct stm32_mdma_desc *desc, + enum dma_transfer_direction dir, u32 count, + dma_addr_t src_addr, dma_addr_t dst_addr, + u32 len, u32 ctcr, u32 ctbr, bool is_last, + bool is_first, bool is_cyclic) +{ + struct stm32_mdma_chan_config *config = &chan->chan_config; + struct stm32_mdma_hwdesc *hwdesc; + u32 next = count + 1; + + hwdesc = desc->node[count].hwdesc; + hwdesc->ctcr = ctcr; + hwdesc->cbndtr &= ~(STM32_MDMA_CBNDTR_BRC_MK | + STM32_MDMA_CBNDTR_BRDUM | + STM32_MDMA_CBNDTR_BRSUM | + STM32_MDMA_CBNDTR_BNDT_MASK); + hwdesc->cbndtr |= STM32_MDMA_CBNDTR_BNDT(len); + hwdesc->csar = src_addr; + hwdesc->cdar = dst_addr; + hwdesc->cbrur = 0; + hwdesc->ctbr = ctbr; + hwdesc->cmar = config->mask_addr; + hwdesc->cmdr = config->mask_data; + + if (is_last) { + if (is_cyclic) + hwdesc->clar = desc->node[0].hwdesc_phys; + else + hwdesc->clar = 0; + } else { + hwdesc->clar = desc->node[next].hwdesc_phys; + } + + stm32_mdma_dump_hwdesc(chan, &desc->node[count]); +} + +static int stm32_mdma_setup_xfer(struct stm32_mdma_chan *chan, + struct stm32_mdma_desc *desc, + struct scatterlist *sgl, u32 sg_len, + enum dma_transfer_direction direction) +{ + struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); + struct dma_slave_config *dma_config = &chan->dma_config; + struct stm32_mdma_chan_config *chan_config = &chan->chan_config; + struct scatterlist *sg; + dma_addr_t src_addr, dst_addr; + u32 m2m_hw_period, ccr, ctcr, ctbr; + int i, ret = 0; + + if (chan_config->m2m_hw) + m2m_hw_period = sg_dma_len(sgl); + + for_each_sg(sgl, sg, sg_len, i) { + if (sg_dma_len(sg) > STM32_MDMA_MAX_BLOCK_LEN) { + dev_err(chan2dev(chan), "Invalid block len\n"); + return -EINVAL; + } + + if (direction == DMA_MEM_TO_DEV) { + src_addr = sg_dma_address(sg); + dst_addr = dma_config->dst_addr; + if (chan_config->m2m_hw && (i & 1)) + dst_addr += m2m_hw_period; + ret = stm32_mdma_set_xfer_param(chan, direction, &ccr, + &ctcr, &ctbr, src_addr, + sg_dma_len(sg)); + stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_SBUS, + src_addr); + } else { + src_addr = dma_config->src_addr; + if (chan_config->m2m_hw && (i & 1)) + src_addr += m2m_hw_period; + dst_addr = sg_dma_address(sg); + ret = stm32_mdma_set_xfer_param(chan, direction, &ccr, + &ctcr, &ctbr, dst_addr, + sg_dma_len(sg)); + stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_DBUS, + dst_addr); + } + + if (ret < 0) + return ret; + + stm32_mdma_setup_hwdesc(chan, desc, direction, i, src_addr, + dst_addr, sg_dma_len(sg), ctcr, ctbr, + i == sg_len - 1, i == 0, false); + } + + /* Enable interrupts */ + ccr &= ~STM32_MDMA_CCR_IRQ_MASK; + ccr |= STM32_MDMA_CCR_TEIE | STM32_MDMA_CCR_CTCIE; + desc->ccr = ccr; + + return 0; +} + +static struct dma_async_tx_descriptor * +stm32_mdma_prep_slave_sg(struct dma_chan *c, struct scatterlist *sgl, + u32 sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c); + struct stm32_mdma_chan_config *chan_config = &chan->chan_config; + struct stm32_mdma_desc *desc; + int i, ret; + + /* + * Once DMA is in setup cyclic mode the channel we cannot assign this + * channel anymore. The DMA channel needs to be aborted or terminated + * for allowing another request. + */ + if (chan->desc && chan->desc->cyclic) { + dev_err(chan2dev(chan), + "Request not allowed when dma in cyclic mode\n"); + return NULL; + } + + desc = stm32_mdma_alloc_desc(chan, sg_len); + if (!desc) + return NULL; + + ret = stm32_mdma_setup_xfer(chan, desc, sgl, sg_len, direction); + if (ret < 0) + goto xfer_setup_err; + + /* + * In case of M2M HW transfer triggered by STM32 DMA, we do not have to clear the + * transfer complete flag by hardware in order to let the CPU rearm the STM32 DMA + * with the next sg element and update some data in dmaengine framework. + */ + if (chan_config->m2m_hw && direction == DMA_MEM_TO_DEV) { + struct stm32_mdma_hwdesc *hwdesc; + + for (i = 0; i < sg_len; i++) { + hwdesc = desc->node[i].hwdesc; + hwdesc->cmar = 0; + hwdesc->cmdr = 0; + } + } + + desc->cyclic = false; + + return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); + +xfer_setup_err: + for (i = 0; i < desc->count; i++) + dma_pool_free(chan->desc_pool, desc->node[i].hwdesc, + desc->node[i].hwdesc_phys); + kfree(desc); + return NULL; +} + +static struct dma_async_tx_descriptor * +stm32_mdma_prep_dma_cyclic(struct dma_chan *c, dma_addr_t buf_addr, + size_t buf_len, size_t period_len, + enum dma_transfer_direction direction, + unsigned long flags) +{ + struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c); + struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); + struct dma_slave_config *dma_config = &chan->dma_config; + struct stm32_mdma_chan_config *chan_config = &chan->chan_config; + struct stm32_mdma_desc *desc; + dma_addr_t src_addr, dst_addr; + u32 ccr, ctcr, ctbr, count; + int i, ret; + + /* + * Once DMA is in setup cyclic mode the channel we cannot assign this + * channel anymore. The DMA channel needs to be aborted or terminated + * for allowing another request. + */ + if (chan->desc && chan->desc->cyclic) { + dev_err(chan2dev(chan), + "Request not allowed when dma in cyclic mode\n"); + return NULL; + } + + if (!buf_len || !period_len || period_len > STM32_MDMA_MAX_BLOCK_LEN) { + dev_err(chan2dev(chan), "Invalid buffer/period len\n"); + return NULL; + } + + if (buf_len % period_len) { + dev_err(chan2dev(chan), "buf_len not multiple of period_len\n"); + return NULL; + } + + count = buf_len / period_len; + + desc = stm32_mdma_alloc_desc(chan, count); + if (!desc) + return NULL; + + /* Select bus */ + if (direction == DMA_MEM_TO_DEV) { + src_addr = buf_addr; + ret = stm32_mdma_set_xfer_param(chan, direction, &ccr, &ctcr, + &ctbr, src_addr, period_len); + stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_SBUS, + src_addr); + } else { + dst_addr = buf_addr; + ret = stm32_mdma_set_xfer_param(chan, direction, &ccr, &ctcr, + &ctbr, dst_addr, period_len); + stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_DBUS, + dst_addr); + } + + if (ret < 0) + goto xfer_setup_err; + + /* Enable interrupts */ + ccr &= ~STM32_MDMA_CCR_IRQ_MASK; + ccr |= STM32_MDMA_CCR_TEIE | STM32_MDMA_CCR_CTCIE | STM32_MDMA_CCR_BTIE; + desc->ccr = ccr; + + /* Configure hwdesc list */ + for (i = 0; i < count; i++) { + if (direction == DMA_MEM_TO_DEV) { + src_addr = buf_addr + i * period_len; + dst_addr = dma_config->dst_addr; + if (chan_config->m2m_hw && (i & 1)) + dst_addr += period_len; + } else { + src_addr = dma_config->src_addr; + if (chan_config->m2m_hw && (i & 1)) + src_addr += period_len; + dst_addr = buf_addr + i * period_len; + } + + stm32_mdma_setup_hwdesc(chan, desc, direction, i, src_addr, + dst_addr, period_len, ctcr, ctbr, + i == count - 1, i == 0, true); + } + + desc->cyclic = true; + + return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); + +xfer_setup_err: + for (i = 0; i < desc->count; i++) + dma_pool_free(chan->desc_pool, desc->node[i].hwdesc, + desc->node[i].hwdesc_phys); + kfree(desc); + return NULL; +} + +static struct dma_async_tx_descriptor * +stm32_mdma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c); + struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); + enum dma_slave_buswidth max_width; + struct stm32_mdma_desc *desc; + struct stm32_mdma_hwdesc *hwdesc; + u32 ccr, ctcr, ctbr, cbndtr, count, max_burst, mdma_burst; + u32 best_burst, tlen; + size_t xfer_count, offset; + int src_bus_width, dst_bus_width; + int i; + + /* + * Once DMA is in setup cyclic mode the channel we cannot assign this + * channel anymore. The DMA channel needs to be aborted or terminated + * to allow another request + */ + if (chan->desc && chan->desc->cyclic) { + dev_err(chan2dev(chan), + "Request not allowed when dma in cyclic mode\n"); + return NULL; + } + + count = DIV_ROUND_UP(len, STM32_MDMA_MAX_BLOCK_LEN); + desc = stm32_mdma_alloc_desc(chan, count); + if (!desc) + return NULL; + + ccr = stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id)) & ~STM32_MDMA_CCR_EN; + ctcr = stm32_mdma_read(dmadev, STM32_MDMA_CTCR(chan->id)); + ctbr = stm32_mdma_read(dmadev, STM32_MDMA_CTBR(chan->id)); + cbndtr = stm32_mdma_read(dmadev, STM32_MDMA_CBNDTR(chan->id)); + + /* Enable sw req, some interrupts and clear other bits */ + ccr &= ~(STM32_MDMA_CCR_WEX | STM32_MDMA_CCR_HEX | + STM32_MDMA_CCR_BEX | STM32_MDMA_CCR_PL_MASK | + STM32_MDMA_CCR_IRQ_MASK); + ccr |= STM32_MDMA_CCR_TEIE; + + /* Enable SW request mode, dest/src inc and clear other bits */ + ctcr &= ~(STM32_MDMA_CTCR_BWM | STM32_MDMA_CTCR_TRGM_MSK | + STM32_MDMA_CTCR_PAM_MASK | STM32_MDMA_CTCR_PKE | + STM32_MDMA_CTCR_TLEN_MSK | STM32_MDMA_CTCR_DBURST_MASK | + STM32_MDMA_CTCR_SBURST_MASK | STM32_MDMA_CTCR_DINCOS_MASK | + STM32_MDMA_CTCR_SINCOS_MASK | STM32_MDMA_CTCR_DSIZE_MASK | + STM32_MDMA_CTCR_SSIZE_MASK | STM32_MDMA_CTCR_DINC_MASK | + STM32_MDMA_CTCR_SINC_MASK); + ctcr |= STM32_MDMA_CTCR_SWRM | STM32_MDMA_CTCR_SINC(STM32_MDMA_INC) | + STM32_MDMA_CTCR_DINC(STM32_MDMA_INC); + + /* Reset HW request */ + ctbr &= ~STM32_MDMA_CTBR_TSEL_MASK; + + /* Select bus */ + stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_SBUS, src); + stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_DBUS, dest); + + /* Clear CBNDTR registers */ + cbndtr &= ~(STM32_MDMA_CBNDTR_BRC_MK | STM32_MDMA_CBNDTR_BRDUM | + STM32_MDMA_CBNDTR_BRSUM | STM32_MDMA_CBNDTR_BNDT_MASK); + + if (len <= STM32_MDMA_MAX_BLOCK_LEN) { + cbndtr |= STM32_MDMA_CBNDTR_BNDT(len); + if (len <= STM32_MDMA_MAX_BUF_LEN) { + /* Setup a buffer transfer */ + ccr |= STM32_MDMA_CCR_TCIE | STM32_MDMA_CCR_CTCIE; + ctcr |= STM32_MDMA_CTCR_TRGM(STM32_MDMA_BUFFER); + } else { + /* Setup a block transfer */ + ccr |= STM32_MDMA_CCR_BTIE | STM32_MDMA_CCR_CTCIE; + ctcr |= STM32_MDMA_CTCR_TRGM(STM32_MDMA_BLOCK); + } + + tlen = STM32_MDMA_MAX_BUF_LEN; + ctcr |= STM32_MDMA_CTCR_TLEN((tlen - 1)); + + /* Set source best burst size */ + max_width = stm32_mdma_get_max_width(src, len, tlen); + src_bus_width = stm32_mdma_get_width(chan, max_width); + + max_burst = tlen / max_width; + best_burst = stm32_mdma_get_best_burst(len, tlen, max_burst, + max_width); + mdma_burst = ilog2(best_burst); + + ctcr |= STM32_MDMA_CTCR_SBURST(mdma_burst) | + STM32_MDMA_CTCR_SSIZE(src_bus_width) | + STM32_MDMA_CTCR_SINCOS(src_bus_width); + + /* Set destination best burst size */ + max_width = stm32_mdma_get_max_width(dest, len, tlen); + dst_bus_width = stm32_mdma_get_width(chan, max_width); + + max_burst = tlen / max_width; + best_burst = stm32_mdma_get_best_burst(len, tlen, max_burst, + max_width); + mdma_burst = ilog2(best_burst); + + ctcr |= STM32_MDMA_CTCR_DBURST(mdma_burst) | + STM32_MDMA_CTCR_DSIZE(dst_bus_width) | + STM32_MDMA_CTCR_DINCOS(dst_bus_width); + + if (dst_bus_width != src_bus_width) + ctcr |= STM32_MDMA_CTCR_PKE; + + /* Prepare hardware descriptor */ + hwdesc = desc->node[0].hwdesc; + hwdesc->ctcr = ctcr; + hwdesc->cbndtr = cbndtr; + hwdesc->csar = src; + hwdesc->cdar = dest; + hwdesc->cbrur = 0; + hwdesc->clar = 0; + hwdesc->ctbr = ctbr; + hwdesc->cmar = 0; + hwdesc->cmdr = 0; + + stm32_mdma_dump_hwdesc(chan, &desc->node[0]); + } else { + /* Setup a LLI transfer */ + ctcr |= STM32_MDMA_CTCR_TRGM(STM32_MDMA_LINKED_LIST) | + STM32_MDMA_CTCR_TLEN((STM32_MDMA_MAX_BUF_LEN - 1)); + ccr |= STM32_MDMA_CCR_BTIE | STM32_MDMA_CCR_CTCIE; + tlen = STM32_MDMA_MAX_BUF_LEN; + + for (i = 0, offset = 0; offset < len; + i++, offset += xfer_count) { + xfer_count = min_t(size_t, len - offset, + STM32_MDMA_MAX_BLOCK_LEN); + + /* Set source best burst size */ + max_width = stm32_mdma_get_max_width(src, len, tlen); + src_bus_width = stm32_mdma_get_width(chan, max_width); + + max_burst = tlen / max_width; + best_burst = stm32_mdma_get_best_burst(len, tlen, + max_burst, + max_width); + mdma_burst = ilog2(best_burst); + + ctcr |= STM32_MDMA_CTCR_SBURST(mdma_burst) | + STM32_MDMA_CTCR_SSIZE(src_bus_width) | + STM32_MDMA_CTCR_SINCOS(src_bus_width); + + /* Set destination best burst size */ + max_width = stm32_mdma_get_max_width(dest, len, tlen); + dst_bus_width = stm32_mdma_get_width(chan, max_width); + + max_burst = tlen / max_width; + best_burst = stm32_mdma_get_best_burst(len, tlen, + max_burst, + max_width); + mdma_burst = ilog2(best_burst); + + ctcr |= STM32_MDMA_CTCR_DBURST(mdma_burst) | + STM32_MDMA_CTCR_DSIZE(dst_bus_width) | + STM32_MDMA_CTCR_DINCOS(dst_bus_width); + + if (dst_bus_width != src_bus_width) + ctcr |= STM32_MDMA_CTCR_PKE; + + /* Prepare hardware descriptor */ + stm32_mdma_setup_hwdesc(chan, desc, DMA_MEM_TO_MEM, i, + src + offset, dest + offset, + xfer_count, ctcr, ctbr, + i == count - 1, i == 0, false); + } + } + + desc->ccr = ccr; + + desc->cyclic = false; + + return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); +} + +static void stm32_mdma_dump_reg(struct stm32_mdma_chan *chan) +{ + struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); + + dev_dbg(chan2dev(chan), "CCR: 0x%08x\n", + stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id))); + dev_dbg(chan2dev(chan), "CTCR: 0x%08x\n", + stm32_mdma_read(dmadev, STM32_MDMA_CTCR(chan->id))); + dev_dbg(chan2dev(chan), "CBNDTR: 0x%08x\n", + stm32_mdma_read(dmadev, STM32_MDMA_CBNDTR(chan->id))); + dev_dbg(chan2dev(chan), "CSAR: 0x%08x\n", + stm32_mdma_read(dmadev, STM32_MDMA_CSAR(chan->id))); + dev_dbg(chan2dev(chan), "CDAR: 0x%08x\n", + stm32_mdma_read(dmadev, STM32_MDMA_CDAR(chan->id))); + dev_dbg(chan2dev(chan), "CBRUR: 0x%08x\n", + stm32_mdma_read(dmadev, STM32_MDMA_CBRUR(chan->id))); + dev_dbg(chan2dev(chan), "CLAR: 0x%08x\n", + stm32_mdma_read(dmadev, STM32_MDMA_CLAR(chan->id))); + dev_dbg(chan2dev(chan), "CTBR: 0x%08x\n", + stm32_mdma_read(dmadev, STM32_MDMA_CTBR(chan->id))); + dev_dbg(chan2dev(chan), "CMAR: 0x%08x\n", + stm32_mdma_read(dmadev, STM32_MDMA_CMAR(chan->id))); + dev_dbg(chan2dev(chan), "CMDR: 0x%08x\n", + stm32_mdma_read(dmadev, STM32_MDMA_CMDR(chan->id))); +} + +static void stm32_mdma_start_transfer(struct stm32_mdma_chan *chan) +{ + struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); + struct virt_dma_desc *vdesc; + struct stm32_mdma_hwdesc *hwdesc; + u32 id = chan->id; + u32 status, reg; + + vdesc = vchan_next_desc(&chan->vchan); + if (!vdesc) { + chan->desc = NULL; + return; + } + + list_del(&vdesc->node); + + chan->desc = to_stm32_mdma_desc(vdesc); + hwdesc = chan->desc->node[0].hwdesc; + chan->curr_hwdesc = 0; + + stm32_mdma_write(dmadev, STM32_MDMA_CCR(id), chan->desc->ccr); + stm32_mdma_write(dmadev, STM32_MDMA_CTCR(id), hwdesc->ctcr); + stm32_mdma_write(dmadev, STM32_MDMA_CBNDTR(id), hwdesc->cbndtr); + stm32_mdma_write(dmadev, STM32_MDMA_CSAR(id), hwdesc->csar); + stm32_mdma_write(dmadev, STM32_MDMA_CDAR(id), hwdesc->cdar); + stm32_mdma_write(dmadev, STM32_MDMA_CBRUR(id), hwdesc->cbrur); + stm32_mdma_write(dmadev, STM32_MDMA_CLAR(id), hwdesc->clar); + stm32_mdma_write(dmadev, STM32_MDMA_CTBR(id), hwdesc->ctbr); + stm32_mdma_write(dmadev, STM32_MDMA_CMAR(id), hwdesc->cmar); + stm32_mdma_write(dmadev, STM32_MDMA_CMDR(id), hwdesc->cmdr); + + /* Clear interrupt status if it is there */ + status = stm32_mdma_read(dmadev, STM32_MDMA_CISR(id)); + if (status) + stm32_mdma_set_bits(dmadev, STM32_MDMA_CIFCR(id), status); + + stm32_mdma_dump_reg(chan); + + /* Start DMA */ + stm32_mdma_set_bits(dmadev, STM32_MDMA_CCR(id), STM32_MDMA_CCR_EN); + + /* Set SW request in case of MEM2MEM transfer */ + if (hwdesc->ctcr & STM32_MDMA_CTCR_SWRM) { + reg = STM32_MDMA_CCR(id); + stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CCR_SWRQ); + } + + chan->busy = true; + + dev_dbg(chan2dev(chan), "vchan %pK: started\n", &chan->vchan); +} + +static void stm32_mdma_issue_pending(struct dma_chan *c) +{ + struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c); + unsigned long flags; + + spin_lock_irqsave(&chan->vchan.lock, flags); + + if (!vchan_issue_pending(&chan->vchan)) + goto end; + + dev_dbg(chan2dev(chan), "vchan %pK: issued\n", &chan->vchan); + + if (!chan->desc && !chan->busy) + stm32_mdma_start_transfer(chan); + +end: + spin_unlock_irqrestore(&chan->vchan.lock, flags); +} + +static int stm32_mdma_pause(struct dma_chan *c) +{ + struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c); + unsigned long flags; + int ret; + + spin_lock_irqsave(&chan->vchan.lock, flags); + ret = stm32_mdma_disable_chan(chan); + spin_unlock_irqrestore(&chan->vchan.lock, flags); + + if (!ret) + dev_dbg(chan2dev(chan), "vchan %pK: pause\n", &chan->vchan); + + return ret; +} + +static int stm32_mdma_resume(struct dma_chan *c) +{ + struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c); + struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); + struct stm32_mdma_hwdesc *hwdesc; + unsigned long flags; + u32 status, reg; + + /* Transfer can be terminated */ + if (!chan->desc || (stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id)) & STM32_MDMA_CCR_EN)) + return -EPERM; + + hwdesc = chan->desc->node[chan->curr_hwdesc].hwdesc; + + spin_lock_irqsave(&chan->vchan.lock, flags); + + /* Re-configure control register */ + stm32_mdma_write(dmadev, STM32_MDMA_CCR(chan->id), chan->desc->ccr); + + /* Clear interrupt status if it is there */ + status = stm32_mdma_read(dmadev, STM32_MDMA_CISR(chan->id)); + if (status) + stm32_mdma_set_bits(dmadev, STM32_MDMA_CIFCR(chan->id), status); + + stm32_mdma_dump_reg(chan); + + /* Re-start DMA */ + reg = STM32_MDMA_CCR(chan->id); + stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CCR_EN); + + /* Set SW request in case of MEM2MEM transfer */ + if (hwdesc->ctcr & STM32_MDMA_CTCR_SWRM) + stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CCR_SWRQ); + + spin_unlock_irqrestore(&chan->vchan.lock, flags); + + dev_dbg(chan2dev(chan), "vchan %pK: resume\n", &chan->vchan); + + return 0; +} + +static int stm32_mdma_terminate_all(struct dma_chan *c) +{ + struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&chan->vchan.lock, flags); + if (chan->desc) { + vchan_terminate_vdesc(&chan->desc->vdesc); + if (chan->busy) + stm32_mdma_stop(chan); + chan->desc = NULL; + } + vchan_get_all_descriptors(&chan->vchan, &head); + spin_unlock_irqrestore(&chan->vchan.lock, flags); + + vchan_dma_desc_free_list(&chan->vchan, &head); + + return 0; +} + +static void stm32_mdma_synchronize(struct dma_chan *c) +{ + struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c); + + vchan_synchronize(&chan->vchan); +} + +static int stm32_mdma_slave_config(struct dma_chan *c, + struct dma_slave_config *config) +{ + struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c); + + memcpy(&chan->dma_config, config, sizeof(*config)); + + /* Check if user is requesting STM32 DMA to trigger MDMA */ + if (config->peripheral_size) { + struct stm32_mdma_dma_config *mdma_config; + + mdma_config = (struct stm32_mdma_dma_config *)chan->dma_config.peripheral_config; + chan->chan_config.request = mdma_config->request; + chan->chan_config.mask_addr = mdma_config->cmar; + chan->chan_config.mask_data = mdma_config->cmdr; + chan->chan_config.m2m_hw = true; + } + + return 0; +} + +static size_t stm32_mdma_desc_residue(struct stm32_mdma_chan *chan, + struct stm32_mdma_desc *desc, + u32 curr_hwdesc, + struct dma_tx_state *state) +{ + struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); + struct stm32_mdma_hwdesc *hwdesc; + u32 cisr, clar, cbndtr, residue, modulo, burst_size; + int i; + + cisr = stm32_mdma_read(dmadev, STM32_MDMA_CISR(chan->id)); + + residue = 0; + /* Get the next hw descriptor to process from current transfer */ + clar = stm32_mdma_read(dmadev, STM32_MDMA_CLAR(chan->id)); + for (i = desc->count - 1; i >= 0; i--) { + hwdesc = desc->node[i].hwdesc; + + if (hwdesc->clar == clar) + break;/* Current transfer found, stop cumulating */ + + /* Cumulate residue of unprocessed hw descriptors */ + residue += STM32_MDMA_CBNDTR_BNDT(hwdesc->cbndtr); + } + cbndtr = stm32_mdma_read(dmadev, STM32_MDMA_CBNDTR(chan->id)); + residue += cbndtr & STM32_MDMA_CBNDTR_BNDT_MASK; + + state->in_flight_bytes = 0; + if (chan->chan_config.m2m_hw && (cisr & STM32_MDMA_CISR_CRQA)) + state->in_flight_bytes = cbndtr & STM32_MDMA_CBNDTR_BNDT_MASK; + + if (!chan->mem_burst) + return residue; + + burst_size = chan->mem_burst * chan->mem_width; + modulo = residue % burst_size; + if (modulo) + residue = residue - modulo + burst_size; + + return residue; +} + +static enum dma_status stm32_mdma_tx_status(struct dma_chan *c, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c); + struct virt_dma_desc *vdesc; + enum dma_status status; + unsigned long flags; + u32 residue = 0; + + status = dma_cookie_status(c, cookie, state); + if ((status == DMA_COMPLETE) || (!state)) + return status; + + spin_lock_irqsave(&chan->vchan.lock, flags); + + vdesc = vchan_find_desc(&chan->vchan, cookie); + if (chan->desc && cookie == chan->desc->vdesc.tx.cookie) + residue = stm32_mdma_desc_residue(chan, chan->desc, chan->curr_hwdesc, state); + else if (vdesc) + residue = stm32_mdma_desc_residue(chan, to_stm32_mdma_desc(vdesc), 0, state); + + dma_set_residue(state, residue); + + spin_unlock_irqrestore(&chan->vchan.lock, flags); + + return status; +} + +static void stm32_mdma_xfer_end(struct stm32_mdma_chan *chan) +{ + vchan_cookie_complete(&chan->desc->vdesc); + chan->desc = NULL; + chan->busy = false; + + /* Start the next transfer if this driver has a next desc */ + stm32_mdma_start_transfer(chan); +} + +static irqreturn_t stm32_mdma_irq_handler(int irq, void *devid) +{ + struct stm32_mdma_device *dmadev = devid; + struct stm32_mdma_chan *chan; + u32 reg, id, ccr, ien, status; + + /* Find out which channel generates the interrupt */ + status = readl_relaxed(dmadev->base + STM32_MDMA_GISR0); + if (!status) { + dev_dbg(mdma2dev(dmadev), "spurious it\n"); + return IRQ_NONE; + } + id = __ffs(status); + chan = &dmadev->chan[id]; + + /* Handle interrupt for the channel */ + spin_lock(&chan->vchan.lock); + status = stm32_mdma_read(dmadev, STM32_MDMA_CISR(id)); + /* Mask Channel ReQuest Active bit which can be set in case of MEM2MEM */ + status &= ~STM32_MDMA_CISR_CRQA; + ccr = stm32_mdma_read(dmadev, STM32_MDMA_CCR(id)); + ien = (ccr & STM32_MDMA_CCR_IRQ_MASK) >> 1; + + if (!(status & ien)) { + spin_unlock(&chan->vchan.lock); + if (chan->busy) + dev_warn(chan2dev(chan), + "spurious it (status=0x%04x, ien=0x%04x)\n", status, ien); + else + dev_dbg(chan2dev(chan), + "spurious it (status=0x%04x, ien=0x%04x)\n", status, ien); + return IRQ_NONE; + } + + reg = STM32_MDMA_CIFCR(id); + + if (status & STM32_MDMA_CISR_TEIF) { + dev_err(chan2dev(chan), "Transfer Err: stat=0x%08x\n", + readl_relaxed(dmadev->base + STM32_MDMA_CESR(id))); + stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CTEIF); + status &= ~STM32_MDMA_CISR_TEIF; + } + + if (status & STM32_MDMA_CISR_CTCIF) { + stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CCTCIF); + status &= ~STM32_MDMA_CISR_CTCIF; + stm32_mdma_xfer_end(chan); + } + + if (status & STM32_MDMA_CISR_BRTIF) { + stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CBRTIF); + status &= ~STM32_MDMA_CISR_BRTIF; + } + + if (status & STM32_MDMA_CISR_BTIF) { + stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CBTIF); + status &= ~STM32_MDMA_CISR_BTIF; + chan->curr_hwdesc++; + if (chan->desc && chan->desc->cyclic) { + if (chan->curr_hwdesc == chan->desc->count) + chan->curr_hwdesc = 0; + vchan_cyclic_callback(&chan->desc->vdesc); + } + } + + if (status & STM32_MDMA_CISR_TCIF) { + stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CLTCIF); + status &= ~STM32_MDMA_CISR_TCIF; + } + + if (status) { + stm32_mdma_set_bits(dmadev, reg, status); + dev_err(chan2dev(chan), "DMA error: status=0x%08x\n", status); + if (!(ccr & STM32_MDMA_CCR_EN)) + dev_err(chan2dev(chan), "chan disabled by HW\n"); + } + + spin_unlock(&chan->vchan.lock); + + return IRQ_HANDLED; +} + +static int stm32_mdma_alloc_chan_resources(struct dma_chan *c) +{ + struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c); + struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); + int ret; + + chan->desc_pool = dmam_pool_create(dev_name(&c->dev->device), + c->device->dev, + sizeof(struct stm32_mdma_hwdesc), + __alignof__(struct stm32_mdma_hwdesc), + 0); + if (!chan->desc_pool) { + dev_err(chan2dev(chan), "failed to allocate descriptor pool\n"); + return -ENOMEM; + } + + ret = pm_runtime_resume_and_get(dmadev->ddev.dev); + if (ret < 0) + return ret; + + ret = stm32_mdma_disable_chan(chan); + if (ret < 0) + pm_runtime_put(dmadev->ddev.dev); + + return ret; +} + +static void stm32_mdma_free_chan_resources(struct dma_chan *c) +{ + struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c); + struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); + unsigned long flags; + + dev_dbg(chan2dev(chan), "Freeing channel %d\n", chan->id); + + if (chan->busy) { + spin_lock_irqsave(&chan->vchan.lock, flags); + stm32_mdma_stop(chan); + chan->desc = NULL; + spin_unlock_irqrestore(&chan->vchan.lock, flags); + } + + pm_runtime_put(dmadev->ddev.dev); + vchan_free_chan_resources(to_virt_chan(c)); + dmam_pool_destroy(chan->desc_pool); + chan->desc_pool = NULL; +} + +static bool stm32_mdma_filter_fn(struct dma_chan *c, void *fn_param) +{ + struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c); + struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); + + /* Check if chan is marked Secure */ + if (dmadev->chan_reserved & BIT(chan->id)) + return false; + + return true; +} + +static struct dma_chan *stm32_mdma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct stm32_mdma_device *dmadev = ofdma->of_dma_data; + dma_cap_mask_t mask = dmadev->ddev.cap_mask; + struct stm32_mdma_chan *chan; + struct dma_chan *c; + struct stm32_mdma_chan_config config; + + if (dma_spec->args_count < 5) { + dev_err(mdma2dev(dmadev), "Bad number of args\n"); + return NULL; + } + + memset(&config, 0, sizeof(config)); + config.request = dma_spec->args[0]; + config.priority_level = dma_spec->args[1]; + config.transfer_config = dma_spec->args[2]; + config.mask_addr = dma_spec->args[3]; + config.mask_data = dma_spec->args[4]; + + if (config.request >= dmadev->nr_requests) { + dev_err(mdma2dev(dmadev), "Bad request line\n"); + return NULL; + } + + if (config.priority_level > STM32_MDMA_VERY_HIGH_PRIORITY) { + dev_err(mdma2dev(dmadev), "Priority level not supported\n"); + return NULL; + } + + c = __dma_request_channel(&mask, stm32_mdma_filter_fn, &config, ofdma->of_node); + if (!c) { + dev_err(mdma2dev(dmadev), "No more channels available\n"); + return NULL; + } + + chan = to_stm32_mdma_chan(c); + chan->chan_config = config; + + return c; +} + +static const struct of_device_id stm32_mdma_of_match[] = { + { .compatible = "st,stm32h7-mdma", }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, stm32_mdma_of_match); + +static int stm32_mdma_probe(struct platform_device *pdev) +{ + struct stm32_mdma_chan *chan; + struct stm32_mdma_device *dmadev; + struct dma_device *dd; + struct device_node *of_node; + struct reset_control *rst; + u32 nr_channels, nr_requests; + int i, count, ret; + + of_node = pdev->dev.of_node; + if (!of_node) + return -ENODEV; + + ret = device_property_read_u32(&pdev->dev, "dma-channels", + &nr_channels); + if (ret) { + nr_channels = STM32_MDMA_MAX_CHANNELS; + dev_warn(&pdev->dev, "MDMA defaulting on %i channels\n", + nr_channels); + } + + ret = device_property_read_u32(&pdev->dev, "dma-requests", + &nr_requests); + if (ret) { + nr_requests = STM32_MDMA_MAX_REQUESTS; + dev_warn(&pdev->dev, "MDMA defaulting on %i request lines\n", + nr_requests); + } + + count = device_property_count_u32(&pdev->dev, "st,ahb-addr-masks"); + if (count < 0) + count = 0; + + dmadev = devm_kzalloc(&pdev->dev, + struct_size(dmadev, ahb_addr_masks, count), + GFP_KERNEL); + if (!dmadev) + return -ENOMEM; + + dmadev->nr_channels = nr_channels; + dmadev->nr_requests = nr_requests; + device_property_read_u32_array(&pdev->dev, "st,ahb-addr-masks", + dmadev->ahb_addr_masks, + count); + dmadev->nr_ahb_addr_masks = count; + + dmadev->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(dmadev->base)) + return PTR_ERR(dmadev->base); + + dmadev->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(dmadev->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(dmadev->clk), + "Missing clock controller\n"); + + ret = clk_prepare_enable(dmadev->clk); + if (ret < 0) { + dev_err(&pdev->dev, "clk_prep_enable error: %d\n", ret); + return ret; + } + + rst = devm_reset_control_get(&pdev->dev, NULL); + if (IS_ERR(rst)) { + ret = PTR_ERR(rst); + if (ret == -EPROBE_DEFER) + goto err_clk; + } else { + reset_control_assert(rst); + udelay(2); + reset_control_deassert(rst); + } + + dd = &dmadev->ddev; + dma_cap_set(DMA_SLAVE, dd->cap_mask); + dma_cap_set(DMA_PRIVATE, dd->cap_mask); + dma_cap_set(DMA_CYCLIC, dd->cap_mask); + dma_cap_set(DMA_MEMCPY, dd->cap_mask); + dd->device_alloc_chan_resources = stm32_mdma_alloc_chan_resources; + dd->device_free_chan_resources = stm32_mdma_free_chan_resources; + dd->device_tx_status = stm32_mdma_tx_status; + dd->device_issue_pending = stm32_mdma_issue_pending; + dd->device_prep_slave_sg = stm32_mdma_prep_slave_sg; + dd->device_prep_dma_cyclic = stm32_mdma_prep_dma_cyclic; + dd->device_prep_dma_memcpy = stm32_mdma_prep_dma_memcpy; + dd->device_config = stm32_mdma_slave_config; + dd->device_pause = stm32_mdma_pause; + dd->device_resume = stm32_mdma_resume; + dd->device_terminate_all = stm32_mdma_terminate_all; + dd->device_synchronize = stm32_mdma_synchronize; + dd->descriptor_reuse = true; + + dd->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES); + dd->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES); + dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV) | + BIT(DMA_MEM_TO_MEM); + dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + dd->max_burst = STM32_MDMA_MAX_BURST; + dd->dev = &pdev->dev; + INIT_LIST_HEAD(&dd->channels); + + for (i = 0; i < dmadev->nr_channels; i++) { + chan = &dmadev->chan[i]; + chan->id = i; + + if (stm32_mdma_read(dmadev, STM32_MDMA_CCR(i)) & STM32_MDMA_CCR_SM) + dmadev->chan_reserved |= BIT(i); + + chan->vchan.desc_free = stm32_mdma_desc_free; + vchan_init(&chan->vchan, dd); + } + + dmadev->irq = platform_get_irq(pdev, 0); + if (dmadev->irq < 0) { + ret = dmadev->irq; + goto err_clk; + } + + ret = devm_request_irq(&pdev->dev, dmadev->irq, stm32_mdma_irq_handler, + 0, dev_name(&pdev->dev), dmadev); + if (ret) { + dev_err(&pdev->dev, "failed to request IRQ\n"); + goto err_clk; + } + + ret = dmaenginem_async_device_register(dd); + if (ret) + goto err_clk; + + ret = of_dma_controller_register(of_node, stm32_mdma_of_xlate, dmadev); + if (ret < 0) { + dev_err(&pdev->dev, + "STM32 MDMA DMA OF registration failed %d\n", ret); + goto err_clk; + } + + platform_set_drvdata(pdev, dmadev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + pm_runtime_get_noresume(&pdev->dev); + pm_runtime_put(&pdev->dev); + + dev_info(&pdev->dev, "STM32 MDMA driver registered\n"); + + return 0; + +err_clk: + clk_disable_unprepare(dmadev->clk); + + return ret; +} + +#ifdef CONFIG_PM +static int stm32_mdma_runtime_suspend(struct device *dev) +{ + struct stm32_mdma_device *dmadev = dev_get_drvdata(dev); + + clk_disable_unprepare(dmadev->clk); + + return 0; +} + +static int stm32_mdma_runtime_resume(struct device *dev) +{ + struct stm32_mdma_device *dmadev = dev_get_drvdata(dev); + int ret; + + ret = clk_prepare_enable(dmadev->clk); + if (ret) { + dev_err(dev, "failed to prepare_enable clock\n"); + return ret; + } + + return 0; +} +#endif + +#ifdef CONFIG_PM_SLEEP +static int stm32_mdma_pm_suspend(struct device *dev) +{ + struct stm32_mdma_device *dmadev = dev_get_drvdata(dev); + u32 ccr, id; + int ret; + + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) + return ret; + + for (id = 0; id < dmadev->nr_channels; id++) { + ccr = stm32_mdma_read(dmadev, STM32_MDMA_CCR(id)); + if (ccr & STM32_MDMA_CCR_EN) { + dev_warn(dev, "Suspend is prevented by Chan %i\n", id); + return -EBUSY; + } + } + + pm_runtime_put_sync(dev); + + pm_runtime_force_suspend(dev); + + return 0; +} + +static int stm32_mdma_pm_resume(struct device *dev) +{ + return pm_runtime_force_resume(dev); +} +#endif + +static const struct dev_pm_ops stm32_mdma_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(stm32_mdma_pm_suspend, stm32_mdma_pm_resume) + SET_RUNTIME_PM_OPS(stm32_mdma_runtime_suspend, + stm32_mdma_runtime_resume, NULL) +}; + +static struct platform_driver stm32_mdma_driver = { + .probe = stm32_mdma_probe, + .driver = { + .name = "stm32-mdma", + .of_match_table = stm32_mdma_of_match, + .pm = &stm32_mdma_pm_ops, + }, +}; + +static int __init stm32_mdma_init(void) +{ + return platform_driver_register(&stm32_mdma_driver); +} + +subsys_initcall(stm32_mdma_init); + +MODULE_DESCRIPTION("Driver for STM32 MDMA controller"); +MODULE_AUTHOR("M'boumba Cedric Madianga "); +MODULE_AUTHOR("Pierre-Yves Mordret "); diff --git a/drivers/dma/sun4i-dma.c b/drivers/dma/sun4i-dma.c new file mode 100644 index 0000000000..e86c882951 --- /dev/null +++ b/drivers/dma/sun4i-dma.c @@ -0,0 +1,1308 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2014 Emilio López + * Emilio López + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virt-dma.h" + +/** Common macros to normal and dedicated DMA registers **/ + +#define SUN4I_DMA_CFG_LOADING BIT(31) +#define SUN4I_DMA_CFG_DST_DATA_WIDTH(width) ((width) << 25) +#define SUN4I_DMA_CFG_DST_BURST_LENGTH(len) ((len) << 23) +#define SUN4I_DMA_CFG_DST_ADDR_MODE(mode) ((mode) << 21) +#define SUN4I_DMA_CFG_DST_DRQ_TYPE(type) ((type) << 16) +#define SUN4I_DMA_CFG_SRC_DATA_WIDTH(width) ((width) << 9) +#define SUN4I_DMA_CFG_SRC_BURST_LENGTH(len) ((len) << 7) +#define SUN4I_DMA_CFG_SRC_ADDR_MODE(mode) ((mode) << 5) +#define SUN4I_DMA_CFG_SRC_DRQ_TYPE(type) (type) + +/** Normal DMA register values **/ + +/* Normal DMA source/destination data request type values */ +#define SUN4I_NDMA_DRQ_TYPE_SDRAM 0x16 +#define SUN4I_NDMA_DRQ_TYPE_LIMIT (0x1F + 1) + +/** Normal DMA register layout **/ + +/* Dedicated DMA source/destination address mode values */ +#define SUN4I_NDMA_ADDR_MODE_LINEAR 0 +#define SUN4I_NDMA_ADDR_MODE_IO 1 + +/* Normal DMA configuration register layout */ +#define SUN4I_NDMA_CFG_CONT_MODE BIT(30) +#define SUN4I_NDMA_CFG_WAIT_STATE(n) ((n) << 27) +#define SUN4I_NDMA_CFG_DST_NON_SECURE BIT(22) +#define SUN4I_NDMA_CFG_BYTE_COUNT_MODE_REMAIN BIT(15) +#define SUN4I_NDMA_CFG_SRC_NON_SECURE BIT(6) + +/** Dedicated DMA register values **/ + +/* Dedicated DMA source/destination address mode values */ +#define SUN4I_DDMA_ADDR_MODE_LINEAR 0 +#define SUN4I_DDMA_ADDR_MODE_IO 1 +#define SUN4I_DDMA_ADDR_MODE_HORIZONTAL_PAGE 2 +#define SUN4I_DDMA_ADDR_MODE_VERTICAL_PAGE 3 + +/* Dedicated DMA source/destination data request type values */ +#define SUN4I_DDMA_DRQ_TYPE_SDRAM 0x1 +#define SUN4I_DDMA_DRQ_TYPE_LIMIT (0x1F + 1) + +/** Dedicated DMA register layout **/ + +/* Dedicated DMA configuration register layout */ +#define SUN4I_DDMA_CFG_BUSY BIT(30) +#define SUN4I_DDMA_CFG_CONT_MODE BIT(29) +#define SUN4I_DDMA_CFG_DST_NON_SECURE BIT(28) +#define SUN4I_DDMA_CFG_BYTE_COUNT_MODE_REMAIN BIT(15) +#define SUN4I_DDMA_CFG_SRC_NON_SECURE BIT(12) + +/* Dedicated DMA parameter register layout */ +#define SUN4I_DDMA_PARA_DST_DATA_BLK_SIZE(n) (((n) - 1) << 24) +#define SUN4I_DDMA_PARA_DST_WAIT_CYCLES(n) (((n) - 1) << 16) +#define SUN4I_DDMA_PARA_SRC_DATA_BLK_SIZE(n) (((n) - 1) << 8) +#define SUN4I_DDMA_PARA_SRC_WAIT_CYCLES(n) (((n) - 1) << 0) + +/** DMA register offsets **/ + +/* General register offsets */ +#define SUN4I_DMA_IRQ_ENABLE_REG 0x0 +#define SUN4I_DMA_IRQ_PENDING_STATUS_REG 0x4 + +/* Normal DMA register offsets */ +#define SUN4I_NDMA_CHANNEL_REG_BASE(n) (0x100 + (n) * 0x20) +#define SUN4I_NDMA_CFG_REG 0x0 +#define SUN4I_NDMA_SRC_ADDR_REG 0x4 +#define SUN4I_NDMA_DST_ADDR_REG 0x8 +#define SUN4I_NDMA_BYTE_COUNT_REG 0xC + +/* Dedicated DMA register offsets */ +#define SUN4I_DDMA_CHANNEL_REG_BASE(n) (0x300 + (n) * 0x20) +#define SUN4I_DDMA_CFG_REG 0x0 +#define SUN4I_DDMA_SRC_ADDR_REG 0x4 +#define SUN4I_DDMA_DST_ADDR_REG 0x8 +#define SUN4I_DDMA_BYTE_COUNT_REG 0xC +#define SUN4I_DDMA_PARA_REG 0x18 + +/** DMA Driver **/ + +/* + * Normal DMA has 8 channels, and Dedicated DMA has another 8, so + * that's 16 channels. As for endpoints, there's 29 and 21 + * respectively. Given that the Normal DMA endpoints (other than + * SDRAM) can be used as tx/rx, we need 78 vchans in total + */ +#define SUN4I_NDMA_NR_MAX_CHANNELS 8 +#define SUN4I_DDMA_NR_MAX_CHANNELS 8 +#define SUN4I_DMA_NR_MAX_CHANNELS \ + (SUN4I_NDMA_NR_MAX_CHANNELS + SUN4I_DDMA_NR_MAX_CHANNELS) +#define SUN4I_NDMA_NR_MAX_VCHANS (29 * 2 - 1) +#define SUN4I_DDMA_NR_MAX_VCHANS 21 +#define SUN4I_DMA_NR_MAX_VCHANS \ + (SUN4I_NDMA_NR_MAX_VCHANS + SUN4I_DDMA_NR_MAX_VCHANS) + +/* This set of SUN4I_DDMA timing parameters were found experimentally while + * working with the SPI driver and seem to make it behave correctly */ +#define SUN4I_DDMA_MAGIC_SPI_PARAMETERS \ + (SUN4I_DDMA_PARA_DST_DATA_BLK_SIZE(1) | \ + SUN4I_DDMA_PARA_SRC_DATA_BLK_SIZE(1) | \ + SUN4I_DDMA_PARA_DST_WAIT_CYCLES(2) | \ + SUN4I_DDMA_PARA_SRC_WAIT_CYCLES(2)) + +/* + * Normal DMA supports individual transfers (segments) up to 128k. + * Dedicated DMA supports transfers up to 16M. We can only report + * one size limit, so we have to use the smaller value. + */ +#define SUN4I_NDMA_MAX_SEG_SIZE SZ_128K +#define SUN4I_DDMA_MAX_SEG_SIZE SZ_16M +#define SUN4I_DMA_MAX_SEG_SIZE SUN4I_NDMA_MAX_SEG_SIZE + +struct sun4i_dma_pchan { + /* Register base of channel */ + void __iomem *base; + /* vchan currently being serviced */ + struct sun4i_dma_vchan *vchan; + /* Is this a dedicated pchan? */ + int is_dedicated; +}; + +struct sun4i_dma_vchan { + struct virt_dma_chan vc; + struct dma_slave_config cfg; + struct sun4i_dma_pchan *pchan; + struct sun4i_dma_promise *processing; + struct sun4i_dma_contract *contract; + u8 endpoint; + int is_dedicated; +}; + +struct sun4i_dma_promise { + u32 cfg; + u32 para; + dma_addr_t src; + dma_addr_t dst; + size_t len; + struct list_head list; +}; + +/* A contract is a set of promises */ +struct sun4i_dma_contract { + struct virt_dma_desc vd; + struct list_head demands; + struct list_head completed_demands; + bool is_cyclic : 1; + bool use_half_int : 1; +}; + +struct sun4i_dma_dev { + DECLARE_BITMAP(pchans_used, SUN4I_DMA_NR_MAX_CHANNELS); + struct dma_device slave; + struct sun4i_dma_pchan *pchans; + struct sun4i_dma_vchan *vchans; + void __iomem *base; + struct clk *clk; + int irq; + spinlock_t lock; +}; + +static struct sun4i_dma_dev *to_sun4i_dma_dev(struct dma_device *dev) +{ + return container_of(dev, struct sun4i_dma_dev, slave); +} + +static struct sun4i_dma_vchan *to_sun4i_dma_vchan(struct dma_chan *chan) +{ + return container_of(chan, struct sun4i_dma_vchan, vc.chan); +} + +static struct sun4i_dma_contract *to_sun4i_dma_contract(struct virt_dma_desc *vd) +{ + return container_of(vd, struct sun4i_dma_contract, vd); +} + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +static int convert_burst(u32 maxburst) +{ + if (maxburst > 8) + return -EINVAL; + + /* 1 -> 0, 4 -> 1, 8 -> 2 */ + return (maxburst >> 2); +} + +static int convert_buswidth(enum dma_slave_buswidth addr_width) +{ + if (addr_width > DMA_SLAVE_BUSWIDTH_4_BYTES) + return -EINVAL; + + /* 8 (1 byte) -> 0, 16 (2 bytes) -> 1, 32 (4 bytes) -> 2 */ + return (addr_width >> 1); +} + +static void sun4i_dma_free_chan_resources(struct dma_chan *chan) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + + vchan_free_chan_resources(&vchan->vc); +} + +static struct sun4i_dma_pchan *find_and_use_pchan(struct sun4i_dma_dev *priv, + struct sun4i_dma_vchan *vchan) +{ + struct sun4i_dma_pchan *pchan = NULL, *pchans = priv->pchans; + unsigned long flags; + int i, max; + + /* + * pchans 0-SUN4I_NDMA_NR_MAX_CHANNELS are normal, and + * SUN4I_NDMA_NR_MAX_CHANNELS+ are dedicated ones + */ + if (vchan->is_dedicated) { + i = SUN4I_NDMA_NR_MAX_CHANNELS; + max = SUN4I_DMA_NR_MAX_CHANNELS; + } else { + i = 0; + max = SUN4I_NDMA_NR_MAX_CHANNELS; + } + + spin_lock_irqsave(&priv->lock, flags); + for_each_clear_bit_from(i, priv->pchans_used, max) { + pchan = &pchans[i]; + pchan->vchan = vchan; + set_bit(i, priv->pchans_used); + break; + } + spin_unlock_irqrestore(&priv->lock, flags); + + return pchan; +} + +static void release_pchan(struct sun4i_dma_dev *priv, + struct sun4i_dma_pchan *pchan) +{ + unsigned long flags; + int nr = pchan - priv->pchans; + + spin_lock_irqsave(&priv->lock, flags); + + pchan->vchan = NULL; + clear_bit(nr, priv->pchans_used); + + spin_unlock_irqrestore(&priv->lock, flags); +} + +static void configure_pchan(struct sun4i_dma_pchan *pchan, + struct sun4i_dma_promise *d) +{ + /* + * Configure addresses and misc parameters depending on type + * SUN4I_DDMA has an extra field with timing parameters + */ + if (pchan->is_dedicated) { + writel_relaxed(d->src, pchan->base + SUN4I_DDMA_SRC_ADDR_REG); + writel_relaxed(d->dst, pchan->base + SUN4I_DDMA_DST_ADDR_REG); + writel_relaxed(d->len, pchan->base + SUN4I_DDMA_BYTE_COUNT_REG); + writel_relaxed(d->para, pchan->base + SUN4I_DDMA_PARA_REG); + writel_relaxed(d->cfg, pchan->base + SUN4I_DDMA_CFG_REG); + } else { + writel_relaxed(d->src, pchan->base + SUN4I_NDMA_SRC_ADDR_REG); + writel_relaxed(d->dst, pchan->base + SUN4I_NDMA_DST_ADDR_REG); + writel_relaxed(d->len, pchan->base + SUN4I_NDMA_BYTE_COUNT_REG); + writel_relaxed(d->cfg, pchan->base + SUN4I_NDMA_CFG_REG); + } +} + +static void set_pchan_interrupt(struct sun4i_dma_dev *priv, + struct sun4i_dma_pchan *pchan, + int half, int end) +{ + u32 reg; + int pchan_number = pchan - priv->pchans; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + reg = readl_relaxed(priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + + if (half) + reg |= BIT(pchan_number * 2); + else + reg &= ~BIT(pchan_number * 2); + + if (end) + reg |= BIT(pchan_number * 2 + 1); + else + reg &= ~BIT(pchan_number * 2 + 1); + + writel_relaxed(reg, priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + + spin_unlock_irqrestore(&priv->lock, flags); +} + +/* + * Execute pending operations on a vchan + * + * When given a vchan, this function will try to acquire a suitable + * pchan and, if successful, will configure it to fulfill a promise + * from the next pending contract. + * + * This function must be called with &vchan->vc.lock held. + */ +static int __execute_vchan_pending(struct sun4i_dma_dev *priv, + struct sun4i_dma_vchan *vchan) +{ + struct sun4i_dma_promise *promise = NULL; + struct sun4i_dma_contract *contract = NULL; + struct sun4i_dma_pchan *pchan; + struct virt_dma_desc *vd; + int ret; + + lockdep_assert_held(&vchan->vc.lock); + + /* We need a pchan to do anything, so secure one if available */ + pchan = find_and_use_pchan(priv, vchan); + if (!pchan) + return -EBUSY; + + /* + * Channel endpoints must not be repeated, so if this vchan + * has already submitted some work, we can't do anything else + */ + if (vchan->processing) { + dev_dbg(chan2dev(&vchan->vc.chan), + "processing something to this endpoint already\n"); + ret = -EBUSY; + goto release_pchan; + } + + do { + /* Figure out which contract we're working with today */ + vd = vchan_next_desc(&vchan->vc); + if (!vd) { + dev_dbg(chan2dev(&vchan->vc.chan), + "No pending contract found"); + ret = 0; + goto release_pchan; + } + + contract = to_sun4i_dma_contract(vd); + if (list_empty(&contract->demands)) { + /* The contract has been completed so mark it as such */ + list_del(&contract->vd.node); + vchan_cookie_complete(&contract->vd); + dev_dbg(chan2dev(&vchan->vc.chan), + "Empty contract found and marked complete"); + } + } while (list_empty(&contract->demands)); + + /* Now find out what we need to do */ + promise = list_first_entry(&contract->demands, + struct sun4i_dma_promise, list); + vchan->processing = promise; + + /* ... and make it reality */ + if (promise) { + vchan->contract = contract; + vchan->pchan = pchan; + set_pchan_interrupt(priv, pchan, contract->use_half_int, 1); + configure_pchan(pchan, promise); + } + + return 0; + +release_pchan: + release_pchan(priv, pchan); + return ret; +} + +static int sanitize_config(struct dma_slave_config *sconfig, + enum dma_transfer_direction direction) +{ + switch (direction) { + case DMA_MEM_TO_DEV: + if ((sconfig->dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) || + !sconfig->dst_maxburst) + return -EINVAL; + + if (sconfig->src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + sconfig->src_addr_width = sconfig->dst_addr_width; + + if (!sconfig->src_maxburst) + sconfig->src_maxburst = sconfig->dst_maxburst; + + break; + + case DMA_DEV_TO_MEM: + if ((sconfig->src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) || + !sconfig->src_maxburst) + return -EINVAL; + + if (sconfig->dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + sconfig->dst_addr_width = sconfig->src_addr_width; + + if (!sconfig->dst_maxburst) + sconfig->dst_maxburst = sconfig->src_maxburst; + + break; + default: + return 0; + } + + return 0; +} + +/* + * Generate a promise, to be used in a normal DMA contract. + * + * A NDMA promise contains all the information required to program the + * normal part of the DMA Engine and get data copied. A non-executed + * promise will live in the demands list on a contract. Once it has been + * completed, it will be moved to the completed demands list for later freeing. + * All linked promises will be freed when the corresponding contract is freed + */ +static struct sun4i_dma_promise * +generate_ndma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest, + size_t len, struct dma_slave_config *sconfig, + enum dma_transfer_direction direction) +{ + struct sun4i_dma_promise *promise; + int ret; + + ret = sanitize_config(sconfig, direction); + if (ret) + return NULL; + + promise = kzalloc(sizeof(*promise), GFP_NOWAIT); + if (!promise) + return NULL; + + promise->src = src; + promise->dst = dest; + promise->len = len; + promise->cfg = SUN4I_DMA_CFG_LOADING | + SUN4I_NDMA_CFG_BYTE_COUNT_MODE_REMAIN; + + dev_dbg(chan2dev(chan), + "src burst %d, dst burst %d, src buswidth %d, dst buswidth %d", + sconfig->src_maxburst, sconfig->dst_maxburst, + sconfig->src_addr_width, sconfig->dst_addr_width); + + /* Source burst */ + ret = convert_burst(sconfig->src_maxburst); + if (ret < 0) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_SRC_BURST_LENGTH(ret); + + /* Destination burst */ + ret = convert_burst(sconfig->dst_maxburst); + if (ret < 0) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_DST_BURST_LENGTH(ret); + + /* Source bus width */ + ret = convert_buswidth(sconfig->src_addr_width); + if (ret < 0) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(ret); + + /* Destination bus width */ + ret = convert_buswidth(sconfig->dst_addr_width); + if (ret < 0) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(ret); + + return promise; + +fail: + kfree(promise); + return NULL; +} + +/* + * Generate a promise, to be used in a dedicated DMA contract. + * + * A DDMA promise contains all the information required to program the + * Dedicated part of the DMA Engine and get data copied. A non-executed + * promise will live in the demands list on a contract. Once it has been + * completed, it will be moved to the completed demands list for later freeing. + * All linked promises will be freed when the corresponding contract is freed + */ +static struct sun4i_dma_promise * +generate_ddma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest, + size_t len, struct dma_slave_config *sconfig) +{ + struct sun4i_dma_promise *promise; + int ret; + + promise = kzalloc(sizeof(*promise), GFP_NOWAIT); + if (!promise) + return NULL; + + promise->src = src; + promise->dst = dest; + promise->len = len; + promise->cfg = SUN4I_DMA_CFG_LOADING | + SUN4I_DDMA_CFG_BYTE_COUNT_MODE_REMAIN; + + /* Source burst */ + ret = convert_burst(sconfig->src_maxburst); + if (ret < 0) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_SRC_BURST_LENGTH(ret); + + /* Destination burst */ + ret = convert_burst(sconfig->dst_maxburst); + if (ret < 0) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_DST_BURST_LENGTH(ret); + + /* Source bus width */ + ret = convert_buswidth(sconfig->src_addr_width); + if (ret < 0) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(ret); + + /* Destination bus width */ + ret = convert_buswidth(sconfig->dst_addr_width); + if (ret < 0) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(ret); + + return promise; + +fail: + kfree(promise); + return NULL; +} + +/* + * Generate a contract + * + * Contracts function as DMA descriptors. As our hardware does not support + * linked lists, we need to implement SG via software. We use a contract + * to hold all the pieces of the request and process them serially one + * after another. Each piece is represented as a promise. + */ +static struct sun4i_dma_contract *generate_dma_contract(void) +{ + struct sun4i_dma_contract *contract; + + contract = kzalloc(sizeof(*contract), GFP_NOWAIT); + if (!contract) + return NULL; + + INIT_LIST_HEAD(&contract->demands); + INIT_LIST_HEAD(&contract->completed_demands); + + return contract; +} + +/* + * Get next promise on a cyclic transfer + * + * Cyclic contracts contain a series of promises which are executed on a + * loop. This function returns the next promise from a cyclic contract, + * so it can be programmed into the hardware. + */ +static struct sun4i_dma_promise * +get_next_cyclic_promise(struct sun4i_dma_contract *contract) +{ + struct sun4i_dma_promise *promise; + + promise = list_first_entry_or_null(&contract->demands, + struct sun4i_dma_promise, list); + if (!promise) { + list_splice_init(&contract->completed_demands, + &contract->demands); + promise = list_first_entry(&contract->demands, + struct sun4i_dma_promise, list); + } + + return promise; +} + +/* + * Free a contract and all its associated promises + */ +static void sun4i_dma_free_contract(struct virt_dma_desc *vd) +{ + struct sun4i_dma_contract *contract = to_sun4i_dma_contract(vd); + struct sun4i_dma_promise *promise, *tmp; + + /* Free all the demands and completed demands */ + list_for_each_entry_safe(promise, tmp, &contract->demands, list) + kfree(promise); + + list_for_each_entry_safe(promise, tmp, &contract->completed_demands, list) + kfree(promise); + + kfree(contract); +} + +static struct dma_async_tx_descriptor * +sun4i_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun4i_dma_promise *promise; + struct sun4i_dma_contract *contract; + + contract = generate_dma_contract(); + if (!contract) + return NULL; + + /* + * We can only do the copy to bus aligned addresses, so + * choose the best one so we get decent performance. We also + * maximize the burst size for this same reason. + */ + sconfig->src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + sconfig->dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + sconfig->src_maxburst = 8; + sconfig->dst_maxburst = 8; + + if (vchan->is_dedicated) + promise = generate_ddma_promise(chan, src, dest, len, sconfig); + else + promise = generate_ndma_promise(chan, src, dest, len, sconfig, + DMA_MEM_TO_MEM); + + if (!promise) { + kfree(contract); + return NULL; + } + + /* Configure memcpy mode */ + if (vchan->is_dedicated) { + promise->cfg |= SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_DDMA_DRQ_TYPE_SDRAM) | + SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_DDMA_DRQ_TYPE_SDRAM); + } else { + promise->cfg |= SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM) | + SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM); + } + + /* Fill the contract with our only promise */ + list_add_tail(&promise->list, &contract->demands); + + /* And add it to the vchan */ + return vchan_tx_prep(&vchan->vc, &contract->vd, flags); +} + +static struct dma_async_tx_descriptor * +sun4i_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf, size_t len, + size_t period_len, enum dma_transfer_direction dir, + unsigned long flags) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun4i_dma_promise *promise; + struct sun4i_dma_contract *contract; + dma_addr_t src, dest; + u32 endpoints; + int nr_periods, offset, plength, i; + u8 ram_type, io_mode, linear_mode; + + if (!is_slave_direction(dir)) { + dev_err(chan2dev(chan), "Invalid DMA direction\n"); + return NULL; + } + + contract = generate_dma_contract(); + if (!contract) + return NULL; + + contract->is_cyclic = 1; + + if (vchan->is_dedicated) { + io_mode = SUN4I_DDMA_ADDR_MODE_IO; + linear_mode = SUN4I_DDMA_ADDR_MODE_LINEAR; + ram_type = SUN4I_DDMA_DRQ_TYPE_SDRAM; + } else { + io_mode = SUN4I_NDMA_ADDR_MODE_IO; + linear_mode = SUN4I_NDMA_ADDR_MODE_LINEAR; + ram_type = SUN4I_NDMA_DRQ_TYPE_SDRAM; + } + + if (dir == DMA_MEM_TO_DEV) { + src = buf; + dest = sconfig->dst_addr; + endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(vchan->endpoint) | + SUN4I_DMA_CFG_DST_ADDR_MODE(io_mode) | + SUN4I_DMA_CFG_SRC_DRQ_TYPE(ram_type) | + SUN4I_DMA_CFG_SRC_ADDR_MODE(linear_mode); + } else { + src = sconfig->src_addr; + dest = buf; + endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(ram_type) | + SUN4I_DMA_CFG_DST_ADDR_MODE(linear_mode) | + SUN4I_DMA_CFG_SRC_DRQ_TYPE(vchan->endpoint) | + SUN4I_DMA_CFG_SRC_ADDR_MODE(io_mode); + } + + /* + * We will be using half done interrupts to make two periods + * out of a promise, so we need to program the DMA engine less + * often + */ + + /* + * The engine can interrupt on half-transfer, so we can use + * this feature to program the engine half as often as if we + * didn't use it (keep in mind the hardware doesn't support + * linked lists). + * + * Say you have a set of periods (| marks the start/end, I for + * interrupt, P for programming the engine to do a new + * transfer), the easy but slow way would be to do + * + * |---|---|---|---| (periods / promises) + * P I,P I,P I,P I + * + * Using half transfer interrupts you can do + * + * |-------|-------| (promises as configured on hw) + * |---|---|---|---| (periods) + * P I I,P I I + * + * Which requires half the engine programming for the same + * functionality. + * + * This only works if two periods fit in a single promise. That will + * always be the case for dedicated DMA, where the hardware has a much + * larger maximum transfer size than advertised to clients. + */ + if (vchan->is_dedicated || period_len <= SUN4I_NDMA_MAX_SEG_SIZE / 2) { + period_len *= 2; + contract->use_half_int = 1; + } + + nr_periods = DIV_ROUND_UP(len, period_len); + for (i = 0; i < nr_periods; i++) { + /* Calculate the offset in the buffer and the length needed */ + offset = i * period_len; + plength = min((len - offset), period_len); + if (dir == DMA_MEM_TO_DEV) + src = buf + offset; + else + dest = buf + offset; + + /* Make the promise */ + if (vchan->is_dedicated) + promise = generate_ddma_promise(chan, src, dest, + plength, sconfig); + else + promise = generate_ndma_promise(chan, src, dest, + plength, sconfig, dir); + + if (!promise) { + /* TODO: should we free everything? */ + return NULL; + } + promise->cfg |= endpoints; + + /* Then add it to the contract */ + list_add_tail(&promise->list, &contract->demands); + } + + /* And add it to the vchan */ + return vchan_tx_prep(&vchan->vc, &contract->vd, flags); +} + +static struct dma_async_tx_descriptor * +sun4i_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun4i_dma_promise *promise; + struct sun4i_dma_contract *contract; + u8 ram_type, io_mode, linear_mode; + struct scatterlist *sg; + dma_addr_t srcaddr, dstaddr; + u32 endpoints, para; + int i; + + if (!sgl) + return NULL; + + if (!is_slave_direction(dir)) { + dev_err(chan2dev(chan), "Invalid DMA direction\n"); + return NULL; + } + + contract = generate_dma_contract(); + if (!contract) + return NULL; + + if (vchan->is_dedicated) { + io_mode = SUN4I_DDMA_ADDR_MODE_IO; + linear_mode = SUN4I_DDMA_ADDR_MODE_LINEAR; + ram_type = SUN4I_DDMA_DRQ_TYPE_SDRAM; + } else { + io_mode = SUN4I_NDMA_ADDR_MODE_IO; + linear_mode = SUN4I_NDMA_ADDR_MODE_LINEAR; + ram_type = SUN4I_NDMA_DRQ_TYPE_SDRAM; + } + + if (dir == DMA_MEM_TO_DEV) + endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(vchan->endpoint) | + SUN4I_DMA_CFG_DST_ADDR_MODE(io_mode) | + SUN4I_DMA_CFG_SRC_DRQ_TYPE(ram_type) | + SUN4I_DMA_CFG_SRC_ADDR_MODE(linear_mode); + else + endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(ram_type) | + SUN4I_DMA_CFG_DST_ADDR_MODE(linear_mode) | + SUN4I_DMA_CFG_SRC_DRQ_TYPE(vchan->endpoint) | + SUN4I_DMA_CFG_SRC_ADDR_MODE(io_mode); + + for_each_sg(sgl, sg, sg_len, i) { + /* Figure out addresses */ + if (dir == DMA_MEM_TO_DEV) { + srcaddr = sg_dma_address(sg); + dstaddr = sconfig->dst_addr; + } else { + srcaddr = sconfig->src_addr; + dstaddr = sg_dma_address(sg); + } + + /* + * These are the magic DMA engine timings that keep SPI going. + * I haven't seen any interface on DMAEngine to configure + * timings, and so far they seem to work for everything we + * support, so I've kept them here. I don't know if other + * devices need different timings because, as usual, we only + * have the "para" bitfield meanings, but no comment on what + * the values should be when doing a certain operation :| + */ + para = SUN4I_DDMA_MAGIC_SPI_PARAMETERS; + + /* And make a suitable promise */ + if (vchan->is_dedicated) + promise = generate_ddma_promise(chan, srcaddr, dstaddr, + sg_dma_len(sg), + sconfig); + else + promise = generate_ndma_promise(chan, srcaddr, dstaddr, + sg_dma_len(sg), + sconfig, dir); + + if (!promise) + return NULL; /* TODO: should we free everything? */ + + promise->cfg |= endpoints; + promise->para = para; + + /* Then add it to the contract */ + list_add_tail(&promise->list, &contract->demands); + } + + /* + * Once we've got all the promises ready, add the contract + * to the pending list on the vchan + */ + return vchan_tx_prep(&vchan->vc, &contract->vd, flags); +} + +static int sun4i_dma_terminate_all(struct dma_chan *chan) +{ + struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device); + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct sun4i_dma_pchan *pchan = vchan->pchan; + LIST_HEAD(head); + unsigned long flags; + + spin_lock_irqsave(&vchan->vc.lock, flags); + vchan_get_all_descriptors(&vchan->vc, &head); + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + /* + * Clearing the configuration register will halt the pchan. Interrupts + * may still trigger, so don't forget to disable them. + */ + if (pchan) { + if (pchan->is_dedicated) + writel(0, pchan->base + SUN4I_DDMA_CFG_REG); + else + writel(0, pchan->base + SUN4I_NDMA_CFG_REG); + set_pchan_interrupt(priv, pchan, 0, 0); + release_pchan(priv, pchan); + } + + spin_lock_irqsave(&vchan->vc.lock, flags); + /* Clear these so the vchan is usable again */ + vchan->processing = NULL; + vchan->pchan = NULL; + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + vchan_dma_desc_free_list(&vchan->vc, &head); + + return 0; +} + +static int sun4i_dma_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + + memcpy(&vchan->cfg, config, sizeof(*config)); + + return 0; +} + +static struct dma_chan *sun4i_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct sun4i_dma_dev *priv = ofdma->of_dma_data; + struct sun4i_dma_vchan *vchan; + struct dma_chan *chan; + u8 is_dedicated = dma_spec->args[0]; + u8 endpoint = dma_spec->args[1]; + + /* Check if type is Normal or Dedicated */ + if (is_dedicated != 0 && is_dedicated != 1) + return NULL; + + /* Make sure the endpoint looks sane */ + if ((is_dedicated && endpoint >= SUN4I_DDMA_DRQ_TYPE_LIMIT) || + (!is_dedicated && endpoint >= SUN4I_NDMA_DRQ_TYPE_LIMIT)) + return NULL; + + chan = dma_get_any_slave_channel(&priv->slave); + if (!chan) + return NULL; + + /* Assign the endpoint to the vchan */ + vchan = to_sun4i_dma_vchan(chan); + vchan->is_dedicated = is_dedicated; + vchan->endpoint = endpoint; + + return chan; +} + +static enum dma_status sun4i_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct sun4i_dma_pchan *pchan = vchan->pchan; + struct sun4i_dma_contract *contract; + struct sun4i_dma_promise *promise; + struct virt_dma_desc *vd; + unsigned long flags; + enum dma_status ret; + size_t bytes = 0; + + ret = dma_cookie_status(chan, cookie, state); + if (!state || (ret == DMA_COMPLETE)) + return ret; + + spin_lock_irqsave(&vchan->vc.lock, flags); + vd = vchan_find_desc(&vchan->vc, cookie); + if (!vd) + goto exit; + contract = to_sun4i_dma_contract(vd); + + list_for_each_entry(promise, &contract->demands, list) + bytes += promise->len; + + /* + * The hardware is configured to return the remaining byte + * quantity. If possible, replace the first listed element's + * full size with the actual remaining amount + */ + promise = list_first_entry_or_null(&contract->demands, + struct sun4i_dma_promise, list); + if (promise && pchan) { + bytes -= promise->len; + if (pchan->is_dedicated) + bytes += readl(pchan->base + SUN4I_DDMA_BYTE_COUNT_REG); + else + bytes += readl(pchan->base + SUN4I_NDMA_BYTE_COUNT_REG); + } + +exit: + + dma_set_residue(state, bytes); + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + return ret; +} + +static void sun4i_dma_issue_pending(struct dma_chan *chan) +{ + struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device); + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + unsigned long flags; + + spin_lock_irqsave(&vchan->vc.lock, flags); + + /* + * If there are pending transactions for this vchan, push one of + * them into the engine to get the ball rolling. + */ + if (vchan_issue_pending(&vchan->vc)) + __execute_vchan_pending(priv, vchan); + + spin_unlock_irqrestore(&vchan->vc.lock, flags); +} + +static irqreturn_t sun4i_dma_interrupt(int irq, void *dev_id) +{ + struct sun4i_dma_dev *priv = dev_id; + struct sun4i_dma_pchan *pchans = priv->pchans, *pchan; + struct sun4i_dma_vchan *vchan; + struct sun4i_dma_contract *contract; + struct sun4i_dma_promise *promise; + unsigned long pendirq, irqs, disableirqs; + int bit, i, free_room, allow_mitigation = 1; + + pendirq = readl_relaxed(priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG); + +handle_pending: + + disableirqs = 0; + free_room = 0; + + for_each_set_bit(bit, &pendirq, 32) { + pchan = &pchans[bit >> 1]; + vchan = pchan->vchan; + if (!vchan) /* a terminated channel may still interrupt */ + continue; + contract = vchan->contract; + + /* + * Disable the IRQ and free the pchan if it's an end + * interrupt (odd bit) + */ + if (bit & 1) { + spin_lock(&vchan->vc.lock); + + /* + * Move the promise into the completed list now that + * we're done with it + */ + list_move_tail(&vchan->processing->list, + &contract->completed_demands); + + /* + * Cyclic DMA transfers are special: + * - There's always something we can dispatch + * - We need to run the callback + * - Latency is very important, as this is used by audio + * We therefore just cycle through the list and dispatch + * whatever we have here, reusing the pchan. There's + * no need to run the thread after this. + * + * For non-cyclic transfers we need to look around, + * so we can program some more work, or notify the + * client that their transfers have been completed. + */ + if (contract->is_cyclic) { + promise = get_next_cyclic_promise(contract); + vchan->processing = promise; + configure_pchan(pchan, promise); + vchan_cyclic_callback(&contract->vd); + } else { + vchan->processing = NULL; + vchan->pchan = NULL; + + free_room = 1; + disableirqs |= BIT(bit); + release_pchan(priv, pchan); + } + + spin_unlock(&vchan->vc.lock); + } else { + /* Half done interrupt */ + if (contract->is_cyclic) + vchan_cyclic_callback(&contract->vd); + else + disableirqs |= BIT(bit); + } + } + + /* Disable the IRQs for events we handled */ + spin_lock(&priv->lock); + irqs = readl_relaxed(priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + writel_relaxed(irqs & ~disableirqs, + priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + spin_unlock(&priv->lock); + + /* Writing 1 to the pending field will clear the pending interrupt */ + writel_relaxed(pendirq, priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG); + + /* + * If a pchan was freed, we may be able to schedule something else, + * so have a look around + */ + if (free_room) { + for (i = 0; i < SUN4I_DMA_NR_MAX_VCHANS; i++) { + vchan = &priv->vchans[i]; + spin_lock(&vchan->vc.lock); + __execute_vchan_pending(priv, vchan); + spin_unlock(&vchan->vc.lock); + } + } + + /* + * Handle newer interrupts if some showed up, but only do it once + * to avoid a too long a loop + */ + if (allow_mitigation) { + pendirq = readl_relaxed(priv->base + + SUN4I_DMA_IRQ_PENDING_STATUS_REG); + if (pendirq) { + allow_mitigation = 0; + goto handle_pending; + } + } + + return IRQ_HANDLED; +} + +static int sun4i_dma_probe(struct platform_device *pdev) +{ + struct sun4i_dma_dev *priv; + int i, j, ret; + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(priv->base)) + return PTR_ERR(priv->base); + + priv->irq = platform_get_irq(pdev, 0); + if (priv->irq < 0) + return priv->irq; + + priv->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(priv->clk)) { + dev_err(&pdev->dev, "No clock specified\n"); + return PTR_ERR(priv->clk); + } + + platform_set_drvdata(pdev, priv); + spin_lock_init(&priv->lock); + + dma_set_max_seg_size(&pdev->dev, SUN4I_DMA_MAX_SEG_SIZE); + + dma_cap_zero(priv->slave.cap_mask); + dma_cap_set(DMA_PRIVATE, priv->slave.cap_mask); + dma_cap_set(DMA_MEMCPY, priv->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, priv->slave.cap_mask); + dma_cap_set(DMA_SLAVE, priv->slave.cap_mask); + + INIT_LIST_HEAD(&priv->slave.channels); + priv->slave.device_free_chan_resources = sun4i_dma_free_chan_resources; + priv->slave.device_tx_status = sun4i_dma_tx_status; + priv->slave.device_issue_pending = sun4i_dma_issue_pending; + priv->slave.device_prep_slave_sg = sun4i_dma_prep_slave_sg; + priv->slave.device_prep_dma_memcpy = sun4i_dma_prep_dma_memcpy; + priv->slave.device_prep_dma_cyclic = sun4i_dma_prep_dma_cyclic; + priv->slave.device_config = sun4i_dma_config; + priv->slave.device_terminate_all = sun4i_dma_terminate_all; + priv->slave.copy_align = 2; + priv->slave.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + priv->slave.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + priv->slave.directions = BIT(DMA_DEV_TO_MEM) | + BIT(DMA_MEM_TO_DEV); + priv->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + priv->slave.dev = &pdev->dev; + + priv->pchans = devm_kcalloc(&pdev->dev, SUN4I_DMA_NR_MAX_CHANNELS, + sizeof(struct sun4i_dma_pchan), GFP_KERNEL); + priv->vchans = devm_kcalloc(&pdev->dev, SUN4I_DMA_NR_MAX_VCHANS, + sizeof(struct sun4i_dma_vchan), GFP_KERNEL); + if (!priv->vchans || !priv->pchans) + return -ENOMEM; + + /* + * [0..SUN4I_NDMA_NR_MAX_CHANNELS) are normal pchans, and + * [SUN4I_NDMA_NR_MAX_CHANNELS..SUN4I_DMA_NR_MAX_CHANNELS) are + * dedicated ones + */ + for (i = 0; i < SUN4I_NDMA_NR_MAX_CHANNELS; i++) + priv->pchans[i].base = priv->base + + SUN4I_NDMA_CHANNEL_REG_BASE(i); + + for (j = 0; i < SUN4I_DMA_NR_MAX_CHANNELS; i++, j++) { + priv->pchans[i].base = priv->base + + SUN4I_DDMA_CHANNEL_REG_BASE(j); + priv->pchans[i].is_dedicated = 1; + } + + for (i = 0; i < SUN4I_DMA_NR_MAX_VCHANS; i++) { + struct sun4i_dma_vchan *vchan = &priv->vchans[i]; + + spin_lock_init(&vchan->vc.lock); + vchan->vc.desc_free = sun4i_dma_free_contract; + vchan_init(&vchan->vc, &priv->slave); + } + + ret = clk_prepare_enable(priv->clk); + if (ret) { + dev_err(&pdev->dev, "Couldn't enable the clock\n"); + return ret; + } + + /* + * Make sure the IRQs are all disabled and accounted for. The bootloader + * likes to leave these dirty + */ + writel(0, priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + writel(0xFFFFFFFF, priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG); + + ret = devm_request_irq(&pdev->dev, priv->irq, sun4i_dma_interrupt, + 0, dev_name(&pdev->dev), priv); + if (ret) { + dev_err(&pdev->dev, "Cannot request IRQ\n"); + goto err_clk_disable; + } + + ret = dma_async_device_register(&priv->slave); + if (ret) { + dev_warn(&pdev->dev, "Failed to register DMA engine device\n"); + goto err_clk_disable; + } + + ret = of_dma_controller_register(pdev->dev.of_node, sun4i_dma_of_xlate, + priv); + if (ret) { + dev_err(&pdev->dev, "of_dma_controller_register failed\n"); + goto err_dma_unregister; + } + + dev_dbg(&pdev->dev, "Successfully probed SUN4I_DMA\n"); + + return 0; + +err_dma_unregister: + dma_async_device_unregister(&priv->slave); +err_clk_disable: + clk_disable_unprepare(priv->clk); + return ret; +} + +static int sun4i_dma_remove(struct platform_device *pdev) +{ + struct sun4i_dma_dev *priv = platform_get_drvdata(pdev); + + /* Disable IRQ so no more work is scheduled */ + disable_irq(priv->irq); + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&priv->slave); + + clk_disable_unprepare(priv->clk); + + return 0; +} + +static const struct of_device_id sun4i_dma_match[] = { + { .compatible = "allwinner,sun4i-a10-dma" }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, sun4i_dma_match); + +static struct platform_driver sun4i_dma_driver = { + .probe = sun4i_dma_probe, + .remove = sun4i_dma_remove, + .driver = { + .name = "sun4i-dma", + .of_match_table = sun4i_dma_match, + }, +}; + +module_platform_driver(sun4i_dma_driver); + +MODULE_DESCRIPTION("Allwinner A10 Dedicated DMA Controller Driver"); +MODULE_AUTHOR("Emilio López "); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c new file mode 100644 index 0000000000..2469efddf5 --- /dev/null +++ b/drivers/dma/sun6i-dma.c @@ -0,0 +1,1504 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2013-2014 Allwinner Tech Co., Ltd + * Author: Sugar + * + * Copyright (C) 2014 Maxime Ripard + * Maxime Ripard + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virt-dma.h" + +/* + * Common registers + */ +#define DMA_IRQ_EN(x) ((x) * 0x04) +#define DMA_IRQ_HALF BIT(0) +#define DMA_IRQ_PKG BIT(1) +#define DMA_IRQ_QUEUE BIT(2) + +#define DMA_IRQ_CHAN_NR 8 +#define DMA_IRQ_CHAN_WIDTH 4 + + +#define DMA_IRQ_STAT(x) ((x) * 0x04 + 0x10) + +#define DMA_STAT 0x30 + +/* Offset between DMA_IRQ_EN and DMA_IRQ_STAT limits number of channels */ +#define DMA_MAX_CHANNELS (DMA_IRQ_CHAN_NR * 0x10 / 4) + +/* + * sun8i specific registers + */ +#define SUN8I_DMA_GATE 0x20 +#define SUN8I_DMA_GATE_ENABLE 0x4 + +#define SUNXI_H3_SECURE_REG 0x20 +#define SUNXI_H3_DMA_GATE 0x28 +#define SUNXI_H3_DMA_GATE_ENABLE 0x4 +/* + * Channels specific registers + */ +#define DMA_CHAN_ENABLE 0x00 +#define DMA_CHAN_ENABLE_START BIT(0) +#define DMA_CHAN_ENABLE_STOP 0 + +#define DMA_CHAN_PAUSE 0x04 +#define DMA_CHAN_PAUSE_PAUSE BIT(1) +#define DMA_CHAN_PAUSE_RESUME 0 + +#define DMA_CHAN_LLI_ADDR 0x08 + +#define DMA_CHAN_CUR_CFG 0x0c +#define DMA_CHAN_MAX_DRQ_A31 0x1f +#define DMA_CHAN_MAX_DRQ_H6 0x3f +#define DMA_CHAN_CFG_SRC_DRQ_A31(x) ((x) & DMA_CHAN_MAX_DRQ_A31) +#define DMA_CHAN_CFG_SRC_DRQ_H6(x) ((x) & DMA_CHAN_MAX_DRQ_H6) +#define DMA_CHAN_CFG_SRC_MODE_A31(x) (((x) & 0x1) << 5) +#define DMA_CHAN_CFG_SRC_MODE_H6(x) (((x) & 0x1) << 8) +#define DMA_CHAN_CFG_SRC_BURST_A31(x) (((x) & 0x3) << 7) +#define DMA_CHAN_CFG_SRC_BURST_H3(x) (((x) & 0x3) << 6) +#define DMA_CHAN_CFG_SRC_WIDTH(x) (((x) & 0x3) << 9) + +#define DMA_CHAN_CFG_DST_DRQ_A31(x) (DMA_CHAN_CFG_SRC_DRQ_A31(x) << 16) +#define DMA_CHAN_CFG_DST_DRQ_H6(x) (DMA_CHAN_CFG_SRC_DRQ_H6(x) << 16) +#define DMA_CHAN_CFG_DST_MODE_A31(x) (DMA_CHAN_CFG_SRC_MODE_A31(x) << 16) +#define DMA_CHAN_CFG_DST_MODE_H6(x) (DMA_CHAN_CFG_SRC_MODE_H6(x) << 16) +#define DMA_CHAN_CFG_DST_BURST_A31(x) (DMA_CHAN_CFG_SRC_BURST_A31(x) << 16) +#define DMA_CHAN_CFG_DST_BURST_H3(x) (DMA_CHAN_CFG_SRC_BURST_H3(x) << 16) +#define DMA_CHAN_CFG_DST_WIDTH(x) (DMA_CHAN_CFG_SRC_WIDTH(x) << 16) + +#define DMA_CHAN_CUR_SRC 0x10 + +#define DMA_CHAN_CUR_DST 0x14 + +#define DMA_CHAN_CUR_CNT 0x18 + +#define DMA_CHAN_CUR_PARA 0x1c + +/* + * LLI address mangling + * + * The LLI link physical address is also mangled, but we avoid dealing + * with that by allocating LLIs from the DMA32 zone. + */ +#define SRC_HIGH_ADDR(x) (((x) & 0x3U) << 16) +#define DST_HIGH_ADDR(x) (((x) & 0x3U) << 18) + +/* + * Various hardware related defines + */ +#define LLI_LAST_ITEM 0xfffff800 +#define NORMAL_WAIT 8 +#define DRQ_SDRAM 1 +#define LINEAR_MODE 0 +#define IO_MODE 1 + +/* forward declaration */ +struct sun6i_dma_dev; + +/* + * Hardware channels / ports representation + * + * The hardware is used in several SoCs, with differing numbers + * of channels and endpoints. This structure ties those numbers + * to a certain compatible string. + */ +struct sun6i_dma_config { + u32 nr_max_channels; + u32 nr_max_requests; + u32 nr_max_vchans; + /* + * In the datasheets/user manuals of newer Allwinner SoCs, a special + * bit (bit 2 at register 0x20) is present. + * It's named "DMA MCLK interface circuit auto gating bit" in the + * documents, and the footnote of this register says that this bit + * should be set up when initializing the DMA controller. + * Allwinner A23/A33 user manuals do not have this bit documented, + * however these SoCs really have and need this bit, as seen in the + * BSP kernel source code. + */ + void (*clock_autogate_enable)(struct sun6i_dma_dev *); + void (*set_burst_length)(u32 *p_cfg, s8 src_burst, s8 dst_burst); + void (*set_drq)(u32 *p_cfg, s8 src_drq, s8 dst_drq); + void (*set_mode)(u32 *p_cfg, s8 src_mode, s8 dst_mode); + u32 src_burst_lengths; + u32 dst_burst_lengths; + u32 src_addr_widths; + u32 dst_addr_widths; + bool has_high_addr; + bool has_mbus_clk; +}; + +/* + * Hardware representation of the LLI + * + * The hardware will be fed the physical address of this structure, + * and read its content in order to start the transfer. + */ +struct sun6i_dma_lli { + u32 cfg; + u32 src; + u32 dst; + u32 len; + u32 para; + u32 p_lli_next; + + /* + * This field is not used by the DMA controller, but will be + * used by the CPU to go through the list (mostly for dumping + * or freeing it). + */ + struct sun6i_dma_lli *v_lli_next; +}; + + +struct sun6i_desc { + struct virt_dma_desc vd; + dma_addr_t p_lli; + struct sun6i_dma_lli *v_lli; +}; + +struct sun6i_pchan { + u32 idx; + void __iomem *base; + struct sun6i_vchan *vchan; + struct sun6i_desc *desc; + struct sun6i_desc *done; +}; + +struct sun6i_vchan { + struct virt_dma_chan vc; + struct list_head node; + struct dma_slave_config cfg; + struct sun6i_pchan *phy; + u8 port; + u8 irq_type; + bool cyclic; +}; + +struct sun6i_dma_dev { + struct dma_device slave; + void __iomem *base; + struct clk *clk; + struct clk *clk_mbus; + int irq; + spinlock_t lock; + struct reset_control *rstc; + struct tasklet_struct task; + atomic_t tasklet_shutdown; + struct list_head pending; + struct dma_pool *pool; + struct sun6i_pchan *pchans; + struct sun6i_vchan *vchans; + const struct sun6i_dma_config *cfg; + u32 num_pchans; + u32 num_vchans; + u32 max_request; +}; + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +static inline struct sun6i_dma_dev *to_sun6i_dma_dev(struct dma_device *d) +{ + return container_of(d, struct sun6i_dma_dev, slave); +} + +static inline struct sun6i_vchan *to_sun6i_vchan(struct dma_chan *chan) +{ + return container_of(chan, struct sun6i_vchan, vc.chan); +} + +static inline struct sun6i_desc * +to_sun6i_desc(struct dma_async_tx_descriptor *tx) +{ + return container_of(tx, struct sun6i_desc, vd.tx); +} + +static inline void sun6i_dma_dump_com_regs(struct sun6i_dma_dev *sdev) +{ + dev_dbg(sdev->slave.dev, "Common register:\n" + "\tmask0(%04x): 0x%08x\n" + "\tmask1(%04x): 0x%08x\n" + "\tpend0(%04x): 0x%08x\n" + "\tpend1(%04x): 0x%08x\n" + "\tstats(%04x): 0x%08x\n", + DMA_IRQ_EN(0), readl(sdev->base + DMA_IRQ_EN(0)), + DMA_IRQ_EN(1), readl(sdev->base + DMA_IRQ_EN(1)), + DMA_IRQ_STAT(0), readl(sdev->base + DMA_IRQ_STAT(0)), + DMA_IRQ_STAT(1), readl(sdev->base + DMA_IRQ_STAT(1)), + DMA_STAT, readl(sdev->base + DMA_STAT)); +} + +static inline void sun6i_dma_dump_chan_regs(struct sun6i_dma_dev *sdev, + struct sun6i_pchan *pchan) +{ + dev_dbg(sdev->slave.dev, "Chan %d reg:\n" + "\t___en(%04x): \t0x%08x\n" + "\tpause(%04x): \t0x%08x\n" + "\tstart(%04x): \t0x%08x\n" + "\t__cfg(%04x): \t0x%08x\n" + "\t__src(%04x): \t0x%08x\n" + "\t__dst(%04x): \t0x%08x\n" + "\tcount(%04x): \t0x%08x\n" + "\t_para(%04x): \t0x%08x\n\n", + pchan->idx, + DMA_CHAN_ENABLE, + readl(pchan->base + DMA_CHAN_ENABLE), + DMA_CHAN_PAUSE, + readl(pchan->base + DMA_CHAN_PAUSE), + DMA_CHAN_LLI_ADDR, + readl(pchan->base + DMA_CHAN_LLI_ADDR), + DMA_CHAN_CUR_CFG, + readl(pchan->base + DMA_CHAN_CUR_CFG), + DMA_CHAN_CUR_SRC, + readl(pchan->base + DMA_CHAN_CUR_SRC), + DMA_CHAN_CUR_DST, + readl(pchan->base + DMA_CHAN_CUR_DST), + DMA_CHAN_CUR_CNT, + readl(pchan->base + DMA_CHAN_CUR_CNT), + DMA_CHAN_CUR_PARA, + readl(pchan->base + DMA_CHAN_CUR_PARA)); +} + +static inline s8 convert_burst(u32 maxburst) +{ + switch (maxburst) { + case 1: + return 0; + case 4: + return 1; + case 8: + return 2; + case 16: + return 3; + default: + return -EINVAL; + } +} + +static inline s8 convert_buswidth(enum dma_slave_buswidth addr_width) +{ + return ilog2(addr_width); +} + +static void sun6i_enable_clock_autogate_a23(struct sun6i_dma_dev *sdev) +{ + writel(SUN8I_DMA_GATE_ENABLE, sdev->base + SUN8I_DMA_GATE); +} + +static void sun6i_enable_clock_autogate_h3(struct sun6i_dma_dev *sdev) +{ + writel(SUNXI_H3_DMA_GATE_ENABLE, sdev->base + SUNXI_H3_DMA_GATE); +} + +static void sun6i_set_burst_length_a31(u32 *p_cfg, s8 src_burst, s8 dst_burst) +{ + *p_cfg |= DMA_CHAN_CFG_SRC_BURST_A31(src_burst) | + DMA_CHAN_CFG_DST_BURST_A31(dst_burst); +} + +static void sun6i_set_burst_length_h3(u32 *p_cfg, s8 src_burst, s8 dst_burst) +{ + *p_cfg |= DMA_CHAN_CFG_SRC_BURST_H3(src_burst) | + DMA_CHAN_CFG_DST_BURST_H3(dst_burst); +} + +static void sun6i_set_drq_a31(u32 *p_cfg, s8 src_drq, s8 dst_drq) +{ + *p_cfg |= DMA_CHAN_CFG_SRC_DRQ_A31(src_drq) | + DMA_CHAN_CFG_DST_DRQ_A31(dst_drq); +} + +static void sun6i_set_drq_h6(u32 *p_cfg, s8 src_drq, s8 dst_drq) +{ + *p_cfg |= DMA_CHAN_CFG_SRC_DRQ_H6(src_drq) | + DMA_CHAN_CFG_DST_DRQ_H6(dst_drq); +} + +static void sun6i_set_mode_a31(u32 *p_cfg, s8 src_mode, s8 dst_mode) +{ + *p_cfg |= DMA_CHAN_CFG_SRC_MODE_A31(src_mode) | + DMA_CHAN_CFG_DST_MODE_A31(dst_mode); +} + +static void sun6i_set_mode_h6(u32 *p_cfg, s8 src_mode, s8 dst_mode) +{ + *p_cfg |= DMA_CHAN_CFG_SRC_MODE_H6(src_mode) | + DMA_CHAN_CFG_DST_MODE_H6(dst_mode); +} + +static size_t sun6i_get_chan_size(struct sun6i_pchan *pchan) +{ + struct sun6i_desc *txd = pchan->desc; + struct sun6i_dma_lli *lli; + size_t bytes; + dma_addr_t pos; + + pos = readl(pchan->base + DMA_CHAN_LLI_ADDR); + bytes = readl(pchan->base + DMA_CHAN_CUR_CNT); + + if (pos == LLI_LAST_ITEM) + return bytes; + + for (lli = txd->v_lli; lli; lli = lli->v_lli_next) { + if (lli->p_lli_next == pos) { + for (lli = lli->v_lli_next; lli; lli = lli->v_lli_next) + bytes += lli->len; + break; + } + } + + return bytes; +} + +static void *sun6i_dma_lli_add(struct sun6i_dma_lli *prev, + struct sun6i_dma_lli *next, + dma_addr_t next_phy, + struct sun6i_desc *txd) +{ + if ((!prev && !txd) || !next) + return NULL; + + if (!prev) { + txd->p_lli = next_phy; + txd->v_lli = next; + } else { + prev->p_lli_next = next_phy; + prev->v_lli_next = next; + } + + next->p_lli_next = LLI_LAST_ITEM; + next->v_lli_next = NULL; + + return next; +} + +static inline void sun6i_dma_dump_lli(struct sun6i_vchan *vchan, + struct sun6i_dma_lli *v_lli, + dma_addr_t p_lli) +{ + dev_dbg(chan2dev(&vchan->vc.chan), + "\n\tdesc:\tp - %pad v - 0x%p\n" + "\t\tc - 0x%08x s - 0x%08x d - 0x%08x\n" + "\t\tl - 0x%08x p - 0x%08x n - 0x%08x\n", + &p_lli, v_lli, + v_lli->cfg, v_lli->src, v_lli->dst, + v_lli->len, v_lli->para, v_lli->p_lli_next); +} + +static void sun6i_dma_free_desc(struct virt_dma_desc *vd) +{ + struct sun6i_desc *txd = to_sun6i_desc(&vd->tx); + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vd->tx.chan->device); + struct sun6i_dma_lli *v_lli, *v_next; + dma_addr_t p_lli, p_next; + + if (unlikely(!txd)) + return; + + p_lli = txd->p_lli; + v_lli = txd->v_lli; + + while (v_lli) { + v_next = v_lli->v_lli_next; + p_next = v_lli->p_lli_next; + + dma_pool_free(sdev->pool, v_lli, p_lli); + + v_lli = v_next; + p_lli = p_next; + } + + kfree(txd); +} + +static int sun6i_dma_start_desc(struct sun6i_vchan *vchan) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vchan->vc.chan.device); + struct virt_dma_desc *desc = vchan_next_desc(&vchan->vc); + struct sun6i_pchan *pchan = vchan->phy; + u32 irq_val, irq_reg, irq_offset; + + if (!pchan) + return -EAGAIN; + + if (!desc) { + pchan->desc = NULL; + pchan->done = NULL; + return -EAGAIN; + } + + list_del(&desc->node); + + pchan->desc = to_sun6i_desc(&desc->tx); + pchan->done = NULL; + + sun6i_dma_dump_lli(vchan, pchan->desc->v_lli, pchan->desc->p_lli); + + irq_reg = pchan->idx / DMA_IRQ_CHAN_NR; + irq_offset = pchan->idx % DMA_IRQ_CHAN_NR; + + vchan->irq_type = vchan->cyclic ? DMA_IRQ_PKG : DMA_IRQ_QUEUE; + + irq_val = readl(sdev->base + DMA_IRQ_EN(irq_reg)); + irq_val &= ~((DMA_IRQ_HALF | DMA_IRQ_PKG | DMA_IRQ_QUEUE) << + (irq_offset * DMA_IRQ_CHAN_WIDTH)); + irq_val |= vchan->irq_type << (irq_offset * DMA_IRQ_CHAN_WIDTH); + writel(irq_val, sdev->base + DMA_IRQ_EN(irq_reg)); + + writel(pchan->desc->p_lli, pchan->base + DMA_CHAN_LLI_ADDR); + writel(DMA_CHAN_ENABLE_START, pchan->base + DMA_CHAN_ENABLE); + + sun6i_dma_dump_com_regs(sdev); + sun6i_dma_dump_chan_regs(sdev, pchan); + + return 0; +} + +static void sun6i_dma_tasklet(struct tasklet_struct *t) +{ + struct sun6i_dma_dev *sdev = from_tasklet(sdev, t, task); + struct sun6i_vchan *vchan; + struct sun6i_pchan *pchan; + unsigned int pchan_alloc = 0; + unsigned int pchan_idx; + + list_for_each_entry(vchan, &sdev->slave.channels, vc.chan.device_node) { + spin_lock_irq(&vchan->vc.lock); + + pchan = vchan->phy; + + if (pchan && pchan->done) { + if (sun6i_dma_start_desc(vchan)) { + /* + * No current txd associated with this channel + */ + dev_dbg(sdev->slave.dev, "pchan %u: free\n", + pchan->idx); + + /* Mark this channel free */ + vchan->phy = NULL; + pchan->vchan = NULL; + } + } + spin_unlock_irq(&vchan->vc.lock); + } + + spin_lock_irq(&sdev->lock); + for (pchan_idx = 0; pchan_idx < sdev->num_pchans; pchan_idx++) { + pchan = &sdev->pchans[pchan_idx]; + + if (pchan->vchan || list_empty(&sdev->pending)) + continue; + + vchan = list_first_entry(&sdev->pending, + struct sun6i_vchan, node); + + /* Remove from pending channels */ + list_del_init(&vchan->node); + pchan_alloc |= BIT(pchan_idx); + + /* Mark this channel allocated */ + pchan->vchan = vchan; + vchan->phy = pchan; + dev_dbg(sdev->slave.dev, "pchan %u: alloc vchan %p\n", + pchan->idx, &vchan->vc); + } + spin_unlock_irq(&sdev->lock); + + for (pchan_idx = 0; pchan_idx < sdev->num_pchans; pchan_idx++) { + if (!(pchan_alloc & BIT(pchan_idx))) + continue; + + pchan = sdev->pchans + pchan_idx; + vchan = pchan->vchan; + if (vchan) { + spin_lock_irq(&vchan->vc.lock); + sun6i_dma_start_desc(vchan); + spin_unlock_irq(&vchan->vc.lock); + } + } +} + +static irqreturn_t sun6i_dma_interrupt(int irq, void *dev_id) +{ + struct sun6i_dma_dev *sdev = dev_id; + struct sun6i_vchan *vchan; + struct sun6i_pchan *pchan; + int i, j, ret = IRQ_NONE; + u32 status; + + for (i = 0; i < sdev->num_pchans / DMA_IRQ_CHAN_NR; i++) { + status = readl(sdev->base + DMA_IRQ_STAT(i)); + if (!status) + continue; + + dev_dbg(sdev->slave.dev, "DMA irq status %s: 0x%x\n", + i ? "high" : "low", status); + + writel(status, sdev->base + DMA_IRQ_STAT(i)); + + for (j = 0; (j < DMA_IRQ_CHAN_NR) && status; j++) { + pchan = sdev->pchans + j; + vchan = pchan->vchan; + if (vchan && (status & vchan->irq_type)) { + if (vchan->cyclic) { + vchan_cyclic_callback(&pchan->desc->vd); + } else { + spin_lock(&vchan->vc.lock); + vchan_cookie_complete(&pchan->desc->vd); + pchan->done = pchan->desc; + spin_unlock(&vchan->vc.lock); + } + } + + status = status >> DMA_IRQ_CHAN_WIDTH; + } + + if (!atomic_read(&sdev->tasklet_shutdown)) + tasklet_schedule(&sdev->task); + ret = IRQ_HANDLED; + } + + return ret; +} + +static int set_config(struct sun6i_dma_dev *sdev, + struct dma_slave_config *sconfig, + enum dma_transfer_direction direction, + u32 *p_cfg) +{ + enum dma_slave_buswidth src_addr_width, dst_addr_width; + u32 src_maxburst, dst_maxburst; + s8 src_width, dst_width, src_burst, dst_burst; + + src_addr_width = sconfig->src_addr_width; + dst_addr_width = sconfig->dst_addr_width; + src_maxburst = sconfig->src_maxburst; + dst_maxburst = sconfig->dst_maxburst; + + switch (direction) { + case DMA_MEM_TO_DEV: + if (src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + src_maxburst = src_maxburst ? src_maxburst : 8; + break; + case DMA_DEV_TO_MEM: + if (dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + dst_maxburst = dst_maxburst ? dst_maxburst : 8; + break; + default: + return -EINVAL; + } + + if (!(BIT(src_addr_width) & sdev->slave.src_addr_widths)) + return -EINVAL; + if (!(BIT(dst_addr_width) & sdev->slave.dst_addr_widths)) + return -EINVAL; + if (!(BIT(src_maxburst) & sdev->cfg->src_burst_lengths)) + return -EINVAL; + if (!(BIT(dst_maxburst) & sdev->cfg->dst_burst_lengths)) + return -EINVAL; + + src_width = convert_buswidth(src_addr_width); + dst_width = convert_buswidth(dst_addr_width); + dst_burst = convert_burst(dst_maxburst); + src_burst = convert_burst(src_maxburst); + + *p_cfg = DMA_CHAN_CFG_SRC_WIDTH(src_width) | + DMA_CHAN_CFG_DST_WIDTH(dst_width); + + sdev->cfg->set_burst_length(p_cfg, src_burst, dst_burst); + + return 0; +} + +static inline void sun6i_dma_set_addr(struct sun6i_dma_dev *sdev, + struct sun6i_dma_lli *v_lli, + dma_addr_t src, dma_addr_t dst) +{ + v_lli->src = lower_32_bits(src); + v_lli->dst = lower_32_bits(dst); + + if (sdev->cfg->has_high_addr) + v_lli->para |= SRC_HIGH_ADDR(upper_32_bits(src)) | + DST_HIGH_ADDR(upper_32_bits(dst)); +} + +static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + struct sun6i_dma_lli *v_lli; + struct sun6i_desc *txd; + dma_addr_t p_lli; + s8 burst, width; + + dev_dbg(chan2dev(chan), + "%s; chan: %d, dest: %pad, src: %pad, len: %zu. flags: 0x%08lx\n", + __func__, vchan->vc.chan.chan_id, &dest, &src, len, flags); + + if (!len) + return NULL; + + txd = kzalloc(sizeof(*txd), GFP_NOWAIT); + if (!txd) + return NULL; + + v_lli = dma_pool_alloc(sdev->pool, GFP_DMA32 | GFP_NOWAIT, &p_lli); + if (!v_lli) { + dev_err(sdev->slave.dev, "Failed to alloc lli memory\n"); + goto err_txd_free; + } + + v_lli->len = len; + v_lli->para = NORMAL_WAIT; + sun6i_dma_set_addr(sdev, v_lli, src, dest); + + burst = convert_burst(8); + width = convert_buswidth(DMA_SLAVE_BUSWIDTH_4_BYTES); + v_lli->cfg = DMA_CHAN_CFG_SRC_WIDTH(width) | + DMA_CHAN_CFG_DST_WIDTH(width); + + sdev->cfg->set_burst_length(&v_lli->cfg, burst, burst); + sdev->cfg->set_drq(&v_lli->cfg, DRQ_SDRAM, DRQ_SDRAM); + sdev->cfg->set_mode(&v_lli->cfg, LINEAR_MODE, LINEAR_MODE); + + sun6i_dma_lli_add(NULL, v_lli, p_lli, txd); + + sun6i_dma_dump_lli(vchan, v_lli, p_lli); + + return vchan_tx_prep(&vchan->vc, &txd->vd, flags); + +err_txd_free: + kfree(txd); + return NULL; +} + +static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun6i_dma_lli *v_lli, *prev = NULL; + struct sun6i_desc *txd; + struct scatterlist *sg; + dma_addr_t p_lli; + u32 lli_cfg; + int i, ret; + + if (!sgl) + return NULL; + + ret = set_config(sdev, sconfig, dir, &lli_cfg); + if (ret) { + dev_err(chan2dev(chan), "Invalid DMA configuration\n"); + return NULL; + } + + txd = kzalloc(sizeof(*txd), GFP_NOWAIT); + if (!txd) + return NULL; + + for_each_sg(sgl, sg, sg_len, i) { + v_lli = dma_pool_alloc(sdev->pool, GFP_DMA32 | GFP_NOWAIT, &p_lli); + if (!v_lli) + goto err_lli_free; + + v_lli->len = sg_dma_len(sg); + v_lli->para = NORMAL_WAIT; + + if (dir == DMA_MEM_TO_DEV) { + sun6i_dma_set_addr(sdev, v_lli, + sg_dma_address(sg), + sconfig->dst_addr); + v_lli->cfg = lli_cfg; + sdev->cfg->set_drq(&v_lli->cfg, DRQ_SDRAM, vchan->port); + sdev->cfg->set_mode(&v_lli->cfg, LINEAR_MODE, IO_MODE); + + dev_dbg(chan2dev(chan), + "%s; chan: %d, dest: %pad, src: %pad, len: %u. flags: 0x%08lx\n", + __func__, vchan->vc.chan.chan_id, + &sconfig->dst_addr, &sg_dma_address(sg), + sg_dma_len(sg), flags); + + } else { + sun6i_dma_set_addr(sdev, v_lli, + sconfig->src_addr, + sg_dma_address(sg)); + v_lli->cfg = lli_cfg; + sdev->cfg->set_drq(&v_lli->cfg, vchan->port, DRQ_SDRAM); + sdev->cfg->set_mode(&v_lli->cfg, IO_MODE, LINEAR_MODE); + + dev_dbg(chan2dev(chan), + "%s; chan: %d, dest: %pad, src: %pad, len: %u. flags: 0x%08lx\n", + __func__, vchan->vc.chan.chan_id, + &sg_dma_address(sg), &sconfig->src_addr, + sg_dma_len(sg), flags); + } + + prev = sun6i_dma_lli_add(prev, v_lli, p_lli, txd); + } + + dev_dbg(chan2dev(chan), "First: %pad\n", &txd->p_lli); + for (p_lli = txd->p_lli, v_lli = txd->v_lli; v_lli; + p_lli = v_lli->p_lli_next, v_lli = v_lli->v_lli_next) + sun6i_dma_dump_lli(vchan, v_lli, p_lli); + + return vchan_tx_prep(&vchan->vc, &txd->vd, flags); + +err_lli_free: + for (p_lli = txd->p_lli, v_lli = txd->v_lli; v_lli; + p_lli = v_lli->p_lli_next, v_lli = v_lli->v_lli_next) + dma_pool_free(sdev->pool, v_lli, p_lli); + kfree(txd); + return NULL; +} + +static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_cyclic( + struct dma_chan *chan, + dma_addr_t buf_addr, + size_t buf_len, + size_t period_len, + enum dma_transfer_direction dir, + unsigned long flags) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun6i_dma_lli *v_lli, *prev = NULL; + struct sun6i_desc *txd; + dma_addr_t p_lli; + u32 lli_cfg; + unsigned int i, periods = buf_len / period_len; + int ret; + + ret = set_config(sdev, sconfig, dir, &lli_cfg); + if (ret) { + dev_err(chan2dev(chan), "Invalid DMA configuration\n"); + return NULL; + } + + txd = kzalloc(sizeof(*txd), GFP_NOWAIT); + if (!txd) + return NULL; + + for (i = 0; i < periods; i++) { + v_lli = dma_pool_alloc(sdev->pool, GFP_DMA32 | GFP_NOWAIT, &p_lli); + if (!v_lli) { + dev_err(sdev->slave.dev, "Failed to alloc lli memory\n"); + goto err_lli_free; + } + + v_lli->len = period_len; + v_lli->para = NORMAL_WAIT; + + if (dir == DMA_MEM_TO_DEV) { + sun6i_dma_set_addr(sdev, v_lli, + buf_addr + period_len * i, + sconfig->dst_addr); + v_lli->cfg = lli_cfg; + sdev->cfg->set_drq(&v_lli->cfg, DRQ_SDRAM, vchan->port); + sdev->cfg->set_mode(&v_lli->cfg, LINEAR_MODE, IO_MODE); + } else { + sun6i_dma_set_addr(sdev, v_lli, + sconfig->src_addr, + buf_addr + period_len * i); + v_lli->cfg = lli_cfg; + sdev->cfg->set_drq(&v_lli->cfg, vchan->port, DRQ_SDRAM); + sdev->cfg->set_mode(&v_lli->cfg, IO_MODE, LINEAR_MODE); + } + + prev = sun6i_dma_lli_add(prev, v_lli, p_lli, txd); + } + + prev->p_lli_next = txd->p_lli; /* cyclic list */ + + vchan->cyclic = true; + + return vchan_tx_prep(&vchan->vc, &txd->vd, flags); + +err_lli_free: + for (p_lli = txd->p_lli, v_lli = txd->v_lli; v_lli; + p_lli = v_lli->p_lli_next, v_lli = v_lli->v_lli_next) + dma_pool_free(sdev->pool, v_lli, p_lli); + kfree(txd); + return NULL; +} + +static int sun6i_dma_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + + memcpy(&vchan->cfg, config, sizeof(*config)); + + return 0; +} + +static int sun6i_dma_pause(struct dma_chan *chan) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + struct sun6i_pchan *pchan = vchan->phy; + + dev_dbg(chan2dev(chan), "vchan %p: pause\n", &vchan->vc); + + if (pchan) { + writel(DMA_CHAN_PAUSE_PAUSE, + pchan->base + DMA_CHAN_PAUSE); + } else { + spin_lock(&sdev->lock); + list_del_init(&vchan->node); + spin_unlock(&sdev->lock); + } + + return 0; +} + +static int sun6i_dma_resume(struct dma_chan *chan) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + struct sun6i_pchan *pchan = vchan->phy; + unsigned long flags; + + dev_dbg(chan2dev(chan), "vchan %p: resume\n", &vchan->vc); + + spin_lock_irqsave(&vchan->vc.lock, flags); + + if (pchan) { + writel(DMA_CHAN_PAUSE_RESUME, + pchan->base + DMA_CHAN_PAUSE); + } else if (!list_empty(&vchan->vc.desc_issued)) { + spin_lock(&sdev->lock); + list_add_tail(&vchan->node, &sdev->pending); + spin_unlock(&sdev->lock); + } + + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + return 0; +} + +static int sun6i_dma_terminate_all(struct dma_chan *chan) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + struct sun6i_pchan *pchan = vchan->phy; + unsigned long flags; + LIST_HEAD(head); + + spin_lock(&sdev->lock); + list_del_init(&vchan->node); + spin_unlock(&sdev->lock); + + spin_lock_irqsave(&vchan->vc.lock, flags); + + if (vchan->cyclic) { + vchan->cyclic = false; + if (pchan && pchan->desc) { + struct virt_dma_desc *vd = &pchan->desc->vd; + struct virt_dma_chan *vc = &vchan->vc; + + list_add_tail(&vd->node, &vc->desc_completed); + } + } + + vchan_get_all_descriptors(&vchan->vc, &head); + + if (pchan) { + writel(DMA_CHAN_ENABLE_STOP, pchan->base + DMA_CHAN_ENABLE); + writel(DMA_CHAN_PAUSE_RESUME, pchan->base + DMA_CHAN_PAUSE); + + vchan->phy = NULL; + pchan->vchan = NULL; + pchan->desc = NULL; + pchan->done = NULL; + } + + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + vchan_dma_desc_free_list(&vchan->vc, &head); + + return 0; +} + +static enum dma_status sun6i_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + struct sun6i_pchan *pchan = vchan->phy; + struct sun6i_dma_lli *lli; + struct virt_dma_desc *vd; + struct sun6i_desc *txd; + enum dma_status ret; + unsigned long flags; + size_t bytes = 0; + + ret = dma_cookie_status(chan, cookie, state); + if (ret == DMA_COMPLETE || !state) + return ret; + + spin_lock_irqsave(&vchan->vc.lock, flags); + + vd = vchan_find_desc(&vchan->vc, cookie); + txd = to_sun6i_desc(&vd->tx); + + if (vd) { + for (lli = txd->v_lli; lli != NULL; lli = lli->v_lli_next) + bytes += lli->len; + } else if (!pchan || !pchan->desc) { + bytes = 0; + } else { + bytes = sun6i_get_chan_size(pchan); + } + + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + dma_set_residue(state, bytes); + + return ret; +} + +static void sun6i_dma_issue_pending(struct dma_chan *chan) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + unsigned long flags; + + spin_lock_irqsave(&vchan->vc.lock, flags); + + if (vchan_issue_pending(&vchan->vc)) { + spin_lock(&sdev->lock); + + if (!vchan->phy && list_empty(&vchan->node)) { + list_add_tail(&vchan->node, &sdev->pending); + tasklet_schedule(&sdev->task); + dev_dbg(chan2dev(chan), "vchan %p: issued\n", + &vchan->vc); + } + + spin_unlock(&sdev->lock); + } else { + dev_dbg(chan2dev(chan), "vchan %p: nothing to issue\n", + &vchan->vc); + } + + spin_unlock_irqrestore(&vchan->vc.lock, flags); +} + +static void sun6i_dma_free_chan_resources(struct dma_chan *chan) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + unsigned long flags; + + spin_lock_irqsave(&sdev->lock, flags); + list_del_init(&vchan->node); + spin_unlock_irqrestore(&sdev->lock, flags); + + vchan_free_chan_resources(&vchan->vc); +} + +static struct dma_chan *sun6i_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct sun6i_dma_dev *sdev = ofdma->of_dma_data; + struct sun6i_vchan *vchan; + struct dma_chan *chan; + u8 port = dma_spec->args[0]; + + if (port > sdev->max_request) + return NULL; + + chan = dma_get_any_slave_channel(&sdev->slave); + if (!chan) + return NULL; + + vchan = to_sun6i_vchan(chan); + vchan->port = port; + + return chan; +} + +static inline void sun6i_kill_tasklet(struct sun6i_dma_dev *sdev) +{ + /* Disable all interrupts from DMA */ + writel(0, sdev->base + DMA_IRQ_EN(0)); + writel(0, sdev->base + DMA_IRQ_EN(1)); + + /* Prevent spurious interrupts from scheduling the tasklet */ + atomic_inc(&sdev->tasklet_shutdown); + + /* Make sure we won't have any further interrupts */ + devm_free_irq(sdev->slave.dev, sdev->irq, sdev); + + /* Actually prevent the tasklet from being scheduled */ + tasklet_kill(&sdev->task); +} + +static inline void sun6i_dma_free(struct sun6i_dma_dev *sdev) +{ + int i; + + for (i = 0; i < sdev->num_vchans; i++) { + struct sun6i_vchan *vchan = &sdev->vchans[i]; + + list_del(&vchan->vc.chan.device_node); + tasklet_kill(&vchan->vc.task); + } +} + +/* + * For A31: + * + * There's 16 physical channels that can work in parallel. + * + * However we have 30 different endpoints for our requests. + * + * Since the channels are able to handle only an unidirectional + * transfer, we need to allocate more virtual channels so that + * everyone can grab one channel. + * + * Some devices can't work in both direction (mostly because it + * wouldn't make sense), so we have a bit fewer virtual channels than + * 2 channels per endpoints. + */ + +static struct sun6i_dma_config sun6i_a31_dma_cfg = { + .nr_max_channels = 16, + .nr_max_requests = 30, + .nr_max_vchans = 53, + .set_burst_length = sun6i_set_burst_length_a31, + .set_drq = sun6i_set_drq_a31, + .set_mode = sun6i_set_mode_a31, + .src_burst_lengths = BIT(1) | BIT(8), + .dst_burst_lengths = BIT(1) | BIT(8), + .src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES), + .dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES), +}; + +/* + * The A23 only has 8 physical channels, a maximum DRQ port id of 24, + * and a total of 37 usable source and destination endpoints. + */ + +static struct sun6i_dma_config sun8i_a23_dma_cfg = { + .nr_max_channels = 8, + .nr_max_requests = 24, + .nr_max_vchans = 37, + .clock_autogate_enable = sun6i_enable_clock_autogate_a23, + .set_burst_length = sun6i_set_burst_length_a31, + .set_drq = sun6i_set_drq_a31, + .set_mode = sun6i_set_mode_a31, + .src_burst_lengths = BIT(1) | BIT(8), + .dst_burst_lengths = BIT(1) | BIT(8), + .src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES), + .dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES), +}; + +static struct sun6i_dma_config sun8i_a83t_dma_cfg = { + .nr_max_channels = 8, + .nr_max_requests = 28, + .nr_max_vchans = 39, + .clock_autogate_enable = sun6i_enable_clock_autogate_a23, + .set_burst_length = sun6i_set_burst_length_a31, + .set_drq = sun6i_set_drq_a31, + .set_mode = sun6i_set_mode_a31, + .src_burst_lengths = BIT(1) | BIT(8), + .dst_burst_lengths = BIT(1) | BIT(8), + .src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES), + .dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES), +}; + +/* + * The H3 has 12 physical channels, a maximum DRQ port id of 27, + * and a total of 34 usable source and destination endpoints. + * It also supports additional burst lengths and bus widths, + * and the burst length fields have different offsets. + */ + +static struct sun6i_dma_config sun8i_h3_dma_cfg = { + .nr_max_channels = 12, + .nr_max_requests = 27, + .nr_max_vchans = 34, + .clock_autogate_enable = sun6i_enable_clock_autogate_h3, + .set_burst_length = sun6i_set_burst_length_h3, + .set_drq = sun6i_set_drq_a31, + .set_mode = sun6i_set_mode_a31, + .src_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16), + .dst_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16), + .src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES), + .dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES), +}; + +/* + * The A64 binding uses the number of dma channels from the + * device tree node. + */ +static struct sun6i_dma_config sun50i_a64_dma_cfg = { + .clock_autogate_enable = sun6i_enable_clock_autogate_h3, + .set_burst_length = sun6i_set_burst_length_h3, + .set_drq = sun6i_set_drq_a31, + .set_mode = sun6i_set_mode_a31, + .src_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16), + .dst_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16), + .src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES), + .dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES), +}; + +/* + * The A100 binding uses the number of dma channels from the + * device tree node. + */ +static struct sun6i_dma_config sun50i_a100_dma_cfg = { + .clock_autogate_enable = sun6i_enable_clock_autogate_h3, + .set_burst_length = sun6i_set_burst_length_h3, + .set_drq = sun6i_set_drq_h6, + .set_mode = sun6i_set_mode_h6, + .src_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16), + .dst_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16), + .src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES), + .dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES), + .has_high_addr = true, + .has_mbus_clk = true, +}; + +/* + * The H6 binding uses the number of dma channels from the + * device tree node. + */ +static struct sun6i_dma_config sun50i_h6_dma_cfg = { + .clock_autogate_enable = sun6i_enable_clock_autogate_h3, + .set_burst_length = sun6i_set_burst_length_h3, + .set_drq = sun6i_set_drq_h6, + .set_mode = sun6i_set_mode_h6, + .src_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16), + .dst_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16), + .src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES), + .dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES), + .has_mbus_clk = true, +}; + +/* + * The V3s have only 8 physical channels, a maximum DRQ port id of 23, + * and a total of 24 usable source and destination endpoints. + */ + +static struct sun6i_dma_config sun8i_v3s_dma_cfg = { + .nr_max_channels = 8, + .nr_max_requests = 23, + .nr_max_vchans = 24, + .clock_autogate_enable = sun6i_enable_clock_autogate_a23, + .set_burst_length = sun6i_set_burst_length_a31, + .set_drq = sun6i_set_drq_a31, + .set_mode = sun6i_set_mode_a31, + .src_burst_lengths = BIT(1) | BIT(8), + .dst_burst_lengths = BIT(1) | BIT(8), + .src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES), + .dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES), +}; + +static const struct of_device_id sun6i_dma_match[] = { + { .compatible = "allwinner,sun6i-a31-dma", .data = &sun6i_a31_dma_cfg }, + { .compatible = "allwinner,sun8i-a23-dma", .data = &sun8i_a23_dma_cfg }, + { .compatible = "allwinner,sun8i-a83t-dma", .data = &sun8i_a83t_dma_cfg }, + { .compatible = "allwinner,sun8i-h3-dma", .data = &sun8i_h3_dma_cfg }, + { .compatible = "allwinner,sun8i-v3s-dma", .data = &sun8i_v3s_dma_cfg }, + { .compatible = "allwinner,sun20i-d1-dma", .data = &sun50i_a100_dma_cfg }, + { .compatible = "allwinner,sun50i-a64-dma", .data = &sun50i_a64_dma_cfg }, + { .compatible = "allwinner,sun50i-a100-dma", .data = &sun50i_a100_dma_cfg }, + { .compatible = "allwinner,sun50i-h6-dma", .data = &sun50i_h6_dma_cfg }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, sun6i_dma_match); + +static int sun6i_dma_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct sun6i_dma_dev *sdc; + int ret, i; + + sdc = devm_kzalloc(&pdev->dev, sizeof(*sdc), GFP_KERNEL); + if (!sdc) + return -ENOMEM; + + sdc->cfg = of_device_get_match_data(&pdev->dev); + if (!sdc->cfg) + return -ENODEV; + + sdc->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(sdc->base)) + return PTR_ERR(sdc->base); + + sdc->irq = platform_get_irq(pdev, 0); + if (sdc->irq < 0) + return sdc->irq; + + sdc->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(sdc->clk)) { + dev_err(&pdev->dev, "No clock specified\n"); + return PTR_ERR(sdc->clk); + } + + if (sdc->cfg->has_mbus_clk) { + sdc->clk_mbus = devm_clk_get(&pdev->dev, "mbus"); + if (IS_ERR(sdc->clk_mbus)) { + dev_err(&pdev->dev, "No mbus clock specified\n"); + return PTR_ERR(sdc->clk_mbus); + } + } + + sdc->rstc = devm_reset_control_get(&pdev->dev, NULL); + if (IS_ERR(sdc->rstc)) { + dev_err(&pdev->dev, "No reset controller specified\n"); + return PTR_ERR(sdc->rstc); + } + + sdc->pool = dmam_pool_create(dev_name(&pdev->dev), &pdev->dev, + sizeof(struct sun6i_dma_lli), 4, 0); + if (!sdc->pool) { + dev_err(&pdev->dev, "No memory for descriptors dma pool\n"); + return -ENOMEM; + } + + platform_set_drvdata(pdev, sdc); + INIT_LIST_HEAD(&sdc->pending); + spin_lock_init(&sdc->lock); + + dma_set_max_seg_size(&pdev->dev, SZ_32M - 1); + + dma_cap_set(DMA_PRIVATE, sdc->slave.cap_mask); + dma_cap_set(DMA_MEMCPY, sdc->slave.cap_mask); + dma_cap_set(DMA_SLAVE, sdc->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, sdc->slave.cap_mask); + + INIT_LIST_HEAD(&sdc->slave.channels); + sdc->slave.device_free_chan_resources = sun6i_dma_free_chan_resources; + sdc->slave.device_tx_status = sun6i_dma_tx_status; + sdc->slave.device_issue_pending = sun6i_dma_issue_pending; + sdc->slave.device_prep_slave_sg = sun6i_dma_prep_slave_sg; + sdc->slave.device_prep_dma_memcpy = sun6i_dma_prep_dma_memcpy; + sdc->slave.device_prep_dma_cyclic = sun6i_dma_prep_dma_cyclic; + sdc->slave.copy_align = DMAENGINE_ALIGN_4_BYTES; + sdc->slave.device_config = sun6i_dma_config; + sdc->slave.device_pause = sun6i_dma_pause; + sdc->slave.device_resume = sun6i_dma_resume; + sdc->slave.device_terminate_all = sun6i_dma_terminate_all; + sdc->slave.src_addr_widths = sdc->cfg->src_addr_widths; + sdc->slave.dst_addr_widths = sdc->cfg->dst_addr_widths; + sdc->slave.directions = BIT(DMA_DEV_TO_MEM) | + BIT(DMA_MEM_TO_DEV); + sdc->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + sdc->slave.dev = &pdev->dev; + + sdc->num_pchans = sdc->cfg->nr_max_channels; + sdc->num_vchans = sdc->cfg->nr_max_vchans; + sdc->max_request = sdc->cfg->nr_max_requests; + + ret = of_property_read_u32(np, "dma-channels", &sdc->num_pchans); + if (ret && !sdc->num_pchans) { + dev_err(&pdev->dev, "Can't get dma-channels.\n"); + return ret; + } + + ret = of_property_read_u32(np, "dma-requests", &sdc->max_request); + if (ret && !sdc->max_request) { + dev_info(&pdev->dev, "Missing dma-requests, using %u.\n", + DMA_CHAN_MAX_DRQ_A31); + sdc->max_request = DMA_CHAN_MAX_DRQ_A31; + } + + /* + * If the number of vchans is not specified, derive it from the + * highest port number, at most one channel per port and direction. + */ + if (!sdc->num_vchans) + sdc->num_vchans = 2 * (sdc->max_request + 1); + + sdc->pchans = devm_kcalloc(&pdev->dev, sdc->num_pchans, + sizeof(struct sun6i_pchan), GFP_KERNEL); + if (!sdc->pchans) + return -ENOMEM; + + sdc->vchans = devm_kcalloc(&pdev->dev, sdc->num_vchans, + sizeof(struct sun6i_vchan), GFP_KERNEL); + if (!sdc->vchans) + return -ENOMEM; + + tasklet_setup(&sdc->task, sun6i_dma_tasklet); + + for (i = 0; i < sdc->num_pchans; i++) { + struct sun6i_pchan *pchan = &sdc->pchans[i]; + + pchan->idx = i; + pchan->base = sdc->base + 0x100 + i * 0x40; + } + + for (i = 0; i < sdc->num_vchans; i++) { + struct sun6i_vchan *vchan = &sdc->vchans[i]; + + INIT_LIST_HEAD(&vchan->node); + vchan->vc.desc_free = sun6i_dma_free_desc; + vchan_init(&vchan->vc, &sdc->slave); + } + + ret = reset_control_deassert(sdc->rstc); + if (ret) { + dev_err(&pdev->dev, "Couldn't deassert the device from reset\n"); + goto err_chan_free; + } + + ret = clk_prepare_enable(sdc->clk); + if (ret) { + dev_err(&pdev->dev, "Couldn't enable the clock\n"); + goto err_reset_assert; + } + + if (sdc->cfg->has_mbus_clk) { + ret = clk_prepare_enable(sdc->clk_mbus); + if (ret) { + dev_err(&pdev->dev, "Couldn't enable mbus clock\n"); + goto err_clk_disable; + } + } + + ret = devm_request_irq(&pdev->dev, sdc->irq, sun6i_dma_interrupt, 0, + dev_name(&pdev->dev), sdc); + if (ret) { + dev_err(&pdev->dev, "Cannot request IRQ\n"); + goto err_mbus_clk_disable; + } + + ret = dma_async_device_register(&sdc->slave); + if (ret) { + dev_warn(&pdev->dev, "Failed to register DMA engine device\n"); + goto err_irq_disable; + } + + ret = of_dma_controller_register(pdev->dev.of_node, sun6i_dma_of_xlate, + sdc); + if (ret) { + dev_err(&pdev->dev, "of_dma_controller_register failed\n"); + goto err_dma_unregister; + } + + if (sdc->cfg->clock_autogate_enable) + sdc->cfg->clock_autogate_enable(sdc); + + return 0; + +err_dma_unregister: + dma_async_device_unregister(&sdc->slave); +err_irq_disable: + sun6i_kill_tasklet(sdc); +err_mbus_clk_disable: + clk_disable_unprepare(sdc->clk_mbus); +err_clk_disable: + clk_disable_unprepare(sdc->clk); +err_reset_assert: + reset_control_assert(sdc->rstc); +err_chan_free: + sun6i_dma_free(sdc); + return ret; +} + +static int sun6i_dma_remove(struct platform_device *pdev) +{ + struct sun6i_dma_dev *sdc = platform_get_drvdata(pdev); + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&sdc->slave); + + sun6i_kill_tasklet(sdc); + + clk_disable_unprepare(sdc->clk_mbus); + clk_disable_unprepare(sdc->clk); + reset_control_assert(sdc->rstc); + + sun6i_dma_free(sdc); + + return 0; +} + +static struct platform_driver sun6i_dma_driver = { + .probe = sun6i_dma_probe, + .remove = sun6i_dma_remove, + .driver = { + .name = "sun6i-dma", + .of_match_table = sun6i_dma_match, + }, +}; +module_platform_driver(sun6i_dma_driver); + +MODULE_DESCRIPTION("Allwinner A31 DMA Controller Driver"); +MODULE_AUTHOR("Sugar "); +MODULE_AUTHOR("Maxime Ripard "); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/tegra186-gpc-dma.c b/drivers/dma/tegra186-gpc-dma.c new file mode 100644 index 0000000000..33b1010011 --- /dev/null +++ b/drivers/dma/tegra186-gpc-dma.c @@ -0,0 +1,1544 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * DMA driver for NVIDIA Tegra GPC DMA controller. + * + * Copyright (c) 2014-2022, NVIDIA CORPORATION. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "virt-dma.h" + +/* CSR register */ +#define TEGRA_GPCDMA_CHAN_CSR 0x00 +#define TEGRA_GPCDMA_CSR_ENB BIT(31) +#define TEGRA_GPCDMA_CSR_IE_EOC BIT(30) +#define TEGRA_GPCDMA_CSR_ONCE BIT(27) + +#define TEGRA_GPCDMA_CSR_FC_MODE GENMASK(25, 24) +#define TEGRA_GPCDMA_CSR_FC_MODE_NO_MMIO \ + FIELD_PREP(TEGRA_GPCDMA_CSR_FC_MODE, 0) +#define TEGRA_GPCDMA_CSR_FC_MODE_ONE_MMIO \ + FIELD_PREP(TEGRA_GPCDMA_CSR_FC_MODE, 1) +#define TEGRA_GPCDMA_CSR_FC_MODE_TWO_MMIO \ + FIELD_PREP(TEGRA_GPCDMA_CSR_FC_MODE, 2) +#define TEGRA_GPCDMA_CSR_FC_MODE_FOUR_MMIO \ + FIELD_PREP(TEGRA_GPCDMA_CSR_FC_MODE, 3) + +#define TEGRA_GPCDMA_CSR_DMA GENMASK(23, 21) +#define TEGRA_GPCDMA_CSR_DMA_IO2MEM_NO_FC \ + FIELD_PREP(TEGRA_GPCDMA_CSR_DMA, 0) +#define TEGRA_GPCDMA_CSR_DMA_IO2MEM_FC \ + FIELD_PREP(TEGRA_GPCDMA_CSR_DMA, 1) +#define TEGRA_GPCDMA_CSR_DMA_MEM2IO_NO_FC \ + FIELD_PREP(TEGRA_GPCDMA_CSR_DMA, 2) +#define TEGRA_GPCDMA_CSR_DMA_MEM2IO_FC \ + FIELD_PREP(TEGRA_GPCDMA_CSR_DMA, 3) +#define TEGRA_GPCDMA_CSR_DMA_MEM2MEM \ + FIELD_PREP(TEGRA_GPCDMA_CSR_DMA, 4) +#define TEGRA_GPCDMA_CSR_DMA_FIXED_PAT \ + FIELD_PREP(TEGRA_GPCDMA_CSR_DMA, 6) + +#define TEGRA_GPCDMA_CSR_REQ_SEL_MASK GENMASK(20, 16) +#define TEGRA_GPCDMA_CSR_REQ_SEL_UNUSED \ + FIELD_PREP(TEGRA_GPCDMA_CSR_REQ_SEL_MASK, 4) +#define TEGRA_GPCDMA_CSR_IRQ_MASK BIT(15) +#define TEGRA_GPCDMA_CSR_WEIGHT GENMASK(13, 10) + +/* STATUS register */ +#define TEGRA_GPCDMA_CHAN_STATUS 0x004 +#define TEGRA_GPCDMA_STATUS_BUSY BIT(31) +#define TEGRA_GPCDMA_STATUS_ISE_EOC BIT(30) +#define TEGRA_GPCDMA_STATUS_PING_PONG BIT(28) +#define TEGRA_GPCDMA_STATUS_DMA_ACTIVITY BIT(27) +#define TEGRA_GPCDMA_STATUS_CHANNEL_PAUSE BIT(26) +#define TEGRA_GPCDMA_STATUS_CHANNEL_RX BIT(25) +#define TEGRA_GPCDMA_STATUS_CHANNEL_TX BIT(24) +#define TEGRA_GPCDMA_STATUS_IRQ_INTR_STA BIT(23) +#define TEGRA_GPCDMA_STATUS_IRQ_STA BIT(21) +#define TEGRA_GPCDMA_STATUS_IRQ_TRIG_STA BIT(20) + +#define TEGRA_GPCDMA_CHAN_CSRE 0x008 +#define TEGRA_GPCDMA_CHAN_CSRE_PAUSE BIT(31) + +/* Source address */ +#define TEGRA_GPCDMA_CHAN_SRC_PTR 0x00C + +/* Destination address */ +#define TEGRA_GPCDMA_CHAN_DST_PTR 0x010 + +/* High address pointer */ +#define TEGRA_GPCDMA_CHAN_HIGH_ADDR_PTR 0x014 +#define TEGRA_GPCDMA_HIGH_ADDR_SRC_PTR GENMASK(7, 0) +#define TEGRA_GPCDMA_HIGH_ADDR_DST_PTR GENMASK(23, 16) + +/* MC sequence register */ +#define TEGRA_GPCDMA_CHAN_MCSEQ 0x18 +#define TEGRA_GPCDMA_MCSEQ_DATA_SWAP BIT(31) +#define TEGRA_GPCDMA_MCSEQ_REQ_COUNT GENMASK(30, 25) +#define TEGRA_GPCDMA_MCSEQ_BURST GENMASK(24, 23) +#define TEGRA_GPCDMA_MCSEQ_BURST_2 \ + FIELD_PREP(TEGRA_GPCDMA_MCSEQ_BURST, 0) +#define TEGRA_GPCDMA_MCSEQ_BURST_16 \ + FIELD_PREP(TEGRA_GPCDMA_MCSEQ_BURST, 3) +#define TEGRA_GPCDMA_MCSEQ_WRAP1 GENMASK(22, 20) +#define TEGRA_GPCDMA_MCSEQ_WRAP0 GENMASK(19, 17) +#define TEGRA_GPCDMA_MCSEQ_WRAP_NONE 0 + +#define TEGRA_GPCDMA_MCSEQ_STREAM_ID1_MASK GENMASK(13, 7) +#define TEGRA_GPCDMA_MCSEQ_STREAM_ID0_MASK GENMASK(6, 0) + +/* MMIO sequence register */ +#define TEGRA_GPCDMA_CHAN_MMIOSEQ 0x01c +#define TEGRA_GPCDMA_MMIOSEQ_DBL_BUF BIT(31) +#define TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH GENMASK(30, 28) +#define TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH_8 \ + FIELD_PREP(TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH, 0) +#define TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH_16 \ + FIELD_PREP(TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH, 1) +#define TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH_32 \ + FIELD_PREP(TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH, 2) +#define TEGRA_GPCDMA_MMIOSEQ_DATA_SWAP BIT(27) +#define TEGRA_GPCDMA_MMIOSEQ_BURST_SHIFT 23 +#define TEGRA_GPCDMA_MMIOSEQ_BURST_MIN 2U +#define TEGRA_GPCDMA_MMIOSEQ_BURST_MAX 32U +#define TEGRA_GPCDMA_MMIOSEQ_BURST(bs) \ + (GENMASK((fls(bs) - 2), 0) << TEGRA_GPCDMA_MMIOSEQ_BURST_SHIFT) +#define TEGRA_GPCDMA_MMIOSEQ_MASTER_ID GENMASK(22, 19) +#define TEGRA_GPCDMA_MMIOSEQ_WRAP_WORD GENMASK(18, 16) +#define TEGRA_GPCDMA_MMIOSEQ_MMIO_PROT GENMASK(8, 7) + +/* Channel WCOUNT */ +#define TEGRA_GPCDMA_CHAN_WCOUNT 0x20 + +/* Transfer count */ +#define TEGRA_GPCDMA_CHAN_XFER_COUNT 0x24 + +/* DMA byte count status */ +#define TEGRA_GPCDMA_CHAN_DMA_BYTE_STATUS 0x28 + +/* Error Status Register */ +#define TEGRA_GPCDMA_CHAN_ERR_STATUS 0x30 +#define TEGRA_GPCDMA_CHAN_ERR_TYPE_SHIFT 8 +#define TEGRA_GPCDMA_CHAN_ERR_TYPE_MASK 0xF +#define TEGRA_GPCDMA_CHAN_ERR_TYPE(err) ( \ + ((err) >> TEGRA_GPCDMA_CHAN_ERR_TYPE_SHIFT) & \ + TEGRA_GPCDMA_CHAN_ERR_TYPE_MASK) +#define TEGRA_DMA_BM_FIFO_FULL_ERR 0xF +#define TEGRA_DMA_PERIPH_FIFO_FULL_ERR 0xE +#define TEGRA_DMA_PERIPH_ID_ERR 0xD +#define TEGRA_DMA_STREAM_ID_ERR 0xC +#define TEGRA_DMA_MC_SLAVE_ERR 0xB +#define TEGRA_DMA_MMIO_SLAVE_ERR 0xA + +/* Fixed Pattern */ +#define TEGRA_GPCDMA_CHAN_FIXED_PATTERN 0x34 + +#define TEGRA_GPCDMA_CHAN_TZ 0x38 +#define TEGRA_GPCDMA_CHAN_TZ_MMIO_PROT_1 BIT(0) +#define TEGRA_GPCDMA_CHAN_TZ_MC_PROT_1 BIT(1) + +#define TEGRA_GPCDMA_CHAN_SPARE 0x3c +#define TEGRA_GPCDMA_CHAN_SPARE_EN_LEGACY_FC BIT(16) + +/* + * If any burst is in flight and DMA paused then this is the time to complete + * on-flight burst and update DMA status register. + */ +#define TEGRA_GPCDMA_BURST_COMPLETE_TIME 10 +#define TEGRA_GPCDMA_BURST_COMPLETION_TIMEOUT 5000 /* 5 msec */ + +/* Channel base address offset from GPCDMA base address */ +#define TEGRA_GPCDMA_CHANNEL_BASE_ADDR_OFFSET 0x10000 + +/* Default channel mask reserving channel0 */ +#define TEGRA_GPCDMA_DEFAULT_CHANNEL_MASK 0xfffffffe + +struct tegra_dma; +struct tegra_dma_channel; + +/* + * tegra_dma_chip_data Tegra chip specific DMA data + * @nr_channels: Number of channels available in the controller. + * @channel_reg_size: Channel register size. + * @max_dma_count: Maximum DMA transfer count supported by DMA controller. + * @hw_support_pause: DMA HW engine support pause of the channel. + */ +struct tegra_dma_chip_data { + bool hw_support_pause; + unsigned int nr_channels; + unsigned int channel_reg_size; + unsigned int max_dma_count; + int (*terminate)(struct tegra_dma_channel *tdc); +}; + +/* DMA channel registers */ +struct tegra_dma_channel_regs { + u32 csr; + u32 src_ptr; + u32 dst_ptr; + u32 high_addr_ptr; + u32 mc_seq; + u32 mmio_seq; + u32 wcount; + u32 fixed_pattern; +}; + +/* + * tegra_dma_sg_req: DMA request details to configure hardware. This + * contains the details for one transfer to configure DMA hw. + * The client's request for data transfer can be broken into multiple + * sub-transfer as per requester details and hw support. This sub transfer + * get added as an array in Tegra DMA desc which manages the transfer details. + */ +struct tegra_dma_sg_req { + unsigned int len; + struct tegra_dma_channel_regs ch_regs; +}; + +/* + * tegra_dma_desc: Tegra DMA descriptors which uses virt_dma_desc to + * manage client request and keep track of transfer status, callbacks + * and request counts etc. + */ +struct tegra_dma_desc { + bool cyclic; + unsigned int bytes_req; + unsigned int bytes_xfer; + unsigned int sg_idx; + unsigned int sg_count; + struct virt_dma_desc vd; + struct tegra_dma_channel *tdc; + struct tegra_dma_sg_req sg_req[]; +}; + +/* + * tegra_dma_channel: Channel specific information + */ +struct tegra_dma_channel { + bool config_init; + char name[30]; + enum dma_transfer_direction sid_dir; + int id; + int irq; + int slave_id; + struct tegra_dma *tdma; + struct virt_dma_chan vc; + struct tegra_dma_desc *dma_desc; + struct dma_slave_config dma_sconfig; + unsigned int stream_id; + unsigned long chan_base_offset; +}; + +/* + * tegra_dma: Tegra DMA specific information + */ +struct tegra_dma { + const struct tegra_dma_chip_data *chip_data; + unsigned long sid_m2d_reserved; + unsigned long sid_d2m_reserved; + u32 chan_mask; + void __iomem *base_addr; + struct device *dev; + struct dma_device dma_dev; + struct reset_control *rst; + struct tegra_dma_channel channels[]; +}; + +static inline void tdc_write(struct tegra_dma_channel *tdc, + u32 reg, u32 val) +{ + writel_relaxed(val, tdc->tdma->base_addr + tdc->chan_base_offset + reg); +} + +static inline u32 tdc_read(struct tegra_dma_channel *tdc, u32 reg) +{ + return readl_relaxed(tdc->tdma->base_addr + tdc->chan_base_offset + reg); +} + +static inline struct tegra_dma_channel *to_tegra_dma_chan(struct dma_chan *dc) +{ + return container_of(dc, struct tegra_dma_channel, vc.chan); +} + +static inline struct tegra_dma_desc *vd_to_tegra_dma_desc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct tegra_dma_desc, vd); +} + +static inline struct device *tdc2dev(struct tegra_dma_channel *tdc) +{ + return tdc->vc.chan.device->dev; +} + +static void tegra_dma_dump_chan_regs(struct tegra_dma_channel *tdc) +{ + dev_dbg(tdc2dev(tdc), "DMA Channel %d name %s register dump:\n", + tdc->id, tdc->name); + dev_dbg(tdc2dev(tdc), "CSR %x STA %x CSRE %x SRC %x DST %x\n", + tdc_read(tdc, TEGRA_GPCDMA_CHAN_CSR), + tdc_read(tdc, TEGRA_GPCDMA_CHAN_STATUS), + tdc_read(tdc, TEGRA_GPCDMA_CHAN_CSRE), + tdc_read(tdc, TEGRA_GPCDMA_CHAN_SRC_PTR), + tdc_read(tdc, TEGRA_GPCDMA_CHAN_DST_PTR) + ); + dev_dbg(tdc2dev(tdc), "MCSEQ %x IOSEQ %x WCNT %x XFER %x BSTA %x\n", + tdc_read(tdc, TEGRA_GPCDMA_CHAN_MCSEQ), + tdc_read(tdc, TEGRA_GPCDMA_CHAN_MMIOSEQ), + tdc_read(tdc, TEGRA_GPCDMA_CHAN_WCOUNT), + tdc_read(tdc, TEGRA_GPCDMA_CHAN_XFER_COUNT), + tdc_read(tdc, TEGRA_GPCDMA_CHAN_DMA_BYTE_STATUS) + ); + dev_dbg(tdc2dev(tdc), "DMA ERR_STA %x\n", + tdc_read(tdc, TEGRA_GPCDMA_CHAN_ERR_STATUS)); +} + +static int tegra_dma_sid_reserve(struct tegra_dma_channel *tdc, + enum dma_transfer_direction direction) +{ + struct tegra_dma *tdma = tdc->tdma; + int sid = tdc->slave_id; + + if (!is_slave_direction(direction)) + return 0; + + switch (direction) { + case DMA_MEM_TO_DEV: + if (test_and_set_bit(sid, &tdma->sid_m2d_reserved)) { + dev_err(tdma->dev, "slave id already in use\n"); + return -EINVAL; + } + break; + case DMA_DEV_TO_MEM: + if (test_and_set_bit(sid, &tdma->sid_d2m_reserved)) { + dev_err(tdma->dev, "slave id already in use\n"); + return -EINVAL; + } + break; + default: + break; + } + + tdc->sid_dir = direction; + + return 0; +} + +static void tegra_dma_sid_free(struct tegra_dma_channel *tdc) +{ + struct tegra_dma *tdma = tdc->tdma; + int sid = tdc->slave_id; + + switch (tdc->sid_dir) { + case DMA_MEM_TO_DEV: + clear_bit(sid, &tdma->sid_m2d_reserved); + break; + case DMA_DEV_TO_MEM: + clear_bit(sid, &tdma->sid_d2m_reserved); + break; + default: + break; + } + + tdc->sid_dir = DMA_TRANS_NONE; +} + +static void tegra_dma_desc_free(struct virt_dma_desc *vd) +{ + kfree(container_of(vd, struct tegra_dma_desc, vd)); +} + +static int tegra_dma_slave_config(struct dma_chan *dc, + struct dma_slave_config *sconfig) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + + memcpy(&tdc->dma_sconfig, sconfig, sizeof(*sconfig)); + tdc->config_init = true; + + return 0; +} + +static int tegra_dma_pause(struct tegra_dma_channel *tdc) +{ + int ret; + u32 val; + + val = tdc_read(tdc, TEGRA_GPCDMA_CHAN_CSRE); + val |= TEGRA_GPCDMA_CHAN_CSRE_PAUSE; + tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSRE, val); + + /* Wait until busy bit is de-asserted */ + ret = readl_relaxed_poll_timeout_atomic(tdc->tdma->base_addr + + tdc->chan_base_offset + TEGRA_GPCDMA_CHAN_STATUS, + val, + !(val & TEGRA_GPCDMA_STATUS_BUSY), + TEGRA_GPCDMA_BURST_COMPLETE_TIME, + TEGRA_GPCDMA_BURST_COMPLETION_TIMEOUT); + + if (ret) { + dev_err(tdc2dev(tdc), "DMA pause timed out\n"); + tegra_dma_dump_chan_regs(tdc); + } + + return ret; +} + +static int tegra_dma_device_pause(struct dma_chan *dc) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + unsigned long flags; + int ret; + + if (!tdc->tdma->chip_data->hw_support_pause) + return -ENOSYS; + + spin_lock_irqsave(&tdc->vc.lock, flags); + ret = tegra_dma_pause(tdc); + spin_unlock_irqrestore(&tdc->vc.lock, flags); + + return ret; +} + +static void tegra_dma_resume(struct tegra_dma_channel *tdc) +{ + u32 val; + + val = tdc_read(tdc, TEGRA_GPCDMA_CHAN_CSRE); + val &= ~TEGRA_GPCDMA_CHAN_CSRE_PAUSE; + tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSRE, val); +} + +static int tegra_dma_device_resume(struct dma_chan *dc) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + unsigned long flags; + + if (!tdc->tdma->chip_data->hw_support_pause) + return -ENOSYS; + + spin_lock_irqsave(&tdc->vc.lock, flags); + tegra_dma_resume(tdc); + spin_unlock_irqrestore(&tdc->vc.lock, flags); + + return 0; +} + +static inline int tegra_dma_pause_noerr(struct tegra_dma_channel *tdc) +{ + /* Return 0 irrespective of PAUSE status. + * This is useful to recover channels that can exit out of flush + * state when the channel is disabled. + */ + + tegra_dma_pause(tdc); + return 0; +} + +static void tegra_dma_disable(struct tegra_dma_channel *tdc) +{ + u32 csr, status; + + csr = tdc_read(tdc, TEGRA_GPCDMA_CHAN_CSR); + + /* Disable interrupts */ + csr &= ~TEGRA_GPCDMA_CSR_IE_EOC; + + /* Disable DMA */ + csr &= ~TEGRA_GPCDMA_CSR_ENB; + tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSR, csr); + + /* Clear interrupt status if it is there */ + status = tdc_read(tdc, TEGRA_GPCDMA_CHAN_STATUS); + if (status & TEGRA_GPCDMA_STATUS_ISE_EOC) { + dev_dbg(tdc2dev(tdc), "%s():clearing interrupt\n", __func__); + tdc_write(tdc, TEGRA_GPCDMA_CHAN_STATUS, status); + } +} + +static void tegra_dma_configure_next_sg(struct tegra_dma_channel *tdc) +{ + struct tegra_dma_desc *dma_desc = tdc->dma_desc; + struct tegra_dma_channel_regs *ch_regs; + int ret; + u32 val; + + dma_desc->sg_idx++; + + /* Reset the sg index for cyclic transfers */ + if (dma_desc->sg_idx == dma_desc->sg_count) + dma_desc->sg_idx = 0; + + /* Configure next transfer immediately after DMA is busy */ + ret = readl_relaxed_poll_timeout_atomic(tdc->tdma->base_addr + + tdc->chan_base_offset + TEGRA_GPCDMA_CHAN_STATUS, + val, + (val & TEGRA_GPCDMA_STATUS_BUSY), 0, + TEGRA_GPCDMA_BURST_COMPLETION_TIMEOUT); + if (ret) + return; + + ch_regs = &dma_desc->sg_req[dma_desc->sg_idx].ch_regs; + + tdc_write(tdc, TEGRA_GPCDMA_CHAN_WCOUNT, ch_regs->wcount); + tdc_write(tdc, TEGRA_GPCDMA_CHAN_SRC_PTR, ch_regs->src_ptr); + tdc_write(tdc, TEGRA_GPCDMA_CHAN_DST_PTR, ch_regs->dst_ptr); + tdc_write(tdc, TEGRA_GPCDMA_CHAN_HIGH_ADDR_PTR, ch_regs->high_addr_ptr); + + /* Start DMA */ + tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSR, + ch_regs->csr | TEGRA_GPCDMA_CSR_ENB); +} + +static void tegra_dma_start(struct tegra_dma_channel *tdc) +{ + struct tegra_dma_desc *dma_desc = tdc->dma_desc; + struct tegra_dma_channel_regs *ch_regs; + struct virt_dma_desc *vdesc; + + if (!dma_desc) { + vdesc = vchan_next_desc(&tdc->vc); + if (!vdesc) + return; + + dma_desc = vd_to_tegra_dma_desc(vdesc); + list_del(&vdesc->node); + dma_desc->tdc = tdc; + tdc->dma_desc = dma_desc; + + tegra_dma_resume(tdc); + } + + ch_regs = &dma_desc->sg_req[dma_desc->sg_idx].ch_regs; + + tdc_write(tdc, TEGRA_GPCDMA_CHAN_WCOUNT, ch_regs->wcount); + tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSR, 0); + tdc_write(tdc, TEGRA_GPCDMA_CHAN_SRC_PTR, ch_regs->src_ptr); + tdc_write(tdc, TEGRA_GPCDMA_CHAN_DST_PTR, ch_regs->dst_ptr); + tdc_write(tdc, TEGRA_GPCDMA_CHAN_HIGH_ADDR_PTR, ch_regs->high_addr_ptr); + tdc_write(tdc, TEGRA_GPCDMA_CHAN_FIXED_PATTERN, ch_regs->fixed_pattern); + tdc_write(tdc, TEGRA_GPCDMA_CHAN_MMIOSEQ, ch_regs->mmio_seq); + tdc_write(tdc, TEGRA_GPCDMA_CHAN_MCSEQ, ch_regs->mc_seq); + tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSR, ch_regs->csr); + + /* Start DMA */ + tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSR, + ch_regs->csr | TEGRA_GPCDMA_CSR_ENB); +} + +static void tegra_dma_xfer_complete(struct tegra_dma_channel *tdc) +{ + vchan_cookie_complete(&tdc->dma_desc->vd); + + tegra_dma_sid_free(tdc); + tdc->dma_desc = NULL; +} + +static void tegra_dma_chan_decode_error(struct tegra_dma_channel *tdc, + unsigned int err_status) +{ + switch (TEGRA_GPCDMA_CHAN_ERR_TYPE(err_status)) { + case TEGRA_DMA_BM_FIFO_FULL_ERR: + dev_err(tdc->tdma->dev, + "GPCDMA CH%d bm fifo full\n", tdc->id); + break; + + case TEGRA_DMA_PERIPH_FIFO_FULL_ERR: + dev_err(tdc->tdma->dev, + "GPCDMA CH%d peripheral fifo full\n", tdc->id); + break; + + case TEGRA_DMA_PERIPH_ID_ERR: + dev_err(tdc->tdma->dev, + "GPCDMA CH%d illegal peripheral id\n", tdc->id); + break; + + case TEGRA_DMA_STREAM_ID_ERR: + dev_err(tdc->tdma->dev, + "GPCDMA CH%d illegal stream id\n", tdc->id); + break; + + case TEGRA_DMA_MC_SLAVE_ERR: + dev_err(tdc->tdma->dev, + "GPCDMA CH%d mc slave error\n", tdc->id); + break; + + case TEGRA_DMA_MMIO_SLAVE_ERR: + dev_err(tdc->tdma->dev, + "GPCDMA CH%d mmio slave error\n", tdc->id); + break; + + default: + dev_err(tdc->tdma->dev, + "GPCDMA CH%d security violation %x\n", tdc->id, + err_status); + } +} + +static irqreturn_t tegra_dma_isr(int irq, void *dev_id) +{ + struct tegra_dma_channel *tdc = dev_id; + struct tegra_dma_desc *dma_desc = tdc->dma_desc; + struct tegra_dma_sg_req *sg_req; + u32 status; + + /* Check channel error status register */ + status = tdc_read(tdc, TEGRA_GPCDMA_CHAN_ERR_STATUS); + if (status) { + tegra_dma_chan_decode_error(tdc, status); + tegra_dma_dump_chan_regs(tdc); + tdc_write(tdc, TEGRA_GPCDMA_CHAN_ERR_STATUS, 0xFFFFFFFF); + } + + spin_lock(&tdc->vc.lock); + status = tdc_read(tdc, TEGRA_GPCDMA_CHAN_STATUS); + if (!(status & TEGRA_GPCDMA_STATUS_ISE_EOC)) + goto irq_done; + + tdc_write(tdc, TEGRA_GPCDMA_CHAN_STATUS, + TEGRA_GPCDMA_STATUS_ISE_EOC); + + if (!dma_desc) + goto irq_done; + + sg_req = dma_desc->sg_req; + dma_desc->bytes_xfer += sg_req[dma_desc->sg_idx].len; + + if (dma_desc->cyclic) { + vchan_cyclic_callback(&dma_desc->vd); + tegra_dma_configure_next_sg(tdc); + } else { + dma_desc->sg_idx++; + if (dma_desc->sg_idx == dma_desc->sg_count) + tegra_dma_xfer_complete(tdc); + else + tegra_dma_start(tdc); + } + +irq_done: + spin_unlock(&tdc->vc.lock); + return IRQ_HANDLED; +} + +static void tegra_dma_issue_pending(struct dma_chan *dc) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + unsigned long flags; + + if (tdc->dma_desc) + return; + + spin_lock_irqsave(&tdc->vc.lock, flags); + if (vchan_issue_pending(&tdc->vc)) + tegra_dma_start(tdc); + + /* + * For cyclic DMA transfers, program the second + * transfer parameters as soon as the first DMA + * transfer is started inorder for the DMA + * controller to trigger the second transfer + * with the correct parameters. + */ + if (tdc->dma_desc && tdc->dma_desc->cyclic) + tegra_dma_configure_next_sg(tdc); + + spin_unlock_irqrestore(&tdc->vc.lock, flags); +} + +static int tegra_dma_stop_client(struct tegra_dma_channel *tdc) +{ + int ret; + u32 status, csr; + + /* + * Change the client associated with the DMA channel + * to stop DMA engine from starting any more bursts for + * the given client and wait for in flight bursts to complete + */ + csr = tdc_read(tdc, TEGRA_GPCDMA_CHAN_CSR); + csr &= ~(TEGRA_GPCDMA_CSR_REQ_SEL_MASK); + csr |= TEGRA_GPCDMA_CSR_REQ_SEL_UNUSED; + tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSR, csr); + + /* Wait for in flight data transfer to finish */ + udelay(TEGRA_GPCDMA_BURST_COMPLETE_TIME); + + /* If TX/RX path is still active wait till it becomes + * inactive + */ + + ret = readl_relaxed_poll_timeout_atomic(tdc->tdma->base_addr + + tdc->chan_base_offset + + TEGRA_GPCDMA_CHAN_STATUS, + status, + !(status & (TEGRA_GPCDMA_STATUS_CHANNEL_TX | + TEGRA_GPCDMA_STATUS_CHANNEL_RX)), + 5, + TEGRA_GPCDMA_BURST_COMPLETION_TIMEOUT); + if (ret) { + dev_err(tdc2dev(tdc), "Timeout waiting for DMA burst completion!\n"); + tegra_dma_dump_chan_regs(tdc); + } + + return ret; +} + +static int tegra_dma_terminate_all(struct dma_chan *dc) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + unsigned long flags; + LIST_HEAD(head); + int err; + + spin_lock_irqsave(&tdc->vc.lock, flags); + + if (tdc->dma_desc) { + err = tdc->tdma->chip_data->terminate(tdc); + if (err) { + spin_unlock_irqrestore(&tdc->vc.lock, flags); + return err; + } + + vchan_terminate_vdesc(&tdc->dma_desc->vd); + tegra_dma_disable(tdc); + tdc->dma_desc = NULL; + } + + tegra_dma_sid_free(tdc); + vchan_get_all_descriptors(&tdc->vc, &head); + spin_unlock_irqrestore(&tdc->vc.lock, flags); + + vchan_dma_desc_free_list(&tdc->vc, &head); + + return 0; +} + +static int tegra_dma_get_residual(struct tegra_dma_channel *tdc) +{ + struct tegra_dma_desc *dma_desc = tdc->dma_desc; + struct tegra_dma_sg_req *sg_req = dma_desc->sg_req; + unsigned int bytes_xfer, residual; + u32 wcount = 0, status; + + wcount = tdc_read(tdc, TEGRA_GPCDMA_CHAN_XFER_COUNT); + + /* + * Set wcount = 0 if EOC bit is set. The transfer would have + * already completed and the CHAN_XFER_COUNT could have updated + * for the next transfer, specifically in case of cyclic transfers. + */ + status = tdc_read(tdc, TEGRA_GPCDMA_CHAN_STATUS); + if (status & TEGRA_GPCDMA_STATUS_ISE_EOC) + wcount = 0; + + bytes_xfer = dma_desc->bytes_xfer + + sg_req[dma_desc->sg_idx].len - (wcount * 4); + + residual = dma_desc->bytes_req - (bytes_xfer % dma_desc->bytes_req); + + return residual; +} + +static enum dma_status tegra_dma_tx_status(struct dma_chan *dc, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + struct tegra_dma_desc *dma_desc; + struct virt_dma_desc *vd; + unsigned int residual; + unsigned long flags; + enum dma_status ret; + + ret = dma_cookie_status(dc, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; + + spin_lock_irqsave(&tdc->vc.lock, flags); + vd = vchan_find_desc(&tdc->vc, cookie); + if (vd) { + dma_desc = vd_to_tegra_dma_desc(vd); + residual = dma_desc->bytes_req; + dma_set_residue(txstate, residual); + } else if (tdc->dma_desc && tdc->dma_desc->vd.tx.cookie == cookie) { + residual = tegra_dma_get_residual(tdc); + dma_set_residue(txstate, residual); + } else { + dev_err(tdc2dev(tdc), "cookie %d is not found\n", cookie); + } + spin_unlock_irqrestore(&tdc->vc.lock, flags); + + return ret; +} + +static inline int get_bus_width(struct tegra_dma_channel *tdc, + enum dma_slave_buswidth slave_bw) +{ + switch (slave_bw) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + return TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH_8; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + return TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH_16; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + return TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH_32; + default: + dev_err(tdc2dev(tdc), "given slave bus width is not supported\n"); + return -EINVAL; + } +} + +static unsigned int get_burst_size(struct tegra_dma_channel *tdc, + u32 burst_size, enum dma_slave_buswidth slave_bw, + int len) +{ + unsigned int burst_mmio_width, burst_byte; + + /* + * burst_size from client is in terms of the bus_width. + * convert that into words. + * If burst_size is not specified from client, then use + * len to calculate the optimum burst size + */ + burst_byte = burst_size ? burst_size * slave_bw : len; + burst_mmio_width = burst_byte / 4; + + if (burst_mmio_width < TEGRA_GPCDMA_MMIOSEQ_BURST_MIN) + return 0; + + burst_mmio_width = min(burst_mmio_width, TEGRA_GPCDMA_MMIOSEQ_BURST_MAX); + + return TEGRA_GPCDMA_MMIOSEQ_BURST(burst_mmio_width); +} + +static int get_transfer_param(struct tegra_dma_channel *tdc, + enum dma_transfer_direction direction, + u32 *apb_addr, + u32 *mmio_seq, + u32 *csr, + unsigned int *burst_size, + enum dma_slave_buswidth *slave_bw) +{ + switch (direction) { + case DMA_MEM_TO_DEV: + *apb_addr = tdc->dma_sconfig.dst_addr; + *mmio_seq = get_bus_width(tdc, tdc->dma_sconfig.dst_addr_width); + *burst_size = tdc->dma_sconfig.dst_maxburst; + *slave_bw = tdc->dma_sconfig.dst_addr_width; + *csr = TEGRA_GPCDMA_CSR_DMA_MEM2IO_FC; + return 0; + case DMA_DEV_TO_MEM: + *apb_addr = tdc->dma_sconfig.src_addr; + *mmio_seq = get_bus_width(tdc, tdc->dma_sconfig.src_addr_width); + *burst_size = tdc->dma_sconfig.src_maxburst; + *slave_bw = tdc->dma_sconfig.src_addr_width; + *csr = TEGRA_GPCDMA_CSR_DMA_IO2MEM_FC; + return 0; + default: + dev_err(tdc2dev(tdc), "DMA direction is not supported\n"); + } + + return -EINVAL; +} + +static struct dma_async_tx_descriptor * +tegra_dma_prep_dma_memset(struct dma_chan *dc, dma_addr_t dest, int value, + size_t len, unsigned long flags) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + unsigned int max_dma_count = tdc->tdma->chip_data->max_dma_count; + struct tegra_dma_sg_req *sg_req; + struct tegra_dma_desc *dma_desc; + u32 csr, mc_seq; + + if ((len & 3) || (dest & 3) || len > max_dma_count) { + dev_err(tdc2dev(tdc), + "DMA length/memory address is not supported\n"); + return NULL; + } + + /* Set DMA mode to fixed pattern */ + csr = TEGRA_GPCDMA_CSR_DMA_FIXED_PAT; + /* Enable once or continuous mode */ + csr |= TEGRA_GPCDMA_CSR_ONCE; + /* Enable IRQ mask */ + csr |= TEGRA_GPCDMA_CSR_IRQ_MASK; + /* Enable the DMA interrupt */ + if (flags & DMA_PREP_INTERRUPT) + csr |= TEGRA_GPCDMA_CSR_IE_EOC; + /* Configure default priority weight for the channel */ + csr |= FIELD_PREP(TEGRA_GPCDMA_CSR_WEIGHT, 1); + + mc_seq = tdc_read(tdc, TEGRA_GPCDMA_CHAN_MCSEQ); + /* retain stream-id and clean rest */ + mc_seq &= TEGRA_GPCDMA_MCSEQ_STREAM_ID0_MASK; + + /* Set the address wrapping */ + mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_WRAP0, + TEGRA_GPCDMA_MCSEQ_WRAP_NONE); + mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_WRAP1, + TEGRA_GPCDMA_MCSEQ_WRAP_NONE); + + /* Program outstanding MC requests */ + mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_REQ_COUNT, 1); + /* Set burst size */ + mc_seq |= TEGRA_GPCDMA_MCSEQ_BURST_16; + + dma_desc = kzalloc(struct_size(dma_desc, sg_req, 1), GFP_NOWAIT); + if (!dma_desc) + return NULL; + + dma_desc->bytes_req = len; + dma_desc->sg_count = 1; + sg_req = dma_desc->sg_req; + + sg_req[0].ch_regs.src_ptr = 0; + sg_req[0].ch_regs.dst_ptr = dest; + sg_req[0].ch_regs.high_addr_ptr = + FIELD_PREP(TEGRA_GPCDMA_HIGH_ADDR_DST_PTR, (dest >> 32)); + sg_req[0].ch_regs.fixed_pattern = value; + /* Word count reg takes value as (N +1) words */ + sg_req[0].ch_regs.wcount = ((len - 4) >> 2); + sg_req[0].ch_regs.csr = csr; + sg_req[0].ch_regs.mmio_seq = 0; + sg_req[0].ch_regs.mc_seq = mc_seq; + sg_req[0].len = len; + + dma_desc->cyclic = false; + return vchan_tx_prep(&tdc->vc, &dma_desc->vd, flags); +} + +static struct dma_async_tx_descriptor * +tegra_dma_prep_dma_memcpy(struct dma_chan *dc, dma_addr_t dest, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + struct tegra_dma_sg_req *sg_req; + struct tegra_dma_desc *dma_desc; + unsigned int max_dma_count; + u32 csr, mc_seq; + + max_dma_count = tdc->tdma->chip_data->max_dma_count; + if ((len & 3) || (src & 3) || (dest & 3) || len > max_dma_count) { + dev_err(tdc2dev(tdc), + "DMA length/memory address is not supported\n"); + return NULL; + } + + /* Set DMA mode to memory to memory transfer */ + csr = TEGRA_GPCDMA_CSR_DMA_MEM2MEM; + /* Enable once or continuous mode */ + csr |= TEGRA_GPCDMA_CSR_ONCE; + /* Enable IRQ mask */ + csr |= TEGRA_GPCDMA_CSR_IRQ_MASK; + /* Enable the DMA interrupt */ + if (flags & DMA_PREP_INTERRUPT) + csr |= TEGRA_GPCDMA_CSR_IE_EOC; + /* Configure default priority weight for the channel */ + csr |= FIELD_PREP(TEGRA_GPCDMA_CSR_WEIGHT, 1); + + mc_seq = tdc_read(tdc, TEGRA_GPCDMA_CHAN_MCSEQ); + /* retain stream-id and clean rest */ + mc_seq &= (TEGRA_GPCDMA_MCSEQ_STREAM_ID0_MASK) | + (TEGRA_GPCDMA_MCSEQ_STREAM_ID1_MASK); + + /* Set the address wrapping */ + mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_WRAP0, + TEGRA_GPCDMA_MCSEQ_WRAP_NONE); + mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_WRAP1, + TEGRA_GPCDMA_MCSEQ_WRAP_NONE); + + /* Program outstanding MC requests */ + mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_REQ_COUNT, 1); + /* Set burst size */ + mc_seq |= TEGRA_GPCDMA_MCSEQ_BURST_16; + + dma_desc = kzalloc(struct_size(dma_desc, sg_req, 1), GFP_NOWAIT); + if (!dma_desc) + return NULL; + + dma_desc->bytes_req = len; + dma_desc->sg_count = 1; + sg_req = dma_desc->sg_req; + + sg_req[0].ch_regs.src_ptr = src; + sg_req[0].ch_regs.dst_ptr = dest; + sg_req[0].ch_regs.high_addr_ptr = + FIELD_PREP(TEGRA_GPCDMA_HIGH_ADDR_SRC_PTR, (src >> 32)); + sg_req[0].ch_regs.high_addr_ptr |= + FIELD_PREP(TEGRA_GPCDMA_HIGH_ADDR_DST_PTR, (dest >> 32)); + /* Word count reg takes value as (N +1) words */ + sg_req[0].ch_regs.wcount = ((len - 4) >> 2); + sg_req[0].ch_regs.csr = csr; + sg_req[0].ch_regs.mmio_seq = 0; + sg_req[0].ch_regs.mc_seq = mc_seq; + sg_req[0].len = len; + + dma_desc->cyclic = false; + return vchan_tx_prep(&tdc->vc, &dma_desc->vd, flags); +} + +static struct dma_async_tx_descriptor * +tegra_dma_prep_slave_sg(struct dma_chan *dc, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + unsigned int max_dma_count = tdc->tdma->chip_data->max_dma_count; + enum dma_slave_buswidth slave_bw = DMA_SLAVE_BUSWIDTH_UNDEFINED; + u32 csr, mc_seq, apb_ptr = 0, mmio_seq = 0; + struct tegra_dma_sg_req *sg_req; + struct tegra_dma_desc *dma_desc; + struct scatterlist *sg; + u32 burst_size; + unsigned int i; + int ret; + + if (!tdc->config_init) { + dev_err(tdc2dev(tdc), "DMA channel is not configured\n"); + return NULL; + } + if (sg_len < 1) { + dev_err(tdc2dev(tdc), "Invalid segment length %d\n", sg_len); + return NULL; + } + + ret = tegra_dma_sid_reserve(tdc, direction); + if (ret) + return NULL; + + ret = get_transfer_param(tdc, direction, &apb_ptr, &mmio_seq, &csr, + &burst_size, &slave_bw); + if (ret < 0) + return NULL; + + /* Enable once or continuous mode */ + csr |= TEGRA_GPCDMA_CSR_ONCE; + /* Program the slave id in requestor select */ + csr |= FIELD_PREP(TEGRA_GPCDMA_CSR_REQ_SEL_MASK, tdc->slave_id); + /* Enable IRQ mask */ + csr |= TEGRA_GPCDMA_CSR_IRQ_MASK; + /* Configure default priority weight for the channel*/ + csr |= FIELD_PREP(TEGRA_GPCDMA_CSR_WEIGHT, 1); + + /* Enable the DMA interrupt */ + if (flags & DMA_PREP_INTERRUPT) + csr |= TEGRA_GPCDMA_CSR_IE_EOC; + + mc_seq = tdc_read(tdc, TEGRA_GPCDMA_CHAN_MCSEQ); + /* retain stream-id and clean rest */ + mc_seq &= TEGRA_GPCDMA_MCSEQ_STREAM_ID0_MASK; + + /* Set the address wrapping on both MC and MMIO side */ + + mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_WRAP0, + TEGRA_GPCDMA_MCSEQ_WRAP_NONE); + mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_WRAP1, + TEGRA_GPCDMA_MCSEQ_WRAP_NONE); + mmio_seq |= FIELD_PREP(TEGRA_GPCDMA_MMIOSEQ_WRAP_WORD, 1); + + /* Program 2 MC outstanding requests by default. */ + mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_REQ_COUNT, 1); + + /* Setting MC burst size depending on MMIO burst size */ + if (burst_size == 64) + mc_seq |= TEGRA_GPCDMA_MCSEQ_BURST_16; + else + mc_seq |= TEGRA_GPCDMA_MCSEQ_BURST_2; + + dma_desc = kzalloc(struct_size(dma_desc, sg_req, sg_len), GFP_NOWAIT); + if (!dma_desc) + return NULL; + + dma_desc->sg_count = sg_len; + sg_req = dma_desc->sg_req; + + /* Make transfer requests */ + for_each_sg(sgl, sg, sg_len, i) { + u32 len; + dma_addr_t mem; + + mem = sg_dma_address(sg); + len = sg_dma_len(sg); + + if ((len & 3) || (mem & 3) || len > max_dma_count) { + dev_err(tdc2dev(tdc), + "DMA length/memory address is not supported\n"); + kfree(dma_desc); + return NULL; + } + + mmio_seq |= get_burst_size(tdc, burst_size, slave_bw, len); + dma_desc->bytes_req += len; + + if (direction == DMA_MEM_TO_DEV) { + sg_req[i].ch_regs.src_ptr = mem; + sg_req[i].ch_regs.dst_ptr = apb_ptr; + sg_req[i].ch_regs.high_addr_ptr = + FIELD_PREP(TEGRA_GPCDMA_HIGH_ADDR_SRC_PTR, (mem >> 32)); + } else if (direction == DMA_DEV_TO_MEM) { + sg_req[i].ch_regs.src_ptr = apb_ptr; + sg_req[i].ch_regs.dst_ptr = mem; + sg_req[i].ch_regs.high_addr_ptr = + FIELD_PREP(TEGRA_GPCDMA_HIGH_ADDR_DST_PTR, (mem >> 32)); + } + + /* + * Word count register takes input in words. Writing a value + * of N into word count register means a req of (N+1) words. + */ + sg_req[i].ch_regs.wcount = ((len - 4) >> 2); + sg_req[i].ch_regs.csr = csr; + sg_req[i].ch_regs.mmio_seq = mmio_seq; + sg_req[i].ch_regs.mc_seq = mc_seq; + sg_req[i].len = len; + } + + dma_desc->cyclic = false; + return vchan_tx_prep(&tdc->vc, &dma_desc->vd, flags); +} + +static struct dma_async_tx_descriptor * +tegra_dma_prep_dma_cyclic(struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + enum dma_slave_buswidth slave_bw = DMA_SLAVE_BUSWIDTH_UNDEFINED; + u32 csr, mc_seq, apb_ptr = 0, mmio_seq = 0, burst_size; + unsigned int max_dma_count, len, period_count, i; + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + struct tegra_dma_desc *dma_desc; + struct tegra_dma_sg_req *sg_req; + dma_addr_t mem = buf_addr; + int ret; + + if (!buf_len || !period_len) { + dev_err(tdc2dev(tdc), "Invalid buffer/period len\n"); + return NULL; + } + + if (!tdc->config_init) { + dev_err(tdc2dev(tdc), "DMA slave is not configured\n"); + return NULL; + } + + ret = tegra_dma_sid_reserve(tdc, direction); + if (ret) + return NULL; + + /* + * We only support cycle transfer when buf_len is multiple of + * period_len. + */ + if (buf_len % period_len) { + dev_err(tdc2dev(tdc), "buf_len is not multiple of period_len\n"); + return NULL; + } + + len = period_len; + max_dma_count = tdc->tdma->chip_data->max_dma_count; + if ((len & 3) || (buf_addr & 3) || len > max_dma_count) { + dev_err(tdc2dev(tdc), "Req len/mem address is not correct\n"); + return NULL; + } + + ret = get_transfer_param(tdc, direction, &apb_ptr, &mmio_seq, &csr, + &burst_size, &slave_bw); + if (ret < 0) + return NULL; + + /* Enable once or continuous mode */ + csr &= ~TEGRA_GPCDMA_CSR_ONCE; + /* Program the slave id in requestor select */ + csr |= FIELD_PREP(TEGRA_GPCDMA_CSR_REQ_SEL_MASK, tdc->slave_id); + /* Enable IRQ mask */ + csr |= TEGRA_GPCDMA_CSR_IRQ_MASK; + /* Configure default priority weight for the channel*/ + csr |= FIELD_PREP(TEGRA_GPCDMA_CSR_WEIGHT, 1); + + /* Enable the DMA interrupt */ + if (flags & DMA_PREP_INTERRUPT) + csr |= TEGRA_GPCDMA_CSR_IE_EOC; + + mmio_seq |= FIELD_PREP(TEGRA_GPCDMA_MMIOSEQ_WRAP_WORD, 1); + + mc_seq = tdc_read(tdc, TEGRA_GPCDMA_CHAN_MCSEQ); + /* retain stream-id and clean rest */ + mc_seq &= TEGRA_GPCDMA_MCSEQ_STREAM_ID0_MASK; + + /* Set the address wrapping on both MC and MMIO side */ + mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_WRAP0, + TEGRA_GPCDMA_MCSEQ_WRAP_NONE); + mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_WRAP1, + TEGRA_GPCDMA_MCSEQ_WRAP_NONE); + + /* Program 2 MC outstanding requests by default. */ + mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_REQ_COUNT, 1); + /* Setting MC burst size depending on MMIO burst size */ + if (burst_size == 64) + mc_seq |= TEGRA_GPCDMA_MCSEQ_BURST_16; + else + mc_seq |= TEGRA_GPCDMA_MCSEQ_BURST_2; + + period_count = buf_len / period_len; + dma_desc = kzalloc(struct_size(dma_desc, sg_req, period_count), + GFP_NOWAIT); + if (!dma_desc) + return NULL; + + dma_desc->bytes_req = buf_len; + dma_desc->sg_count = period_count; + sg_req = dma_desc->sg_req; + + /* Split transfer equal to period size */ + for (i = 0; i < period_count; i++) { + mmio_seq |= get_burst_size(tdc, burst_size, slave_bw, len); + if (direction == DMA_MEM_TO_DEV) { + sg_req[i].ch_regs.src_ptr = mem; + sg_req[i].ch_regs.dst_ptr = apb_ptr; + sg_req[i].ch_regs.high_addr_ptr = + FIELD_PREP(TEGRA_GPCDMA_HIGH_ADDR_SRC_PTR, (mem >> 32)); + } else if (direction == DMA_DEV_TO_MEM) { + sg_req[i].ch_regs.src_ptr = apb_ptr; + sg_req[i].ch_regs.dst_ptr = mem; + sg_req[i].ch_regs.high_addr_ptr = + FIELD_PREP(TEGRA_GPCDMA_HIGH_ADDR_DST_PTR, (mem >> 32)); + } + /* + * Word count register takes input in words. Writing a value + * of N into word count register means a req of (N+1) words. + */ + sg_req[i].ch_regs.wcount = ((len - 4) >> 2); + sg_req[i].ch_regs.csr = csr; + sg_req[i].ch_regs.mmio_seq = mmio_seq; + sg_req[i].ch_regs.mc_seq = mc_seq; + sg_req[i].len = len; + + mem += len; + } + + dma_desc->cyclic = true; + + return vchan_tx_prep(&tdc->vc, &dma_desc->vd, flags); +} + +static int tegra_dma_alloc_chan_resources(struct dma_chan *dc) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + int ret; + + ret = request_irq(tdc->irq, tegra_dma_isr, 0, tdc->name, tdc); + if (ret) { + dev_err(tdc2dev(tdc), "request_irq failed for %s\n", tdc->name); + return ret; + } + + dma_cookie_init(&tdc->vc.chan); + tdc->config_init = false; + return 0; +} + +static void tegra_dma_chan_synchronize(struct dma_chan *dc) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + + synchronize_irq(tdc->irq); + vchan_synchronize(&tdc->vc); +} + +static void tegra_dma_free_chan_resources(struct dma_chan *dc) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + + dev_dbg(tdc2dev(tdc), "Freeing channel %d\n", tdc->id); + + tegra_dma_terminate_all(dc); + synchronize_irq(tdc->irq); + + tasklet_kill(&tdc->vc.task); + tdc->config_init = false; + tdc->slave_id = -1; + tdc->sid_dir = DMA_TRANS_NONE; + free_irq(tdc->irq, tdc); + + vchan_free_chan_resources(&tdc->vc); +} + +static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct tegra_dma *tdma = ofdma->of_dma_data; + struct tegra_dma_channel *tdc; + struct dma_chan *chan; + + chan = dma_get_any_slave_channel(&tdma->dma_dev); + if (!chan) + return NULL; + + tdc = to_tegra_dma_chan(chan); + tdc->slave_id = dma_spec->args[0]; + + return chan; +} + +static const struct tegra_dma_chip_data tegra186_dma_chip_data = { + .nr_channels = 32, + .channel_reg_size = SZ_64K, + .max_dma_count = SZ_1G, + .hw_support_pause = false, + .terminate = tegra_dma_stop_client, +}; + +static const struct tegra_dma_chip_data tegra194_dma_chip_data = { + .nr_channels = 32, + .channel_reg_size = SZ_64K, + .max_dma_count = SZ_1G, + .hw_support_pause = true, + .terminate = tegra_dma_pause, +}; + +static const struct tegra_dma_chip_data tegra234_dma_chip_data = { + .nr_channels = 32, + .channel_reg_size = SZ_64K, + .max_dma_count = SZ_1G, + .hw_support_pause = true, + .terminate = tegra_dma_pause_noerr, +}; + +static const struct of_device_id tegra_dma_of_match[] = { + { + .compatible = "nvidia,tegra186-gpcdma", + .data = &tegra186_dma_chip_data, + }, { + .compatible = "nvidia,tegra194-gpcdma", + .data = &tegra194_dma_chip_data, + }, { + .compatible = "nvidia,tegra234-gpcdma", + .data = &tegra234_dma_chip_data, + }, { + }, +}; +MODULE_DEVICE_TABLE(of, tegra_dma_of_match); + +static int tegra_dma_program_sid(struct tegra_dma_channel *tdc, int stream_id) +{ + unsigned int reg_val = tdc_read(tdc, TEGRA_GPCDMA_CHAN_MCSEQ); + + reg_val &= ~(TEGRA_GPCDMA_MCSEQ_STREAM_ID0_MASK); + reg_val &= ~(TEGRA_GPCDMA_MCSEQ_STREAM_ID1_MASK); + + reg_val |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_STREAM_ID0_MASK, stream_id); + reg_val |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_STREAM_ID1_MASK, stream_id); + + tdc_write(tdc, TEGRA_GPCDMA_CHAN_MCSEQ, reg_val); + return 0; +} + +static int tegra_dma_probe(struct platform_device *pdev) +{ + const struct tegra_dma_chip_data *cdata = NULL; + struct iommu_fwspec *iommu_spec; + unsigned int stream_id, i; + struct tegra_dma *tdma; + int ret; + + cdata = of_device_get_match_data(&pdev->dev); + + tdma = devm_kzalloc(&pdev->dev, + struct_size(tdma, channels, cdata->nr_channels), + GFP_KERNEL); + if (!tdma) + return -ENOMEM; + + tdma->dev = &pdev->dev; + tdma->chip_data = cdata; + platform_set_drvdata(pdev, tdma); + + tdma->base_addr = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(tdma->base_addr)) + return PTR_ERR(tdma->base_addr); + + tdma->rst = devm_reset_control_get_exclusive(&pdev->dev, "gpcdma"); + if (IS_ERR(tdma->rst)) { + return dev_err_probe(&pdev->dev, PTR_ERR(tdma->rst), + "Missing controller reset\n"); + } + reset_control_reset(tdma->rst); + + tdma->dma_dev.dev = &pdev->dev; + + iommu_spec = dev_iommu_fwspec_get(&pdev->dev); + if (!iommu_spec) { + dev_err(&pdev->dev, "Missing iommu stream-id\n"); + return -EINVAL; + } + stream_id = iommu_spec->ids[0] & 0xffff; + + ret = device_property_read_u32(&pdev->dev, "dma-channel-mask", + &tdma->chan_mask); + if (ret) { + dev_warn(&pdev->dev, + "Missing dma-channel-mask property, using default channel mask %#x\n", + TEGRA_GPCDMA_DEFAULT_CHANNEL_MASK); + tdma->chan_mask = TEGRA_GPCDMA_DEFAULT_CHANNEL_MASK; + } + + INIT_LIST_HEAD(&tdma->dma_dev.channels); + for (i = 0; i < cdata->nr_channels; i++) { + struct tegra_dma_channel *tdc = &tdma->channels[i]; + + /* Check for channel mask */ + if (!(tdma->chan_mask & BIT(i))) + continue; + + tdc->irq = platform_get_irq(pdev, i); + if (tdc->irq < 0) + return tdc->irq; + + tdc->chan_base_offset = TEGRA_GPCDMA_CHANNEL_BASE_ADDR_OFFSET + + i * cdata->channel_reg_size; + snprintf(tdc->name, sizeof(tdc->name), "gpcdma.%d", i); + tdc->tdma = tdma; + tdc->id = i; + tdc->slave_id = -1; + + vchan_init(&tdc->vc, &tdma->dma_dev); + tdc->vc.desc_free = tegra_dma_desc_free; + + /* program stream-id for this channel */ + tegra_dma_program_sid(tdc, stream_id); + tdc->stream_id = stream_id; + } + + dma_cap_set(DMA_SLAVE, tdma->dma_dev.cap_mask); + dma_cap_set(DMA_PRIVATE, tdma->dma_dev.cap_mask); + dma_cap_set(DMA_MEMCPY, tdma->dma_dev.cap_mask); + dma_cap_set(DMA_MEMSET, tdma->dma_dev.cap_mask); + dma_cap_set(DMA_CYCLIC, tdma->dma_dev.cap_mask); + + /* + * Only word aligned transfers are supported. Set the copy + * alignment shift. + */ + tdma->dma_dev.copy_align = 2; + tdma->dma_dev.fill_align = 2; + tdma->dma_dev.device_alloc_chan_resources = + tegra_dma_alloc_chan_resources; + tdma->dma_dev.device_free_chan_resources = + tegra_dma_free_chan_resources; + tdma->dma_dev.device_prep_slave_sg = tegra_dma_prep_slave_sg; + tdma->dma_dev.device_prep_dma_memcpy = tegra_dma_prep_dma_memcpy; + tdma->dma_dev.device_prep_dma_memset = tegra_dma_prep_dma_memset; + tdma->dma_dev.device_prep_dma_cyclic = tegra_dma_prep_dma_cyclic; + tdma->dma_dev.device_config = tegra_dma_slave_config; + tdma->dma_dev.device_terminate_all = tegra_dma_terminate_all; + tdma->dma_dev.device_tx_status = tegra_dma_tx_status; + tdma->dma_dev.device_issue_pending = tegra_dma_issue_pending; + tdma->dma_dev.device_pause = tegra_dma_device_pause; + tdma->dma_dev.device_resume = tegra_dma_device_resume; + tdma->dma_dev.device_synchronize = tegra_dma_chan_synchronize; + tdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + ret = dma_async_device_register(&tdma->dma_dev); + if (ret < 0) { + dev_err_probe(&pdev->dev, ret, + "GPC DMA driver registration failed\n"); + return ret; + } + + ret = of_dma_controller_register(pdev->dev.of_node, + tegra_dma_of_xlate, tdma); + if (ret < 0) { + dev_err_probe(&pdev->dev, ret, + "GPC DMA OF registration failed\n"); + + dma_async_device_unregister(&tdma->dma_dev); + return ret; + } + + dev_info(&pdev->dev, "GPC DMA driver register %lu channels\n", + hweight_long(tdma->chan_mask)); + + return 0; +} + +static int tegra_dma_remove(struct platform_device *pdev) +{ + struct tegra_dma *tdma = platform_get_drvdata(pdev); + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&tdma->dma_dev); + + return 0; +} + +static int __maybe_unused tegra_dma_pm_suspend(struct device *dev) +{ + struct tegra_dma *tdma = dev_get_drvdata(dev); + unsigned int i; + + for (i = 0; i < tdma->chip_data->nr_channels; i++) { + struct tegra_dma_channel *tdc = &tdma->channels[i]; + + if (!(tdma->chan_mask & BIT(i))) + continue; + + if (tdc->dma_desc) { + dev_err(tdma->dev, "channel %u busy\n", i); + return -EBUSY; + } + } + + return 0; +} + +static int __maybe_unused tegra_dma_pm_resume(struct device *dev) +{ + struct tegra_dma *tdma = dev_get_drvdata(dev); + unsigned int i; + + reset_control_reset(tdma->rst); + + for (i = 0; i < tdma->chip_data->nr_channels; i++) { + struct tegra_dma_channel *tdc = &tdma->channels[i]; + + if (!(tdma->chan_mask & BIT(i))) + continue; + + tegra_dma_program_sid(tdc, tdc->stream_id); + } + + return 0; +} + +static const struct dev_pm_ops tegra_dma_dev_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(tegra_dma_pm_suspend, tegra_dma_pm_resume) +}; + +static struct platform_driver tegra_dma_driver = { + .driver = { + .name = "tegra-gpcdma", + .pm = &tegra_dma_dev_pm_ops, + .of_match_table = tegra_dma_of_match, + }, + .probe = tegra_dma_probe, + .remove = tegra_dma_remove, +}; + +module_platform_driver(tegra_dma_driver); + +MODULE_DESCRIPTION("NVIDIA Tegra GPC DMA Controller driver"); +MODULE_AUTHOR("Pavan Kunapuli "); +MODULE_AUTHOR("Rajesh Gumasta "); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c new file mode 100644 index 0000000000..063022f9df --- /dev/null +++ b/drivers/dma/tegra20-apb-dma.c @@ -0,0 +1,1687 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * DMA driver for Nvidia's Tegra20 APB DMA controller. + * + * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" + +#define CREATE_TRACE_POINTS +#include + +#define TEGRA_APBDMA_GENERAL 0x0 +#define TEGRA_APBDMA_GENERAL_ENABLE BIT(31) + +#define TEGRA_APBDMA_CONTROL 0x010 +#define TEGRA_APBDMA_IRQ_MASK 0x01c +#define TEGRA_APBDMA_IRQ_MASK_SET 0x020 + +/* CSR register */ +#define TEGRA_APBDMA_CHAN_CSR 0x00 +#define TEGRA_APBDMA_CSR_ENB BIT(31) +#define TEGRA_APBDMA_CSR_IE_EOC BIT(30) +#define TEGRA_APBDMA_CSR_HOLD BIT(29) +#define TEGRA_APBDMA_CSR_DIR BIT(28) +#define TEGRA_APBDMA_CSR_ONCE BIT(27) +#define TEGRA_APBDMA_CSR_FLOW BIT(21) +#define TEGRA_APBDMA_CSR_REQ_SEL_SHIFT 16 +#define TEGRA_APBDMA_CSR_REQ_SEL_MASK 0x1F +#define TEGRA_APBDMA_CSR_WCOUNT_MASK 0xFFFC + +/* STATUS register */ +#define TEGRA_APBDMA_CHAN_STATUS 0x004 +#define TEGRA_APBDMA_STATUS_BUSY BIT(31) +#define TEGRA_APBDMA_STATUS_ISE_EOC BIT(30) +#define TEGRA_APBDMA_STATUS_HALT BIT(29) +#define TEGRA_APBDMA_STATUS_PING_PONG BIT(28) +#define TEGRA_APBDMA_STATUS_COUNT_SHIFT 2 +#define TEGRA_APBDMA_STATUS_COUNT_MASK 0xFFFC + +#define TEGRA_APBDMA_CHAN_CSRE 0x00C +#define TEGRA_APBDMA_CHAN_CSRE_PAUSE BIT(31) + +/* AHB memory address */ +#define TEGRA_APBDMA_CHAN_AHBPTR 0x010 + +/* AHB sequence register */ +#define TEGRA_APBDMA_CHAN_AHBSEQ 0x14 +#define TEGRA_APBDMA_AHBSEQ_INTR_ENB BIT(31) +#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_8 (0 << 28) +#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_16 (1 << 28) +#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_32 (2 << 28) +#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_64 (3 << 28) +#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_128 (4 << 28) +#define TEGRA_APBDMA_AHBSEQ_DATA_SWAP BIT(27) +#define TEGRA_APBDMA_AHBSEQ_BURST_1 (4 << 24) +#define TEGRA_APBDMA_AHBSEQ_BURST_4 (5 << 24) +#define TEGRA_APBDMA_AHBSEQ_BURST_8 (6 << 24) +#define TEGRA_APBDMA_AHBSEQ_DBL_BUF BIT(19) +#define TEGRA_APBDMA_AHBSEQ_WRAP_SHIFT 16 +#define TEGRA_APBDMA_AHBSEQ_WRAP_NONE 0 + +/* APB address */ +#define TEGRA_APBDMA_CHAN_APBPTR 0x018 + +/* APB sequence register */ +#define TEGRA_APBDMA_CHAN_APBSEQ 0x01c +#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_8 (0 << 28) +#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_16 (1 << 28) +#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_32 (2 << 28) +#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_64 (3 << 28) +#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_128 (4 << 28) +#define TEGRA_APBDMA_APBSEQ_DATA_SWAP BIT(27) +#define TEGRA_APBDMA_APBSEQ_WRAP_WORD_1 (1 << 16) + +/* Tegra148 specific registers */ +#define TEGRA_APBDMA_CHAN_WCOUNT 0x20 + +#define TEGRA_APBDMA_CHAN_WORD_TRANSFER 0x24 + +/* + * If any burst is in flight and DMA paused then this is the time to complete + * on-flight burst and update DMA status register. + */ +#define TEGRA_APBDMA_BURST_COMPLETE_TIME 20 + +/* Channel base address offset from APBDMA base address */ +#define TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET 0x1000 + +#define TEGRA_APBDMA_SLAVE_ID_INVALID (TEGRA_APBDMA_CSR_REQ_SEL_MASK + 1) + +struct tegra_dma; + +/* + * tegra_dma_chip_data Tegra chip specific DMA data + * @nr_channels: Number of channels available in the controller. + * @channel_reg_size: Channel register size/stride. + * @max_dma_count: Maximum DMA transfer count supported by DMA controller. + * @support_channel_pause: Support channel wise pause of dma. + * @support_separate_wcount_reg: Support separate word count register. + */ +struct tegra_dma_chip_data { + unsigned int nr_channels; + unsigned int channel_reg_size; + unsigned int max_dma_count; + bool support_channel_pause; + bool support_separate_wcount_reg; +}; + +/* DMA channel registers */ +struct tegra_dma_channel_regs { + u32 csr; + u32 ahb_ptr; + u32 apb_ptr; + u32 ahb_seq; + u32 apb_seq; + u32 wcount; +}; + +/* + * tegra_dma_sg_req: DMA request details to configure hardware. This + * contains the details for one transfer to configure DMA hw. + * The client's request for data transfer can be broken into multiple + * sub-transfer as per requester details and hw support. + * This sub transfer get added in the list of transfer and point to Tegra + * DMA descriptor which manages the transfer details. + */ +struct tegra_dma_sg_req { + struct tegra_dma_channel_regs ch_regs; + unsigned int req_len; + bool configured; + bool last_sg; + struct list_head node; + struct tegra_dma_desc *dma_desc; + unsigned int words_xferred; +}; + +/* + * tegra_dma_desc: Tegra DMA descriptors which manages the client requests. + * This descriptor keep track of transfer status, callbacks and request + * counts etc. + */ +struct tegra_dma_desc { + struct dma_async_tx_descriptor txd; + unsigned int bytes_requested; + unsigned int bytes_transferred; + enum dma_status dma_status; + struct list_head node; + struct list_head tx_list; + struct list_head cb_node; + unsigned int cb_count; +}; + +struct tegra_dma_channel; + +typedef void (*dma_isr_handler)(struct tegra_dma_channel *tdc, + bool to_terminate); + +/* tegra_dma_channel: Channel specific information */ +struct tegra_dma_channel { + struct dma_chan dma_chan; + char name[12]; + bool config_init; + unsigned int id; + void __iomem *chan_addr; + spinlock_t lock; + bool busy; + struct tegra_dma *tdma; + bool cyclic; + + /* Different lists for managing the requests */ + struct list_head free_sg_req; + struct list_head pending_sg_req; + struct list_head free_dma_desc; + struct list_head cb_desc; + + /* ISR handler and tasklet for bottom half of isr handling */ + dma_isr_handler isr_handler; + struct tasklet_struct tasklet; + + /* Channel-slave specific configuration */ + unsigned int slave_id; + struct dma_slave_config dma_sconfig; + struct tegra_dma_channel_regs channel_reg; + + struct wait_queue_head wq; +}; + +/* tegra_dma: Tegra DMA specific information */ +struct tegra_dma { + struct dma_device dma_dev; + struct device *dev; + struct clk *dma_clk; + struct reset_control *rst; + spinlock_t global_lock; + void __iomem *base_addr; + const struct tegra_dma_chip_data *chip_data; + + /* + * Counter for managing global pausing of the DMA controller. + * Only applicable for devices that don't support individual + * channel pausing. + */ + u32 global_pause_count; + + /* Last member of the structure */ + struct tegra_dma_channel channels[]; +}; + +static inline void tdma_write(struct tegra_dma *tdma, u32 reg, u32 val) +{ + writel(val, tdma->base_addr + reg); +} + +static inline void tdc_write(struct tegra_dma_channel *tdc, + u32 reg, u32 val) +{ + writel(val, tdc->chan_addr + reg); +} + +static inline u32 tdc_read(struct tegra_dma_channel *tdc, u32 reg) +{ + return readl(tdc->chan_addr + reg); +} + +static inline struct tegra_dma_channel *to_tegra_dma_chan(struct dma_chan *dc) +{ + return container_of(dc, struct tegra_dma_channel, dma_chan); +} + +static inline struct tegra_dma_desc * +txd_to_tegra_dma_desc(struct dma_async_tx_descriptor *td) +{ + return container_of(td, struct tegra_dma_desc, txd); +} + +static inline struct device *tdc2dev(struct tegra_dma_channel *tdc) +{ + return &tdc->dma_chan.dev->device; +} + +static dma_cookie_t tegra_dma_tx_submit(struct dma_async_tx_descriptor *tx); + +/* Get DMA desc from free list, if not there then allocate it. */ +static struct tegra_dma_desc *tegra_dma_desc_get(struct tegra_dma_channel *tdc) +{ + struct tegra_dma_desc *dma_desc; + unsigned long flags; + + spin_lock_irqsave(&tdc->lock, flags); + + /* Do not allocate if desc are waiting for ack */ + list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) { + if (async_tx_test_ack(&dma_desc->txd) && !dma_desc->cb_count) { + list_del(&dma_desc->node); + spin_unlock_irqrestore(&tdc->lock, flags); + dma_desc->txd.flags = 0; + return dma_desc; + } + } + + spin_unlock_irqrestore(&tdc->lock, flags); + + /* Allocate DMA desc */ + dma_desc = kzalloc(sizeof(*dma_desc), GFP_NOWAIT); + if (!dma_desc) + return NULL; + + dma_async_tx_descriptor_init(&dma_desc->txd, &tdc->dma_chan); + dma_desc->txd.tx_submit = tegra_dma_tx_submit; + dma_desc->txd.flags = 0; + + return dma_desc; +} + +static void tegra_dma_desc_put(struct tegra_dma_channel *tdc, + struct tegra_dma_desc *dma_desc) +{ + unsigned long flags; + + spin_lock_irqsave(&tdc->lock, flags); + if (!list_empty(&dma_desc->tx_list)) + list_splice_init(&dma_desc->tx_list, &tdc->free_sg_req); + list_add_tail(&dma_desc->node, &tdc->free_dma_desc); + spin_unlock_irqrestore(&tdc->lock, flags); +} + +static struct tegra_dma_sg_req * +tegra_dma_sg_req_get(struct tegra_dma_channel *tdc) +{ + struct tegra_dma_sg_req *sg_req; + unsigned long flags; + + spin_lock_irqsave(&tdc->lock, flags); + if (!list_empty(&tdc->free_sg_req)) { + sg_req = list_first_entry(&tdc->free_sg_req, typeof(*sg_req), + node); + list_del(&sg_req->node); + spin_unlock_irqrestore(&tdc->lock, flags); + return sg_req; + } + spin_unlock_irqrestore(&tdc->lock, flags); + + sg_req = kzalloc(sizeof(*sg_req), GFP_NOWAIT); + + return sg_req; +} + +static int tegra_dma_slave_config(struct dma_chan *dc, + struct dma_slave_config *sconfig) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + + if (!list_empty(&tdc->pending_sg_req)) { + dev_err(tdc2dev(tdc), "Configuration not allowed\n"); + return -EBUSY; + } + + memcpy(&tdc->dma_sconfig, sconfig, sizeof(*sconfig)); + tdc->config_init = true; + + return 0; +} + +static void tegra_dma_global_pause(struct tegra_dma_channel *tdc, + bool wait_for_burst_complete) +{ + struct tegra_dma *tdma = tdc->tdma; + + spin_lock(&tdma->global_lock); + + if (tdc->tdma->global_pause_count == 0) { + tdma_write(tdma, TEGRA_APBDMA_GENERAL, 0); + if (wait_for_burst_complete) + udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME); + } + + tdc->tdma->global_pause_count++; + + spin_unlock(&tdma->global_lock); +} + +static void tegra_dma_global_resume(struct tegra_dma_channel *tdc) +{ + struct tegra_dma *tdma = tdc->tdma; + + spin_lock(&tdma->global_lock); + + if (WARN_ON(tdc->tdma->global_pause_count == 0)) + goto out; + + if (--tdc->tdma->global_pause_count == 0) + tdma_write(tdma, TEGRA_APBDMA_GENERAL, + TEGRA_APBDMA_GENERAL_ENABLE); + +out: + spin_unlock(&tdma->global_lock); +} + +static void tegra_dma_pause(struct tegra_dma_channel *tdc, + bool wait_for_burst_complete) +{ + struct tegra_dma *tdma = tdc->tdma; + + if (tdma->chip_data->support_channel_pause) { + tdc_write(tdc, TEGRA_APBDMA_CHAN_CSRE, + TEGRA_APBDMA_CHAN_CSRE_PAUSE); + if (wait_for_burst_complete) + udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME); + } else { + tegra_dma_global_pause(tdc, wait_for_burst_complete); + } +} + +static void tegra_dma_resume(struct tegra_dma_channel *tdc) +{ + struct tegra_dma *tdma = tdc->tdma; + + if (tdma->chip_data->support_channel_pause) + tdc_write(tdc, TEGRA_APBDMA_CHAN_CSRE, 0); + else + tegra_dma_global_resume(tdc); +} + +static void tegra_dma_stop(struct tegra_dma_channel *tdc) +{ + u32 csr, status; + + /* Disable interrupts */ + csr = tdc_read(tdc, TEGRA_APBDMA_CHAN_CSR); + csr &= ~TEGRA_APBDMA_CSR_IE_EOC; + tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR, csr); + + /* Disable DMA */ + csr &= ~TEGRA_APBDMA_CSR_ENB; + tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR, csr); + + /* Clear interrupt status if it is there */ + status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS); + if (status & TEGRA_APBDMA_STATUS_ISE_EOC) { + dev_dbg(tdc2dev(tdc), "%s():clearing interrupt\n", __func__); + tdc_write(tdc, TEGRA_APBDMA_CHAN_STATUS, status); + } + tdc->busy = false; +} + +static void tegra_dma_start(struct tegra_dma_channel *tdc, + struct tegra_dma_sg_req *sg_req) +{ + struct tegra_dma_channel_regs *ch_regs = &sg_req->ch_regs; + + tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR, ch_regs->csr); + tdc_write(tdc, TEGRA_APBDMA_CHAN_APBSEQ, ch_regs->apb_seq); + tdc_write(tdc, TEGRA_APBDMA_CHAN_APBPTR, ch_regs->apb_ptr); + tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBSEQ, ch_regs->ahb_seq); + tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBPTR, ch_regs->ahb_ptr); + if (tdc->tdma->chip_data->support_separate_wcount_reg) + tdc_write(tdc, TEGRA_APBDMA_CHAN_WCOUNT, ch_regs->wcount); + + /* Start DMA */ + tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR, + ch_regs->csr | TEGRA_APBDMA_CSR_ENB); +} + +static void tegra_dma_configure_for_next(struct tegra_dma_channel *tdc, + struct tegra_dma_sg_req *nsg_req) +{ + unsigned long status; + + /* + * The DMA controller reloads the new configuration for next transfer + * after last burst of current transfer completes. + * If there is no IEC status then this makes sure that last burst + * has not be completed. There may be case that last burst is on + * flight and so it can complete but because DMA is paused, it + * will not generates interrupt as well as not reload the new + * configuration. + * If there is already IEC status then interrupt handler need to + * load new configuration. + */ + tegra_dma_pause(tdc, false); + status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS); + + /* + * If interrupt is pending then do nothing as the ISR will handle + * the programing for new request. + */ + if (status & TEGRA_APBDMA_STATUS_ISE_EOC) { + dev_err(tdc2dev(tdc), + "Skipping new configuration as interrupt is pending\n"); + tegra_dma_resume(tdc); + return; + } + + /* Safe to program new configuration */ + tdc_write(tdc, TEGRA_APBDMA_CHAN_APBPTR, nsg_req->ch_regs.apb_ptr); + tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBPTR, nsg_req->ch_regs.ahb_ptr); + if (tdc->tdma->chip_data->support_separate_wcount_reg) + tdc_write(tdc, TEGRA_APBDMA_CHAN_WCOUNT, + nsg_req->ch_regs.wcount); + tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR, + nsg_req->ch_regs.csr | TEGRA_APBDMA_CSR_ENB); + nsg_req->configured = true; + nsg_req->words_xferred = 0; + + tegra_dma_resume(tdc); +} + +static void tdc_start_head_req(struct tegra_dma_channel *tdc) +{ + struct tegra_dma_sg_req *sg_req; + + sg_req = list_first_entry(&tdc->pending_sg_req, typeof(*sg_req), node); + tegra_dma_start(tdc, sg_req); + sg_req->configured = true; + sg_req->words_xferred = 0; + tdc->busy = true; +} + +static void tdc_configure_next_head_desc(struct tegra_dma_channel *tdc) +{ + struct tegra_dma_sg_req *hsgreq, *hnsgreq; + + hsgreq = list_first_entry(&tdc->pending_sg_req, typeof(*hsgreq), node); + if (!list_is_last(&hsgreq->node, &tdc->pending_sg_req)) { + hnsgreq = list_first_entry(&hsgreq->node, typeof(*hnsgreq), + node); + tegra_dma_configure_for_next(tdc, hnsgreq); + } +} + +static inline unsigned int +get_current_xferred_count(struct tegra_dma_channel *tdc, + struct tegra_dma_sg_req *sg_req, + unsigned long status) +{ + return sg_req->req_len - (status & TEGRA_APBDMA_STATUS_COUNT_MASK) - 4; +} + +static void tegra_dma_abort_all(struct tegra_dma_channel *tdc) +{ + struct tegra_dma_desc *dma_desc; + struct tegra_dma_sg_req *sgreq; + + while (!list_empty(&tdc->pending_sg_req)) { + sgreq = list_first_entry(&tdc->pending_sg_req, typeof(*sgreq), + node); + list_move_tail(&sgreq->node, &tdc->free_sg_req); + if (sgreq->last_sg) { + dma_desc = sgreq->dma_desc; + dma_desc->dma_status = DMA_ERROR; + list_add_tail(&dma_desc->node, &tdc->free_dma_desc); + + /* Add in cb list if it is not there. */ + if (!dma_desc->cb_count) + list_add_tail(&dma_desc->cb_node, + &tdc->cb_desc); + dma_desc->cb_count++; + } + } + tdc->isr_handler = NULL; +} + +static bool handle_continuous_head_request(struct tegra_dma_channel *tdc, + bool to_terminate) +{ + struct tegra_dma_sg_req *hsgreq; + + /* + * Check that head req on list should be in flight. + * If it is not in flight then abort transfer as + * looping of transfer can not continue. + */ + hsgreq = list_first_entry(&tdc->pending_sg_req, typeof(*hsgreq), node); + if (!hsgreq->configured) { + tegra_dma_stop(tdc); + pm_runtime_put(tdc->tdma->dev); + dev_err(tdc2dev(tdc), "DMA transfer underflow, aborting DMA\n"); + tegra_dma_abort_all(tdc); + return false; + } + + /* Configure next request */ + if (!to_terminate) + tdc_configure_next_head_desc(tdc); + + return true; +} + +static void handle_once_dma_done(struct tegra_dma_channel *tdc, + bool to_terminate) +{ + struct tegra_dma_desc *dma_desc; + struct tegra_dma_sg_req *sgreq; + + tdc->busy = false; + sgreq = list_first_entry(&tdc->pending_sg_req, typeof(*sgreq), node); + dma_desc = sgreq->dma_desc; + dma_desc->bytes_transferred += sgreq->req_len; + + list_del(&sgreq->node); + if (sgreq->last_sg) { + dma_desc->dma_status = DMA_COMPLETE; + dma_cookie_complete(&dma_desc->txd); + if (!dma_desc->cb_count) + list_add_tail(&dma_desc->cb_node, &tdc->cb_desc); + dma_desc->cb_count++; + list_add_tail(&dma_desc->node, &tdc->free_dma_desc); + } + list_add_tail(&sgreq->node, &tdc->free_sg_req); + + /* Do not start DMA if it is going to be terminate */ + if (to_terminate) + return; + + if (list_empty(&tdc->pending_sg_req)) { + pm_runtime_put(tdc->tdma->dev); + return; + } + + tdc_start_head_req(tdc); +} + +static void handle_cont_sngl_cycle_dma_done(struct tegra_dma_channel *tdc, + bool to_terminate) +{ + struct tegra_dma_desc *dma_desc; + struct tegra_dma_sg_req *sgreq; + bool st; + + sgreq = list_first_entry(&tdc->pending_sg_req, typeof(*sgreq), node); + dma_desc = sgreq->dma_desc; + /* if we dma for long enough the transfer count will wrap */ + dma_desc->bytes_transferred = + (dma_desc->bytes_transferred + sgreq->req_len) % + dma_desc->bytes_requested; + + /* Callback need to be call */ + if (!dma_desc->cb_count) + list_add_tail(&dma_desc->cb_node, &tdc->cb_desc); + dma_desc->cb_count++; + + sgreq->words_xferred = 0; + + /* If not last req then put at end of pending list */ + if (!list_is_last(&sgreq->node, &tdc->pending_sg_req)) { + list_move_tail(&sgreq->node, &tdc->pending_sg_req); + sgreq->configured = false; + st = handle_continuous_head_request(tdc, to_terminate); + if (!st) + dma_desc->dma_status = DMA_ERROR; + } +} + +static void tegra_dma_tasklet(struct tasklet_struct *t) +{ + struct tegra_dma_channel *tdc = from_tasklet(tdc, t, tasklet); + struct dmaengine_desc_callback cb; + struct tegra_dma_desc *dma_desc; + unsigned int cb_count; + unsigned long flags; + + spin_lock_irqsave(&tdc->lock, flags); + while (!list_empty(&tdc->cb_desc)) { + dma_desc = list_first_entry(&tdc->cb_desc, typeof(*dma_desc), + cb_node); + list_del(&dma_desc->cb_node); + dmaengine_desc_get_callback(&dma_desc->txd, &cb); + cb_count = dma_desc->cb_count; + dma_desc->cb_count = 0; + trace_tegra_dma_complete_cb(&tdc->dma_chan, cb_count, + cb.callback); + spin_unlock_irqrestore(&tdc->lock, flags); + while (cb_count--) + dmaengine_desc_callback_invoke(&cb, NULL); + spin_lock_irqsave(&tdc->lock, flags); + } + spin_unlock_irqrestore(&tdc->lock, flags); +} + +static irqreturn_t tegra_dma_isr(int irq, void *dev_id) +{ + struct tegra_dma_channel *tdc = dev_id; + u32 status; + + spin_lock(&tdc->lock); + + trace_tegra_dma_isr(&tdc->dma_chan, irq); + status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS); + if (status & TEGRA_APBDMA_STATUS_ISE_EOC) { + tdc_write(tdc, TEGRA_APBDMA_CHAN_STATUS, status); + tdc->isr_handler(tdc, false); + tasklet_schedule(&tdc->tasklet); + wake_up_all(&tdc->wq); + spin_unlock(&tdc->lock); + return IRQ_HANDLED; + } + + spin_unlock(&tdc->lock); + dev_info(tdc2dev(tdc), "Interrupt already served status 0x%08x\n", + status); + + return IRQ_NONE; +} + +static dma_cookie_t tegra_dma_tx_submit(struct dma_async_tx_descriptor *txd) +{ + struct tegra_dma_desc *dma_desc = txd_to_tegra_dma_desc(txd); + struct tegra_dma_channel *tdc = to_tegra_dma_chan(txd->chan); + unsigned long flags; + dma_cookie_t cookie; + + spin_lock_irqsave(&tdc->lock, flags); + dma_desc->dma_status = DMA_IN_PROGRESS; + cookie = dma_cookie_assign(&dma_desc->txd); + list_splice_tail_init(&dma_desc->tx_list, &tdc->pending_sg_req); + spin_unlock_irqrestore(&tdc->lock, flags); + + return cookie; +} + +static void tegra_dma_issue_pending(struct dma_chan *dc) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + unsigned long flags; + int err; + + spin_lock_irqsave(&tdc->lock, flags); + if (list_empty(&tdc->pending_sg_req)) { + dev_err(tdc2dev(tdc), "No DMA request\n"); + goto end; + } + if (!tdc->busy) { + err = pm_runtime_resume_and_get(tdc->tdma->dev); + if (err < 0) { + dev_err(tdc2dev(tdc), "Failed to enable DMA\n"); + goto end; + } + + tdc_start_head_req(tdc); + + /* Continuous single mode: Configure next req */ + if (tdc->cyclic) { + /* + * Wait for 1 burst time for configure DMA for + * next transfer. + */ + udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME); + tdc_configure_next_head_desc(tdc); + } + } +end: + spin_unlock_irqrestore(&tdc->lock, flags); +} + +static int tegra_dma_terminate_all(struct dma_chan *dc) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + struct tegra_dma_desc *dma_desc; + struct tegra_dma_sg_req *sgreq; + unsigned long flags; + u32 status, wcount; + bool was_busy; + + spin_lock_irqsave(&tdc->lock, flags); + + if (!tdc->busy) + goto skip_dma_stop; + + /* Pause DMA before checking the queue status */ + tegra_dma_pause(tdc, true); + + status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS); + if (status & TEGRA_APBDMA_STATUS_ISE_EOC) { + dev_dbg(tdc2dev(tdc), "%s():handling isr\n", __func__); + tdc->isr_handler(tdc, true); + status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS); + } + if (tdc->tdma->chip_data->support_separate_wcount_reg) + wcount = tdc_read(tdc, TEGRA_APBDMA_CHAN_WORD_TRANSFER); + else + wcount = status; + + was_busy = tdc->busy; + tegra_dma_stop(tdc); + + if (!list_empty(&tdc->pending_sg_req) && was_busy) { + sgreq = list_first_entry(&tdc->pending_sg_req, typeof(*sgreq), + node); + sgreq->dma_desc->bytes_transferred += + get_current_xferred_count(tdc, sgreq, wcount); + } + tegra_dma_resume(tdc); + + pm_runtime_put(tdc->tdma->dev); + wake_up_all(&tdc->wq); + +skip_dma_stop: + tegra_dma_abort_all(tdc); + + while (!list_empty(&tdc->cb_desc)) { + dma_desc = list_first_entry(&tdc->cb_desc, typeof(*dma_desc), + cb_node); + list_del(&dma_desc->cb_node); + dma_desc->cb_count = 0; + } + spin_unlock_irqrestore(&tdc->lock, flags); + + return 0; +} + +static bool tegra_dma_eoc_interrupt_deasserted(struct tegra_dma_channel *tdc) +{ + unsigned long flags; + u32 status; + + spin_lock_irqsave(&tdc->lock, flags); + status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS); + spin_unlock_irqrestore(&tdc->lock, flags); + + return !(status & TEGRA_APBDMA_STATUS_ISE_EOC); +} + +static void tegra_dma_synchronize(struct dma_chan *dc) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + int err; + + err = pm_runtime_resume_and_get(tdc->tdma->dev); + if (err < 0) { + dev_err(tdc2dev(tdc), "Failed to synchronize DMA: %d\n", err); + return; + } + + /* + * CPU, which handles interrupt, could be busy in + * uninterruptible state, in this case sibling CPU + * should wait until interrupt is handled. + */ + wait_event(tdc->wq, tegra_dma_eoc_interrupt_deasserted(tdc)); + + tasklet_kill(&tdc->tasklet); + + pm_runtime_put(tdc->tdma->dev); +} + +static unsigned int tegra_dma_sg_bytes_xferred(struct tegra_dma_channel *tdc, + struct tegra_dma_sg_req *sg_req) +{ + u32 status, wcount = 0; + + if (!list_is_first(&sg_req->node, &tdc->pending_sg_req)) + return 0; + + if (tdc->tdma->chip_data->support_separate_wcount_reg) + wcount = tdc_read(tdc, TEGRA_APBDMA_CHAN_WORD_TRANSFER); + + status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS); + + if (!tdc->tdma->chip_data->support_separate_wcount_reg) + wcount = status; + + if (status & TEGRA_APBDMA_STATUS_ISE_EOC) + return sg_req->req_len; + + wcount = get_current_xferred_count(tdc, sg_req, wcount); + + if (!wcount) { + /* + * If wcount wasn't ever polled for this SG before, then + * simply assume that transfer hasn't started yet. + * + * Otherwise it's the end of the transfer. + * + * The alternative would be to poll the status register + * until EOC bit is set or wcount goes UP. That's so + * because EOC bit is getting set only after the last + * burst's completion and counter is less than the actual + * transfer size by 4 bytes. The counter value wraps around + * in a cyclic mode before EOC is set(!), so we can't easily + * distinguish start of transfer from its end. + */ + if (sg_req->words_xferred) + wcount = sg_req->req_len - 4; + + } else if (wcount < sg_req->words_xferred) { + /* + * This case will never happen for a non-cyclic transfer. + * + * For a cyclic transfer, although it is possible for the + * next transfer to have already started (resetting the word + * count), this case should still not happen because we should + * have detected that the EOC bit is set and hence the transfer + * was completed. + */ + WARN_ON_ONCE(1); + + wcount = sg_req->req_len - 4; + } else { + sg_req->words_xferred = wcount; + } + + return wcount; +} + +static enum dma_status tegra_dma_tx_status(struct dma_chan *dc, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + struct tegra_dma_desc *dma_desc; + struct tegra_dma_sg_req *sg_req; + enum dma_status ret; + unsigned long flags; + unsigned int residual; + unsigned int bytes = 0; + + ret = dma_cookie_status(dc, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; + + spin_lock_irqsave(&tdc->lock, flags); + + /* Check on wait_ack desc status */ + list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) { + if (dma_desc->txd.cookie == cookie) { + ret = dma_desc->dma_status; + goto found; + } + } + + /* Check in pending list */ + list_for_each_entry(sg_req, &tdc->pending_sg_req, node) { + dma_desc = sg_req->dma_desc; + if (dma_desc->txd.cookie == cookie) { + bytes = tegra_dma_sg_bytes_xferred(tdc, sg_req); + ret = dma_desc->dma_status; + goto found; + } + } + + dev_dbg(tdc2dev(tdc), "cookie %d not found\n", cookie); + dma_desc = NULL; + +found: + if (dma_desc && txstate) { + residual = dma_desc->bytes_requested - + ((dma_desc->bytes_transferred + bytes) % + dma_desc->bytes_requested); + dma_set_residue(txstate, residual); + } + + trace_tegra_dma_tx_status(&tdc->dma_chan, cookie, txstate); + spin_unlock_irqrestore(&tdc->lock, flags); + + return ret; +} + +static inline unsigned int get_bus_width(struct tegra_dma_channel *tdc, + enum dma_slave_buswidth slave_bw) +{ + switch (slave_bw) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_8; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_16; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_32; + case DMA_SLAVE_BUSWIDTH_8_BYTES: + return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_64; + default: + dev_warn(tdc2dev(tdc), + "slave bw is not supported, using 32bits\n"); + return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_32; + } +} + +static inline unsigned int get_burst_size(struct tegra_dma_channel *tdc, + u32 burst_size, + enum dma_slave_buswidth slave_bw, + u32 len) +{ + unsigned int burst_byte, burst_ahb_width; + + /* + * burst_size from client is in terms of the bus_width. + * convert them into AHB memory width which is 4 byte. + */ + burst_byte = burst_size * slave_bw; + burst_ahb_width = burst_byte / 4; + + /* If burst size is 0 then calculate the burst size based on length */ + if (!burst_ahb_width) { + if (len & 0xF) + return TEGRA_APBDMA_AHBSEQ_BURST_1; + else if ((len >> 4) & 0x1) + return TEGRA_APBDMA_AHBSEQ_BURST_4; + else + return TEGRA_APBDMA_AHBSEQ_BURST_8; + } + if (burst_ahb_width < 4) + return TEGRA_APBDMA_AHBSEQ_BURST_1; + else if (burst_ahb_width < 8) + return TEGRA_APBDMA_AHBSEQ_BURST_4; + else + return TEGRA_APBDMA_AHBSEQ_BURST_8; +} + +static int get_transfer_param(struct tegra_dma_channel *tdc, + enum dma_transfer_direction direction, + u32 *apb_addr, + u32 *apb_seq, + u32 *csr, + unsigned int *burst_size, + enum dma_slave_buswidth *slave_bw) +{ + switch (direction) { + case DMA_MEM_TO_DEV: + *apb_addr = tdc->dma_sconfig.dst_addr; + *apb_seq = get_bus_width(tdc, tdc->dma_sconfig.dst_addr_width); + *burst_size = tdc->dma_sconfig.dst_maxburst; + *slave_bw = tdc->dma_sconfig.dst_addr_width; + *csr = TEGRA_APBDMA_CSR_DIR; + return 0; + + case DMA_DEV_TO_MEM: + *apb_addr = tdc->dma_sconfig.src_addr; + *apb_seq = get_bus_width(tdc, tdc->dma_sconfig.src_addr_width); + *burst_size = tdc->dma_sconfig.src_maxburst; + *slave_bw = tdc->dma_sconfig.src_addr_width; + *csr = 0; + return 0; + + default: + dev_err(tdc2dev(tdc), "DMA direction is not supported\n"); + break; + } + + return -EINVAL; +} + +static void tegra_dma_prep_wcount(struct tegra_dma_channel *tdc, + struct tegra_dma_channel_regs *ch_regs, + u32 len) +{ + u32 len_field = (len - 4) & 0xFFFC; + + if (tdc->tdma->chip_data->support_separate_wcount_reg) + ch_regs->wcount = len_field; + else + ch_regs->csr |= len_field; +} + +static struct dma_async_tx_descriptor * +tegra_dma_prep_slave_sg(struct dma_chan *dc, + struct scatterlist *sgl, + unsigned int sg_len, + enum dma_transfer_direction direction, + unsigned long flags, + void *context) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + struct tegra_dma_sg_req *sg_req = NULL; + u32 csr, ahb_seq, apb_ptr, apb_seq; + enum dma_slave_buswidth slave_bw; + struct tegra_dma_desc *dma_desc; + struct list_head req_list; + struct scatterlist *sg; + unsigned int burst_size; + unsigned int i; + + if (!tdc->config_init) { + dev_err(tdc2dev(tdc), "DMA channel is not configured\n"); + return NULL; + } + if (sg_len < 1) { + dev_err(tdc2dev(tdc), "Invalid segment length %d\n", sg_len); + return NULL; + } + + if (get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr, + &burst_size, &slave_bw) < 0) + return NULL; + + INIT_LIST_HEAD(&req_list); + + ahb_seq = TEGRA_APBDMA_AHBSEQ_INTR_ENB; + ahb_seq |= TEGRA_APBDMA_AHBSEQ_WRAP_NONE << + TEGRA_APBDMA_AHBSEQ_WRAP_SHIFT; + ahb_seq |= TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_32; + + csr |= TEGRA_APBDMA_CSR_ONCE; + + if (tdc->slave_id != TEGRA_APBDMA_SLAVE_ID_INVALID) { + csr |= TEGRA_APBDMA_CSR_FLOW; + csr |= tdc->slave_id << TEGRA_APBDMA_CSR_REQ_SEL_SHIFT; + } + + if (flags & DMA_PREP_INTERRUPT) { + csr |= TEGRA_APBDMA_CSR_IE_EOC; + } else { + WARN_ON_ONCE(1); + return NULL; + } + + apb_seq |= TEGRA_APBDMA_APBSEQ_WRAP_WORD_1; + + dma_desc = tegra_dma_desc_get(tdc); + if (!dma_desc) { + dev_err(tdc2dev(tdc), "DMA descriptors not available\n"); + return NULL; + } + INIT_LIST_HEAD(&dma_desc->tx_list); + INIT_LIST_HEAD(&dma_desc->cb_node); + dma_desc->cb_count = 0; + dma_desc->bytes_requested = 0; + dma_desc->bytes_transferred = 0; + dma_desc->dma_status = DMA_IN_PROGRESS; + + /* Make transfer requests */ + for_each_sg(sgl, sg, sg_len, i) { + u32 len, mem; + + mem = sg_dma_address(sg); + len = sg_dma_len(sg); + + if ((len & 3) || (mem & 3) || + len > tdc->tdma->chip_data->max_dma_count) { + dev_err(tdc2dev(tdc), + "DMA length/memory address is not supported\n"); + tegra_dma_desc_put(tdc, dma_desc); + return NULL; + } + + sg_req = tegra_dma_sg_req_get(tdc); + if (!sg_req) { + dev_err(tdc2dev(tdc), "DMA sg-req not available\n"); + tegra_dma_desc_put(tdc, dma_desc); + return NULL; + } + + ahb_seq |= get_burst_size(tdc, burst_size, slave_bw, len); + dma_desc->bytes_requested += len; + + sg_req->ch_regs.apb_ptr = apb_ptr; + sg_req->ch_regs.ahb_ptr = mem; + sg_req->ch_regs.csr = csr; + tegra_dma_prep_wcount(tdc, &sg_req->ch_regs, len); + sg_req->ch_regs.apb_seq = apb_seq; + sg_req->ch_regs.ahb_seq = ahb_seq; + sg_req->configured = false; + sg_req->last_sg = false; + sg_req->dma_desc = dma_desc; + sg_req->req_len = len; + + list_add_tail(&sg_req->node, &dma_desc->tx_list); + } + sg_req->last_sg = true; + if (flags & DMA_CTRL_ACK) + dma_desc->txd.flags = DMA_CTRL_ACK; + + /* + * Make sure that mode should not be conflicting with currently + * configured mode. + */ + if (!tdc->isr_handler) { + tdc->isr_handler = handle_once_dma_done; + tdc->cyclic = false; + } else { + if (tdc->cyclic) { + dev_err(tdc2dev(tdc), "DMA configured in cyclic mode\n"); + tegra_dma_desc_put(tdc, dma_desc); + return NULL; + } + } + + return &dma_desc->txd; +} + +static struct dma_async_tx_descriptor * +tegra_dma_prep_dma_cyclic(struct dma_chan *dc, dma_addr_t buf_addr, + size_t buf_len, + size_t period_len, + enum dma_transfer_direction direction, + unsigned long flags) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + struct tegra_dma_sg_req *sg_req = NULL; + u32 csr, ahb_seq, apb_ptr, apb_seq; + enum dma_slave_buswidth slave_bw; + struct tegra_dma_desc *dma_desc; + dma_addr_t mem = buf_addr; + unsigned int burst_size; + size_t len, remain_len; + + if (!buf_len || !period_len) { + dev_err(tdc2dev(tdc), "Invalid buffer/period len\n"); + return NULL; + } + + if (!tdc->config_init) { + dev_err(tdc2dev(tdc), "DMA slave is not configured\n"); + return NULL; + } + + /* + * We allow to take more number of requests till DMA is + * not started. The driver will loop over all requests. + * Once DMA is started then new requests can be queued only after + * terminating the DMA. + */ + if (tdc->busy) { + dev_err(tdc2dev(tdc), "Request not allowed when DMA running\n"); + return NULL; + } + + /* + * We only support cycle transfer when buf_len is multiple of + * period_len. + */ + if (buf_len % period_len) { + dev_err(tdc2dev(tdc), "buf_len is not multiple of period_len\n"); + return NULL; + } + + len = period_len; + if ((len & 3) || (buf_addr & 3) || + len > tdc->tdma->chip_data->max_dma_count) { + dev_err(tdc2dev(tdc), "Req len/mem address is not correct\n"); + return NULL; + } + + if (get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr, + &burst_size, &slave_bw) < 0) + return NULL; + + ahb_seq = TEGRA_APBDMA_AHBSEQ_INTR_ENB; + ahb_seq |= TEGRA_APBDMA_AHBSEQ_WRAP_NONE << + TEGRA_APBDMA_AHBSEQ_WRAP_SHIFT; + ahb_seq |= TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_32; + + if (tdc->slave_id != TEGRA_APBDMA_SLAVE_ID_INVALID) { + csr |= TEGRA_APBDMA_CSR_FLOW; + csr |= tdc->slave_id << TEGRA_APBDMA_CSR_REQ_SEL_SHIFT; + } + + if (flags & DMA_PREP_INTERRUPT) { + csr |= TEGRA_APBDMA_CSR_IE_EOC; + } else { + WARN_ON_ONCE(1); + return NULL; + } + + apb_seq |= TEGRA_APBDMA_APBSEQ_WRAP_WORD_1; + + dma_desc = tegra_dma_desc_get(tdc); + if (!dma_desc) { + dev_err(tdc2dev(tdc), "not enough descriptors available\n"); + return NULL; + } + + INIT_LIST_HEAD(&dma_desc->tx_list); + INIT_LIST_HEAD(&dma_desc->cb_node); + dma_desc->cb_count = 0; + + dma_desc->bytes_transferred = 0; + dma_desc->bytes_requested = buf_len; + remain_len = buf_len; + + /* Split transfer equal to period size */ + while (remain_len) { + sg_req = tegra_dma_sg_req_get(tdc); + if (!sg_req) { + dev_err(tdc2dev(tdc), "DMA sg-req not available\n"); + tegra_dma_desc_put(tdc, dma_desc); + return NULL; + } + + ahb_seq |= get_burst_size(tdc, burst_size, slave_bw, len); + sg_req->ch_regs.apb_ptr = apb_ptr; + sg_req->ch_regs.ahb_ptr = mem; + sg_req->ch_regs.csr = csr; + tegra_dma_prep_wcount(tdc, &sg_req->ch_regs, len); + sg_req->ch_regs.apb_seq = apb_seq; + sg_req->ch_regs.ahb_seq = ahb_seq; + sg_req->configured = false; + sg_req->last_sg = false; + sg_req->dma_desc = dma_desc; + sg_req->req_len = len; + + list_add_tail(&sg_req->node, &dma_desc->tx_list); + remain_len -= len; + mem += len; + } + sg_req->last_sg = true; + if (flags & DMA_CTRL_ACK) + dma_desc->txd.flags = DMA_CTRL_ACK; + + /* + * Make sure that mode should not be conflicting with currently + * configured mode. + */ + if (!tdc->isr_handler) { + tdc->isr_handler = handle_cont_sngl_cycle_dma_done; + tdc->cyclic = true; + } else { + if (!tdc->cyclic) { + dev_err(tdc2dev(tdc), "DMA configuration conflict\n"); + tegra_dma_desc_put(tdc, dma_desc); + return NULL; + } + } + + return &dma_desc->txd; +} + +static int tegra_dma_alloc_chan_resources(struct dma_chan *dc) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + + dma_cookie_init(&tdc->dma_chan); + + return 0; +} + +static void tegra_dma_free_chan_resources(struct dma_chan *dc) +{ + struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); + struct tegra_dma_desc *dma_desc; + struct tegra_dma_sg_req *sg_req; + struct list_head dma_desc_list; + struct list_head sg_req_list; + + INIT_LIST_HEAD(&dma_desc_list); + INIT_LIST_HEAD(&sg_req_list); + + dev_dbg(tdc2dev(tdc), "Freeing channel %d\n", tdc->id); + + tegra_dma_terminate_all(dc); + tasklet_kill(&tdc->tasklet); + + list_splice_init(&tdc->pending_sg_req, &sg_req_list); + list_splice_init(&tdc->free_sg_req, &sg_req_list); + list_splice_init(&tdc->free_dma_desc, &dma_desc_list); + INIT_LIST_HEAD(&tdc->cb_desc); + tdc->config_init = false; + tdc->isr_handler = NULL; + + while (!list_empty(&dma_desc_list)) { + dma_desc = list_first_entry(&dma_desc_list, typeof(*dma_desc), + node); + list_del(&dma_desc->node); + kfree(dma_desc); + } + + while (!list_empty(&sg_req_list)) { + sg_req = list_first_entry(&sg_req_list, typeof(*sg_req), node); + list_del(&sg_req->node); + kfree(sg_req); + } + + tdc->slave_id = TEGRA_APBDMA_SLAVE_ID_INVALID; +} + +static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct tegra_dma *tdma = ofdma->of_dma_data; + struct tegra_dma_channel *tdc; + struct dma_chan *chan; + + if (dma_spec->args[0] > TEGRA_APBDMA_CSR_REQ_SEL_MASK) { + dev_err(tdma->dev, "Invalid slave id: %d\n", dma_spec->args[0]); + return NULL; + } + + chan = dma_get_any_slave_channel(&tdma->dma_dev); + if (!chan) + return NULL; + + tdc = to_tegra_dma_chan(chan); + tdc->slave_id = dma_spec->args[0]; + + return chan; +} + +/* Tegra20 specific DMA controller information */ +static const struct tegra_dma_chip_data tegra20_dma_chip_data = { + .nr_channels = 16, + .channel_reg_size = 0x20, + .max_dma_count = 1024UL * 64, + .support_channel_pause = false, + .support_separate_wcount_reg = false, +}; + +/* Tegra30 specific DMA controller information */ +static const struct tegra_dma_chip_data tegra30_dma_chip_data = { + .nr_channels = 32, + .channel_reg_size = 0x20, + .max_dma_count = 1024UL * 64, + .support_channel_pause = false, + .support_separate_wcount_reg = false, +}; + +/* Tegra114 specific DMA controller information */ +static const struct tegra_dma_chip_data tegra114_dma_chip_data = { + .nr_channels = 32, + .channel_reg_size = 0x20, + .max_dma_count = 1024UL * 64, + .support_channel_pause = true, + .support_separate_wcount_reg = false, +}; + +/* Tegra148 specific DMA controller information */ +static const struct tegra_dma_chip_data tegra148_dma_chip_data = { + .nr_channels = 32, + .channel_reg_size = 0x40, + .max_dma_count = 1024UL * 64, + .support_channel_pause = true, + .support_separate_wcount_reg = true, +}; + +static int tegra_dma_init_hw(struct tegra_dma *tdma) +{ + int err; + + err = reset_control_assert(tdma->rst); + if (err) { + dev_err(tdma->dev, "failed to assert reset: %d\n", err); + return err; + } + + err = clk_enable(tdma->dma_clk); + if (err) { + dev_err(tdma->dev, "failed to enable clk: %d\n", err); + return err; + } + + /* reset DMA controller */ + udelay(2); + reset_control_deassert(tdma->rst); + + /* enable global DMA registers */ + tdma_write(tdma, TEGRA_APBDMA_GENERAL, TEGRA_APBDMA_GENERAL_ENABLE); + tdma_write(tdma, TEGRA_APBDMA_CONTROL, 0); + tdma_write(tdma, TEGRA_APBDMA_IRQ_MASK_SET, 0xFFFFFFFF); + + clk_disable(tdma->dma_clk); + + return 0; +} + +static int tegra_dma_probe(struct platform_device *pdev) +{ + const struct tegra_dma_chip_data *cdata; + struct tegra_dma *tdma; + unsigned int i; + size_t size; + int ret; + + cdata = of_device_get_match_data(&pdev->dev); + size = struct_size(tdma, channels, cdata->nr_channels); + + tdma = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + if (!tdma) + return -ENOMEM; + + tdma->dev = &pdev->dev; + tdma->chip_data = cdata; + platform_set_drvdata(pdev, tdma); + + tdma->base_addr = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(tdma->base_addr)) + return PTR_ERR(tdma->base_addr); + + tdma->dma_clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(tdma->dma_clk)) { + dev_err(&pdev->dev, "Error: Missing controller clock\n"); + return PTR_ERR(tdma->dma_clk); + } + + tdma->rst = devm_reset_control_get(&pdev->dev, "dma"); + if (IS_ERR(tdma->rst)) { + dev_err(&pdev->dev, "Error: Missing reset\n"); + return PTR_ERR(tdma->rst); + } + + spin_lock_init(&tdma->global_lock); + + ret = clk_prepare(tdma->dma_clk); + if (ret) + return ret; + + ret = tegra_dma_init_hw(tdma); + if (ret) + goto err_clk_unprepare; + + pm_runtime_irq_safe(&pdev->dev); + pm_runtime_enable(&pdev->dev); + + INIT_LIST_HEAD(&tdma->dma_dev.channels); + for (i = 0; i < cdata->nr_channels; i++) { + struct tegra_dma_channel *tdc = &tdma->channels[i]; + int irq; + + tdc->chan_addr = tdma->base_addr + + TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET + + (i * cdata->channel_reg_size); + + irq = platform_get_irq(pdev, i); + if (irq < 0) { + ret = irq; + goto err_pm_disable; + } + + snprintf(tdc->name, sizeof(tdc->name), "apbdma.%d", i); + ret = devm_request_irq(&pdev->dev, irq, tegra_dma_isr, 0, + tdc->name, tdc); + if (ret) { + dev_err(&pdev->dev, + "request_irq failed with err %d channel %d\n", + ret, i); + goto err_pm_disable; + } + + tdc->dma_chan.device = &tdma->dma_dev; + dma_cookie_init(&tdc->dma_chan); + list_add_tail(&tdc->dma_chan.device_node, + &tdma->dma_dev.channels); + tdc->tdma = tdma; + tdc->id = i; + tdc->slave_id = TEGRA_APBDMA_SLAVE_ID_INVALID; + + tasklet_setup(&tdc->tasklet, tegra_dma_tasklet); + spin_lock_init(&tdc->lock); + init_waitqueue_head(&tdc->wq); + + INIT_LIST_HEAD(&tdc->pending_sg_req); + INIT_LIST_HEAD(&tdc->free_sg_req); + INIT_LIST_HEAD(&tdc->free_dma_desc); + INIT_LIST_HEAD(&tdc->cb_desc); + } + + dma_cap_set(DMA_SLAVE, tdma->dma_dev.cap_mask); + dma_cap_set(DMA_PRIVATE, tdma->dma_dev.cap_mask); + dma_cap_set(DMA_CYCLIC, tdma->dma_dev.cap_mask); + + tdma->global_pause_count = 0; + tdma->dma_dev.dev = &pdev->dev; + tdma->dma_dev.device_alloc_chan_resources = + tegra_dma_alloc_chan_resources; + tdma->dma_dev.device_free_chan_resources = + tegra_dma_free_chan_resources; + tdma->dma_dev.device_prep_slave_sg = tegra_dma_prep_slave_sg; + tdma->dma_dev.device_prep_dma_cyclic = tegra_dma_prep_dma_cyclic; + tdma->dma_dev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES); + tdma->dma_dev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES); + tdma->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + tdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + tdma->dma_dev.device_config = tegra_dma_slave_config; + tdma->dma_dev.device_terminate_all = tegra_dma_terminate_all; + tdma->dma_dev.device_synchronize = tegra_dma_synchronize; + tdma->dma_dev.device_tx_status = tegra_dma_tx_status; + tdma->dma_dev.device_issue_pending = tegra_dma_issue_pending; + + ret = dma_async_device_register(&tdma->dma_dev); + if (ret < 0) { + dev_err(&pdev->dev, + "Tegra20 APB DMA driver registration failed %d\n", ret); + goto err_pm_disable; + } + + ret = of_dma_controller_register(pdev->dev.of_node, + tegra_dma_of_xlate, tdma); + if (ret < 0) { + dev_err(&pdev->dev, + "Tegra20 APB DMA OF registration failed %d\n", ret); + goto err_unregister_dma_dev; + } + + dev_info(&pdev->dev, "Tegra20 APB DMA driver registered %u channels\n", + cdata->nr_channels); + + return 0; + +err_unregister_dma_dev: + dma_async_device_unregister(&tdma->dma_dev); + +err_pm_disable: + pm_runtime_disable(&pdev->dev); + +err_clk_unprepare: + clk_unprepare(tdma->dma_clk); + + return ret; +} + +static int tegra_dma_remove(struct platform_device *pdev) +{ + struct tegra_dma *tdma = platform_get_drvdata(pdev); + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&tdma->dma_dev); + pm_runtime_disable(&pdev->dev); + clk_unprepare(tdma->dma_clk); + + return 0; +} + +static int __maybe_unused tegra_dma_runtime_suspend(struct device *dev) +{ + struct tegra_dma *tdma = dev_get_drvdata(dev); + + clk_disable(tdma->dma_clk); + + return 0; +} + +static int __maybe_unused tegra_dma_runtime_resume(struct device *dev) +{ + struct tegra_dma *tdma = dev_get_drvdata(dev); + + return clk_enable(tdma->dma_clk); +} + +static int __maybe_unused tegra_dma_dev_suspend(struct device *dev) +{ + struct tegra_dma *tdma = dev_get_drvdata(dev); + unsigned long flags; + unsigned int i; + bool busy; + + for (i = 0; i < tdma->chip_data->nr_channels; i++) { + struct tegra_dma_channel *tdc = &tdma->channels[i]; + + tasklet_kill(&tdc->tasklet); + + spin_lock_irqsave(&tdc->lock, flags); + busy = tdc->busy; + spin_unlock_irqrestore(&tdc->lock, flags); + + if (busy) { + dev_err(tdma->dev, "channel %u busy\n", i); + return -EBUSY; + } + } + + return pm_runtime_force_suspend(dev); +} + +static int __maybe_unused tegra_dma_dev_resume(struct device *dev) +{ + struct tegra_dma *tdma = dev_get_drvdata(dev); + int err; + + err = tegra_dma_init_hw(tdma); + if (err) + return err; + + return pm_runtime_force_resume(dev); +} + +static const struct dev_pm_ops tegra_dma_dev_pm_ops = { + SET_RUNTIME_PM_OPS(tegra_dma_runtime_suspend, tegra_dma_runtime_resume, + NULL) + SET_SYSTEM_SLEEP_PM_OPS(tegra_dma_dev_suspend, tegra_dma_dev_resume) +}; + +static const struct of_device_id tegra_dma_of_match[] = { + { + .compatible = "nvidia,tegra148-apbdma", + .data = &tegra148_dma_chip_data, + }, { + .compatible = "nvidia,tegra114-apbdma", + .data = &tegra114_dma_chip_data, + }, { + .compatible = "nvidia,tegra30-apbdma", + .data = &tegra30_dma_chip_data, + }, { + .compatible = "nvidia,tegra20-apbdma", + .data = &tegra20_dma_chip_data, + }, { + }, +}; +MODULE_DEVICE_TABLE(of, tegra_dma_of_match); + +static struct platform_driver tegra_dmac_driver = { + .driver = { + .name = "tegra-apbdma", + .pm = &tegra_dma_dev_pm_ops, + .of_match_table = tegra_dma_of_match, + }, + .probe = tegra_dma_probe, + .remove = tegra_dma_remove, +}; + +module_platform_driver(tegra_dmac_driver); + +MODULE_DESCRIPTION("NVIDIA Tegra APB DMA Controller driver"); +MODULE_AUTHOR("Laxman Dewangan "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c new file mode 100644 index 0000000000..e557bada15 --- /dev/null +++ b/drivers/dma/tegra210-adma.c @@ -0,0 +1,991 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ADMA driver for Nvidia's Tegra210 ADMA controller. + * + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virt-dma.h" + +#define ADMA_CH_CMD 0x00 +#define ADMA_CH_STATUS 0x0c +#define ADMA_CH_STATUS_XFER_EN BIT(0) +#define ADMA_CH_STATUS_XFER_PAUSED BIT(1) + +#define ADMA_CH_INT_STATUS 0x10 +#define ADMA_CH_INT_STATUS_XFER_DONE BIT(0) + +#define ADMA_CH_INT_CLEAR 0x1c +#define ADMA_CH_CTRL 0x24 +#define ADMA_CH_CTRL_DIR(val) (((val) & 0xf) << 12) +#define ADMA_CH_CTRL_DIR_AHUB2MEM 2 +#define ADMA_CH_CTRL_DIR_MEM2AHUB 4 +#define ADMA_CH_CTRL_MODE_CONTINUOUS (2 << 8) +#define ADMA_CH_CTRL_FLOWCTRL_EN BIT(1) +#define ADMA_CH_CTRL_XFER_PAUSE_SHIFT 0 + +#define ADMA_CH_CONFIG 0x28 +#define ADMA_CH_CONFIG_SRC_BUF(val) (((val) & 0x7) << 28) +#define ADMA_CH_CONFIG_TRG_BUF(val) (((val) & 0x7) << 24) +#define ADMA_CH_CONFIG_BURST_SIZE_SHIFT 20 +#define ADMA_CH_CONFIG_MAX_BURST_SIZE 16 +#define ADMA_CH_CONFIG_WEIGHT_FOR_WRR(val) ((val) & 0xf) +#define ADMA_CH_CONFIG_MAX_BUFS 8 +#define TEGRA186_ADMA_CH_CONFIG_OUTSTANDING_REQS(reqs) (reqs << 4) + +#define ADMA_CH_FIFO_CTRL 0x2c +#define ADMA_CH_TX_FIFO_SIZE_SHIFT 8 +#define ADMA_CH_RX_FIFO_SIZE_SHIFT 0 + +#define ADMA_CH_LOWER_SRC_ADDR 0x34 +#define ADMA_CH_LOWER_TRG_ADDR 0x3c +#define ADMA_CH_TC 0x44 +#define ADMA_CH_TC_COUNT_MASK 0x3ffffffc + +#define ADMA_CH_XFER_STATUS 0x54 +#define ADMA_CH_XFER_STATUS_COUNT_MASK 0xffff + +#define ADMA_GLOBAL_CMD 0x00 +#define ADMA_GLOBAL_SOFT_RESET 0x04 + +#define TEGRA_ADMA_BURST_COMPLETE_TIME 20 + +#define ADMA_CH_REG_FIELD_VAL(val, mask, shift) (((val) & mask) << shift) + +struct tegra_adma; + +/* + * struct tegra_adma_chip_data - Tegra chip specific data + * @adma_get_burst_config: Function callback used to set DMA burst size. + * @global_reg_offset: Register offset of DMA global register. + * @global_int_clear: Register offset of DMA global interrupt clear. + * @ch_req_tx_shift: Register offset for AHUB transmit channel select. + * @ch_req_rx_shift: Register offset for AHUB receive channel select. + * @ch_base_offset: Register offset of DMA channel registers. + * @ch_fifo_ctrl: Default value for channel FIFO CTRL register. + * @ch_req_mask: Mask for Tx or Rx channel select. + * @ch_req_max: Maximum number of Tx or Rx channels available. + * @ch_reg_size: Size of DMA channel register space. + * @nr_channels: Number of DMA channels available. + * @ch_fifo_size_mask: Mask for FIFO size field. + * @sreq_index_offset: Slave channel index offset. + * @has_outstanding_reqs: If DMA channel can have outstanding requests. + */ +struct tegra_adma_chip_data { + unsigned int (*adma_get_burst_config)(unsigned int burst_size); + unsigned int global_reg_offset; + unsigned int global_int_clear; + unsigned int ch_req_tx_shift; + unsigned int ch_req_rx_shift; + unsigned int ch_base_offset; + unsigned int ch_fifo_ctrl; + unsigned int ch_req_mask; + unsigned int ch_req_max; + unsigned int ch_reg_size; + unsigned int nr_channels; + unsigned int ch_fifo_size_mask; + unsigned int sreq_index_offset; + bool has_outstanding_reqs; +}; + +/* + * struct tegra_adma_chan_regs - Tegra ADMA channel registers + */ +struct tegra_adma_chan_regs { + unsigned int ctrl; + unsigned int config; + unsigned int src_addr; + unsigned int trg_addr; + unsigned int fifo_ctrl; + unsigned int cmd; + unsigned int tc; +}; + +/* + * struct tegra_adma_desc - Tegra ADMA descriptor to manage transfer requests. + */ +struct tegra_adma_desc { + struct virt_dma_desc vd; + struct tegra_adma_chan_regs ch_regs; + size_t buf_len; + size_t period_len; + size_t num_periods; +}; + +/* + * struct tegra_adma_chan - Tegra ADMA channel information + */ +struct tegra_adma_chan { + struct virt_dma_chan vc; + struct tegra_adma_desc *desc; + struct tegra_adma *tdma; + int irq; + void __iomem *chan_addr; + + /* Slave channel configuration info */ + struct dma_slave_config sconfig; + enum dma_transfer_direction sreq_dir; + unsigned int sreq_index; + bool sreq_reserved; + struct tegra_adma_chan_regs ch_regs; + + /* Transfer count and position info */ + unsigned int tx_buf_count; + unsigned int tx_buf_pos; +}; + +/* + * struct tegra_adma - Tegra ADMA controller information + */ +struct tegra_adma { + struct dma_device dma_dev; + struct device *dev; + void __iomem *base_addr; + struct clk *ahub_clk; + unsigned int nr_channels; + unsigned long rx_requests_reserved; + unsigned long tx_requests_reserved; + + /* Used to store global command register state when suspending */ + unsigned int global_cmd; + + const struct tegra_adma_chip_data *cdata; + + /* Last member of the structure */ + struct tegra_adma_chan channels[]; +}; + +static inline void tdma_write(struct tegra_adma *tdma, u32 reg, u32 val) +{ + writel(val, tdma->base_addr + tdma->cdata->global_reg_offset + reg); +} + +static inline u32 tdma_read(struct tegra_adma *tdma, u32 reg) +{ + return readl(tdma->base_addr + tdma->cdata->global_reg_offset + reg); +} + +static inline void tdma_ch_write(struct tegra_adma_chan *tdc, u32 reg, u32 val) +{ + writel(val, tdc->chan_addr + reg); +} + +static inline u32 tdma_ch_read(struct tegra_adma_chan *tdc, u32 reg) +{ + return readl(tdc->chan_addr + reg); +} + +static inline struct tegra_adma_chan *to_tegra_adma_chan(struct dma_chan *dc) +{ + return container_of(dc, struct tegra_adma_chan, vc.chan); +} + +static inline struct tegra_adma_desc *to_tegra_adma_desc( + struct dma_async_tx_descriptor *td) +{ + return container_of(td, struct tegra_adma_desc, vd.tx); +} + +static inline struct device *tdc2dev(struct tegra_adma_chan *tdc) +{ + return tdc->tdma->dev; +} + +static void tegra_adma_desc_free(struct virt_dma_desc *vd) +{ + kfree(container_of(vd, struct tegra_adma_desc, vd)); +} + +static int tegra_adma_slave_config(struct dma_chan *dc, + struct dma_slave_config *sconfig) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + + memcpy(&tdc->sconfig, sconfig, sizeof(*sconfig)); + + return 0; +} + +static int tegra_adma_init(struct tegra_adma *tdma) +{ + u32 status; + int ret; + + /* Clear any interrupts */ + tdma_write(tdma, tdma->cdata->ch_base_offset + tdma->cdata->global_int_clear, 0x1); + + /* Assert soft reset */ + tdma_write(tdma, ADMA_GLOBAL_SOFT_RESET, 0x1); + + /* Wait for reset to clear */ + ret = readx_poll_timeout(readl, + tdma->base_addr + + tdma->cdata->global_reg_offset + + ADMA_GLOBAL_SOFT_RESET, + status, status == 0, 20, 10000); + if (ret) + return ret; + + /* Enable global ADMA registers */ + tdma_write(tdma, ADMA_GLOBAL_CMD, 1); + + return 0; +} + +static int tegra_adma_request_alloc(struct tegra_adma_chan *tdc, + enum dma_transfer_direction direction) +{ + struct tegra_adma *tdma = tdc->tdma; + unsigned int sreq_index = tdc->sreq_index; + + if (tdc->sreq_reserved) + return tdc->sreq_dir == direction ? 0 : -EINVAL; + + if (sreq_index > tdma->cdata->ch_req_max) { + dev_err(tdma->dev, "invalid DMA request\n"); + return -EINVAL; + } + + switch (direction) { + case DMA_MEM_TO_DEV: + if (test_and_set_bit(sreq_index, &tdma->tx_requests_reserved)) { + dev_err(tdma->dev, "DMA request reserved\n"); + return -EINVAL; + } + break; + + case DMA_DEV_TO_MEM: + if (test_and_set_bit(sreq_index, &tdma->rx_requests_reserved)) { + dev_err(tdma->dev, "DMA request reserved\n"); + return -EINVAL; + } + break; + + default: + dev_WARN(tdma->dev, "channel %s has invalid transfer type\n", + dma_chan_name(&tdc->vc.chan)); + return -EINVAL; + } + + tdc->sreq_dir = direction; + tdc->sreq_reserved = true; + + return 0; +} + +static void tegra_adma_request_free(struct tegra_adma_chan *tdc) +{ + struct tegra_adma *tdma = tdc->tdma; + + if (!tdc->sreq_reserved) + return; + + switch (tdc->sreq_dir) { + case DMA_MEM_TO_DEV: + clear_bit(tdc->sreq_index, &tdma->tx_requests_reserved); + break; + + case DMA_DEV_TO_MEM: + clear_bit(tdc->sreq_index, &tdma->rx_requests_reserved); + break; + + default: + dev_WARN(tdma->dev, "channel %s has invalid transfer type\n", + dma_chan_name(&tdc->vc.chan)); + return; + } + + tdc->sreq_reserved = false; +} + +static u32 tegra_adma_irq_status(struct tegra_adma_chan *tdc) +{ + u32 status = tdma_ch_read(tdc, ADMA_CH_INT_STATUS); + + return status & ADMA_CH_INT_STATUS_XFER_DONE; +} + +static u32 tegra_adma_irq_clear(struct tegra_adma_chan *tdc) +{ + u32 status = tegra_adma_irq_status(tdc); + + if (status) + tdma_ch_write(tdc, ADMA_CH_INT_CLEAR, status); + + return status; +} + +static void tegra_adma_stop(struct tegra_adma_chan *tdc) +{ + unsigned int status; + + /* Disable ADMA */ + tdma_ch_write(tdc, ADMA_CH_CMD, 0); + + /* Clear interrupt status */ + tegra_adma_irq_clear(tdc); + + if (readx_poll_timeout_atomic(readl, tdc->chan_addr + ADMA_CH_STATUS, + status, !(status & ADMA_CH_STATUS_XFER_EN), + 20, 10000)) { + dev_err(tdc2dev(tdc), "unable to stop DMA channel\n"); + return; + } + + kfree(tdc->desc); + tdc->desc = NULL; +} + +static void tegra_adma_start(struct tegra_adma_chan *tdc) +{ + struct virt_dma_desc *vd = vchan_next_desc(&tdc->vc); + struct tegra_adma_chan_regs *ch_regs; + struct tegra_adma_desc *desc; + + if (!vd) + return; + + list_del(&vd->node); + + desc = to_tegra_adma_desc(&vd->tx); + + if (!desc) { + dev_warn(tdc2dev(tdc), "unable to start DMA, no descriptor\n"); + return; + } + + ch_regs = &desc->ch_regs; + + tdc->tx_buf_pos = 0; + tdc->tx_buf_count = 0; + tdma_ch_write(tdc, ADMA_CH_TC, ch_regs->tc); + tdma_ch_write(tdc, ADMA_CH_CTRL, ch_regs->ctrl); + tdma_ch_write(tdc, ADMA_CH_LOWER_SRC_ADDR, ch_regs->src_addr); + tdma_ch_write(tdc, ADMA_CH_LOWER_TRG_ADDR, ch_regs->trg_addr); + tdma_ch_write(tdc, ADMA_CH_FIFO_CTRL, ch_regs->fifo_ctrl); + tdma_ch_write(tdc, ADMA_CH_CONFIG, ch_regs->config); + + /* Start ADMA */ + tdma_ch_write(tdc, ADMA_CH_CMD, 1); + + tdc->desc = desc; +} + +static unsigned int tegra_adma_get_residue(struct tegra_adma_chan *tdc) +{ + struct tegra_adma_desc *desc = tdc->desc; + unsigned int max = ADMA_CH_XFER_STATUS_COUNT_MASK + 1; + unsigned int pos = tdma_ch_read(tdc, ADMA_CH_XFER_STATUS); + unsigned int periods_remaining; + + /* + * Handle wrap around of buffer count register + */ + if (pos < tdc->tx_buf_pos) + tdc->tx_buf_count += pos + (max - tdc->tx_buf_pos); + else + tdc->tx_buf_count += pos - tdc->tx_buf_pos; + + periods_remaining = tdc->tx_buf_count % desc->num_periods; + tdc->tx_buf_pos = pos; + + return desc->buf_len - (periods_remaining * desc->period_len); +} + +static irqreturn_t tegra_adma_isr(int irq, void *dev_id) +{ + struct tegra_adma_chan *tdc = dev_id; + unsigned long status; + + spin_lock(&tdc->vc.lock); + + status = tegra_adma_irq_clear(tdc); + if (status == 0 || !tdc->desc) { + spin_unlock(&tdc->vc.lock); + return IRQ_NONE; + } + + vchan_cyclic_callback(&tdc->desc->vd); + + spin_unlock(&tdc->vc.lock); + + return IRQ_HANDLED; +} + +static void tegra_adma_issue_pending(struct dma_chan *dc) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + unsigned long flags; + + spin_lock_irqsave(&tdc->vc.lock, flags); + + if (vchan_issue_pending(&tdc->vc)) { + if (!tdc->desc) + tegra_adma_start(tdc); + } + + spin_unlock_irqrestore(&tdc->vc.lock, flags); +} + +static bool tegra_adma_is_paused(struct tegra_adma_chan *tdc) +{ + u32 csts; + + csts = tdma_ch_read(tdc, ADMA_CH_STATUS); + csts &= ADMA_CH_STATUS_XFER_PAUSED; + + return csts ? true : false; +} + +static int tegra_adma_pause(struct dma_chan *dc) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + struct tegra_adma_desc *desc = tdc->desc; + struct tegra_adma_chan_regs *ch_regs = &desc->ch_regs; + int dcnt = 10; + + ch_regs->ctrl = tdma_ch_read(tdc, ADMA_CH_CTRL); + ch_regs->ctrl |= (1 << ADMA_CH_CTRL_XFER_PAUSE_SHIFT); + tdma_ch_write(tdc, ADMA_CH_CTRL, ch_regs->ctrl); + + while (dcnt-- && !tegra_adma_is_paused(tdc)) + udelay(TEGRA_ADMA_BURST_COMPLETE_TIME); + + if (dcnt < 0) { + dev_err(tdc2dev(tdc), "unable to pause DMA channel\n"); + return -EBUSY; + } + + return 0; +} + +static int tegra_adma_resume(struct dma_chan *dc) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + struct tegra_adma_desc *desc = tdc->desc; + struct tegra_adma_chan_regs *ch_regs = &desc->ch_regs; + + ch_regs->ctrl = tdma_ch_read(tdc, ADMA_CH_CTRL); + ch_regs->ctrl &= ~(1 << ADMA_CH_CTRL_XFER_PAUSE_SHIFT); + tdma_ch_write(tdc, ADMA_CH_CTRL, ch_regs->ctrl); + + return 0; +} + +static int tegra_adma_terminate_all(struct dma_chan *dc) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&tdc->vc.lock, flags); + + if (tdc->desc) + tegra_adma_stop(tdc); + + tegra_adma_request_free(tdc); + vchan_get_all_descriptors(&tdc->vc, &head); + spin_unlock_irqrestore(&tdc->vc.lock, flags); + vchan_dma_desc_free_list(&tdc->vc, &head); + + return 0; +} + +static enum dma_status tegra_adma_tx_status(struct dma_chan *dc, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + struct tegra_adma_desc *desc; + struct virt_dma_desc *vd; + enum dma_status ret; + unsigned long flags; + unsigned int residual; + + ret = dma_cookie_status(dc, cookie, txstate); + if (ret == DMA_COMPLETE || !txstate) + return ret; + + spin_lock_irqsave(&tdc->vc.lock, flags); + + vd = vchan_find_desc(&tdc->vc, cookie); + if (vd) { + desc = to_tegra_adma_desc(&vd->tx); + residual = desc->ch_regs.tc; + } else if (tdc->desc && tdc->desc->vd.tx.cookie == cookie) { + residual = tegra_adma_get_residue(tdc); + } else { + residual = 0; + } + + spin_unlock_irqrestore(&tdc->vc.lock, flags); + + dma_set_residue(txstate, residual); + + return ret; +} + +static unsigned int tegra210_adma_get_burst_config(unsigned int burst_size) +{ + if (!burst_size || burst_size > ADMA_CH_CONFIG_MAX_BURST_SIZE) + burst_size = ADMA_CH_CONFIG_MAX_BURST_SIZE; + + return fls(burst_size) << ADMA_CH_CONFIG_BURST_SIZE_SHIFT; +} + +static unsigned int tegra186_adma_get_burst_config(unsigned int burst_size) +{ + if (!burst_size || burst_size > ADMA_CH_CONFIG_MAX_BURST_SIZE) + burst_size = ADMA_CH_CONFIG_MAX_BURST_SIZE; + + return (burst_size - 1) << ADMA_CH_CONFIG_BURST_SIZE_SHIFT; +} + +static int tegra_adma_set_xfer_params(struct tegra_adma_chan *tdc, + struct tegra_adma_desc *desc, + dma_addr_t buf_addr, + enum dma_transfer_direction direction) +{ + struct tegra_adma_chan_regs *ch_regs = &desc->ch_regs; + const struct tegra_adma_chip_data *cdata = tdc->tdma->cdata; + unsigned int burst_size, adma_dir, fifo_size_shift; + + if (desc->num_periods > ADMA_CH_CONFIG_MAX_BUFS) + return -EINVAL; + + switch (direction) { + case DMA_MEM_TO_DEV: + fifo_size_shift = ADMA_CH_TX_FIFO_SIZE_SHIFT; + adma_dir = ADMA_CH_CTRL_DIR_MEM2AHUB; + burst_size = tdc->sconfig.dst_maxburst; + ch_regs->config = ADMA_CH_CONFIG_SRC_BUF(desc->num_periods - 1); + ch_regs->ctrl = ADMA_CH_REG_FIELD_VAL(tdc->sreq_index, + cdata->ch_req_mask, + cdata->ch_req_tx_shift); + ch_regs->src_addr = buf_addr; + break; + + case DMA_DEV_TO_MEM: + fifo_size_shift = ADMA_CH_RX_FIFO_SIZE_SHIFT; + adma_dir = ADMA_CH_CTRL_DIR_AHUB2MEM; + burst_size = tdc->sconfig.src_maxburst; + ch_regs->config = ADMA_CH_CONFIG_TRG_BUF(desc->num_periods - 1); + ch_regs->ctrl = ADMA_CH_REG_FIELD_VAL(tdc->sreq_index, + cdata->ch_req_mask, + cdata->ch_req_rx_shift); + ch_regs->trg_addr = buf_addr; + break; + + default: + dev_err(tdc2dev(tdc), "DMA direction is not supported\n"); + return -EINVAL; + } + + ch_regs->ctrl |= ADMA_CH_CTRL_DIR(adma_dir) | + ADMA_CH_CTRL_MODE_CONTINUOUS | + ADMA_CH_CTRL_FLOWCTRL_EN; + ch_regs->config |= cdata->adma_get_burst_config(burst_size); + ch_regs->config |= ADMA_CH_CONFIG_WEIGHT_FOR_WRR(1); + if (cdata->has_outstanding_reqs) + ch_regs->config |= TEGRA186_ADMA_CH_CONFIG_OUTSTANDING_REQS(8); + + /* + * 'sreq_index' represents the current ADMAIF channel number and as per + * HW recommendation its FIFO size should match with the corresponding + * ADMA channel. + * + * ADMA FIFO size is set as per below (based on default ADMAIF channel + * FIFO sizes): + * fifo_size = 0x2 (sreq_index > sreq_index_offset) + * fifo_size = 0x3 (sreq_index <= sreq_index_offset) + * + */ + if (tdc->sreq_index > cdata->sreq_index_offset) + ch_regs->fifo_ctrl = + ADMA_CH_REG_FIELD_VAL(2, cdata->ch_fifo_size_mask, + fifo_size_shift); + else + ch_regs->fifo_ctrl = + ADMA_CH_REG_FIELD_VAL(3, cdata->ch_fifo_size_mask, + fifo_size_shift); + + ch_regs->tc = desc->period_len & ADMA_CH_TC_COUNT_MASK; + + return tegra_adma_request_alloc(tdc, direction); +} + +static struct dma_async_tx_descriptor *tegra_adma_prep_dma_cyclic( + struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + struct tegra_adma_desc *desc = NULL; + + if (!buf_len || !period_len || period_len > ADMA_CH_TC_COUNT_MASK) { + dev_err(tdc2dev(tdc), "invalid buffer/period len\n"); + return NULL; + } + + if (buf_len % period_len) { + dev_err(tdc2dev(tdc), "buf_len not a multiple of period_len\n"); + return NULL; + } + + if (!IS_ALIGNED(buf_addr, 4)) { + dev_err(tdc2dev(tdc), "invalid buffer alignment\n"); + return NULL; + } + + desc = kzalloc(sizeof(*desc), GFP_NOWAIT); + if (!desc) + return NULL; + + desc->buf_len = buf_len; + desc->period_len = period_len; + desc->num_periods = buf_len / period_len; + + if (tegra_adma_set_xfer_params(tdc, desc, buf_addr, direction)) { + kfree(desc); + return NULL; + } + + return vchan_tx_prep(&tdc->vc, &desc->vd, flags); +} + +static int tegra_adma_alloc_chan_resources(struct dma_chan *dc) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + int ret; + + ret = request_irq(tdc->irq, tegra_adma_isr, 0, dma_chan_name(dc), tdc); + if (ret) { + dev_err(tdc2dev(tdc), "failed to get interrupt for %s\n", + dma_chan_name(dc)); + return ret; + } + + ret = pm_runtime_resume_and_get(tdc2dev(tdc)); + if (ret < 0) { + free_irq(tdc->irq, tdc); + return ret; + } + + dma_cookie_init(&tdc->vc.chan); + + return 0; +} + +static void tegra_adma_free_chan_resources(struct dma_chan *dc) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + + tegra_adma_terminate_all(dc); + vchan_free_chan_resources(&tdc->vc); + tasklet_kill(&tdc->vc.task); + free_irq(tdc->irq, tdc); + pm_runtime_put(tdc2dev(tdc)); + + tdc->sreq_index = 0; + tdc->sreq_dir = DMA_TRANS_NONE; +} + +static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct tegra_adma *tdma = ofdma->of_dma_data; + struct tegra_adma_chan *tdc; + struct dma_chan *chan; + unsigned int sreq_index; + + if (dma_spec->args_count != 1) + return NULL; + + sreq_index = dma_spec->args[0]; + + if (sreq_index == 0) { + dev_err(tdma->dev, "DMA request must not be 0\n"); + return NULL; + } + + chan = dma_get_any_slave_channel(&tdma->dma_dev); + if (!chan) + return NULL; + + tdc = to_tegra_adma_chan(chan); + tdc->sreq_index = sreq_index; + + return chan; +} + +static int __maybe_unused tegra_adma_runtime_suspend(struct device *dev) +{ + struct tegra_adma *tdma = dev_get_drvdata(dev); + struct tegra_adma_chan_regs *ch_reg; + struct tegra_adma_chan *tdc; + int i; + + tdma->global_cmd = tdma_read(tdma, ADMA_GLOBAL_CMD); + if (!tdma->global_cmd) + goto clk_disable; + + for (i = 0; i < tdma->nr_channels; i++) { + tdc = &tdma->channels[i]; + ch_reg = &tdc->ch_regs; + ch_reg->cmd = tdma_ch_read(tdc, ADMA_CH_CMD); + /* skip if channel is not active */ + if (!ch_reg->cmd) + continue; + ch_reg->tc = tdma_ch_read(tdc, ADMA_CH_TC); + ch_reg->src_addr = tdma_ch_read(tdc, ADMA_CH_LOWER_SRC_ADDR); + ch_reg->trg_addr = tdma_ch_read(tdc, ADMA_CH_LOWER_TRG_ADDR); + ch_reg->ctrl = tdma_ch_read(tdc, ADMA_CH_CTRL); + ch_reg->fifo_ctrl = tdma_ch_read(tdc, ADMA_CH_FIFO_CTRL); + ch_reg->config = tdma_ch_read(tdc, ADMA_CH_CONFIG); + } + +clk_disable: + clk_disable_unprepare(tdma->ahub_clk); + + return 0; +} + +static int __maybe_unused tegra_adma_runtime_resume(struct device *dev) +{ + struct tegra_adma *tdma = dev_get_drvdata(dev); + struct tegra_adma_chan_regs *ch_reg; + struct tegra_adma_chan *tdc; + int ret, i; + + ret = clk_prepare_enable(tdma->ahub_clk); + if (ret) { + dev_err(dev, "ahub clk_enable failed: %d\n", ret); + return ret; + } + tdma_write(tdma, ADMA_GLOBAL_CMD, tdma->global_cmd); + + if (!tdma->global_cmd) + return 0; + + for (i = 0; i < tdma->nr_channels; i++) { + tdc = &tdma->channels[i]; + ch_reg = &tdc->ch_regs; + /* skip if channel was not active earlier */ + if (!ch_reg->cmd) + continue; + tdma_ch_write(tdc, ADMA_CH_TC, ch_reg->tc); + tdma_ch_write(tdc, ADMA_CH_LOWER_SRC_ADDR, ch_reg->src_addr); + tdma_ch_write(tdc, ADMA_CH_LOWER_TRG_ADDR, ch_reg->trg_addr); + tdma_ch_write(tdc, ADMA_CH_CTRL, ch_reg->ctrl); + tdma_ch_write(tdc, ADMA_CH_FIFO_CTRL, ch_reg->fifo_ctrl); + tdma_ch_write(tdc, ADMA_CH_CONFIG, ch_reg->config); + tdma_ch_write(tdc, ADMA_CH_CMD, ch_reg->cmd); + } + + return 0; +} + +static const struct tegra_adma_chip_data tegra210_chip_data = { + .adma_get_burst_config = tegra210_adma_get_burst_config, + .global_reg_offset = 0xc00, + .global_int_clear = 0x20, + .ch_req_tx_shift = 28, + .ch_req_rx_shift = 24, + .ch_base_offset = 0, + .ch_req_mask = 0xf, + .ch_req_max = 10, + .ch_reg_size = 0x80, + .nr_channels = 22, + .ch_fifo_size_mask = 0xf, + .sreq_index_offset = 2, + .has_outstanding_reqs = false, +}; + +static const struct tegra_adma_chip_data tegra186_chip_data = { + .adma_get_burst_config = tegra186_adma_get_burst_config, + .global_reg_offset = 0, + .global_int_clear = 0x402c, + .ch_req_tx_shift = 27, + .ch_req_rx_shift = 22, + .ch_base_offset = 0x10000, + .ch_req_mask = 0x1f, + .ch_req_max = 20, + .ch_reg_size = 0x100, + .nr_channels = 32, + .ch_fifo_size_mask = 0x1f, + .sreq_index_offset = 4, + .has_outstanding_reqs = true, +}; + +static const struct of_device_id tegra_adma_of_match[] = { + { .compatible = "nvidia,tegra210-adma", .data = &tegra210_chip_data }, + { .compatible = "nvidia,tegra186-adma", .data = &tegra186_chip_data }, + { }, +}; +MODULE_DEVICE_TABLE(of, tegra_adma_of_match); + +static int tegra_adma_probe(struct platform_device *pdev) +{ + const struct tegra_adma_chip_data *cdata; + struct tegra_adma *tdma; + int ret, i; + + cdata = of_device_get_match_data(&pdev->dev); + if (!cdata) { + dev_err(&pdev->dev, "device match data not found\n"); + return -ENODEV; + } + + tdma = devm_kzalloc(&pdev->dev, + struct_size(tdma, channels, cdata->nr_channels), + GFP_KERNEL); + if (!tdma) + return -ENOMEM; + + tdma->dev = &pdev->dev; + tdma->cdata = cdata; + tdma->nr_channels = cdata->nr_channels; + platform_set_drvdata(pdev, tdma); + + tdma->base_addr = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(tdma->base_addr)) + return PTR_ERR(tdma->base_addr); + + tdma->ahub_clk = devm_clk_get(&pdev->dev, "d_audio"); + if (IS_ERR(tdma->ahub_clk)) { + dev_err(&pdev->dev, "Error: Missing ahub controller clock\n"); + return PTR_ERR(tdma->ahub_clk); + } + + INIT_LIST_HEAD(&tdma->dma_dev.channels); + for (i = 0; i < tdma->nr_channels; i++) { + struct tegra_adma_chan *tdc = &tdma->channels[i]; + + tdc->chan_addr = tdma->base_addr + cdata->ch_base_offset + + (cdata->ch_reg_size * i); + + tdc->irq = of_irq_get(pdev->dev.of_node, i); + if (tdc->irq <= 0) { + ret = tdc->irq ?: -ENXIO; + goto irq_dispose; + } + + vchan_init(&tdc->vc, &tdma->dma_dev); + tdc->vc.desc_free = tegra_adma_desc_free; + tdc->tdma = tdma; + } + + pm_runtime_enable(&pdev->dev); + + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret < 0) + goto rpm_disable; + + ret = tegra_adma_init(tdma); + if (ret) + goto rpm_put; + + dma_cap_set(DMA_SLAVE, tdma->dma_dev.cap_mask); + dma_cap_set(DMA_PRIVATE, tdma->dma_dev.cap_mask); + dma_cap_set(DMA_CYCLIC, tdma->dma_dev.cap_mask); + + tdma->dma_dev.dev = &pdev->dev; + tdma->dma_dev.device_alloc_chan_resources = + tegra_adma_alloc_chan_resources; + tdma->dma_dev.device_free_chan_resources = + tegra_adma_free_chan_resources; + tdma->dma_dev.device_issue_pending = tegra_adma_issue_pending; + tdma->dma_dev.device_prep_dma_cyclic = tegra_adma_prep_dma_cyclic; + tdma->dma_dev.device_config = tegra_adma_slave_config; + tdma->dma_dev.device_tx_status = tegra_adma_tx_status; + tdma->dma_dev.device_terminate_all = tegra_adma_terminate_all; + tdma->dma_dev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + tdma->dma_dev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + tdma->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + tdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + tdma->dma_dev.device_pause = tegra_adma_pause; + tdma->dma_dev.device_resume = tegra_adma_resume; + + ret = dma_async_device_register(&tdma->dma_dev); + if (ret < 0) { + dev_err(&pdev->dev, "ADMA registration failed: %d\n", ret); + goto rpm_put; + } + + ret = of_dma_controller_register(pdev->dev.of_node, + tegra_dma_of_xlate, tdma); + if (ret < 0) { + dev_err(&pdev->dev, "ADMA OF registration failed %d\n", ret); + goto dma_remove; + } + + pm_runtime_put(&pdev->dev); + + dev_info(&pdev->dev, "Tegra210 ADMA driver registered %d channels\n", + tdma->nr_channels); + + return 0; + +dma_remove: + dma_async_device_unregister(&tdma->dma_dev); +rpm_put: + pm_runtime_put_sync(&pdev->dev); +rpm_disable: + pm_runtime_disable(&pdev->dev); +irq_dispose: + while (--i >= 0) + irq_dispose_mapping(tdma->channels[i].irq); + + return ret; +} + +static int tegra_adma_remove(struct platform_device *pdev) +{ + struct tegra_adma *tdma = platform_get_drvdata(pdev); + int i; + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&tdma->dma_dev); + + for (i = 0; i < tdma->nr_channels; ++i) + irq_dispose_mapping(tdma->channels[i].irq); + + pm_runtime_disable(&pdev->dev); + + return 0; +} + +static const struct dev_pm_ops tegra_adma_dev_pm_ops = { + SET_RUNTIME_PM_OPS(tegra_adma_runtime_suspend, + tegra_adma_runtime_resume, NULL) + SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) +}; + +static struct platform_driver tegra_admac_driver = { + .driver = { + .name = "tegra-adma", + .pm = &tegra_adma_dev_pm_ops, + .of_match_table = tegra_adma_of_match, + }, + .probe = tegra_adma_probe, + .remove = tegra_adma_remove, +}; + +module_platform_driver(tegra_admac_driver); + +MODULE_ALIAS("platform:tegra210-adma"); +MODULE_DESCRIPTION("NVIDIA Tegra ADMA driver"); +MODULE_AUTHOR("Dara Ramesh "); +MODULE_AUTHOR("Jon Hunter "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/ti/Kconfig b/drivers/dma/ti/Kconfig new file mode 100644 index 0000000000..2adc2cca10 --- /dev/null +++ b/drivers/dma/ti/Kconfig @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Texas Instruments DMA drivers +# + +config TI_CPPI41 + tristate "Texas Instruments CPPI 4.1 DMA support" + depends on (ARCH_OMAP || ARCH_DAVINCI_DA8XX) + select DMA_ENGINE + help + The Communications Port Programming Interface (CPPI) 4.1 DMA engine + is currently used by the USB driver on AM335x and DA8xx platforms. + +config TI_EDMA + tristate "Texas Instruments EDMA support" + depends on ARCH_DAVINCI || ARCH_OMAP || ARCH_KEYSTONE || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + select TI_DMA_CROSSBAR if (ARCH_OMAP || COMPILE_TEST) + default y + help + Enable support for the TI EDMA (Enhanced DMA) controller. This DMA + engine is found on TI DaVinci, AM33xx, AM43xx, DRA7xx and Keystone 2 + parts. + +config DMA_OMAP + tristate "Texas Instruments sDMA (omap-dma) support" + depends on ARCH_OMAP || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + select TI_DMA_CROSSBAR if (SOC_DRA7XX || COMPILE_TEST) + default y + help + Enable support for the TI sDMA (System DMA or DMA4) controller. This + DMA engine is found on OMAP and DRA7xx parts. + +config TI_K3_UDMA + tristate "Texas Instruments UDMA support" + depends on ARCH_K3 + depends on TI_SCI_PROTOCOL + depends on TI_SCI_INTA_IRQCHIP + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + select TI_K3_RINGACC + select TI_K3_PSIL + help + Enable support for the TI UDMA (Unified DMA) controller. This + DMA engine is used in AM65x and j721e. + +config TI_K3_UDMA_GLUE_LAYER + tristate "Texas Instruments UDMA Glue layer for non DMAengine users" + depends on ARCH_K3 + depends on TI_K3_UDMA + help + Say y here to support the K3 NAVSS DMA glue interface + If unsure, say N. + +config TI_K3_PSIL + tristate + default TI_K3_UDMA + +config TI_DMA_CROSSBAR + bool diff --git a/drivers/dma/ti/Makefile b/drivers/dma/ti/Makefile new file mode 100644 index 0000000000..acc950bf60 --- /dev/null +++ b/drivers/dma/ti/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_TI_CPPI41) += cppi41.o +obj-$(CONFIG_TI_EDMA) += edma.o +obj-$(CONFIG_DMA_OMAP) += omap-dma.o +obj-$(CONFIG_TI_K3_UDMA) += k3-udma.o +obj-$(CONFIG_TI_K3_UDMA_GLUE_LAYER) += k3-udma-glue.o +k3-psil-lib-objs := k3-psil.o \ + k3-psil-am654.o \ + k3-psil-j721e.o \ + k3-psil-j7200.o \ + k3-psil-am64.o \ + k3-psil-j721s2.o \ + k3-psil-am62.o \ + k3-psil-am62a.o \ + k3-psil-j784s4.o +obj-$(CONFIG_TI_K3_PSIL) += k3-psil-lib.o +obj-$(CONFIG_TI_DMA_CROSSBAR) += dma-crossbar.o diff --git a/drivers/dma/ti/cppi41.c b/drivers/dma/ti/cppi41.c new file mode 100644 index 0000000000..c3555cfb06 --- /dev/null +++ b/drivers/dma/ti/cppi41.c @@ -0,0 +1,1257 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../dmaengine.h" + +#define DESC_TYPE 27 +#define DESC_TYPE_HOST 0x10 +#define DESC_TYPE_TEARD 0x13 + +#define TD_DESC_IS_RX (1 << 16) +#define TD_DESC_DMA_NUM 10 + +#define DESC_LENGTH_BITS_NUM 21 + +#define DESC_TYPE_USB (5 << 26) +#define DESC_PD_COMPLETE (1 << 31) + +/* DMA engine */ +#define DMA_TDFDQ 4 +#define DMA_TXGCR(x) (0x800 + (x) * 0x20) +#define DMA_RXGCR(x) (0x808 + (x) * 0x20) +#define RXHPCRA0 4 + +#define GCR_CHAN_ENABLE (1 << 31) +#define GCR_TEARDOWN (1 << 30) +#define GCR_STARV_RETRY (1 << 24) +#define GCR_DESC_TYPE_HOST (1 << 14) + +/* DMA scheduler */ +#define DMA_SCHED_CTRL 0 +#define DMA_SCHED_CTRL_EN (1 << 31) +#define DMA_SCHED_WORD(x) ((x) * 4 + 0x800) + +#define SCHED_ENTRY0_CHAN(x) ((x) << 0) +#define SCHED_ENTRY0_IS_RX (1 << 7) + +#define SCHED_ENTRY1_CHAN(x) ((x) << 8) +#define SCHED_ENTRY1_IS_RX (1 << 15) + +#define SCHED_ENTRY2_CHAN(x) ((x) << 16) +#define SCHED_ENTRY2_IS_RX (1 << 23) + +#define SCHED_ENTRY3_CHAN(x) ((x) << 24) +#define SCHED_ENTRY3_IS_RX (1 << 31) + +/* Queue manager */ +/* 4 KiB of memory for descriptors, 2 for each endpoint */ +#define ALLOC_DECS_NUM 128 +#define DESCS_AREAS 1 +#define TOTAL_DESCS_NUM (ALLOC_DECS_NUM * DESCS_AREAS) +#define QMGR_SCRATCH_SIZE (TOTAL_DESCS_NUM * 4) + +#define QMGR_LRAM0_BASE 0x80 +#define QMGR_LRAM_SIZE 0x84 +#define QMGR_LRAM1_BASE 0x88 +#define QMGR_MEMBASE(x) (0x1000 + (x) * 0x10) +#define QMGR_MEMCTRL(x) (0x1004 + (x) * 0x10) +#define QMGR_MEMCTRL_IDX_SH 16 +#define QMGR_MEMCTRL_DESC_SH 8 + +#define QMGR_PEND(x) (0x90 + (x) * 4) + +#define QMGR_PENDING_SLOT_Q(x) (x / 32) +#define QMGR_PENDING_BIT_Q(x) (x % 32) + +#define QMGR_QUEUE_A(n) (0x2000 + (n) * 0x10) +#define QMGR_QUEUE_B(n) (0x2004 + (n) * 0x10) +#define QMGR_QUEUE_C(n) (0x2008 + (n) * 0x10) +#define QMGR_QUEUE_D(n) (0x200c + (n) * 0x10) + +/* Packet Descriptor */ +#define PD2_ZERO_LENGTH (1 << 19) + +struct cppi41_channel { + struct dma_chan chan; + struct dma_async_tx_descriptor txd; + struct cppi41_dd *cdd; + struct cppi41_desc *desc; + dma_addr_t desc_phys; + void __iomem *gcr_reg; + int is_tx; + u32 residue; + + unsigned int q_num; + unsigned int q_comp_num; + unsigned int port_num; + + unsigned td_retry; + unsigned td_queued:1; + unsigned td_seen:1; + unsigned td_desc_seen:1; + + struct list_head node; /* Node for pending list */ +}; + +struct cppi41_desc { + u32 pd0; + u32 pd1; + u32 pd2; + u32 pd3; + u32 pd4; + u32 pd5; + u32 pd6; + u32 pd7; +} __aligned(32); + +struct chan_queues { + u16 submit; + u16 complete; +}; + +struct cppi41_dd { + struct dma_device ddev; + + void *qmgr_scratch; + dma_addr_t scratch_phys; + + struct cppi41_desc *cd; + dma_addr_t descs_phys; + u32 first_td_desc; + struct cppi41_channel *chan_busy[ALLOC_DECS_NUM]; + + void __iomem *ctrl_mem; + void __iomem *sched_mem; + void __iomem *qmgr_mem; + unsigned int irq; + const struct chan_queues *queues_rx; + const struct chan_queues *queues_tx; + struct chan_queues td_queue; + u16 first_completion_queue; + u16 qmgr_num_pend; + u32 n_chans; + u8 platform; + + struct list_head pending; /* Pending queued transfers */ + spinlock_t lock; /* Lock for pending list */ + + /* context for suspend/resume */ + unsigned int dma_tdfdq; + + bool is_suspended; +}; + +static struct chan_queues am335x_usb_queues_tx[] = { + /* USB0 ENDP 1 */ + [ 0] = { .submit = 32, .complete = 93}, + [ 1] = { .submit = 34, .complete = 94}, + [ 2] = { .submit = 36, .complete = 95}, + [ 3] = { .submit = 38, .complete = 96}, + [ 4] = { .submit = 40, .complete = 97}, + [ 5] = { .submit = 42, .complete = 98}, + [ 6] = { .submit = 44, .complete = 99}, + [ 7] = { .submit = 46, .complete = 100}, + [ 8] = { .submit = 48, .complete = 101}, + [ 9] = { .submit = 50, .complete = 102}, + [10] = { .submit = 52, .complete = 103}, + [11] = { .submit = 54, .complete = 104}, + [12] = { .submit = 56, .complete = 105}, + [13] = { .submit = 58, .complete = 106}, + [14] = { .submit = 60, .complete = 107}, + + /* USB1 ENDP1 */ + [15] = { .submit = 62, .complete = 125}, + [16] = { .submit = 64, .complete = 126}, + [17] = { .submit = 66, .complete = 127}, + [18] = { .submit = 68, .complete = 128}, + [19] = { .submit = 70, .complete = 129}, + [20] = { .submit = 72, .complete = 130}, + [21] = { .submit = 74, .complete = 131}, + [22] = { .submit = 76, .complete = 132}, + [23] = { .submit = 78, .complete = 133}, + [24] = { .submit = 80, .complete = 134}, + [25] = { .submit = 82, .complete = 135}, + [26] = { .submit = 84, .complete = 136}, + [27] = { .submit = 86, .complete = 137}, + [28] = { .submit = 88, .complete = 138}, + [29] = { .submit = 90, .complete = 139}, +}; + +static const struct chan_queues am335x_usb_queues_rx[] = { + /* USB0 ENDP 1 */ + [ 0] = { .submit = 1, .complete = 109}, + [ 1] = { .submit = 2, .complete = 110}, + [ 2] = { .submit = 3, .complete = 111}, + [ 3] = { .submit = 4, .complete = 112}, + [ 4] = { .submit = 5, .complete = 113}, + [ 5] = { .submit = 6, .complete = 114}, + [ 6] = { .submit = 7, .complete = 115}, + [ 7] = { .submit = 8, .complete = 116}, + [ 8] = { .submit = 9, .complete = 117}, + [ 9] = { .submit = 10, .complete = 118}, + [10] = { .submit = 11, .complete = 119}, + [11] = { .submit = 12, .complete = 120}, + [12] = { .submit = 13, .complete = 121}, + [13] = { .submit = 14, .complete = 122}, + [14] = { .submit = 15, .complete = 123}, + + /* USB1 ENDP 1 */ + [15] = { .submit = 16, .complete = 141}, + [16] = { .submit = 17, .complete = 142}, + [17] = { .submit = 18, .complete = 143}, + [18] = { .submit = 19, .complete = 144}, + [19] = { .submit = 20, .complete = 145}, + [20] = { .submit = 21, .complete = 146}, + [21] = { .submit = 22, .complete = 147}, + [22] = { .submit = 23, .complete = 148}, + [23] = { .submit = 24, .complete = 149}, + [24] = { .submit = 25, .complete = 150}, + [25] = { .submit = 26, .complete = 151}, + [26] = { .submit = 27, .complete = 152}, + [27] = { .submit = 28, .complete = 153}, + [28] = { .submit = 29, .complete = 154}, + [29] = { .submit = 30, .complete = 155}, +}; + +static const struct chan_queues da8xx_usb_queues_tx[] = { + [0] = { .submit = 16, .complete = 24}, + [1] = { .submit = 18, .complete = 24}, + [2] = { .submit = 20, .complete = 24}, + [3] = { .submit = 22, .complete = 24}, +}; + +static const struct chan_queues da8xx_usb_queues_rx[] = { + [0] = { .submit = 1, .complete = 26}, + [1] = { .submit = 3, .complete = 26}, + [2] = { .submit = 5, .complete = 26}, + [3] = { .submit = 7, .complete = 26}, +}; + +struct cppi_glue_infos { + const struct chan_queues *queues_rx; + const struct chan_queues *queues_tx; + struct chan_queues td_queue; + u16 first_completion_queue; + u16 qmgr_num_pend; +}; + +static struct cppi41_channel *to_cpp41_chan(struct dma_chan *c) +{ + return container_of(c, struct cppi41_channel, chan); +} + +static struct cppi41_channel *desc_to_chan(struct cppi41_dd *cdd, u32 desc) +{ + struct cppi41_channel *c; + u32 descs_size; + u32 desc_num; + + descs_size = sizeof(struct cppi41_desc) * ALLOC_DECS_NUM; + + if (!((desc >= cdd->descs_phys) && + (desc < (cdd->descs_phys + descs_size)))) { + return NULL; + } + + desc_num = (desc - cdd->descs_phys) / sizeof(struct cppi41_desc); + BUG_ON(desc_num >= ALLOC_DECS_NUM); + c = cdd->chan_busy[desc_num]; + cdd->chan_busy[desc_num] = NULL; + + /* Usecount for chan_busy[], paired with push_desc_queue() */ + pm_runtime_put(cdd->ddev.dev); + + return c; +} + +static void cppi_writel(u32 val, void *__iomem *mem) +{ + __raw_writel(val, mem); +} + +static u32 cppi_readl(void *__iomem *mem) +{ + return __raw_readl(mem); +} + +static u32 pd_trans_len(u32 val) +{ + return val & ((1 << (DESC_LENGTH_BITS_NUM + 1)) - 1); +} + +static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num) +{ + u32 desc; + + desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num)); + desc &= ~0x1f; + return desc; +} + +static irqreturn_t cppi41_irq(int irq, void *data) +{ + struct cppi41_dd *cdd = data; + u16 first_completion_queue = cdd->first_completion_queue; + u16 qmgr_num_pend = cdd->qmgr_num_pend; + struct cppi41_channel *c; + int i; + + for (i = QMGR_PENDING_SLOT_Q(first_completion_queue); i < qmgr_num_pend; + i++) { + u32 val; + u32 q_num; + + val = cppi_readl(cdd->qmgr_mem + QMGR_PEND(i)); + if (i == QMGR_PENDING_SLOT_Q(first_completion_queue) && val) { + u32 mask; + /* set corresponding bit for completion Q 93 */ + mask = 1 << QMGR_PENDING_BIT_Q(first_completion_queue); + /* not set all bits for queues less than Q 93 */ + mask--; + /* now invert and keep only Q 93+ set */ + val &= ~mask; + } + + if (val) + __iormb(); + + while (val) { + u32 desc, len; + + /* + * This should never trigger, see the comments in + * push_desc_queue() + */ + WARN_ON(cdd->is_suspended); + + q_num = __fls(val); + val &= ~(1 << q_num); + q_num += 32 * i; + desc = cppi41_pop_desc(cdd, q_num); + c = desc_to_chan(cdd, desc); + if (WARN_ON(!c)) { + pr_err("%s() q %d desc %08x\n", __func__, + q_num, desc); + continue; + } + + if (c->desc->pd2 & PD2_ZERO_LENGTH) + len = 0; + else + len = pd_trans_len(c->desc->pd0); + + c->residue = pd_trans_len(c->desc->pd6) - len; + dma_cookie_complete(&c->txd); + dmaengine_desc_get_callback_invoke(&c->txd, NULL); + } + } + return IRQ_HANDLED; +} + +static dma_cookie_t cppi41_tx_submit(struct dma_async_tx_descriptor *tx) +{ + dma_cookie_t cookie; + + cookie = dma_cookie_assign(tx); + + return cookie; +} + +static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct cppi41_channel *c = to_cpp41_chan(chan); + struct cppi41_dd *cdd = c->cdd; + int error; + + error = pm_runtime_get_sync(cdd->ddev.dev); + if (error < 0) { + dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n", + __func__, error); + pm_runtime_put_noidle(cdd->ddev.dev); + + return error; + } + + dma_cookie_init(chan); + dma_async_tx_descriptor_init(&c->txd, chan); + c->txd.tx_submit = cppi41_tx_submit; + + if (!c->is_tx) + cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0); + + pm_runtime_mark_last_busy(cdd->ddev.dev); + pm_runtime_put_autosuspend(cdd->ddev.dev); + + return 0; +} + +static void cppi41_dma_free_chan_resources(struct dma_chan *chan) +{ + struct cppi41_channel *c = to_cpp41_chan(chan); + struct cppi41_dd *cdd = c->cdd; + int error; + + error = pm_runtime_get_sync(cdd->ddev.dev); + if (error < 0) { + pm_runtime_put_noidle(cdd->ddev.dev); + + return; + } + + WARN_ON(!list_empty(&cdd->pending)); + + pm_runtime_mark_last_busy(cdd->ddev.dev); + pm_runtime_put_autosuspend(cdd->ddev.dev); +} + +static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) +{ + struct cppi41_channel *c = to_cpp41_chan(chan); + enum dma_status ret; + + ret = dma_cookie_status(chan, cookie, txstate); + + dma_set_residue(txstate, c->residue); + + return ret; +} + +static void push_desc_queue(struct cppi41_channel *c) +{ + struct cppi41_dd *cdd = c->cdd; + u32 desc_num; + u32 desc_phys; + u32 reg; + + c->residue = 0; + + reg = GCR_CHAN_ENABLE; + if (!c->is_tx) { + reg |= GCR_STARV_RETRY; + reg |= GCR_DESC_TYPE_HOST; + reg |= c->q_comp_num; + } + + cppi_writel(reg, c->gcr_reg); + + /* + * We don't use writel() but __raw_writel() so we have to make sure + * that the DMA descriptor in coherent memory made to the main memory + * before starting the dma engine. + */ + __iowmb(); + + /* + * DMA transfers can take at least 200ms to complete with USB mass + * storage connected. To prevent autosuspend timeouts, we must use + * pm_runtime_get/put() when chan_busy[] is modified. This will get + * cleared in desc_to_chan() or cppi41_stop_chan() depending on the + * outcome of the transfer. + */ + pm_runtime_get(cdd->ddev.dev); + + desc_phys = lower_32_bits(c->desc_phys); + desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); + WARN_ON(cdd->chan_busy[desc_num]); + cdd->chan_busy[desc_num] = c; + + reg = (sizeof(struct cppi41_desc) - 24) / 4; + reg |= desc_phys; + cppi_writel(reg, cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num)); +} + +/* + * Caller must hold cdd->lock to prevent push_desc_queue() + * getting called out of order. We have both cppi41_dma_issue_pending() + * and cppi41_runtime_resume() call this function. + */ +static void cppi41_run_queue(struct cppi41_dd *cdd) +{ + struct cppi41_channel *c, *_c; + + list_for_each_entry_safe(c, _c, &cdd->pending, node) { + push_desc_queue(c); + list_del(&c->node); + } +} + +static void cppi41_dma_issue_pending(struct dma_chan *chan) +{ + struct cppi41_channel *c = to_cpp41_chan(chan); + struct cppi41_dd *cdd = c->cdd; + unsigned long flags; + int error; + + error = pm_runtime_get(cdd->ddev.dev); + if ((error != -EINPROGRESS) && error < 0) { + pm_runtime_put_noidle(cdd->ddev.dev); + dev_err(cdd->ddev.dev, "Failed to pm_runtime_get: %i\n", + error); + + return; + } + + spin_lock_irqsave(&cdd->lock, flags); + list_add_tail(&c->node, &cdd->pending); + if (!cdd->is_suspended) + cppi41_run_queue(cdd); + spin_unlock_irqrestore(&cdd->lock, flags); + + pm_runtime_mark_last_busy(cdd->ddev.dev); + pm_runtime_put_autosuspend(cdd->ddev.dev); +} + +static u32 get_host_pd0(u32 length) +{ + u32 reg; + + reg = DESC_TYPE_HOST << DESC_TYPE; + reg |= length; + + return reg; +} + +static u32 get_host_pd1(struct cppi41_channel *c) +{ + u32 reg; + + reg = 0; + + return reg; +} + +static u32 get_host_pd2(struct cppi41_channel *c) +{ + u32 reg; + + reg = DESC_TYPE_USB; + reg |= c->q_comp_num; + + return reg; +} + +static u32 get_host_pd3(u32 length) +{ + u32 reg; + + /* PD3 = packet size */ + reg = length; + + return reg; +} + +static u32 get_host_pd6(u32 length) +{ + u32 reg; + + /* PD6 buffer size */ + reg = DESC_PD_COMPLETE; + reg |= length; + + return reg; +} + +static u32 get_host_pd4_or_7(u32 addr) +{ + u32 reg; + + reg = addr; + + return reg; +} + +static u32 get_host_pd5(void) +{ + u32 reg; + + reg = 0; + + return reg; +} + +static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, unsigned sg_len, + enum dma_transfer_direction dir, unsigned long tx_flags, void *context) +{ + struct cppi41_channel *c = to_cpp41_chan(chan); + struct dma_async_tx_descriptor *txd = NULL; + struct cppi41_dd *cdd = c->cdd; + struct cppi41_desc *d; + struct scatterlist *sg; + unsigned int i; + int error; + + error = pm_runtime_get(cdd->ddev.dev); + if (error < 0) { + pm_runtime_put_noidle(cdd->ddev.dev); + + return NULL; + } + + if (cdd->is_suspended) + goto err_out_not_ready; + + d = c->desc; + for_each_sg(sgl, sg, sg_len, i) { + u32 addr; + u32 len; + + /* We need to use more than one desc once musb supports sg */ + addr = lower_32_bits(sg_dma_address(sg)); + len = sg_dma_len(sg); + + d->pd0 = get_host_pd0(len); + d->pd1 = get_host_pd1(c); + d->pd2 = get_host_pd2(c); + d->pd3 = get_host_pd3(len); + d->pd4 = get_host_pd4_or_7(addr); + d->pd5 = get_host_pd5(); + d->pd6 = get_host_pd6(len); + d->pd7 = get_host_pd4_or_7(addr); + + d++; + } + + txd = &c->txd; + +err_out_not_ready: + pm_runtime_mark_last_busy(cdd->ddev.dev); + pm_runtime_put_autosuspend(cdd->ddev.dev); + + return txd; +} + +static void cppi41_compute_td_desc(struct cppi41_desc *d) +{ + d->pd0 = DESC_TYPE_TEARD << DESC_TYPE; +} + +static int cppi41_tear_down_chan(struct cppi41_channel *c) +{ + struct dmaengine_result abort_result; + struct cppi41_dd *cdd = c->cdd; + struct cppi41_desc *td; + u32 reg; + u32 desc_phys; + u32 td_desc_phys; + + td = cdd->cd; + td += cdd->first_td_desc; + + td_desc_phys = cdd->descs_phys; + td_desc_phys += cdd->first_td_desc * sizeof(struct cppi41_desc); + + if (!c->td_queued) { + cppi41_compute_td_desc(td); + __iowmb(); + + reg = (sizeof(struct cppi41_desc) - 24) / 4; + reg |= td_desc_phys; + cppi_writel(reg, cdd->qmgr_mem + + QMGR_QUEUE_D(cdd->td_queue.submit)); + + reg = GCR_CHAN_ENABLE; + if (!c->is_tx) { + reg |= GCR_STARV_RETRY; + reg |= GCR_DESC_TYPE_HOST; + reg |= cdd->td_queue.complete; + } + reg |= GCR_TEARDOWN; + cppi_writel(reg, c->gcr_reg); + c->td_queued = 1; + c->td_retry = 500; + } + + if (!c->td_seen || !c->td_desc_seen) { + + desc_phys = cppi41_pop_desc(cdd, cdd->td_queue.complete); + if (!desc_phys && c->is_tx) + desc_phys = cppi41_pop_desc(cdd, c->q_comp_num); + + if (desc_phys == c->desc_phys) { + c->td_desc_seen = 1; + + } else if (desc_phys == td_desc_phys) { + u32 pd0; + + __iormb(); + pd0 = td->pd0; + WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD); + WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX)); + WARN_ON((pd0 & 0x1f) != c->port_num); + c->td_seen = 1; + } else if (desc_phys) { + WARN_ON_ONCE(1); + } + } + c->td_retry--; + /* + * If the TX descriptor / channel is in use, the caller needs to poke + * his TD bit multiple times. After that he hardware releases the + * transfer descriptor followed by TD descriptor. Waiting seems not to + * cause any difference. + * RX seems to be thrown out right away. However once the TearDown + * descriptor gets through we are done. If we have seen the transfer + * descriptor before the TD we fetch it from enqueue, it has to be + * there waiting for us. + */ + if (!c->td_seen && c->td_retry) { + udelay(1); + return -EAGAIN; + } + WARN_ON(!c->td_retry); + + if (!c->td_desc_seen) { + desc_phys = cppi41_pop_desc(cdd, c->q_num); + if (!desc_phys) + desc_phys = cppi41_pop_desc(cdd, c->q_comp_num); + WARN_ON(!desc_phys); + } + + c->td_queued = 0; + c->td_seen = 0; + c->td_desc_seen = 0; + cppi_writel(0, c->gcr_reg); + + /* Invoke the callback to do the necessary clean-up */ + abort_result.result = DMA_TRANS_ABORTED; + dma_cookie_complete(&c->txd); + dmaengine_desc_get_callback_invoke(&c->txd, &abort_result); + + return 0; +} + +static int cppi41_stop_chan(struct dma_chan *chan) +{ + struct cppi41_channel *c = to_cpp41_chan(chan); + struct cppi41_dd *cdd = c->cdd; + u32 desc_num; + u32 desc_phys; + int ret; + + desc_phys = lower_32_bits(c->desc_phys); + desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); + if (!cdd->chan_busy[desc_num]) { + struct cppi41_channel *cc, *_ct; + + /* + * channels might still be in the pending list if + * cppi41_dma_issue_pending() is called after + * cppi41_runtime_suspend() is called + */ + list_for_each_entry_safe(cc, _ct, &cdd->pending, node) { + if (cc != c) + continue; + list_del(&cc->node); + break; + } + return 0; + } + + ret = cppi41_tear_down_chan(c); + if (ret) + return ret; + + WARN_ON(!cdd->chan_busy[desc_num]); + cdd->chan_busy[desc_num] = NULL; + + /* Usecount for chan_busy[], paired with push_desc_queue() */ + pm_runtime_put(cdd->ddev.dev); + + return 0; +} + +static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd) +{ + struct cppi41_channel *cchan, *chans; + int i; + u32 n_chans = cdd->n_chans; + + /* + * The channels can only be used as TX or as RX. So we add twice + * that much dma channels because USB can only do RX or TX. + */ + n_chans *= 2; + + chans = devm_kcalloc(dev, n_chans, sizeof(*chans), GFP_KERNEL); + if (!chans) + return -ENOMEM; + + for (i = 0; i < n_chans; i++) { + cchan = &chans[i]; + + cchan->cdd = cdd; + if (i & 1) { + cchan->gcr_reg = cdd->ctrl_mem + DMA_TXGCR(i >> 1); + cchan->is_tx = 1; + } else { + cchan->gcr_reg = cdd->ctrl_mem + DMA_RXGCR(i >> 1); + cchan->is_tx = 0; + } + cchan->port_num = i >> 1; + cchan->desc = &cdd->cd[i]; + cchan->desc_phys = cdd->descs_phys; + cchan->desc_phys += i * sizeof(struct cppi41_desc); + cchan->chan.device = &cdd->ddev; + list_add_tail(&cchan->chan.device_node, &cdd->ddev.channels); + } + cdd->first_td_desc = n_chans; + + return 0; +} + +static void purge_descs(struct device *dev, struct cppi41_dd *cdd) +{ + unsigned int mem_decs; + int i; + + mem_decs = ALLOC_DECS_NUM * sizeof(struct cppi41_desc); + + for (i = 0; i < DESCS_AREAS; i++) { + + cppi_writel(0, cdd->qmgr_mem + QMGR_MEMBASE(i)); + cppi_writel(0, cdd->qmgr_mem + QMGR_MEMCTRL(i)); + + dma_free_coherent(dev, mem_decs, cdd->cd, + cdd->descs_phys); + } +} + +static void disable_sched(struct cppi41_dd *cdd) +{ + cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); +} + +static void deinit_cppi41(struct device *dev, struct cppi41_dd *cdd) +{ + disable_sched(cdd); + + purge_descs(dev, cdd); + + cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); + cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); + dma_free_coherent(dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch, + cdd->scratch_phys); +} + +static int init_descs(struct device *dev, struct cppi41_dd *cdd) +{ + unsigned int desc_size; + unsigned int mem_decs; + int i; + u32 reg; + u32 idx; + + BUILD_BUG_ON(sizeof(struct cppi41_desc) & + (sizeof(struct cppi41_desc) - 1)); + BUILD_BUG_ON(sizeof(struct cppi41_desc) < 32); + BUILD_BUG_ON(ALLOC_DECS_NUM < 32); + + desc_size = sizeof(struct cppi41_desc); + mem_decs = ALLOC_DECS_NUM * desc_size; + + idx = 0; + for (i = 0; i < DESCS_AREAS; i++) { + + reg = idx << QMGR_MEMCTRL_IDX_SH; + reg |= (ilog2(desc_size) - 5) << QMGR_MEMCTRL_DESC_SH; + reg |= ilog2(ALLOC_DECS_NUM) - 5; + + BUILD_BUG_ON(DESCS_AREAS != 1); + cdd->cd = dma_alloc_coherent(dev, mem_decs, + &cdd->descs_phys, GFP_KERNEL); + if (!cdd->cd) + return -ENOMEM; + + cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i)); + cppi_writel(reg, cdd->qmgr_mem + QMGR_MEMCTRL(i)); + + idx += ALLOC_DECS_NUM; + } + return 0; +} + +static void init_sched(struct cppi41_dd *cdd) +{ + unsigned ch; + unsigned word; + u32 reg; + + word = 0; + cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); + for (ch = 0; ch < cdd->n_chans; ch += 2) { + + reg = SCHED_ENTRY0_CHAN(ch); + reg |= SCHED_ENTRY1_CHAN(ch) | SCHED_ENTRY1_IS_RX; + + reg |= SCHED_ENTRY2_CHAN(ch + 1); + reg |= SCHED_ENTRY3_CHAN(ch + 1) | SCHED_ENTRY3_IS_RX; + cppi_writel(reg, cdd->sched_mem + DMA_SCHED_WORD(word)); + word++; + } + reg = cdd->n_chans * 2 - 1; + reg |= DMA_SCHED_CTRL_EN; + cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL); +} + +static int init_cppi41(struct device *dev, struct cppi41_dd *cdd) +{ + int ret; + + BUILD_BUG_ON(QMGR_SCRATCH_SIZE > ((1 << 14) - 1)); + cdd->qmgr_scratch = dma_alloc_coherent(dev, QMGR_SCRATCH_SIZE, + &cdd->scratch_phys, GFP_KERNEL); + if (!cdd->qmgr_scratch) + return -ENOMEM; + + cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE); + cppi_writel(TOTAL_DESCS_NUM, cdd->qmgr_mem + QMGR_LRAM_SIZE); + cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE); + + ret = init_descs(dev, cdd); + if (ret) + goto err_td; + + cppi_writel(cdd->td_queue.submit, cdd->ctrl_mem + DMA_TDFDQ); + init_sched(cdd); + + return 0; +err_td: + deinit_cppi41(dev, cdd); + return ret; +} + +static struct platform_driver cpp41_dma_driver; +/* + * The param format is: + * X Y + * X: Port + * Y: 0 = RX else TX + */ +#define INFO_PORT 0 +#define INFO_IS_TX 1 + +static bool cpp41_dma_filter_fn(struct dma_chan *chan, void *param) +{ + struct cppi41_channel *cchan; + struct cppi41_dd *cdd; + const struct chan_queues *queues; + u32 *num = param; + + if (chan->device->dev->driver != &cpp41_dma_driver.driver) + return false; + + cchan = to_cpp41_chan(chan); + + if (cchan->port_num != num[INFO_PORT]) + return false; + + if (cchan->is_tx && !num[INFO_IS_TX]) + return false; + cdd = cchan->cdd; + if (cchan->is_tx) + queues = cdd->queues_tx; + else + queues = cdd->queues_rx; + + BUILD_BUG_ON(ARRAY_SIZE(am335x_usb_queues_rx) != + ARRAY_SIZE(am335x_usb_queues_tx)); + if (WARN_ON(cchan->port_num >= ARRAY_SIZE(am335x_usb_queues_rx))) + return false; + + cchan->q_num = queues[cchan->port_num].submit; + cchan->q_comp_num = queues[cchan->port_num].complete; + return true; +} + +static struct of_dma_filter_info cpp41_dma_info = { + .filter_fn = cpp41_dma_filter_fn, +}; + +static struct dma_chan *cppi41_dma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + int count = dma_spec->args_count; + struct of_dma_filter_info *info = ofdma->of_dma_data; + + if (!info || !info->filter_fn) + return NULL; + + if (count != 2) + return NULL; + + return dma_request_channel(info->dma_cap, info->filter_fn, + &dma_spec->args[0]); +} + +static const struct cppi_glue_infos am335x_usb_infos = { + .queues_rx = am335x_usb_queues_rx, + .queues_tx = am335x_usb_queues_tx, + .td_queue = { .submit = 31, .complete = 0 }, + .first_completion_queue = 93, + .qmgr_num_pend = 5, +}; + +static const struct cppi_glue_infos da8xx_usb_infos = { + .queues_rx = da8xx_usb_queues_rx, + .queues_tx = da8xx_usb_queues_tx, + .td_queue = { .submit = 31, .complete = 0 }, + .first_completion_queue = 24, + .qmgr_num_pend = 2, +}; + +static const struct of_device_id cppi41_dma_ids[] = { + { .compatible = "ti,am3359-cppi41", .data = &am335x_usb_infos}, + { .compatible = "ti,da830-cppi41", .data = &da8xx_usb_infos}, + {}, +}; +MODULE_DEVICE_TABLE(of, cppi41_dma_ids); + +static const struct cppi_glue_infos *get_glue_info(struct device *dev) +{ + const struct of_device_id *of_id; + + of_id = of_match_node(cppi41_dma_ids, dev->of_node); + if (!of_id) + return NULL; + return of_id->data; +} + +#define CPPI41_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) + +static int cppi41_dma_probe(struct platform_device *pdev) +{ + struct cppi41_dd *cdd; + struct device *dev = &pdev->dev; + const struct cppi_glue_infos *glue_info; + int index; + int irq; + int ret; + + glue_info = get_glue_info(dev); + if (!glue_info) + return -EINVAL; + + cdd = devm_kzalloc(&pdev->dev, sizeof(*cdd), GFP_KERNEL); + if (!cdd) + return -ENOMEM; + + dma_cap_set(DMA_SLAVE, cdd->ddev.cap_mask); + cdd->ddev.device_alloc_chan_resources = cppi41_dma_alloc_chan_resources; + cdd->ddev.device_free_chan_resources = cppi41_dma_free_chan_resources; + cdd->ddev.device_tx_status = cppi41_dma_tx_status; + cdd->ddev.device_issue_pending = cppi41_dma_issue_pending; + cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg; + cdd->ddev.device_terminate_all = cppi41_stop_chan; + cdd->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + cdd->ddev.src_addr_widths = CPPI41_DMA_BUSWIDTHS; + cdd->ddev.dst_addr_widths = CPPI41_DMA_BUSWIDTHS; + cdd->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + cdd->ddev.dev = dev; + INIT_LIST_HEAD(&cdd->ddev.channels); + cpp41_dma_info.dma_cap = cdd->ddev.cap_mask; + + index = of_property_match_string(dev->of_node, + "reg-names", "controller"); + if (index < 0) + return index; + + cdd->ctrl_mem = devm_platform_ioremap_resource(pdev, index); + if (IS_ERR(cdd->ctrl_mem)) + return PTR_ERR(cdd->ctrl_mem); + + cdd->sched_mem = devm_platform_ioremap_resource(pdev, index + 1); + if (IS_ERR(cdd->sched_mem)) + return PTR_ERR(cdd->sched_mem); + + cdd->qmgr_mem = devm_platform_ioremap_resource(pdev, index + 2); + if (IS_ERR(cdd->qmgr_mem)) + return PTR_ERR(cdd->qmgr_mem); + + spin_lock_init(&cdd->lock); + INIT_LIST_HEAD(&cdd->pending); + + platform_set_drvdata(pdev, cdd); + + pm_runtime_enable(dev); + pm_runtime_set_autosuspend_delay(dev, 100); + pm_runtime_use_autosuspend(dev); + ret = pm_runtime_get_sync(dev); + if (ret < 0) + goto err_get_sync; + + cdd->queues_rx = glue_info->queues_rx; + cdd->queues_tx = glue_info->queues_tx; + cdd->td_queue = glue_info->td_queue; + cdd->qmgr_num_pend = glue_info->qmgr_num_pend; + cdd->first_completion_queue = glue_info->first_completion_queue; + + /* Parse new and deprecated dma-channels properties */ + ret = of_property_read_u32(dev->of_node, + "dma-channels", &cdd->n_chans); + if (ret) + ret = of_property_read_u32(dev->of_node, + "#dma-channels", &cdd->n_chans); + if (ret) + goto err_get_n_chans; + + ret = init_cppi41(dev, cdd); + if (ret) + goto err_init_cppi; + + ret = cppi41_add_chans(dev, cdd); + if (ret) + goto err_chans; + + irq = irq_of_parse_and_map(dev->of_node, 0); + if (!irq) { + ret = -EINVAL; + goto err_chans; + } + + ret = devm_request_irq(&pdev->dev, irq, cppi41_irq, IRQF_SHARED, + dev_name(dev), cdd); + if (ret) + goto err_chans; + cdd->irq = irq; + + ret = dma_async_device_register(&cdd->ddev); + if (ret) + goto err_chans; + + ret = of_dma_controller_register(dev->of_node, + cppi41_dma_xlate, &cpp41_dma_info); + if (ret) + goto err_of; + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + + return 0; +err_of: + dma_async_device_unregister(&cdd->ddev); +err_chans: + deinit_cppi41(dev, cdd); +err_init_cppi: + pm_runtime_dont_use_autosuspend(dev); +err_get_n_chans: +err_get_sync: + pm_runtime_put_sync(dev); + pm_runtime_disable(dev); + return ret; +} + +static int cppi41_dma_remove(struct platform_device *pdev) +{ + struct cppi41_dd *cdd = platform_get_drvdata(pdev); + int error; + + error = pm_runtime_get_sync(&pdev->dev); + if (error < 0) + dev_err(&pdev->dev, "%s could not pm_runtime_get: %i\n", + __func__, error); + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&cdd->ddev); + + devm_free_irq(&pdev->dev, cdd->irq, cdd); + deinit_cppi41(&pdev->dev, cdd); + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return 0; +} + +static int __maybe_unused cppi41_suspend(struct device *dev) +{ + struct cppi41_dd *cdd = dev_get_drvdata(dev); + + cdd->dma_tdfdq = cppi_readl(cdd->ctrl_mem + DMA_TDFDQ); + disable_sched(cdd); + + return 0; +} + +static int __maybe_unused cppi41_resume(struct device *dev) +{ + struct cppi41_dd *cdd = dev_get_drvdata(dev); + struct cppi41_channel *c; + int i; + + for (i = 0; i < DESCS_AREAS; i++) + cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i)); + + list_for_each_entry(c, &cdd->ddev.channels, chan.device_node) + if (!c->is_tx) + cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0); + + init_sched(cdd); + + cppi_writel(cdd->dma_tdfdq, cdd->ctrl_mem + DMA_TDFDQ); + cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE); + cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE); + cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE); + + return 0; +} + +static int __maybe_unused cppi41_runtime_suspend(struct device *dev) +{ + struct cppi41_dd *cdd = dev_get_drvdata(dev); + unsigned long flags; + + spin_lock_irqsave(&cdd->lock, flags); + cdd->is_suspended = true; + WARN_ON(!list_empty(&cdd->pending)); + spin_unlock_irqrestore(&cdd->lock, flags); + + return 0; +} + +static int __maybe_unused cppi41_runtime_resume(struct device *dev) +{ + struct cppi41_dd *cdd = dev_get_drvdata(dev); + unsigned long flags; + + spin_lock_irqsave(&cdd->lock, flags); + cdd->is_suspended = false; + cppi41_run_queue(cdd); + spin_unlock_irqrestore(&cdd->lock, flags); + + return 0; +} + +static const struct dev_pm_ops cppi41_pm_ops = { + SET_LATE_SYSTEM_SLEEP_PM_OPS(cppi41_suspend, cppi41_resume) + SET_RUNTIME_PM_OPS(cppi41_runtime_suspend, + cppi41_runtime_resume, + NULL) +}; + +static struct platform_driver cpp41_dma_driver = { + .probe = cppi41_dma_probe, + .remove = cppi41_dma_remove, + .driver = { + .name = "cppi41-dma-engine", + .pm = &cppi41_pm_ops, + .of_match_table = of_match_ptr(cppi41_dma_ids), + }, +}; + +module_platform_driver(cpp41_dma_driver); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Sebastian Andrzej Siewior "); diff --git a/drivers/dma/ti/dma-crossbar.c b/drivers/dma/ti/dma-crossbar.c new file mode 100644 index 0000000000..7f17ee87a6 --- /dev/null +++ b/drivers/dma/ti/dma-crossbar.c @@ -0,0 +1,478 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com + * Author: Peter Ujfalusi + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define TI_XBAR_DRA7 0 +#define TI_XBAR_AM335X 1 +static const u32 ti_xbar_type[] = { + [TI_XBAR_DRA7] = TI_XBAR_DRA7, + [TI_XBAR_AM335X] = TI_XBAR_AM335X, +}; + +static const struct of_device_id ti_dma_xbar_match[] = { + { + .compatible = "ti,dra7-dma-crossbar", + .data = &ti_xbar_type[TI_XBAR_DRA7], + }, + { + .compatible = "ti,am335x-edma-crossbar", + .data = &ti_xbar_type[TI_XBAR_AM335X], + }, + {}, +}; + +/* Crossbar on AM335x/AM437x family */ +#define TI_AM335X_XBAR_LINES 64 + +struct ti_am335x_xbar_data { + void __iomem *iomem; + + struct dma_router dmarouter; + + u32 xbar_events; /* maximum number of events to select in xbar */ + u32 dma_requests; /* number of DMA requests on eDMA */ +}; + +struct ti_am335x_xbar_map { + u16 dma_line; + u8 mux_val; +}; + +static inline void ti_am335x_xbar_write(void __iomem *iomem, int event, u8 val) +{ + /* + * TPCC_EVT_MUX_60_63 register layout is different than the + * rest, in the sense, that event 63 is mapped to lowest byte + * and event 60 is mapped to highest, handle it separately. + */ + if (event >= 60 && event <= 63) + writeb_relaxed(val, iomem + (63 - event % 4)); + else + writeb_relaxed(val, iomem + event); +} + +static void ti_am335x_xbar_free(struct device *dev, void *route_data) +{ + struct ti_am335x_xbar_data *xbar = dev_get_drvdata(dev); + struct ti_am335x_xbar_map *map = route_data; + + dev_dbg(dev, "Unmapping XBAR event %u on channel %u\n", + map->mux_val, map->dma_line); + + ti_am335x_xbar_write(xbar->iomem, map->dma_line, 0); + kfree(map); +} + +static void *ti_am335x_xbar_route_allocate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct platform_device *pdev = of_find_device_by_node(ofdma->of_node); + struct ti_am335x_xbar_data *xbar = platform_get_drvdata(pdev); + struct ti_am335x_xbar_map *map; + + if (dma_spec->args_count != 3) + return ERR_PTR(-EINVAL); + + if (dma_spec->args[2] >= xbar->xbar_events) { + dev_err(&pdev->dev, "Invalid XBAR event number: %d\n", + dma_spec->args[2]); + return ERR_PTR(-EINVAL); + } + + if (dma_spec->args[0] >= xbar->dma_requests) { + dev_err(&pdev->dev, "Invalid DMA request line number: %d\n", + dma_spec->args[0]); + return ERR_PTR(-EINVAL); + } + + /* The of_node_put() will be done in the core for the node */ + dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0); + if (!dma_spec->np) { + dev_err(&pdev->dev, "Can't get DMA master\n"); + return ERR_PTR(-EINVAL); + } + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) { + of_node_put(dma_spec->np); + return ERR_PTR(-ENOMEM); + } + + map->dma_line = (u16)dma_spec->args[0]; + map->mux_val = (u8)dma_spec->args[2]; + + dma_spec->args[2] = 0; + dma_spec->args_count = 2; + + dev_dbg(&pdev->dev, "Mapping XBAR event%u to DMA%u\n", + map->mux_val, map->dma_line); + + ti_am335x_xbar_write(xbar->iomem, map->dma_line, map->mux_val); + + return map; +} + +static const struct of_device_id ti_am335x_master_match[] __maybe_unused = { + { .compatible = "ti,edma3-tpcc", }, + {}, +}; + +static int ti_am335x_xbar_probe(struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + const struct of_device_id *match; + struct device_node *dma_node; + struct ti_am335x_xbar_data *xbar; + void __iomem *iomem; + int i, ret; + + if (!node) + return -ENODEV; + + xbar = devm_kzalloc(&pdev->dev, sizeof(*xbar), GFP_KERNEL); + if (!xbar) + return -ENOMEM; + + dma_node = of_parse_phandle(node, "dma-masters", 0); + if (!dma_node) { + dev_err(&pdev->dev, "Can't get DMA master node\n"); + return -ENODEV; + } + + match = of_match_node(ti_am335x_master_match, dma_node); + if (!match) { + dev_err(&pdev->dev, "DMA master is not supported\n"); + of_node_put(dma_node); + return -EINVAL; + } + + if (of_property_read_u32(dma_node, "dma-requests", + &xbar->dma_requests)) { + dev_info(&pdev->dev, + "Missing XBAR output information, using %u.\n", + TI_AM335X_XBAR_LINES); + xbar->dma_requests = TI_AM335X_XBAR_LINES; + } + of_node_put(dma_node); + + if (of_property_read_u32(node, "dma-requests", &xbar->xbar_events)) { + dev_info(&pdev->dev, + "Missing XBAR input information, using %u.\n", + TI_AM335X_XBAR_LINES); + xbar->xbar_events = TI_AM335X_XBAR_LINES; + } + + iomem = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(iomem)) + return PTR_ERR(iomem); + + xbar->iomem = iomem; + + xbar->dmarouter.dev = &pdev->dev; + xbar->dmarouter.route_free = ti_am335x_xbar_free; + + platform_set_drvdata(pdev, xbar); + + /* Reset the crossbar */ + for (i = 0; i < xbar->dma_requests; i++) + ti_am335x_xbar_write(xbar->iomem, i, 0); + + ret = of_dma_router_register(node, ti_am335x_xbar_route_allocate, + &xbar->dmarouter); + + return ret; +} + +/* Crossbar on DRA7xx family */ +#define TI_DRA7_XBAR_OUTPUTS 127 +#define TI_DRA7_XBAR_INPUTS 256 + +struct ti_dra7_xbar_data { + void __iomem *iomem; + + struct dma_router dmarouter; + struct mutex mutex; + unsigned long *dma_inuse; + + u16 safe_val; /* Value to rest the crossbar lines */ + u32 xbar_requests; /* number of DMA requests connected to XBAR */ + u32 dma_requests; /* number of DMA requests forwarded to DMA */ + u32 dma_offset; +}; + +struct ti_dra7_xbar_map { + u16 xbar_in; + int xbar_out; +}; + +static inline void ti_dra7_xbar_write(void __iomem *iomem, int xbar, u16 val) +{ + writew_relaxed(val, iomem + (xbar * 2)); +} + +static void ti_dra7_xbar_free(struct device *dev, void *route_data) +{ + struct ti_dra7_xbar_data *xbar = dev_get_drvdata(dev); + struct ti_dra7_xbar_map *map = route_data; + + dev_dbg(dev, "Unmapping XBAR%u (was routed to %d)\n", + map->xbar_in, map->xbar_out); + + ti_dra7_xbar_write(xbar->iomem, map->xbar_out, xbar->safe_val); + mutex_lock(&xbar->mutex); + clear_bit(map->xbar_out, xbar->dma_inuse); + mutex_unlock(&xbar->mutex); + kfree(map); +} + +static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct platform_device *pdev = of_find_device_by_node(ofdma->of_node); + struct ti_dra7_xbar_data *xbar = platform_get_drvdata(pdev); + struct ti_dra7_xbar_map *map; + + if (dma_spec->args[0] >= xbar->xbar_requests) { + dev_err(&pdev->dev, "Invalid XBAR request number: %d\n", + dma_spec->args[0]); + put_device(&pdev->dev); + return ERR_PTR(-EINVAL); + } + + /* The of_node_put() will be done in the core for the node */ + dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0); + if (!dma_spec->np) { + dev_err(&pdev->dev, "Can't get DMA master\n"); + put_device(&pdev->dev); + return ERR_PTR(-EINVAL); + } + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) { + of_node_put(dma_spec->np); + put_device(&pdev->dev); + return ERR_PTR(-ENOMEM); + } + + mutex_lock(&xbar->mutex); + map->xbar_out = find_first_zero_bit(xbar->dma_inuse, + xbar->dma_requests); + if (map->xbar_out == xbar->dma_requests) { + mutex_unlock(&xbar->mutex); + dev_err(&pdev->dev, "Run out of free DMA requests\n"); + kfree(map); + of_node_put(dma_spec->np); + put_device(&pdev->dev); + return ERR_PTR(-ENOMEM); + } + set_bit(map->xbar_out, xbar->dma_inuse); + mutex_unlock(&xbar->mutex); + + map->xbar_in = (u16)dma_spec->args[0]; + + dma_spec->args[0] = map->xbar_out + xbar->dma_offset; + + dev_dbg(&pdev->dev, "Mapping XBAR%u to DMA%d\n", + map->xbar_in, map->xbar_out); + + ti_dra7_xbar_write(xbar->iomem, map->xbar_out, map->xbar_in); + + return map; +} + +#define TI_XBAR_EDMA_OFFSET 0 +#define TI_XBAR_SDMA_OFFSET 1 +static const u32 ti_dma_offset[] = { + [TI_XBAR_EDMA_OFFSET] = 0, + [TI_XBAR_SDMA_OFFSET] = 1, +}; + +static const struct of_device_id ti_dra7_master_match[] __maybe_unused = { + { + .compatible = "ti,omap4430-sdma", + .data = &ti_dma_offset[TI_XBAR_SDMA_OFFSET], + }, + { + .compatible = "ti,edma3", + .data = &ti_dma_offset[TI_XBAR_EDMA_OFFSET], + }, + { + .compatible = "ti,edma3-tpcc", + .data = &ti_dma_offset[TI_XBAR_EDMA_OFFSET], + }, + {}, +}; + +static inline void ti_dra7_xbar_reserve(int offset, int len, unsigned long *p) +{ + for (; len > 0; len--) + set_bit(offset + (len - 1), p); +} + +static int ti_dra7_xbar_probe(struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + const struct of_device_id *match; + struct device_node *dma_node; + struct ti_dra7_xbar_data *xbar; + struct property *prop; + u32 safe_val; + int sz; + void __iomem *iomem; + int i, ret; + + if (!node) + return -ENODEV; + + xbar = devm_kzalloc(&pdev->dev, sizeof(*xbar), GFP_KERNEL); + if (!xbar) + return -ENOMEM; + + dma_node = of_parse_phandle(node, "dma-masters", 0); + if (!dma_node) { + dev_err(&pdev->dev, "Can't get DMA master node\n"); + return -ENODEV; + } + + match = of_match_node(ti_dra7_master_match, dma_node); + if (!match) { + dev_err(&pdev->dev, "DMA master is not supported\n"); + of_node_put(dma_node); + return -EINVAL; + } + + if (of_property_read_u32(dma_node, "dma-requests", + &xbar->dma_requests)) { + dev_info(&pdev->dev, + "Missing XBAR output information, using %u.\n", + TI_DRA7_XBAR_OUTPUTS); + xbar->dma_requests = TI_DRA7_XBAR_OUTPUTS; + } + of_node_put(dma_node); + + xbar->dma_inuse = devm_kcalloc(&pdev->dev, + BITS_TO_LONGS(xbar->dma_requests), + sizeof(unsigned long), GFP_KERNEL); + if (!xbar->dma_inuse) + return -ENOMEM; + + if (of_property_read_u32(node, "dma-requests", &xbar->xbar_requests)) { + dev_info(&pdev->dev, + "Missing XBAR input information, using %u.\n", + TI_DRA7_XBAR_INPUTS); + xbar->xbar_requests = TI_DRA7_XBAR_INPUTS; + } + + if (!of_property_read_u32(node, "ti,dma-safe-map", &safe_val)) + xbar->safe_val = (u16)safe_val; + + + prop = of_find_property(node, "ti,reserved-dma-request-ranges", &sz); + if (prop) { + const char pname[] = "ti,reserved-dma-request-ranges"; + u32 (*rsv_events)[2]; + size_t nelm = sz / sizeof(*rsv_events); + int i; + + if (!nelm) + return -EINVAL; + + rsv_events = kcalloc(nelm, sizeof(*rsv_events), GFP_KERNEL); + if (!rsv_events) + return -ENOMEM; + + ret = of_property_read_u32_array(node, pname, (u32 *)rsv_events, + nelm * 2); + if (ret) { + kfree(rsv_events); + return ret; + } + + for (i = 0; i < nelm; i++) { + ti_dra7_xbar_reserve(rsv_events[i][0], rsv_events[i][1], + xbar->dma_inuse); + } + kfree(rsv_events); + } + + iomem = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(iomem)) + return PTR_ERR(iomem); + + xbar->iomem = iomem; + + xbar->dmarouter.dev = &pdev->dev; + xbar->dmarouter.route_free = ti_dra7_xbar_free; + xbar->dma_offset = *(u32 *)match->data; + + mutex_init(&xbar->mutex); + platform_set_drvdata(pdev, xbar); + + /* Reset the crossbar */ + for (i = 0; i < xbar->dma_requests; i++) { + if (!test_bit(i, xbar->dma_inuse)) + ti_dra7_xbar_write(xbar->iomem, i, xbar->safe_val); + } + + ret = of_dma_router_register(node, ti_dra7_xbar_route_allocate, + &xbar->dmarouter); + if (ret) { + /* Restore the defaults for the crossbar */ + for (i = 0; i < xbar->dma_requests; i++) { + if (!test_bit(i, xbar->dma_inuse)) + ti_dra7_xbar_write(xbar->iomem, i, i); + } + } + + return ret; +} + +static int ti_dma_xbar_probe(struct platform_device *pdev) +{ + const struct of_device_id *match; + int ret; + + match = of_match_node(ti_dma_xbar_match, pdev->dev.of_node); + if (unlikely(!match)) + return -EINVAL; + + switch (*(u32 *)match->data) { + case TI_XBAR_DRA7: + ret = ti_dra7_xbar_probe(pdev); + break; + case TI_XBAR_AM335X: + ret = ti_am335x_xbar_probe(pdev); + break; + default: + dev_err(&pdev->dev, "Unsupported crossbar\n"); + ret = -ENODEV; + break; + } + + return ret; +} + +static struct platform_driver ti_dma_xbar_driver = { + .driver = { + .name = "ti-dma-crossbar", + .of_match_table = ti_dma_xbar_match, + }, + .probe = ti_dma_xbar_probe, +}; + +static int omap_dmaxbar_init(void) +{ + return platform_driver_register(&ti_dma_xbar_driver); +} +arch_initcall(omap_dmaxbar_init); diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c new file mode 100644 index 0000000000..33d6d931b3 --- /dev/null +++ b/drivers/dma/ti/edma.c @@ -0,0 +1,2690 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * TI EDMA DMA engine driver + * + * Copyright 2012 Texas Instruments + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "../dmaengine.h" +#include "../virt-dma.h" + +/* Offsets matching "struct edmacc_param" */ +#define PARM_OPT 0x00 +#define PARM_SRC 0x04 +#define PARM_A_B_CNT 0x08 +#define PARM_DST 0x0c +#define PARM_SRC_DST_BIDX 0x10 +#define PARM_LINK_BCNTRLD 0x14 +#define PARM_SRC_DST_CIDX 0x18 +#define PARM_CCNT 0x1c + +#define PARM_SIZE 0x20 + +/* Offsets for EDMA CC global channel registers and their shadows */ +#define SH_ER 0x00 /* 64 bits */ +#define SH_ECR 0x08 /* 64 bits */ +#define SH_ESR 0x10 /* 64 bits */ +#define SH_CER 0x18 /* 64 bits */ +#define SH_EER 0x20 /* 64 bits */ +#define SH_EECR 0x28 /* 64 bits */ +#define SH_EESR 0x30 /* 64 bits */ +#define SH_SER 0x38 /* 64 bits */ +#define SH_SECR 0x40 /* 64 bits */ +#define SH_IER 0x50 /* 64 bits */ +#define SH_IECR 0x58 /* 64 bits */ +#define SH_IESR 0x60 /* 64 bits */ +#define SH_IPR 0x68 /* 64 bits */ +#define SH_ICR 0x70 /* 64 bits */ +#define SH_IEVAL 0x78 +#define SH_QER 0x80 +#define SH_QEER 0x84 +#define SH_QEECR 0x88 +#define SH_QEESR 0x8c +#define SH_QSER 0x90 +#define SH_QSECR 0x94 +#define SH_SIZE 0x200 + +/* Offsets for EDMA CC global registers */ +#define EDMA_REV 0x0000 +#define EDMA_CCCFG 0x0004 +#define EDMA_QCHMAP 0x0200 /* 8 registers */ +#define EDMA_DMAQNUM 0x0240 /* 8 registers (4 on OMAP-L1xx) */ +#define EDMA_QDMAQNUM 0x0260 +#define EDMA_QUETCMAP 0x0280 +#define EDMA_QUEPRI 0x0284 +#define EDMA_EMR 0x0300 /* 64 bits */ +#define EDMA_EMCR 0x0308 /* 64 bits */ +#define EDMA_QEMR 0x0310 +#define EDMA_QEMCR 0x0314 +#define EDMA_CCERR 0x0318 +#define EDMA_CCERRCLR 0x031c +#define EDMA_EEVAL 0x0320 +#define EDMA_DRAE 0x0340 /* 4 x 64 bits*/ +#define EDMA_QRAE 0x0380 /* 4 registers */ +#define EDMA_QUEEVTENTRY 0x0400 /* 2 x 16 registers */ +#define EDMA_QSTAT 0x0600 /* 2 registers */ +#define EDMA_QWMTHRA 0x0620 +#define EDMA_QWMTHRB 0x0624 +#define EDMA_CCSTAT 0x0640 + +#define EDMA_M 0x1000 /* global channel registers */ +#define EDMA_ECR 0x1008 +#define EDMA_ECRH 0x100C +#define EDMA_SHADOW0 0x2000 /* 4 shadow regions */ +#define EDMA_PARM 0x4000 /* PaRAM entries */ + +#define PARM_OFFSET(param_no) (EDMA_PARM + ((param_no) << 5)) + +#define EDMA_DCHMAP 0x0100 /* 64 registers */ + +/* CCCFG register */ +#define GET_NUM_DMACH(x) (x & 0x7) /* bits 0-2 */ +#define GET_NUM_QDMACH(x) ((x & 0x70) >> 4) /* bits 4-6 */ +#define GET_NUM_PAENTRY(x) ((x & 0x7000) >> 12) /* bits 12-14 */ +#define GET_NUM_EVQUE(x) ((x & 0x70000) >> 16) /* bits 16-18 */ +#define GET_NUM_REGN(x) ((x & 0x300000) >> 20) /* bits 20-21 */ +#define CHMAP_EXIST BIT(24) + +/* CCSTAT register */ +#define EDMA_CCSTAT_ACTV BIT(4) + +/* + * Max of 20 segments per channel to conserve PaRAM slots + * Also note that MAX_NR_SG should be at least the no.of periods + * that are required for ASoC, otherwise DMA prep calls will + * fail. Today davinci-pcm is the only user of this driver and + * requires at least 17 slots, so we setup the default to 20. + */ +#define MAX_NR_SG 20 +#define EDMA_MAX_SLOTS MAX_NR_SG +#define EDMA_DESCRIPTORS 16 + +#define EDMA_CHANNEL_ANY -1 /* for edma_alloc_channel() */ +#define EDMA_SLOT_ANY -1 /* for edma_alloc_slot() */ +#define EDMA_CONT_PARAMS_ANY 1001 +#define EDMA_CONT_PARAMS_FIXED_EXACT 1002 +#define EDMA_CONT_PARAMS_FIXED_NOT_EXACT 1003 + +/* + * 64bit array registers are split into two 32bit registers: + * reg0: channel/event 0-31 + * reg1: channel/event 32-63 + * + * bit 5 in the channel number tells the array index (0/1) + * bit 0-4 (0x1f) is the bit offset within the register + */ +#define EDMA_REG_ARRAY_INDEX(channel) ((channel) >> 5) +#define EDMA_CHANNEL_BIT(channel) (BIT((channel) & 0x1f)) + +/* PaRAM slots are laid out like this */ +struct edmacc_param { + u32 opt; + u32 src; + u32 a_b_cnt; + u32 dst; + u32 src_dst_bidx; + u32 link_bcntrld; + u32 src_dst_cidx; + u32 ccnt; +} __packed; + +/* fields in edmacc_param.opt */ +#define SAM BIT(0) +#define DAM BIT(1) +#define SYNCDIM BIT(2) +#define STATIC BIT(3) +#define EDMA_FWID (0x07 << 8) +#define TCCMODE BIT(11) +#define EDMA_TCC(t) ((t) << 12) +#define TCINTEN BIT(20) +#define ITCINTEN BIT(21) +#define TCCHEN BIT(22) +#define ITCCHEN BIT(23) + +struct edma_pset { + u32 len; + dma_addr_t addr; + struct edmacc_param param; +}; + +struct edma_desc { + struct virt_dma_desc vdesc; + struct list_head node; + enum dma_transfer_direction direction; + int cyclic; + bool polled; + int absync; + int pset_nr; + struct edma_chan *echan; + int processed; + + /* + * The following 4 elements are used for residue accounting. + * + * - processed_stat: the number of SG elements we have traversed + * so far to cover accounting. This is updated directly to processed + * during edma_callback and is always <= processed, because processed + * refers to the number of pending transfer (programmed to EDMA + * controller), where as processed_stat tracks number of transfers + * accounted for so far. + * + * - residue: The amount of bytes we have left to transfer for this desc + * + * - residue_stat: The residue in bytes of data we have covered + * so far for accounting. This is updated directly to residue + * during callbacks to keep it current. + * + * - sg_len: Tracks the length of the current intermediate transfer, + * this is required to update the residue during intermediate transfer + * completion callback. + */ + int processed_stat; + u32 sg_len; + u32 residue; + u32 residue_stat; + + struct edma_pset pset[]; +}; + +struct edma_cc; + +struct edma_tc { + struct device_node *node; + u16 id; +}; + +struct edma_chan { + struct virt_dma_chan vchan; + struct list_head node; + struct edma_desc *edesc; + struct edma_cc *ecc; + struct edma_tc *tc; + int ch_num; + bool alloced; + bool hw_triggered; + int slot[EDMA_MAX_SLOTS]; + int missed; + struct dma_slave_config cfg; +}; + +struct edma_cc { + struct device *dev; + struct edma_soc_info *info; + void __iomem *base; + int id; + bool legacy_mode; + + /* eDMA3 resource information */ + unsigned num_channels; + unsigned num_qchannels; + unsigned num_region; + unsigned num_slots; + unsigned num_tc; + bool chmap_exist; + enum dma_event_q default_queue; + + unsigned int ccint; + unsigned int ccerrint; + + /* + * The slot_inuse bit for each PaRAM slot is clear unless the slot is + * in use by Linux or if it is allocated to be used by DSP. + */ + unsigned long *slot_inuse; + + /* + * For tracking reserved channels used by DSP. + * If the bit is cleared, the channel is allocated to be used by DSP + * and Linux must not touch it. + */ + unsigned long *channels_mask; + + struct dma_device dma_slave; + struct dma_device *dma_memcpy; + struct edma_chan *slave_chans; + struct edma_tc *tc_list; + int dummy_slot; +}; + +/* dummy param set used to (re)initialize parameter RAM slots */ +static const struct edmacc_param dummy_paramset = { + .link_bcntrld = 0xffff, + .ccnt = 1, +}; + +#define EDMA_BINDING_LEGACY 0 +#define EDMA_BINDING_TPCC 1 +static const u32 edma_binding_type[] = { + [EDMA_BINDING_LEGACY] = EDMA_BINDING_LEGACY, + [EDMA_BINDING_TPCC] = EDMA_BINDING_TPCC, +}; + +static const struct of_device_id edma_of_ids[] = { + { + .compatible = "ti,edma3", + .data = &edma_binding_type[EDMA_BINDING_LEGACY], + }, + { + .compatible = "ti,edma3-tpcc", + .data = &edma_binding_type[EDMA_BINDING_TPCC], + }, + {} +}; +MODULE_DEVICE_TABLE(of, edma_of_ids); + +static const struct of_device_id edma_tptc_of_ids[] = { + { .compatible = "ti,edma3-tptc", }, + {} +}; +MODULE_DEVICE_TABLE(of, edma_tptc_of_ids); + +static inline unsigned int edma_read(struct edma_cc *ecc, int offset) +{ + return (unsigned int)__raw_readl(ecc->base + offset); +} + +static inline void edma_write(struct edma_cc *ecc, int offset, int val) +{ + __raw_writel(val, ecc->base + offset); +} + +static inline void edma_modify(struct edma_cc *ecc, int offset, unsigned and, + unsigned or) +{ + unsigned val = edma_read(ecc, offset); + + val &= and; + val |= or; + edma_write(ecc, offset, val); +} + +static inline void edma_or(struct edma_cc *ecc, int offset, unsigned or) +{ + unsigned val = edma_read(ecc, offset); + + val |= or; + edma_write(ecc, offset, val); +} + +static inline unsigned int edma_read_array(struct edma_cc *ecc, int offset, + int i) +{ + return edma_read(ecc, offset + (i << 2)); +} + +static inline void edma_write_array(struct edma_cc *ecc, int offset, int i, + unsigned val) +{ + edma_write(ecc, offset + (i << 2), val); +} + +static inline void edma_modify_array(struct edma_cc *ecc, int offset, int i, + unsigned and, unsigned or) +{ + edma_modify(ecc, offset + (i << 2), and, or); +} + +static inline void edma_or_array2(struct edma_cc *ecc, int offset, int i, int j, + unsigned or) +{ + edma_or(ecc, offset + ((i * 2 + j) << 2), or); +} + +static inline void edma_write_array2(struct edma_cc *ecc, int offset, int i, + int j, unsigned val) +{ + edma_write(ecc, offset + ((i * 2 + j) << 2), val); +} + +static inline unsigned int edma_shadow0_read_array(struct edma_cc *ecc, + int offset, int i) +{ + return edma_read(ecc, EDMA_SHADOW0 + offset + (i << 2)); +} + +static inline void edma_shadow0_write(struct edma_cc *ecc, int offset, + unsigned val) +{ + edma_write(ecc, EDMA_SHADOW0 + offset, val); +} + +static inline void edma_shadow0_write_array(struct edma_cc *ecc, int offset, + int i, unsigned val) +{ + edma_write(ecc, EDMA_SHADOW0 + offset + (i << 2), val); +} + +static inline void edma_param_modify(struct edma_cc *ecc, int offset, + int param_no, unsigned and, unsigned or) +{ + edma_modify(ecc, EDMA_PARM + offset + (param_no << 5), and, or); +} + +static void edma_assign_priority_to_queue(struct edma_cc *ecc, int queue_no, + int priority) +{ + int bit = queue_no * 4; + + edma_modify(ecc, EDMA_QUEPRI, ~(0x7 << bit), ((priority & 0x7) << bit)); +} + +static void edma_set_chmap(struct edma_chan *echan, int slot) +{ + struct edma_cc *ecc = echan->ecc; + int channel = EDMA_CHAN_SLOT(echan->ch_num); + + if (ecc->chmap_exist) { + slot = EDMA_CHAN_SLOT(slot); + edma_write_array(ecc, EDMA_DCHMAP, channel, (slot << 5)); + } +} + +static void edma_setup_interrupt(struct edma_chan *echan, bool enable) +{ + struct edma_cc *ecc = echan->ecc; + int channel = EDMA_CHAN_SLOT(echan->ch_num); + int idx = EDMA_REG_ARRAY_INDEX(channel); + int ch_bit = EDMA_CHANNEL_BIT(channel); + + if (enable) { + edma_shadow0_write_array(ecc, SH_ICR, idx, ch_bit); + edma_shadow0_write_array(ecc, SH_IESR, idx, ch_bit); + } else { + edma_shadow0_write_array(ecc, SH_IECR, idx, ch_bit); + } +} + +/* + * paRAM slot management functions + */ +static void edma_write_slot(struct edma_cc *ecc, unsigned slot, + const struct edmacc_param *param) +{ + slot = EDMA_CHAN_SLOT(slot); + if (slot >= ecc->num_slots) + return; + memcpy_toio(ecc->base + PARM_OFFSET(slot), param, PARM_SIZE); +} + +static int edma_read_slot(struct edma_cc *ecc, unsigned slot, + struct edmacc_param *param) +{ + slot = EDMA_CHAN_SLOT(slot); + if (slot >= ecc->num_slots) + return -EINVAL; + memcpy_fromio(param, ecc->base + PARM_OFFSET(slot), PARM_SIZE); + + return 0; +} + +/** + * edma_alloc_slot - allocate DMA parameter RAM + * @ecc: pointer to edma_cc struct + * @slot: specific slot to allocate; negative for "any unused slot" + * + * This allocates a parameter RAM slot, initializing it to hold a + * dummy transfer. Slots allocated using this routine have not been + * mapped to a hardware DMA channel, and will normally be used by + * linking to them from a slot associated with a DMA channel. + * + * Normal use is to pass EDMA_SLOT_ANY as the @slot, but specific + * slots may be allocated on behalf of DSP firmware. + * + * Returns the number of the slot, else negative errno. + */ +static int edma_alloc_slot(struct edma_cc *ecc, int slot) +{ + if (slot >= 0) { + slot = EDMA_CHAN_SLOT(slot); + /* Requesting entry paRAM slot for a HW triggered channel. */ + if (ecc->chmap_exist && slot < ecc->num_channels) + slot = EDMA_SLOT_ANY; + } + + if (slot < 0) { + if (ecc->chmap_exist) + slot = 0; + else + slot = ecc->num_channels; + for (;;) { + slot = find_next_zero_bit(ecc->slot_inuse, + ecc->num_slots, + slot); + if (slot == ecc->num_slots) + return -ENOMEM; + if (!test_and_set_bit(slot, ecc->slot_inuse)) + break; + } + } else if (slot >= ecc->num_slots) { + return -EINVAL; + } else if (test_and_set_bit(slot, ecc->slot_inuse)) { + return -EBUSY; + } + + edma_write_slot(ecc, slot, &dummy_paramset); + + return EDMA_CTLR_CHAN(ecc->id, slot); +} + +static void edma_free_slot(struct edma_cc *ecc, unsigned slot) +{ + slot = EDMA_CHAN_SLOT(slot); + if (slot >= ecc->num_slots) + return; + + edma_write_slot(ecc, slot, &dummy_paramset); + clear_bit(slot, ecc->slot_inuse); +} + +/** + * edma_link - link one parameter RAM slot to another + * @ecc: pointer to edma_cc struct + * @from: parameter RAM slot originating the link + * @to: parameter RAM slot which is the link target + * + * The originating slot should not be part of any active DMA transfer. + */ +static void edma_link(struct edma_cc *ecc, unsigned from, unsigned to) +{ + if (unlikely(EDMA_CTLR(from) != EDMA_CTLR(to))) + dev_warn(ecc->dev, "Ignoring eDMA instance for linking\n"); + + from = EDMA_CHAN_SLOT(from); + to = EDMA_CHAN_SLOT(to); + if (from >= ecc->num_slots || to >= ecc->num_slots) + return; + + edma_param_modify(ecc, PARM_LINK_BCNTRLD, from, 0xffff0000, + PARM_OFFSET(to)); +} + +/** + * edma_get_position - returns the current transfer point + * @ecc: pointer to edma_cc struct + * @slot: parameter RAM slot being examined + * @dst: true selects the dest position, false the source + * + * Returns the position of the current active slot + */ +static dma_addr_t edma_get_position(struct edma_cc *ecc, unsigned slot, + bool dst) +{ + u32 offs; + + slot = EDMA_CHAN_SLOT(slot); + offs = PARM_OFFSET(slot); + offs += dst ? PARM_DST : PARM_SRC; + + return edma_read(ecc, offs); +} + +/* + * Channels with event associations will be triggered by their hardware + * events, and channels without such associations will be triggered by + * software. (At this writing there is no interface for using software + * triggers except with channels that don't support hardware triggers.) + */ +static void edma_start(struct edma_chan *echan) +{ + struct edma_cc *ecc = echan->ecc; + int channel = EDMA_CHAN_SLOT(echan->ch_num); + int idx = EDMA_REG_ARRAY_INDEX(channel); + int ch_bit = EDMA_CHANNEL_BIT(channel); + + if (!echan->hw_triggered) { + /* EDMA channels without event association */ + dev_dbg(ecc->dev, "ESR%d %08x\n", idx, + edma_shadow0_read_array(ecc, SH_ESR, idx)); + edma_shadow0_write_array(ecc, SH_ESR, idx, ch_bit); + } else { + /* EDMA channel with event association */ + dev_dbg(ecc->dev, "ER%d %08x\n", idx, + edma_shadow0_read_array(ecc, SH_ER, idx)); + /* Clear any pending event or error */ + edma_write_array(ecc, EDMA_ECR, idx, ch_bit); + edma_write_array(ecc, EDMA_EMCR, idx, ch_bit); + /* Clear any SER */ + edma_shadow0_write_array(ecc, SH_SECR, idx, ch_bit); + edma_shadow0_write_array(ecc, SH_EESR, idx, ch_bit); + dev_dbg(ecc->dev, "EER%d %08x\n", idx, + edma_shadow0_read_array(ecc, SH_EER, idx)); + } +} + +static void edma_stop(struct edma_chan *echan) +{ + struct edma_cc *ecc = echan->ecc; + int channel = EDMA_CHAN_SLOT(echan->ch_num); + int idx = EDMA_REG_ARRAY_INDEX(channel); + int ch_bit = EDMA_CHANNEL_BIT(channel); + + edma_shadow0_write_array(ecc, SH_EECR, idx, ch_bit); + edma_shadow0_write_array(ecc, SH_ECR, idx, ch_bit); + edma_shadow0_write_array(ecc, SH_SECR, idx, ch_bit); + edma_write_array(ecc, EDMA_EMCR, idx, ch_bit); + + /* clear possibly pending completion interrupt */ + edma_shadow0_write_array(ecc, SH_ICR, idx, ch_bit); + + dev_dbg(ecc->dev, "EER%d %08x\n", idx, + edma_shadow0_read_array(ecc, SH_EER, idx)); + + /* REVISIT: consider guarding against inappropriate event + * chaining by overwriting with dummy_paramset. + */ +} + +/* + * Temporarily disable EDMA hardware events on the specified channel, + * preventing them from triggering new transfers + */ +static void edma_pause(struct edma_chan *echan) +{ + int channel = EDMA_CHAN_SLOT(echan->ch_num); + + edma_shadow0_write_array(echan->ecc, SH_EECR, + EDMA_REG_ARRAY_INDEX(channel), + EDMA_CHANNEL_BIT(channel)); +} + +/* Re-enable EDMA hardware events on the specified channel. */ +static void edma_resume(struct edma_chan *echan) +{ + int channel = EDMA_CHAN_SLOT(echan->ch_num); + + edma_shadow0_write_array(echan->ecc, SH_EESR, + EDMA_REG_ARRAY_INDEX(channel), + EDMA_CHANNEL_BIT(channel)); +} + +static void edma_trigger_channel(struct edma_chan *echan) +{ + struct edma_cc *ecc = echan->ecc; + int channel = EDMA_CHAN_SLOT(echan->ch_num); + int idx = EDMA_REG_ARRAY_INDEX(channel); + int ch_bit = EDMA_CHANNEL_BIT(channel); + + edma_shadow0_write_array(ecc, SH_ESR, idx, ch_bit); + + dev_dbg(ecc->dev, "ESR%d %08x\n", idx, + edma_shadow0_read_array(ecc, SH_ESR, idx)); +} + +static void edma_clean_channel(struct edma_chan *echan) +{ + struct edma_cc *ecc = echan->ecc; + int channel = EDMA_CHAN_SLOT(echan->ch_num); + int idx = EDMA_REG_ARRAY_INDEX(channel); + int ch_bit = EDMA_CHANNEL_BIT(channel); + + dev_dbg(ecc->dev, "EMR%d %08x\n", idx, + edma_read_array(ecc, EDMA_EMR, idx)); + edma_shadow0_write_array(ecc, SH_ECR, idx, ch_bit); + /* Clear the corresponding EMR bits */ + edma_write_array(ecc, EDMA_EMCR, idx, ch_bit); + /* Clear any SER */ + edma_shadow0_write_array(ecc, SH_SECR, idx, ch_bit); + edma_write(ecc, EDMA_CCERRCLR, BIT(16) | BIT(1) | BIT(0)); +} + +/* Move channel to a specific event queue */ +static void edma_assign_channel_eventq(struct edma_chan *echan, + enum dma_event_q eventq_no) +{ + struct edma_cc *ecc = echan->ecc; + int channel = EDMA_CHAN_SLOT(echan->ch_num); + int bit = (channel & 0x7) * 4; + + /* default to low priority queue */ + if (eventq_no == EVENTQ_DEFAULT) + eventq_no = ecc->default_queue; + if (eventq_no >= ecc->num_tc) + return; + + eventq_no &= 7; + edma_modify_array(ecc, EDMA_DMAQNUM, (channel >> 3), ~(0x7 << bit), + eventq_no << bit); +} + +static int edma_alloc_channel(struct edma_chan *echan, + enum dma_event_q eventq_no) +{ + struct edma_cc *ecc = echan->ecc; + int channel = EDMA_CHAN_SLOT(echan->ch_num); + + if (!test_bit(echan->ch_num, ecc->channels_mask)) { + dev_err(ecc->dev, "Channel%d is reserved, can not be used!\n", + echan->ch_num); + return -EINVAL; + } + + /* ensure access through shadow region 0 */ + edma_or_array2(ecc, EDMA_DRAE, 0, EDMA_REG_ARRAY_INDEX(channel), + EDMA_CHANNEL_BIT(channel)); + + /* ensure no events are pending */ + edma_stop(echan); + + edma_setup_interrupt(echan, true); + + edma_assign_channel_eventq(echan, eventq_no); + + return 0; +} + +static void edma_free_channel(struct edma_chan *echan) +{ + /* ensure no events are pending */ + edma_stop(echan); + /* REVISIT should probably take out of shadow region 0 */ + edma_setup_interrupt(echan, false); +} + +static inline struct edma_chan *to_edma_chan(struct dma_chan *c) +{ + return container_of(c, struct edma_chan, vchan.chan); +} + +static inline struct edma_desc *to_edma_desc(struct dma_async_tx_descriptor *tx) +{ + return container_of(tx, struct edma_desc, vdesc.tx); +} + +static void edma_desc_free(struct virt_dma_desc *vdesc) +{ + kfree(container_of(vdesc, struct edma_desc, vdesc)); +} + +/* Dispatch a queued descriptor to the controller (caller holds lock) */ +static void edma_execute(struct edma_chan *echan) +{ + struct edma_cc *ecc = echan->ecc; + struct virt_dma_desc *vdesc; + struct edma_desc *edesc; + struct device *dev = echan->vchan.chan.device->dev; + int i, j, left, nslots; + + if (!echan->edesc) { + /* Setup is needed for the first transfer */ + vdesc = vchan_next_desc(&echan->vchan); + if (!vdesc) + return; + list_del(&vdesc->node); + echan->edesc = to_edma_desc(&vdesc->tx); + } + + edesc = echan->edesc; + + /* Find out how many left */ + left = edesc->pset_nr - edesc->processed; + nslots = min(MAX_NR_SG, left); + edesc->sg_len = 0; + + /* Write descriptor PaRAM set(s) */ + for (i = 0; i < nslots; i++) { + j = i + edesc->processed; + edma_write_slot(ecc, echan->slot[i], &edesc->pset[j].param); + edesc->sg_len += edesc->pset[j].len; + dev_vdbg(dev, + "\n pset[%d]:\n" + " chnum\t%d\n" + " slot\t%d\n" + " opt\t%08x\n" + " src\t%08x\n" + " dst\t%08x\n" + " abcnt\t%08x\n" + " ccnt\t%08x\n" + " bidx\t%08x\n" + " cidx\t%08x\n" + " lkrld\t%08x\n", + j, echan->ch_num, echan->slot[i], + edesc->pset[j].param.opt, + edesc->pset[j].param.src, + edesc->pset[j].param.dst, + edesc->pset[j].param.a_b_cnt, + edesc->pset[j].param.ccnt, + edesc->pset[j].param.src_dst_bidx, + edesc->pset[j].param.src_dst_cidx, + edesc->pset[j].param.link_bcntrld); + /* Link to the previous slot if not the last set */ + if (i != (nslots - 1)) + edma_link(ecc, echan->slot[i], echan->slot[i + 1]); + } + + edesc->processed += nslots; + + /* + * If this is either the last set in a set of SG-list transactions + * then setup a link to the dummy slot, this results in all future + * events being absorbed and that's OK because we're done + */ + if (edesc->processed == edesc->pset_nr) { + if (edesc->cyclic) + edma_link(ecc, echan->slot[nslots - 1], echan->slot[1]); + else + edma_link(ecc, echan->slot[nslots - 1], + echan->ecc->dummy_slot); + } + + if (echan->missed) { + /* + * This happens due to setup times between intermediate + * transfers in long SG lists which have to be broken up into + * transfers of MAX_NR_SG + */ + dev_dbg(dev, "missed event on channel %d\n", echan->ch_num); + edma_clean_channel(echan); + edma_stop(echan); + edma_start(echan); + edma_trigger_channel(echan); + echan->missed = 0; + } else if (edesc->processed <= MAX_NR_SG) { + dev_dbg(dev, "first transfer starting on channel %d\n", + echan->ch_num); + edma_start(echan); + } else { + dev_dbg(dev, "chan: %d: completed %d elements, resuming\n", + echan->ch_num, edesc->processed); + edma_resume(echan); + } +} + +static int edma_terminate_all(struct dma_chan *chan) +{ + struct edma_chan *echan = to_edma_chan(chan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&echan->vchan.lock, flags); + + /* + * Stop DMA activity: we assume the callback will not be called + * after edma_dma() returns (even if it does, it will see + * echan->edesc is NULL and exit.) + */ + if (echan->edesc) { + edma_stop(echan); + /* Move the cyclic channel back to default queue */ + if (!echan->tc && echan->edesc->cyclic) + edma_assign_channel_eventq(echan, EVENTQ_DEFAULT); + + vchan_terminate_vdesc(&echan->edesc->vdesc); + echan->edesc = NULL; + } + + vchan_get_all_descriptors(&echan->vchan, &head); + spin_unlock_irqrestore(&echan->vchan.lock, flags); + vchan_dma_desc_free_list(&echan->vchan, &head); + + return 0; +} + +static void edma_synchronize(struct dma_chan *chan) +{ + struct edma_chan *echan = to_edma_chan(chan); + + vchan_synchronize(&echan->vchan); +} + +static int edma_slave_config(struct dma_chan *chan, + struct dma_slave_config *cfg) +{ + struct edma_chan *echan = to_edma_chan(chan); + + if (cfg->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES || + cfg->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES) + return -EINVAL; + + if (cfg->src_maxburst > chan->device->max_burst || + cfg->dst_maxburst > chan->device->max_burst) + return -EINVAL; + + memcpy(&echan->cfg, cfg, sizeof(echan->cfg)); + + return 0; +} + +static int edma_dma_pause(struct dma_chan *chan) +{ + struct edma_chan *echan = to_edma_chan(chan); + + if (!echan->edesc) + return -EINVAL; + + edma_pause(echan); + return 0; +} + +static int edma_dma_resume(struct dma_chan *chan) +{ + struct edma_chan *echan = to_edma_chan(chan); + + edma_resume(echan); + return 0; +} + +/* + * A PaRAM set configuration abstraction used by other modes + * @chan: Channel who's PaRAM set we're configuring + * @pset: PaRAM set to initialize and setup. + * @src_addr: Source address of the DMA + * @dst_addr: Destination address of the DMA + * @burst: In units of dev_width, how much to send + * @dev_width: How much is the dev_width + * @dma_length: Total length of the DMA transfer + * @direction: Direction of the transfer + */ +static int edma_config_pset(struct dma_chan *chan, struct edma_pset *epset, + dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst, + unsigned int acnt, unsigned int dma_length, + enum dma_transfer_direction direction) +{ + struct edma_chan *echan = to_edma_chan(chan); + struct device *dev = chan->device->dev; + struct edmacc_param *param = &epset->param; + int bcnt, ccnt, cidx; + int src_bidx, dst_bidx, src_cidx, dst_cidx; + int absync; + + /* src/dst_maxburst == 0 is the same case as src/dst_maxburst == 1 */ + if (!burst) + burst = 1; + /* + * If the maxburst is equal to the fifo width, use + * A-synced transfers. This allows for large contiguous + * buffer transfers using only one PaRAM set. + */ + if (burst == 1) { + /* + * For the A-sync case, bcnt and ccnt are the remainder + * and quotient respectively of the division of: + * (dma_length / acnt) by (SZ_64K -1). This is so + * that in case bcnt over flows, we have ccnt to use. + * Note: In A-sync transfer only, bcntrld is used, but it + * only applies for sg_dma_len(sg) >= SZ_64K. + * In this case, the best way adopted is- bccnt for the + * first frame will be the remainder below. Then for + * every successive frame, bcnt will be SZ_64K-1. This + * is assured as bcntrld = 0xffff in end of function. + */ + absync = false; + ccnt = dma_length / acnt / (SZ_64K - 1); + bcnt = dma_length / acnt - ccnt * (SZ_64K - 1); + /* + * If bcnt is non-zero, we have a remainder and hence an + * extra frame to transfer, so increment ccnt. + */ + if (bcnt) + ccnt++; + else + bcnt = SZ_64K - 1; + cidx = acnt; + } else { + /* + * If maxburst is greater than the fifo address_width, + * use AB-synced transfers where A count is the fifo + * address_width and B count is the maxburst. In this + * case, we are limited to transfers of C count frames + * of (address_width * maxburst) where C count is limited + * to SZ_64K-1. This places an upper bound on the length + * of an SG segment that can be handled. + */ + absync = true; + bcnt = burst; + ccnt = dma_length / (acnt * bcnt); + if (ccnt > (SZ_64K - 1)) { + dev_err(dev, "Exceeded max SG segment size\n"); + return -EINVAL; + } + cidx = acnt * bcnt; + } + + epset->len = dma_length; + + if (direction == DMA_MEM_TO_DEV) { + src_bidx = acnt; + src_cidx = cidx; + dst_bidx = 0; + dst_cidx = 0; + epset->addr = src_addr; + } else if (direction == DMA_DEV_TO_MEM) { + src_bidx = 0; + src_cidx = 0; + dst_bidx = acnt; + dst_cidx = cidx; + epset->addr = dst_addr; + } else if (direction == DMA_MEM_TO_MEM) { + src_bidx = acnt; + src_cidx = cidx; + dst_bidx = acnt; + dst_cidx = cidx; + epset->addr = src_addr; + } else { + dev_err(dev, "%s: direction not implemented yet\n", __func__); + return -EINVAL; + } + + param->opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num)); + /* Configure A or AB synchronized transfers */ + if (absync) + param->opt |= SYNCDIM; + + param->src = src_addr; + param->dst = dst_addr; + + param->src_dst_bidx = (dst_bidx << 16) | src_bidx; + param->src_dst_cidx = (dst_cidx << 16) | src_cidx; + + param->a_b_cnt = bcnt << 16 | acnt; + param->ccnt = ccnt; + /* + * Only time when (bcntrld) auto reload is required is for + * A-sync case, and in this case, a requirement of reload value + * of SZ_64K-1 only is assured. 'link' is initially set to NULL + * and then later will be populated by edma_execute. + */ + param->link_bcntrld = 0xffffffff; + return absync; +} + +static struct dma_async_tx_descriptor *edma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long tx_flags, void *context) +{ + struct edma_chan *echan = to_edma_chan(chan); + struct device *dev = chan->device->dev; + struct edma_desc *edesc; + dma_addr_t src_addr = 0, dst_addr = 0; + enum dma_slave_buswidth dev_width; + u32 burst; + struct scatterlist *sg; + int i, nslots, ret; + + if (unlikely(!echan || !sgl || !sg_len)) + return NULL; + + if (direction == DMA_DEV_TO_MEM) { + src_addr = echan->cfg.src_addr; + dev_width = echan->cfg.src_addr_width; + burst = echan->cfg.src_maxburst; + } else if (direction == DMA_MEM_TO_DEV) { + dst_addr = echan->cfg.dst_addr; + dev_width = echan->cfg.dst_addr_width; + burst = echan->cfg.dst_maxburst; + } else { + dev_err(dev, "%s: bad direction: %d\n", __func__, direction); + return NULL; + } + + if (dev_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) { + dev_err(dev, "%s: Undefined slave buswidth\n", __func__); + return NULL; + } + + edesc = kzalloc(struct_size(edesc, pset, sg_len), GFP_ATOMIC); + if (!edesc) + return NULL; + + edesc->pset_nr = sg_len; + edesc->residue = 0; + edesc->direction = direction; + edesc->echan = echan; + + /* Allocate a PaRAM slot, if needed */ + nslots = min_t(unsigned, MAX_NR_SG, sg_len); + + for (i = 0; i < nslots; i++) { + if (echan->slot[i] < 0) { + echan->slot[i] = + edma_alloc_slot(echan->ecc, EDMA_SLOT_ANY); + if (echan->slot[i] < 0) { + kfree(edesc); + dev_err(dev, "%s: Failed to allocate slot\n", + __func__); + return NULL; + } + } + } + + /* Configure PaRAM sets for each SG */ + for_each_sg(sgl, sg, sg_len, i) { + /* Get address for each SG */ + if (direction == DMA_DEV_TO_MEM) + dst_addr = sg_dma_address(sg); + else + src_addr = sg_dma_address(sg); + + ret = edma_config_pset(chan, &edesc->pset[i], src_addr, + dst_addr, burst, dev_width, + sg_dma_len(sg), direction); + if (ret < 0) { + kfree(edesc); + return NULL; + } + + edesc->absync = ret; + edesc->residue += sg_dma_len(sg); + + if (i == sg_len - 1) + /* Enable completion interrupt */ + edesc->pset[i].param.opt |= TCINTEN; + else if (!((i+1) % MAX_NR_SG)) + /* + * Enable early completion interrupt for the + * intermediateset. In this case the driver will be + * notified when the paRAM set is submitted to TC. This + * will allow more time to set up the next set of slots. + */ + edesc->pset[i].param.opt |= (TCINTEN | TCCMODE); + } + edesc->residue_stat = edesc->residue; + + return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags); +} + +static struct dma_async_tx_descriptor *edma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long tx_flags) +{ + int ret, nslots; + struct edma_desc *edesc; + struct device *dev = chan->device->dev; + struct edma_chan *echan = to_edma_chan(chan); + unsigned int width, pset_len, array_size; + + if (unlikely(!echan || !len)) + return NULL; + + /* Align the array size (acnt block) with the transfer properties */ + switch (__ffs((src | dest | len))) { + case 0: + array_size = SZ_32K - 1; + break; + case 1: + array_size = SZ_32K - 2; + break; + default: + array_size = SZ_32K - 4; + break; + } + + if (len < SZ_64K) { + /* + * Transfer size less than 64K can be handled with one paRAM + * slot and with one burst. + * ACNT = length + */ + width = len; + pset_len = len; + nslots = 1; + } else { + /* + * Transfer size bigger than 64K will be handled with maximum of + * two paRAM slots. + * slot1: (full_length / 32767) times 32767 bytes bursts. + * ACNT = 32767, length1: (full_length / 32767) * 32767 + * slot2: the remaining amount of data after slot1. + * ACNT = full_length - length1, length2 = ACNT + * + * When the full_length is a multiple of 32767 one slot can be + * used to complete the transfer. + */ + width = array_size; + pset_len = rounddown(len, width); + /* One slot is enough for lengths multiple of (SZ_32K -1) */ + if (unlikely(pset_len == len)) + nslots = 1; + else + nslots = 2; + } + + edesc = kzalloc(struct_size(edesc, pset, nslots), GFP_ATOMIC); + if (!edesc) + return NULL; + + edesc->pset_nr = nslots; + edesc->residue = edesc->residue_stat = len; + edesc->direction = DMA_MEM_TO_MEM; + edesc->echan = echan; + + ret = edma_config_pset(chan, &edesc->pset[0], src, dest, 1, + width, pset_len, DMA_MEM_TO_MEM); + if (ret < 0) { + kfree(edesc); + return NULL; + } + + edesc->absync = ret; + + edesc->pset[0].param.opt |= ITCCHEN; + if (nslots == 1) { + /* Enable transfer complete interrupt if requested */ + if (tx_flags & DMA_PREP_INTERRUPT) + edesc->pset[0].param.opt |= TCINTEN; + } else { + /* Enable transfer complete chaining for the first slot */ + edesc->pset[0].param.opt |= TCCHEN; + + if (echan->slot[1] < 0) { + echan->slot[1] = edma_alloc_slot(echan->ecc, + EDMA_SLOT_ANY); + if (echan->slot[1] < 0) { + kfree(edesc); + dev_err(dev, "%s: Failed to allocate slot\n", + __func__); + return NULL; + } + } + dest += pset_len; + src += pset_len; + pset_len = width = len % array_size; + + ret = edma_config_pset(chan, &edesc->pset[1], src, dest, 1, + width, pset_len, DMA_MEM_TO_MEM); + if (ret < 0) { + kfree(edesc); + return NULL; + } + + edesc->pset[1].param.opt |= ITCCHEN; + /* Enable transfer complete interrupt if requested */ + if (tx_flags & DMA_PREP_INTERRUPT) + edesc->pset[1].param.opt |= TCINTEN; + } + + if (!(tx_flags & DMA_PREP_INTERRUPT)) + edesc->polled = true; + + return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags); +} + +static struct dma_async_tx_descriptor * +edma_prep_dma_interleaved(struct dma_chan *chan, + struct dma_interleaved_template *xt, + unsigned long tx_flags) +{ + struct device *dev = chan->device->dev; + struct edma_chan *echan = to_edma_chan(chan); + struct edmacc_param *param; + struct edma_desc *edesc; + size_t src_icg, dst_icg; + int src_bidx, dst_bidx; + + /* Slave mode is not supported */ + if (is_slave_direction(xt->dir)) + return NULL; + + if (xt->frame_size != 1 || xt->numf == 0) + return NULL; + + if (xt->sgl[0].size > SZ_64K || xt->numf > SZ_64K) + return NULL; + + src_icg = dmaengine_get_src_icg(xt, &xt->sgl[0]); + if (src_icg) { + src_bidx = src_icg + xt->sgl[0].size; + } else if (xt->src_inc) { + src_bidx = xt->sgl[0].size; + } else { + dev_err(dev, "%s: SRC constant addressing is not supported\n", + __func__); + return NULL; + } + + dst_icg = dmaengine_get_dst_icg(xt, &xt->sgl[0]); + if (dst_icg) { + dst_bidx = dst_icg + xt->sgl[0].size; + } else if (xt->dst_inc) { + dst_bidx = xt->sgl[0].size; + } else { + dev_err(dev, "%s: DST constant addressing is not supported\n", + __func__); + return NULL; + } + + if (src_bidx > SZ_64K || dst_bidx > SZ_64K) + return NULL; + + edesc = kzalloc(struct_size(edesc, pset, 1), GFP_ATOMIC); + if (!edesc) + return NULL; + + edesc->direction = DMA_MEM_TO_MEM; + edesc->echan = echan; + edesc->pset_nr = 1; + + param = &edesc->pset[0].param; + + param->src = xt->src_start; + param->dst = xt->dst_start; + param->a_b_cnt = xt->numf << 16 | xt->sgl[0].size; + param->ccnt = 1; + param->src_dst_bidx = (dst_bidx << 16) | src_bidx; + param->src_dst_cidx = 0; + + param->opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num)); + param->opt |= ITCCHEN; + /* Enable transfer complete interrupt if requested */ + if (tx_flags & DMA_PREP_INTERRUPT) + param->opt |= TCINTEN; + else + edesc->polled = true; + + return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags); +} + +static struct dma_async_tx_descriptor *edma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long tx_flags) +{ + struct edma_chan *echan = to_edma_chan(chan); + struct device *dev = chan->device->dev; + struct edma_desc *edesc; + dma_addr_t src_addr, dst_addr; + enum dma_slave_buswidth dev_width; + bool use_intermediate = false; + u32 burst; + int i, ret, nslots; + + if (unlikely(!echan || !buf_len || !period_len)) + return NULL; + + if (direction == DMA_DEV_TO_MEM) { + src_addr = echan->cfg.src_addr; + dst_addr = buf_addr; + dev_width = echan->cfg.src_addr_width; + burst = echan->cfg.src_maxburst; + } else if (direction == DMA_MEM_TO_DEV) { + src_addr = buf_addr; + dst_addr = echan->cfg.dst_addr; + dev_width = echan->cfg.dst_addr_width; + burst = echan->cfg.dst_maxburst; + } else { + dev_err(dev, "%s: bad direction: %d\n", __func__, direction); + return NULL; + } + + if (dev_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) { + dev_err(dev, "%s: Undefined slave buswidth\n", __func__); + return NULL; + } + + if (unlikely(buf_len % period_len)) { + dev_err(dev, "Period should be multiple of Buffer length\n"); + return NULL; + } + + nslots = (buf_len / period_len) + 1; + + /* + * Cyclic DMA users such as audio cannot tolerate delays introduced + * by cases where the number of periods is more than the maximum + * number of SGs the EDMA driver can handle at a time. For DMA types + * such as Slave SGs, such delays are tolerable and synchronized, + * but the synchronization is difficult to achieve with Cyclic and + * cannot be guaranteed, so we error out early. + */ + if (nslots > MAX_NR_SG) { + /* + * If the burst and period sizes are the same, we can put + * the full buffer into a single period and activate + * intermediate interrupts. This will produce interrupts + * after each burst, which is also after each desired period. + */ + if (burst == period_len) { + period_len = buf_len; + nslots = 2; + use_intermediate = true; + } else { + return NULL; + } + } + + edesc = kzalloc(struct_size(edesc, pset, nslots), GFP_ATOMIC); + if (!edesc) + return NULL; + + edesc->cyclic = 1; + edesc->pset_nr = nslots; + edesc->residue = edesc->residue_stat = buf_len; + edesc->direction = direction; + edesc->echan = echan; + + dev_dbg(dev, "%s: channel=%d nslots=%d period_len=%zu buf_len=%zu\n", + __func__, echan->ch_num, nslots, period_len, buf_len); + + for (i = 0; i < nslots; i++) { + /* Allocate a PaRAM slot, if needed */ + if (echan->slot[i] < 0) { + echan->slot[i] = + edma_alloc_slot(echan->ecc, EDMA_SLOT_ANY); + if (echan->slot[i] < 0) { + kfree(edesc); + dev_err(dev, "%s: Failed to allocate slot\n", + __func__); + return NULL; + } + } + + if (i == nslots - 1) { + memcpy(&edesc->pset[i], &edesc->pset[0], + sizeof(edesc->pset[0])); + break; + } + + ret = edma_config_pset(chan, &edesc->pset[i], src_addr, + dst_addr, burst, dev_width, period_len, + direction); + if (ret < 0) { + kfree(edesc); + return NULL; + } + + if (direction == DMA_DEV_TO_MEM) + dst_addr += period_len; + else + src_addr += period_len; + + dev_vdbg(dev, "%s: Configure period %d of buf:\n", __func__, i); + dev_vdbg(dev, + "\n pset[%d]:\n" + " chnum\t%d\n" + " slot\t%d\n" + " opt\t%08x\n" + " src\t%08x\n" + " dst\t%08x\n" + " abcnt\t%08x\n" + " ccnt\t%08x\n" + " bidx\t%08x\n" + " cidx\t%08x\n" + " lkrld\t%08x\n", + i, echan->ch_num, echan->slot[i], + edesc->pset[i].param.opt, + edesc->pset[i].param.src, + edesc->pset[i].param.dst, + edesc->pset[i].param.a_b_cnt, + edesc->pset[i].param.ccnt, + edesc->pset[i].param.src_dst_bidx, + edesc->pset[i].param.src_dst_cidx, + edesc->pset[i].param.link_bcntrld); + + edesc->absync = ret; + + /* + * Enable period interrupt only if it is requested + */ + if (tx_flags & DMA_PREP_INTERRUPT) { + edesc->pset[i].param.opt |= TCINTEN; + + /* Also enable intermediate interrupts if necessary */ + if (use_intermediate) + edesc->pset[i].param.opt |= ITCINTEN; + } + } + + /* Place the cyclic channel to highest priority queue */ + if (!echan->tc) + edma_assign_channel_eventq(echan, EVENTQ_0); + + return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags); +} + +static void edma_completion_handler(struct edma_chan *echan) +{ + struct device *dev = echan->vchan.chan.device->dev; + struct edma_desc *edesc; + + spin_lock(&echan->vchan.lock); + edesc = echan->edesc; + if (edesc) { + if (edesc->cyclic) { + vchan_cyclic_callback(&edesc->vdesc); + spin_unlock(&echan->vchan.lock); + return; + } else if (edesc->processed == edesc->pset_nr) { + edesc->residue = 0; + edma_stop(echan); + vchan_cookie_complete(&edesc->vdesc); + echan->edesc = NULL; + + dev_dbg(dev, "Transfer completed on channel %d\n", + echan->ch_num); + } else { + dev_dbg(dev, "Sub transfer completed on channel %d\n", + echan->ch_num); + + edma_pause(echan); + + /* Update statistics for tx_status */ + edesc->residue -= edesc->sg_len; + edesc->residue_stat = edesc->residue; + edesc->processed_stat = edesc->processed; + } + edma_execute(echan); + } + + spin_unlock(&echan->vchan.lock); +} + +/* eDMA interrupt handler */ +static irqreturn_t dma_irq_handler(int irq, void *data) +{ + struct edma_cc *ecc = data; + int ctlr; + u32 sh_ier; + u32 sh_ipr; + u32 bank; + + ctlr = ecc->id; + if (ctlr < 0) + return IRQ_NONE; + + dev_vdbg(ecc->dev, "dma_irq_handler\n"); + + sh_ipr = edma_shadow0_read_array(ecc, SH_IPR, 0); + if (!sh_ipr) { + sh_ipr = edma_shadow0_read_array(ecc, SH_IPR, 1); + if (!sh_ipr) + return IRQ_NONE; + sh_ier = edma_shadow0_read_array(ecc, SH_IER, 1); + bank = 1; + } else { + sh_ier = edma_shadow0_read_array(ecc, SH_IER, 0); + bank = 0; + } + + do { + u32 slot; + u32 channel; + + slot = __ffs(sh_ipr); + sh_ipr &= ~(BIT(slot)); + + if (sh_ier & BIT(slot)) { + channel = (bank << 5) | slot; + /* Clear the corresponding IPR bits */ + edma_shadow0_write_array(ecc, SH_ICR, bank, BIT(slot)); + edma_completion_handler(&ecc->slave_chans[channel]); + } + } while (sh_ipr); + + edma_shadow0_write(ecc, SH_IEVAL, 1); + return IRQ_HANDLED; +} + +static void edma_error_handler(struct edma_chan *echan) +{ + struct edma_cc *ecc = echan->ecc; + struct device *dev = echan->vchan.chan.device->dev; + struct edmacc_param p; + int err; + + if (!echan->edesc) + return; + + spin_lock(&echan->vchan.lock); + + err = edma_read_slot(ecc, echan->slot[0], &p); + + /* + * Issue later based on missed flag which will be sure + * to happen as: + * (1) we finished transmitting an intermediate slot and + * edma_execute is coming up. + * (2) or we finished current transfer and issue will + * call edma_execute. + * + * Important note: issuing can be dangerous here and + * lead to some nasty recursion when we are in a NULL + * slot. So we avoid doing so and set the missed flag. + */ + if (err || (p.a_b_cnt == 0 && p.ccnt == 0)) { + dev_dbg(dev, "Error on null slot, setting miss\n"); + echan->missed = 1; + } else { + /* + * The slot is already programmed but the event got + * missed, so its safe to issue it here. + */ + dev_dbg(dev, "Missed event, TRIGGERING\n"); + edma_clean_channel(echan); + edma_stop(echan); + edma_start(echan); + edma_trigger_channel(echan); + } + spin_unlock(&echan->vchan.lock); +} + +static inline bool edma_error_pending(struct edma_cc *ecc) +{ + if (edma_read_array(ecc, EDMA_EMR, 0) || + edma_read_array(ecc, EDMA_EMR, 1) || + edma_read(ecc, EDMA_QEMR) || edma_read(ecc, EDMA_CCERR)) + return true; + + return false; +} + +/* eDMA error interrupt handler */ +static irqreturn_t dma_ccerr_handler(int irq, void *data) +{ + struct edma_cc *ecc = data; + int i, j; + int ctlr; + unsigned int cnt = 0; + unsigned int val; + + ctlr = ecc->id; + if (ctlr < 0) + return IRQ_NONE; + + dev_vdbg(ecc->dev, "dma_ccerr_handler\n"); + + if (!edma_error_pending(ecc)) { + /* + * The registers indicate no pending error event but the irq + * handler has been called. + * Ask eDMA to re-evaluate the error registers. + */ + dev_err(ecc->dev, "%s: Error interrupt without error event!\n", + __func__); + edma_write(ecc, EDMA_EEVAL, 1); + return IRQ_NONE; + } + + while (1) { + /* Event missed register(s) */ + for (j = 0; j < 2; j++) { + unsigned long emr; + + val = edma_read_array(ecc, EDMA_EMR, j); + if (!val) + continue; + + dev_dbg(ecc->dev, "EMR%d 0x%08x\n", j, val); + emr = val; + for_each_set_bit(i, &emr, 32) { + int k = (j << 5) + i; + + /* Clear the corresponding EMR bits */ + edma_write_array(ecc, EDMA_EMCR, j, BIT(i)); + /* Clear any SER */ + edma_shadow0_write_array(ecc, SH_SECR, j, + BIT(i)); + edma_error_handler(&ecc->slave_chans[k]); + } + } + + val = edma_read(ecc, EDMA_QEMR); + if (val) { + dev_dbg(ecc->dev, "QEMR 0x%02x\n", val); + /* Not reported, just clear the interrupt reason. */ + edma_write(ecc, EDMA_QEMCR, val); + edma_shadow0_write(ecc, SH_QSECR, val); + } + + val = edma_read(ecc, EDMA_CCERR); + if (val) { + dev_warn(ecc->dev, "CCERR 0x%08x\n", val); + /* Not reported, just clear the interrupt reason. */ + edma_write(ecc, EDMA_CCERRCLR, val); + } + + if (!edma_error_pending(ecc)) + break; + cnt++; + if (cnt > 10) + break; + } + edma_write(ecc, EDMA_EEVAL, 1); + return IRQ_HANDLED; +} + +/* Alloc channel resources */ +static int edma_alloc_chan_resources(struct dma_chan *chan) +{ + struct edma_chan *echan = to_edma_chan(chan); + struct edma_cc *ecc = echan->ecc; + struct device *dev = ecc->dev; + enum dma_event_q eventq_no = EVENTQ_DEFAULT; + int ret; + + if (echan->tc) { + eventq_no = echan->tc->id; + } else if (ecc->tc_list) { + /* memcpy channel */ + echan->tc = &ecc->tc_list[ecc->info->default_queue]; + eventq_no = echan->tc->id; + } + + ret = edma_alloc_channel(echan, eventq_no); + if (ret) + return ret; + + echan->slot[0] = edma_alloc_slot(ecc, echan->ch_num); + if (echan->slot[0] < 0) { + dev_err(dev, "Entry slot allocation failed for channel %u\n", + EDMA_CHAN_SLOT(echan->ch_num)); + ret = echan->slot[0]; + goto err_slot; + } + + /* Set up channel -> slot mapping for the entry slot */ + edma_set_chmap(echan, echan->slot[0]); + echan->alloced = true; + + dev_dbg(dev, "Got eDMA channel %d for virt channel %d (%s trigger)\n", + EDMA_CHAN_SLOT(echan->ch_num), chan->chan_id, + echan->hw_triggered ? "HW" : "SW"); + + return 0; + +err_slot: + edma_free_channel(echan); + return ret; +} + +/* Free channel resources */ +static void edma_free_chan_resources(struct dma_chan *chan) +{ + struct edma_chan *echan = to_edma_chan(chan); + struct device *dev = echan->ecc->dev; + int i; + + /* Terminate transfers */ + edma_stop(echan); + + vchan_free_chan_resources(&echan->vchan); + + /* Free EDMA PaRAM slots */ + for (i = 0; i < EDMA_MAX_SLOTS; i++) { + if (echan->slot[i] >= 0) { + edma_free_slot(echan->ecc, echan->slot[i]); + echan->slot[i] = -1; + } + } + + /* Set entry slot to the dummy slot */ + edma_set_chmap(echan, echan->ecc->dummy_slot); + + /* Free EDMA channel */ + if (echan->alloced) { + edma_free_channel(echan); + echan->alloced = false; + } + + echan->tc = NULL; + echan->hw_triggered = false; + + dev_dbg(dev, "Free eDMA channel %d for virt channel %d\n", + EDMA_CHAN_SLOT(echan->ch_num), chan->chan_id); +} + +/* Send pending descriptor to hardware */ +static void edma_issue_pending(struct dma_chan *chan) +{ + struct edma_chan *echan = to_edma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&echan->vchan.lock, flags); + if (vchan_issue_pending(&echan->vchan) && !echan->edesc) + edma_execute(echan); + spin_unlock_irqrestore(&echan->vchan.lock, flags); +} + +/* + * This limit exists to avoid a possible infinite loop when waiting for proof + * that a particular transfer is completed. This limit can be hit if there + * are large bursts to/from slow devices or the CPU is never able to catch + * the DMA hardware idle. On an AM335x transferring 48 bytes from the UART + * RX-FIFO, as many as 55 loops have been seen. + */ +#define EDMA_MAX_TR_WAIT_LOOPS 1000 + +static u32 edma_residue(struct edma_desc *edesc) +{ + bool dst = edesc->direction == DMA_DEV_TO_MEM; + int loop_count = EDMA_MAX_TR_WAIT_LOOPS; + struct edma_chan *echan = edesc->echan; + struct edma_pset *pset = edesc->pset; + dma_addr_t done, pos, pos_old; + int channel = EDMA_CHAN_SLOT(echan->ch_num); + int idx = EDMA_REG_ARRAY_INDEX(channel); + int ch_bit = EDMA_CHANNEL_BIT(channel); + int event_reg; + int i; + + /* + * We always read the dst/src position from the first RamPar + * pset. That's the one which is active now. + */ + pos = edma_get_position(echan->ecc, echan->slot[0], dst); + + /* + * "pos" may represent a transfer request that is still being + * processed by the EDMACC or EDMATC. We will busy wait until + * any one of the situations occurs: + * 1. while and event is pending for the channel + * 2. a position updated + * 3. we hit the loop limit + */ + if (is_slave_direction(edesc->direction)) + event_reg = SH_ER; + else + event_reg = SH_ESR; + + pos_old = pos; + while (edma_shadow0_read_array(echan->ecc, event_reg, idx) & ch_bit) { + pos = edma_get_position(echan->ecc, echan->slot[0], dst); + if (pos != pos_old) + break; + + if (!--loop_count) { + dev_dbg_ratelimited(echan->vchan.chan.device->dev, + "%s: timeout waiting for PaRAM update\n", + __func__); + break; + } + + cpu_relax(); + } + + /* + * Cyclic is simple. Just subtract pset[0].addr from pos. + * + * We never update edesc->residue in the cyclic case, so we + * can tell the remaining room to the end of the circular + * buffer. + */ + if (edesc->cyclic) { + done = pos - pset->addr; + edesc->residue_stat = edesc->residue - done; + return edesc->residue_stat; + } + + /* + * If the position is 0, then EDMA loaded the closing dummy slot, the + * transfer is completed + */ + if (!pos) + return 0; + /* + * For SG operation we catch up with the last processed + * status. + */ + pset += edesc->processed_stat; + + for (i = edesc->processed_stat; i < edesc->processed; i++, pset++) { + /* + * If we are inside this pset address range, we know + * this is the active one. Get the current delta and + * stop walking the psets. + */ + if (pos >= pset->addr && pos < pset->addr + pset->len) + return edesc->residue_stat - (pos - pset->addr); + + /* Otherwise mark it done and update residue_stat. */ + edesc->processed_stat++; + edesc->residue_stat -= pset->len; + } + return edesc->residue_stat; +} + +/* Check request completion status */ +static enum dma_status edma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct edma_chan *echan = to_edma_chan(chan); + struct dma_tx_state txstate_tmp; + enum dma_status ret; + unsigned long flags; + + ret = dma_cookie_status(chan, cookie, txstate); + + if (ret == DMA_COMPLETE) + return ret; + + /* Provide a dummy dma_tx_state for completion checking */ + if (!txstate) + txstate = &txstate_tmp; + + spin_lock_irqsave(&echan->vchan.lock, flags); + if (echan->edesc && echan->edesc->vdesc.tx.cookie == cookie) { + txstate->residue = edma_residue(echan->edesc); + } else { + struct virt_dma_desc *vdesc = vchan_find_desc(&echan->vchan, + cookie); + + if (vdesc) + txstate->residue = to_edma_desc(&vdesc->tx)->residue; + else + txstate->residue = 0; + } + + /* + * Mark the cookie completed if the residue is 0 for non cyclic + * transfers + */ + if (ret != DMA_COMPLETE && !txstate->residue && + echan->edesc && echan->edesc->polled && + echan->edesc->vdesc.tx.cookie == cookie) { + edma_stop(echan); + vchan_cookie_complete(&echan->edesc->vdesc); + echan->edesc = NULL; + edma_execute(echan); + ret = DMA_COMPLETE; + } + + spin_unlock_irqrestore(&echan->vchan.lock, flags); + + return ret; +} + +static bool edma_is_memcpy_channel(int ch_num, s32 *memcpy_channels) +{ + if (!memcpy_channels) + return false; + while (*memcpy_channels != -1) { + if (*memcpy_channels == ch_num) + return true; + memcpy_channels++; + } + return false; +} + +#define EDMA_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) + +static void edma_dma_init(struct edma_cc *ecc, bool legacy_mode) +{ + struct dma_device *s_ddev = &ecc->dma_slave; + struct dma_device *m_ddev = NULL; + s32 *memcpy_channels = ecc->info->memcpy_channels; + int i, j; + + dma_cap_zero(s_ddev->cap_mask); + dma_cap_set(DMA_SLAVE, s_ddev->cap_mask); + dma_cap_set(DMA_CYCLIC, s_ddev->cap_mask); + if (ecc->legacy_mode && !memcpy_channels) { + dev_warn(ecc->dev, + "Legacy memcpy is enabled, things might not work\n"); + + dma_cap_set(DMA_MEMCPY, s_ddev->cap_mask); + dma_cap_set(DMA_INTERLEAVE, s_ddev->cap_mask); + s_ddev->device_prep_dma_memcpy = edma_prep_dma_memcpy; + s_ddev->device_prep_interleaved_dma = edma_prep_dma_interleaved; + s_ddev->directions = BIT(DMA_MEM_TO_MEM); + } + + s_ddev->device_prep_slave_sg = edma_prep_slave_sg; + s_ddev->device_prep_dma_cyclic = edma_prep_dma_cyclic; + s_ddev->device_alloc_chan_resources = edma_alloc_chan_resources; + s_ddev->device_free_chan_resources = edma_free_chan_resources; + s_ddev->device_issue_pending = edma_issue_pending; + s_ddev->device_tx_status = edma_tx_status; + s_ddev->device_config = edma_slave_config; + s_ddev->device_pause = edma_dma_pause; + s_ddev->device_resume = edma_dma_resume; + s_ddev->device_terminate_all = edma_terminate_all; + s_ddev->device_synchronize = edma_synchronize; + + s_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS; + s_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS; + s_ddev->directions |= (BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV)); + s_ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + s_ddev->max_burst = SZ_32K - 1; /* CIDX: 16bit signed */ + + s_ddev->dev = ecc->dev; + INIT_LIST_HEAD(&s_ddev->channels); + + if (memcpy_channels) { + m_ddev = devm_kzalloc(ecc->dev, sizeof(*m_ddev), GFP_KERNEL); + if (!m_ddev) { + dev_warn(ecc->dev, "memcpy is disabled due to OoM\n"); + memcpy_channels = NULL; + goto ch_setup; + } + ecc->dma_memcpy = m_ddev; + + dma_cap_zero(m_ddev->cap_mask); + dma_cap_set(DMA_MEMCPY, m_ddev->cap_mask); + dma_cap_set(DMA_INTERLEAVE, m_ddev->cap_mask); + + m_ddev->device_prep_dma_memcpy = edma_prep_dma_memcpy; + m_ddev->device_prep_interleaved_dma = edma_prep_dma_interleaved; + m_ddev->device_alloc_chan_resources = edma_alloc_chan_resources; + m_ddev->device_free_chan_resources = edma_free_chan_resources; + m_ddev->device_issue_pending = edma_issue_pending; + m_ddev->device_tx_status = edma_tx_status; + m_ddev->device_config = edma_slave_config; + m_ddev->device_pause = edma_dma_pause; + m_ddev->device_resume = edma_dma_resume; + m_ddev->device_terminate_all = edma_terminate_all; + m_ddev->device_synchronize = edma_synchronize; + + m_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS; + m_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS; + m_ddev->directions = BIT(DMA_MEM_TO_MEM); + m_ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + m_ddev->dev = ecc->dev; + INIT_LIST_HEAD(&m_ddev->channels); + } else if (!ecc->legacy_mode) { + dev_info(ecc->dev, "memcpy is disabled\n"); + } + +ch_setup: + for (i = 0; i < ecc->num_channels; i++) { + struct edma_chan *echan = &ecc->slave_chans[i]; + echan->ch_num = EDMA_CTLR_CHAN(ecc->id, i); + echan->ecc = ecc; + echan->vchan.desc_free = edma_desc_free; + + if (m_ddev && edma_is_memcpy_channel(i, memcpy_channels)) + vchan_init(&echan->vchan, m_ddev); + else + vchan_init(&echan->vchan, s_ddev); + + INIT_LIST_HEAD(&echan->node); + for (j = 0; j < EDMA_MAX_SLOTS; j++) + echan->slot[j] = -1; + } +} + +static int edma_setup_from_hw(struct device *dev, struct edma_soc_info *pdata, + struct edma_cc *ecc) +{ + int i; + u32 value, cccfg; + s8 (*queue_priority_map)[2]; + + /* Decode the eDMA3 configuration from CCCFG register */ + cccfg = edma_read(ecc, EDMA_CCCFG); + + value = GET_NUM_REGN(cccfg); + ecc->num_region = BIT(value); + + value = GET_NUM_DMACH(cccfg); + ecc->num_channels = BIT(value + 1); + + value = GET_NUM_QDMACH(cccfg); + ecc->num_qchannels = value * 2; + + value = GET_NUM_PAENTRY(cccfg); + ecc->num_slots = BIT(value + 4); + + value = GET_NUM_EVQUE(cccfg); + ecc->num_tc = value + 1; + + ecc->chmap_exist = (cccfg & CHMAP_EXIST) ? true : false; + + dev_dbg(dev, "eDMA3 CC HW configuration (cccfg: 0x%08x):\n", cccfg); + dev_dbg(dev, "num_region: %u\n", ecc->num_region); + dev_dbg(dev, "num_channels: %u\n", ecc->num_channels); + dev_dbg(dev, "num_qchannels: %u\n", ecc->num_qchannels); + dev_dbg(dev, "num_slots: %u\n", ecc->num_slots); + dev_dbg(dev, "num_tc: %u\n", ecc->num_tc); + dev_dbg(dev, "chmap_exist: %s\n", ecc->chmap_exist ? "yes" : "no"); + + /* Nothing need to be done if queue priority is provided */ + if (pdata->queue_priority_mapping) + return 0; + + /* + * Configure TC/queue priority as follows: + * Q0 - priority 0 + * Q1 - priority 1 + * Q2 - priority 2 + * ... + * The meaning of priority numbers: 0 highest priority, 7 lowest + * priority. So Q0 is the highest priority queue and the last queue has + * the lowest priority. + */ + queue_priority_map = devm_kcalloc(dev, ecc->num_tc + 1, sizeof(s8), + GFP_KERNEL); + if (!queue_priority_map) + return -ENOMEM; + + for (i = 0; i < ecc->num_tc; i++) { + queue_priority_map[i][0] = i; + queue_priority_map[i][1] = i; + } + queue_priority_map[i][0] = -1; + queue_priority_map[i][1] = -1; + + pdata->queue_priority_mapping = queue_priority_map; + /* Default queue has the lowest priority */ + pdata->default_queue = i - 1; + + return 0; +} + +#if IS_ENABLED(CONFIG_OF) +static int edma_xbar_event_map(struct device *dev, struct edma_soc_info *pdata, + size_t sz) +{ + const char pname[] = "ti,edma-xbar-event-map"; + struct resource res; + void __iomem *xbar; + s16 (*xbar_chans)[2]; + size_t nelm = sz / sizeof(s16); + u32 shift, offset, mux; + int ret, i; + + xbar_chans = devm_kcalloc(dev, nelm + 2, sizeof(s16), GFP_KERNEL); + if (!xbar_chans) + return -ENOMEM; + + ret = of_address_to_resource(dev->of_node, 1, &res); + if (ret) + return -ENOMEM; + + xbar = devm_ioremap(dev, res.start, resource_size(&res)); + if (!xbar) + return -ENOMEM; + + ret = of_property_read_u16_array(dev->of_node, pname, (u16 *)xbar_chans, + nelm); + if (ret) + return -EIO; + + /* Invalidate last entry for the other user of this mess */ + nelm >>= 1; + xbar_chans[nelm][0] = -1; + xbar_chans[nelm][1] = -1; + + for (i = 0; i < nelm; i++) { + shift = (xbar_chans[i][1] & 0x03) << 3; + offset = xbar_chans[i][1] & 0xfffffffc; + mux = readl(xbar + offset); + mux &= ~(0xff << shift); + mux |= xbar_chans[i][0] << shift; + writel(mux, (xbar + offset)); + } + + pdata->xbar_chans = (const s16 (*)[2]) xbar_chans; + return 0; +} + +static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev, + bool legacy_mode) +{ + struct edma_soc_info *info; + struct property *prop; + int sz, ret; + + info = devm_kzalloc(dev, sizeof(struct edma_soc_info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + + if (legacy_mode) { + prop = of_find_property(dev->of_node, "ti,edma-xbar-event-map", + &sz); + if (prop) { + ret = edma_xbar_event_map(dev, info, sz); + if (ret) + return ERR_PTR(ret); + } + return info; + } + + /* Get the list of channels allocated to be used for memcpy */ + prop = of_find_property(dev->of_node, "ti,edma-memcpy-channels", &sz); + if (prop) { + const char pname[] = "ti,edma-memcpy-channels"; + size_t nelm = sz / sizeof(s32); + s32 *memcpy_ch; + + memcpy_ch = devm_kcalloc(dev, nelm + 1, sizeof(s32), + GFP_KERNEL); + if (!memcpy_ch) + return ERR_PTR(-ENOMEM); + + ret = of_property_read_u32_array(dev->of_node, pname, + (u32 *)memcpy_ch, nelm); + if (ret) + return ERR_PTR(ret); + + memcpy_ch[nelm] = -1; + info->memcpy_channels = memcpy_ch; + } + + prop = of_find_property(dev->of_node, "ti,edma-reserved-slot-ranges", + &sz); + if (prop) { + const char pname[] = "ti,edma-reserved-slot-ranges"; + u32 (*tmp)[2]; + s16 (*rsv_slots)[2]; + size_t nelm = sz / sizeof(*tmp); + struct edma_rsv_info *rsv_info; + int i; + + if (!nelm) + return info; + + tmp = kcalloc(nelm, sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return ERR_PTR(-ENOMEM); + + rsv_info = devm_kzalloc(dev, sizeof(*rsv_info), GFP_KERNEL); + if (!rsv_info) { + kfree(tmp); + return ERR_PTR(-ENOMEM); + } + + rsv_slots = devm_kcalloc(dev, nelm + 1, sizeof(*rsv_slots), + GFP_KERNEL); + if (!rsv_slots) { + kfree(tmp); + return ERR_PTR(-ENOMEM); + } + + ret = of_property_read_u32_array(dev->of_node, pname, + (u32 *)tmp, nelm * 2); + if (ret) { + kfree(tmp); + return ERR_PTR(ret); + } + + for (i = 0; i < nelm; i++) { + rsv_slots[i][0] = tmp[i][0]; + rsv_slots[i][1] = tmp[i][1]; + } + rsv_slots[nelm][0] = -1; + rsv_slots[nelm][1] = -1; + + info->rsv = rsv_info; + info->rsv->rsv_slots = (const s16 (*)[2])rsv_slots; + + kfree(tmp); + } + + return info; +} + +static struct dma_chan *of_edma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct edma_cc *ecc = ofdma->of_dma_data; + struct dma_chan *chan = NULL; + struct edma_chan *echan; + int i; + + if (!ecc || dma_spec->args_count < 1) + return NULL; + + for (i = 0; i < ecc->num_channels; i++) { + echan = &ecc->slave_chans[i]; + if (echan->ch_num == dma_spec->args[0]) { + chan = &echan->vchan.chan; + break; + } + } + + if (!chan) + return NULL; + + if (echan->ecc->legacy_mode && dma_spec->args_count == 1) + goto out; + + if (!echan->ecc->legacy_mode && dma_spec->args_count == 2 && + dma_spec->args[1] < echan->ecc->num_tc) { + echan->tc = &echan->ecc->tc_list[dma_spec->args[1]]; + goto out; + } + + return NULL; +out: + /* The channel is going to be used as HW synchronized */ + echan->hw_triggered = true; + return dma_get_slave_channel(chan); +} +#else +static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev, + bool legacy_mode) +{ + return ERR_PTR(-EINVAL); +} + +static struct dma_chan *of_edma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + return NULL; +} +#endif + +static bool edma_filter_fn(struct dma_chan *chan, void *param); + +static int edma_probe(struct platform_device *pdev) +{ + struct edma_soc_info *info = pdev->dev.platform_data; + s8 (*queue_priority_mapping)[2]; + const s16 (*reserved)[2]; + int i, irq; + char *irq_name; + struct resource *mem; + struct device_node *node = pdev->dev.of_node; + struct device *dev = &pdev->dev; + struct edma_cc *ecc; + bool legacy_mode = true; + int ret; + + if (node) { + const struct of_device_id *match; + + match = of_match_node(edma_of_ids, node); + if (match && (*(u32 *)match->data) == EDMA_BINDING_TPCC) + legacy_mode = false; + + info = edma_setup_info_from_dt(dev, legacy_mode); + if (IS_ERR(info)) { + dev_err(dev, "failed to get DT data\n"); + return PTR_ERR(info); + } + } + + if (!info) + return -ENODEV; + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + if (ret) + return ret; + + ecc = devm_kzalloc(dev, sizeof(*ecc), GFP_KERNEL); + if (!ecc) + return -ENOMEM; + + ecc->dev = dev; + ecc->id = pdev->id; + ecc->legacy_mode = legacy_mode; + /* When booting with DT the pdev->id is -1 */ + if (ecc->id < 0) + ecc->id = 0; + + mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "edma3_cc"); + if (!mem) { + dev_dbg(dev, "mem resource not found, using index 0\n"); + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) { + dev_err(dev, "no mem resource?\n"); + return -ENODEV; + } + } + ecc->base = devm_ioremap_resource(dev, mem); + if (IS_ERR(ecc->base)) + return PTR_ERR(ecc->base); + + platform_set_drvdata(pdev, ecc); + + pm_runtime_enable(dev); + ret = pm_runtime_get_sync(dev); + if (ret < 0) { + dev_err(dev, "pm_runtime_get_sync() failed\n"); + pm_runtime_disable(dev); + return ret; + } + + /* Get eDMA3 configuration from IP */ + ret = edma_setup_from_hw(dev, info, ecc); + if (ret) + goto err_disable_pm; + + /* Allocate memory based on the information we got from the IP */ + ecc->slave_chans = devm_kcalloc(dev, ecc->num_channels, + sizeof(*ecc->slave_chans), GFP_KERNEL); + + ecc->slot_inuse = devm_kcalloc(dev, BITS_TO_LONGS(ecc->num_slots), + sizeof(unsigned long), GFP_KERNEL); + + ecc->channels_mask = devm_kcalloc(dev, + BITS_TO_LONGS(ecc->num_channels), + sizeof(unsigned long), GFP_KERNEL); + if (!ecc->slave_chans || !ecc->slot_inuse || !ecc->channels_mask) { + ret = -ENOMEM; + goto err_disable_pm; + } + + /* Mark all channels available initially */ + bitmap_fill(ecc->channels_mask, ecc->num_channels); + + ecc->default_queue = info->default_queue; + + if (info->rsv) { + /* Set the reserved slots in inuse list */ + reserved = info->rsv->rsv_slots; + if (reserved) { + for (i = 0; reserved[i][0] != -1; i++) + bitmap_set(ecc->slot_inuse, reserved[i][0], + reserved[i][1]); + } + + /* Clear channels not usable for Linux */ + reserved = info->rsv->rsv_chans; + if (reserved) { + for (i = 0; reserved[i][0] != -1; i++) + bitmap_clear(ecc->channels_mask, reserved[i][0], + reserved[i][1]); + } + } + + for (i = 0; i < ecc->num_slots; i++) { + /* Reset only unused - not reserved - paRAM slots */ + if (!test_bit(i, ecc->slot_inuse)) + edma_write_slot(ecc, i, &dummy_paramset); + } + + irq = platform_get_irq_byname(pdev, "edma3_ccint"); + if (irq < 0 && node) + irq = irq_of_parse_and_map(node, 0); + + if (irq > 0) { + irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccint", + dev_name(dev)); + ret = devm_request_irq(dev, irq, dma_irq_handler, 0, irq_name, + ecc); + if (ret) { + dev_err(dev, "CCINT (%d) failed --> %d\n", irq, ret); + goto err_disable_pm; + } + ecc->ccint = irq; + } + + irq = platform_get_irq_byname(pdev, "edma3_ccerrint"); + if (irq < 0 && node) + irq = irq_of_parse_and_map(node, 2); + + if (irq > 0) { + irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccerrint", + dev_name(dev)); + ret = devm_request_irq(dev, irq, dma_ccerr_handler, 0, irq_name, + ecc); + if (ret) { + dev_err(dev, "CCERRINT (%d) failed --> %d\n", irq, ret); + goto err_disable_pm; + } + ecc->ccerrint = irq; + } + + ecc->dummy_slot = edma_alloc_slot(ecc, EDMA_SLOT_ANY); + if (ecc->dummy_slot < 0) { + dev_err(dev, "Can't allocate PaRAM dummy slot\n"); + ret = ecc->dummy_slot; + goto err_disable_pm; + } + + queue_priority_mapping = info->queue_priority_mapping; + + if (!ecc->legacy_mode) { + int lowest_priority = 0; + unsigned int array_max; + struct of_phandle_args tc_args; + + ecc->tc_list = devm_kcalloc(dev, ecc->num_tc, + sizeof(*ecc->tc_list), GFP_KERNEL); + if (!ecc->tc_list) { + ret = -ENOMEM; + goto err_reg1; + } + + for (i = 0;; i++) { + ret = of_parse_phandle_with_fixed_args(node, "ti,tptcs", + 1, i, &tc_args); + if (ret || i == ecc->num_tc) + break; + + ecc->tc_list[i].node = tc_args.np; + ecc->tc_list[i].id = i; + queue_priority_mapping[i][1] = tc_args.args[0]; + if (queue_priority_mapping[i][1] > lowest_priority) { + lowest_priority = queue_priority_mapping[i][1]; + info->default_queue = i; + } + } + + /* See if we have optional dma-channel-mask array */ + array_max = DIV_ROUND_UP(ecc->num_channels, BITS_PER_TYPE(u32)); + ret = of_property_read_variable_u32_array(node, + "dma-channel-mask", + (u32 *)ecc->channels_mask, + 1, array_max); + if (ret > 0 && ret != array_max) + dev_warn(dev, "dma-channel-mask is not complete.\n"); + else if (ret == -EOVERFLOW || ret == -ENODATA) + dev_warn(dev, + "dma-channel-mask is out of range or empty\n"); + } + + /* Event queue priority mapping */ + for (i = 0; queue_priority_mapping[i][0] != -1; i++) + edma_assign_priority_to_queue(ecc, queue_priority_mapping[i][0], + queue_priority_mapping[i][1]); + + edma_write_array2(ecc, EDMA_DRAE, 0, 0, 0x0); + edma_write_array2(ecc, EDMA_DRAE, 0, 1, 0x0); + edma_write_array(ecc, EDMA_QRAE, 0, 0x0); + + ecc->info = info; + + /* Init the dma device and channels */ + edma_dma_init(ecc, legacy_mode); + + for (i = 0; i < ecc->num_channels; i++) { + /* Do not touch reserved channels */ + if (!test_bit(i, ecc->channels_mask)) + continue; + + /* Assign all channels to the default queue */ + edma_assign_channel_eventq(&ecc->slave_chans[i], + info->default_queue); + /* Set entry slot to the dummy slot */ + edma_set_chmap(&ecc->slave_chans[i], ecc->dummy_slot); + } + + ecc->dma_slave.filter.map = info->slave_map; + ecc->dma_slave.filter.mapcnt = info->slavecnt; + ecc->dma_slave.filter.fn = edma_filter_fn; + + ret = dma_async_device_register(&ecc->dma_slave); + if (ret) { + dev_err(dev, "slave ddev registration failed (%d)\n", ret); + goto err_reg1; + } + + if (ecc->dma_memcpy) { + ret = dma_async_device_register(ecc->dma_memcpy); + if (ret) { + dev_err(dev, "memcpy ddev registration failed (%d)\n", + ret); + dma_async_device_unregister(&ecc->dma_slave); + goto err_reg1; + } + } + + if (node) + of_dma_controller_register(node, of_edma_xlate, ecc); + + dev_info(dev, "TI EDMA DMA engine driver\n"); + + return 0; + +err_reg1: + edma_free_slot(ecc, ecc->dummy_slot); +err_disable_pm: + pm_runtime_put_sync(dev); + pm_runtime_disable(dev); + return ret; +} + +static void edma_cleanupp_vchan(struct dma_device *dmadev) +{ + struct edma_chan *echan, *_echan; + + list_for_each_entry_safe(echan, _echan, + &dmadev->channels, vchan.chan.device_node) { + list_del(&echan->vchan.chan.device_node); + tasklet_kill(&echan->vchan.task); + } +} + +static int edma_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct edma_cc *ecc = dev_get_drvdata(dev); + + devm_free_irq(dev, ecc->ccint, ecc); + devm_free_irq(dev, ecc->ccerrint, ecc); + + edma_cleanupp_vchan(&ecc->dma_slave); + + if (dev->of_node) + of_dma_controller_free(dev->of_node); + dma_async_device_unregister(&ecc->dma_slave); + if (ecc->dma_memcpy) + dma_async_device_unregister(ecc->dma_memcpy); + edma_free_slot(ecc, ecc->dummy_slot); + pm_runtime_put_sync(dev); + pm_runtime_disable(dev); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int edma_pm_suspend(struct device *dev) +{ + struct edma_cc *ecc = dev_get_drvdata(dev); + struct edma_chan *echan = ecc->slave_chans; + int i; + + for (i = 0; i < ecc->num_channels; i++) { + if (echan[i].alloced) + edma_setup_interrupt(&echan[i], false); + } + + return 0; +} + +static int edma_pm_resume(struct device *dev) +{ + struct edma_cc *ecc = dev_get_drvdata(dev); + struct edma_chan *echan = ecc->slave_chans; + int i; + s8 (*queue_priority_mapping)[2]; + + /* re initialize dummy slot to dummy param set */ + edma_write_slot(ecc, ecc->dummy_slot, &dummy_paramset); + + queue_priority_mapping = ecc->info->queue_priority_mapping; + + /* Event queue priority mapping */ + for (i = 0; queue_priority_mapping[i][0] != -1; i++) + edma_assign_priority_to_queue(ecc, queue_priority_mapping[i][0], + queue_priority_mapping[i][1]); + + for (i = 0; i < ecc->num_channels; i++) { + if (echan[i].alloced) { + /* ensure access through shadow region 0 */ + edma_or_array2(ecc, EDMA_DRAE, 0, + EDMA_REG_ARRAY_INDEX(i), + EDMA_CHANNEL_BIT(i)); + + edma_setup_interrupt(&echan[i], true); + + /* Set up channel -> slot mapping for the entry slot */ + edma_set_chmap(&echan[i], echan[i].slot[0]); + } + } + + return 0; +} +#endif + +static const struct dev_pm_ops edma_pm_ops = { + SET_LATE_SYSTEM_SLEEP_PM_OPS(edma_pm_suspend, edma_pm_resume) +}; + +static struct platform_driver edma_driver = { + .probe = edma_probe, + .remove = edma_remove, + .driver = { + .name = "edma", + .pm = &edma_pm_ops, + .of_match_table = edma_of_ids, + }, +}; + +static int edma_tptc_probe(struct platform_device *pdev) +{ + pm_runtime_enable(&pdev->dev); + return pm_runtime_get_sync(&pdev->dev); +} + +static struct platform_driver edma_tptc_driver = { + .probe = edma_tptc_probe, + .driver = { + .name = "edma3-tptc", + .of_match_table = edma_tptc_of_ids, + }, +}; + +static bool edma_filter_fn(struct dma_chan *chan, void *param) +{ + bool match = false; + + if (chan->device->dev->driver == &edma_driver.driver) { + struct edma_chan *echan = to_edma_chan(chan); + unsigned ch_req = *(unsigned *)param; + if (ch_req == echan->ch_num) { + /* The channel is going to be used as HW synchronized */ + echan->hw_triggered = true; + match = true; + } + } + return match; +} + +static int edma_init(void) +{ + int ret; + + ret = platform_driver_register(&edma_tptc_driver); + if (ret) + return ret; + + return platform_driver_register(&edma_driver); +} +subsys_initcall(edma_init); + +static void __exit edma_exit(void) +{ + platform_driver_unregister(&edma_driver); + platform_driver_unregister(&edma_tptc_driver); +} +module_exit(edma_exit); + +MODULE_AUTHOR("Matt Porter "); +MODULE_DESCRIPTION("TI EDMA DMA engine driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/ti/k3-psil-am62.c b/drivers/dma/ti/k3-psil-am62.c new file mode 100644 index 0000000000..1272b1541f --- /dev/null +++ b/drivers/dma/ti/k3-psil-am62.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Texas Instruments Incorporated - https://www.ti.com + */ + +#include + +#include "k3-psil-priv.h" + +#define PSIL_PDMA_XY_PKT(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .mapped_channel_id = -1, \ + .default_flow_id = -1, \ + .pkt_mode = 1, \ + }, \ + } + +#define PSIL_ETHERNET(x, ch, flow_base, flow_cnt) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 16, \ + .mapped_channel_id = ch, \ + .flow_start = flow_base, \ + .flow_num = flow_cnt, \ + .default_flow_id = flow_base, \ + }, \ + } + +#define PSIL_SAUL(x, ch, flow_base, flow_cnt, default_flow, tx) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 64, \ + .mapped_channel_id = ch, \ + .flow_start = flow_base, \ + .flow_num = flow_cnt, \ + .default_flow_id = default_flow, \ + .notdpkt = tx, \ + }, \ + } + +#define PSIL_PDMA_MCASP(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .pdma_acc32 = 1, \ + .pdma_burst = 1, \ + }, \ + } + +#define PSIL_CSI2RX(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + }, \ + } + +/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */ +static struct psil_ep am62_src_ep_map[] = { + /* SAUL */ + PSIL_SAUL(0x7504, 20, 35, 8, 35, 0), + PSIL_SAUL(0x7505, 21, 35, 8, 36, 0), + PSIL_SAUL(0x7506, 22, 43, 8, 43, 0), + PSIL_SAUL(0x7507, 23, 43, 8, 44, 0), + /* PDMA_MAIN0 - SPI0-2 */ + PSIL_PDMA_XY_PKT(0x4300), + PSIL_PDMA_XY_PKT(0x4301), + PSIL_PDMA_XY_PKT(0x4302), + PSIL_PDMA_XY_PKT(0x4303), + PSIL_PDMA_XY_PKT(0x4304), + PSIL_PDMA_XY_PKT(0x4305), + PSIL_PDMA_XY_PKT(0x4306), + PSIL_PDMA_XY_PKT(0x4307), + PSIL_PDMA_XY_PKT(0x4308), + PSIL_PDMA_XY_PKT(0x4309), + PSIL_PDMA_XY_PKT(0x430a), + PSIL_PDMA_XY_PKT(0x430b), + /* PDMA_MAIN1 - UART0-6 */ + PSIL_PDMA_XY_PKT(0x4400), + PSIL_PDMA_XY_PKT(0x4401), + PSIL_PDMA_XY_PKT(0x4402), + PSIL_PDMA_XY_PKT(0x4403), + PSIL_PDMA_XY_PKT(0x4404), + PSIL_PDMA_XY_PKT(0x4405), + PSIL_PDMA_XY_PKT(0x4406), + /* PDMA_MAIN2 - MCASP0-2 */ + PSIL_PDMA_MCASP(0x4500), + PSIL_PDMA_MCASP(0x4501), + PSIL_PDMA_MCASP(0x4502), + /* CPSW3G */ + PSIL_ETHERNET(0x4600, 19, 19, 16), + /* CSI2RX */ + PSIL_CSI2RX(0x4700), + PSIL_CSI2RX(0x4701), + PSIL_CSI2RX(0x4702), + PSIL_CSI2RX(0x4703), + PSIL_CSI2RX(0x4704), + PSIL_CSI2RX(0x4705), + PSIL_CSI2RX(0x4706), + PSIL_CSI2RX(0x4707), + PSIL_CSI2RX(0x4708), + PSIL_CSI2RX(0x4709), + PSIL_CSI2RX(0x470a), + PSIL_CSI2RX(0x470b), + PSIL_CSI2RX(0x470c), + PSIL_CSI2RX(0x470d), + PSIL_CSI2RX(0x470e), + PSIL_CSI2RX(0x470f), + PSIL_CSI2RX(0x4710), + PSIL_CSI2RX(0x4711), + PSIL_CSI2RX(0x4712), + PSIL_CSI2RX(0x4713), + PSIL_CSI2RX(0x4714), + PSIL_CSI2RX(0x4715), + PSIL_CSI2RX(0x4716), + PSIL_CSI2RX(0x4717), + PSIL_CSI2RX(0x4718), + PSIL_CSI2RX(0x4719), + PSIL_CSI2RX(0x471a), + PSIL_CSI2RX(0x471b), + PSIL_CSI2RX(0x471c), + PSIL_CSI2RX(0x471d), + PSIL_CSI2RX(0x471e), + PSIL_CSI2RX(0x471f), +}; + +/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */ +static struct psil_ep am62_dst_ep_map[] = { + /* SAUL */ + PSIL_SAUL(0xf500, 27, 83, 8, 83, 1), + PSIL_SAUL(0xf501, 28, 91, 8, 91, 1), + /* PDMA_MAIN0 - SPI0-2 */ + PSIL_PDMA_XY_PKT(0xc300), + PSIL_PDMA_XY_PKT(0xc301), + PSIL_PDMA_XY_PKT(0xc302), + PSIL_PDMA_XY_PKT(0xc303), + PSIL_PDMA_XY_PKT(0xc304), + PSIL_PDMA_XY_PKT(0xc305), + PSIL_PDMA_XY_PKT(0xc306), + PSIL_PDMA_XY_PKT(0xc307), + PSIL_PDMA_XY_PKT(0xc308), + PSIL_PDMA_XY_PKT(0xc309), + PSIL_PDMA_XY_PKT(0xc30a), + PSIL_PDMA_XY_PKT(0xc30b), + /* PDMA_MAIN1 - UART0-6 */ + PSIL_PDMA_XY_PKT(0xc400), + PSIL_PDMA_XY_PKT(0xc401), + PSIL_PDMA_XY_PKT(0xc402), + PSIL_PDMA_XY_PKT(0xc403), + PSIL_PDMA_XY_PKT(0xc404), + PSIL_PDMA_XY_PKT(0xc405), + PSIL_PDMA_XY_PKT(0xc406), + /* PDMA_MAIN2 - MCASP0-2 */ + PSIL_PDMA_MCASP(0xc500), + PSIL_PDMA_MCASP(0xc501), + PSIL_PDMA_MCASP(0xc502), + /* CPSW3G */ + PSIL_ETHERNET(0xc600, 19, 19, 8), + PSIL_ETHERNET(0xc601, 20, 27, 8), + PSIL_ETHERNET(0xc602, 21, 35, 8), + PSIL_ETHERNET(0xc603, 22, 43, 8), + PSIL_ETHERNET(0xc604, 23, 51, 8), + PSIL_ETHERNET(0xc605, 24, 59, 8), + PSIL_ETHERNET(0xc606, 25, 67, 8), + PSIL_ETHERNET(0xc607, 26, 75, 8), +}; + +struct psil_ep_map am62_ep_map = { + .name = "am62", + .src = am62_src_ep_map, + .src_count = ARRAY_SIZE(am62_src_ep_map), + .dst = am62_dst_ep_map, + .dst_count = ARRAY_SIZE(am62_dst_ep_map), +}; diff --git a/drivers/dma/ti/k3-psil-am62a.c b/drivers/dma/ti/k3-psil-am62a.c new file mode 100644 index 0000000000..4cf9123b0e --- /dev/null +++ b/drivers/dma/ti/k3-psil-am62a.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Texas Instruments Incorporated - https://www.ti.com + */ + +#include + +#include "k3-psil-priv.h" + +#define PSIL_PDMA_XY_TR(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .mapped_channel_id = -1, \ + .default_flow_id = -1, \ + }, \ + } + +#define PSIL_PDMA_XY_PKT(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .mapped_channel_id = -1, \ + .default_flow_id = -1, \ + .pkt_mode = 1, \ + }, \ + } + +#define PSIL_ETHERNET(x, ch, flow_base, flow_cnt) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 16, \ + .mapped_channel_id = ch, \ + .flow_start = flow_base, \ + .flow_num = flow_cnt, \ + .default_flow_id = flow_base, \ + }, \ + } + +#define PSIL_SAUL(x, ch, flow_base, flow_cnt, default_flow, tx) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 64, \ + .mapped_channel_id = ch, \ + .flow_start = flow_base, \ + .flow_num = flow_cnt, \ + .default_flow_id = default_flow, \ + .notdpkt = tx, \ + }, \ + } + +#define PSIL_PDMA_MCASP(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .pdma_acc32 = 1, \ + .pdma_burst = 1, \ + }, \ + } + +#define PSIL_CSI2RX(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + }, \ + } + +/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */ +static struct psil_ep am62a_src_ep_map[] = { + /* SAUL */ + PSIL_SAUL(0x7504, 20, 35, 8, 35, 0), + PSIL_SAUL(0x7505, 21, 35, 8, 36, 0), + PSIL_SAUL(0x7506, 22, 43, 8, 43, 0), + PSIL_SAUL(0x7507, 23, 43, 8, 44, 0), + /* PDMA_MAIN0 - SPI0-2 */ + PSIL_PDMA_XY_PKT(0x4300), + PSIL_PDMA_XY_PKT(0x4301), + PSIL_PDMA_XY_PKT(0x4302), + PSIL_PDMA_XY_PKT(0x4303), + PSIL_PDMA_XY_PKT(0x4304), + PSIL_PDMA_XY_PKT(0x4305), + PSIL_PDMA_XY_PKT(0x4306), + PSIL_PDMA_XY_PKT(0x4307), + PSIL_PDMA_XY_PKT(0x4308), + PSIL_PDMA_XY_PKT(0x4309), + PSIL_PDMA_XY_PKT(0x430a), + PSIL_PDMA_XY_PKT(0x430b), + /* PDMA_MAIN1 - UART0-6 */ + PSIL_PDMA_XY_PKT(0x4400), + PSIL_PDMA_XY_PKT(0x4401), + PSIL_PDMA_XY_PKT(0x4402), + PSIL_PDMA_XY_PKT(0x4403), + PSIL_PDMA_XY_PKT(0x4404), + PSIL_PDMA_XY_PKT(0x4405), + PSIL_PDMA_XY_PKT(0x4406), + /* PDMA_MAIN2 - MCASP0-2 */ + PSIL_PDMA_MCASP(0x4500), + PSIL_PDMA_MCASP(0x4501), + PSIL_PDMA_MCASP(0x4502), + /* CPSW3G */ + PSIL_ETHERNET(0x4600, 19, 19, 16), + /* CSI2RX */ + PSIL_CSI2RX(0x5000), + PSIL_CSI2RX(0x5001), + PSIL_CSI2RX(0x5002), + PSIL_CSI2RX(0x5003), + PSIL_CSI2RX(0x5004), + PSIL_CSI2RX(0x5005), + PSIL_CSI2RX(0x5006), + PSIL_CSI2RX(0x5007), + PSIL_CSI2RX(0x5008), + PSIL_CSI2RX(0x5009), + PSIL_CSI2RX(0x500a), + PSIL_CSI2RX(0x500b), + PSIL_CSI2RX(0x500c), + PSIL_CSI2RX(0x500d), + PSIL_CSI2RX(0x500e), + PSIL_CSI2RX(0x500f), + PSIL_CSI2RX(0x5010), + PSIL_CSI2RX(0x5011), + PSIL_CSI2RX(0x5012), + PSIL_CSI2RX(0x5013), + PSIL_CSI2RX(0x5014), + PSIL_CSI2RX(0x5015), + PSIL_CSI2RX(0x5016), + PSIL_CSI2RX(0x5017), + PSIL_CSI2RX(0x5018), + PSIL_CSI2RX(0x5019), + PSIL_CSI2RX(0x501a), + PSIL_CSI2RX(0x501b), + PSIL_CSI2RX(0x501c), + PSIL_CSI2RX(0x501d), + PSIL_CSI2RX(0x501e), + PSIL_CSI2RX(0x501f), +}; + +/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */ +static struct psil_ep am62a_dst_ep_map[] = { + /* SAUL */ + PSIL_SAUL(0xf500, 27, 83, 8, 83, 1), + PSIL_SAUL(0xf501, 28, 91, 8, 91, 1), + /* PDMA_MAIN0 - SPI0-2 */ + PSIL_PDMA_XY_PKT(0xc300), + PSIL_PDMA_XY_PKT(0xc301), + PSIL_PDMA_XY_PKT(0xc302), + PSIL_PDMA_XY_PKT(0xc303), + PSIL_PDMA_XY_PKT(0xc304), + PSIL_PDMA_XY_PKT(0xc305), + PSIL_PDMA_XY_PKT(0xc306), + PSIL_PDMA_XY_PKT(0xc307), + PSIL_PDMA_XY_PKT(0xc308), + PSIL_PDMA_XY_PKT(0xc309), + PSIL_PDMA_XY_PKT(0xc30a), + PSIL_PDMA_XY_PKT(0xc30b), + /* PDMA_MAIN1 - UART0-6 */ + PSIL_PDMA_XY_PKT(0xc400), + PSIL_PDMA_XY_PKT(0xc401), + PSIL_PDMA_XY_PKT(0xc402), + PSIL_PDMA_XY_PKT(0xc403), + PSIL_PDMA_XY_PKT(0xc404), + PSIL_PDMA_XY_PKT(0xc405), + PSIL_PDMA_XY_PKT(0xc406), + /* PDMA_MAIN2 - MCASP0-2 */ + PSIL_PDMA_MCASP(0xc500), + PSIL_PDMA_MCASP(0xc501), + PSIL_PDMA_MCASP(0xc502), + /* CPSW3G */ + PSIL_ETHERNET(0xc600, 19, 19, 8), + PSIL_ETHERNET(0xc601, 20, 27, 8), + PSIL_ETHERNET(0xc602, 21, 35, 8), + PSIL_ETHERNET(0xc603, 22, 43, 8), + PSIL_ETHERNET(0xc604, 23, 51, 8), + PSIL_ETHERNET(0xc605, 24, 59, 8), + PSIL_ETHERNET(0xc606, 25, 67, 8), + PSIL_ETHERNET(0xc607, 26, 75, 8), +}; + +struct psil_ep_map am62a_ep_map = { + .name = "am62a", + .src = am62a_src_ep_map, + .src_count = ARRAY_SIZE(am62a_src_ep_map), + .dst = am62a_dst_ep_map, + .dst_count = ARRAY_SIZE(am62a_dst_ep_map), +}; diff --git a/drivers/dma/ti/k3-psil-am64.c b/drivers/dma/ti/k3-psil-am64.c new file mode 100644 index 0000000000..9fdeaa11a4 --- /dev/null +++ b/drivers/dma/ti/k3-psil-am64.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Texas Instruments Incorporated - https://www.ti.com + * Author: Peter Ujfalusi + */ + +#include + +#include "k3-psil-priv.h" + +#define PSIL_PDMA_XY_TR(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .mapped_channel_id = -1, \ + .default_flow_id = -1, \ + }, \ + } + +#define PSIL_PDMA_XY_PKT(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .mapped_channel_id = -1, \ + .default_flow_id = -1, \ + .pkt_mode = 1, \ + }, \ + } + +#define PSIL_ETHERNET(x, ch, flow_base, flow_cnt) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 16, \ + .mapped_channel_id = ch, \ + .flow_start = flow_base, \ + .flow_num = flow_cnt, \ + .default_flow_id = flow_base, \ + }, \ + } + +#define PSIL_SAUL(x, ch, flow_base, flow_cnt, default_flow, tx) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 64, \ + .mapped_channel_id = ch, \ + .flow_start = flow_base, \ + .flow_num = flow_cnt, \ + .default_flow_id = default_flow, \ + .notdpkt = tx, \ + }, \ + } + +/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */ +static struct psil_ep am64_src_ep_map[] = { + /* SAUL */ + PSIL_SAUL(0x4000, 17, 32, 8, 32, 0), + PSIL_SAUL(0x4001, 18, 32, 8, 33, 0), + PSIL_SAUL(0x4002, 19, 40, 8, 40, 0), + PSIL_SAUL(0x4003, 20, 40, 8, 41, 0), + /* ICSS_G0 */ + PSIL_ETHERNET(0x4100, 21, 48, 16), + PSIL_ETHERNET(0x4101, 22, 64, 16), + PSIL_ETHERNET(0x4102, 23, 80, 16), + PSIL_ETHERNET(0x4103, 24, 96, 16), + /* ICSS_G1 */ + PSIL_ETHERNET(0x4200, 25, 112, 16), + PSIL_ETHERNET(0x4201, 26, 128, 16), + PSIL_ETHERNET(0x4202, 27, 144, 16), + PSIL_ETHERNET(0x4203, 28, 160, 16), + /* PDMA_MAIN0 - SPI0-3 */ + PSIL_PDMA_XY_PKT(0x4300), + PSIL_PDMA_XY_PKT(0x4301), + PSIL_PDMA_XY_PKT(0x4302), + PSIL_PDMA_XY_PKT(0x4303), + PSIL_PDMA_XY_PKT(0x4304), + PSIL_PDMA_XY_PKT(0x4305), + PSIL_PDMA_XY_PKT(0x4306), + PSIL_PDMA_XY_PKT(0x4307), + PSIL_PDMA_XY_PKT(0x4308), + PSIL_PDMA_XY_PKT(0x4309), + PSIL_PDMA_XY_PKT(0x430a), + PSIL_PDMA_XY_PKT(0x430b), + PSIL_PDMA_XY_PKT(0x430c), + PSIL_PDMA_XY_PKT(0x430d), + PSIL_PDMA_XY_PKT(0x430e), + PSIL_PDMA_XY_PKT(0x430f), + /* PDMA_MAIN0 - USART0-1 */ + PSIL_PDMA_XY_PKT(0x4310), + PSIL_PDMA_XY_PKT(0x4311), + /* PDMA_MAIN1 - SPI4 */ + PSIL_PDMA_XY_PKT(0x4400), + PSIL_PDMA_XY_PKT(0x4401), + PSIL_PDMA_XY_PKT(0x4402), + PSIL_PDMA_XY_PKT(0x4403), + /* PDMA_MAIN1 - USART2-6 */ + PSIL_PDMA_XY_PKT(0x4404), + PSIL_PDMA_XY_PKT(0x4405), + PSIL_PDMA_XY_PKT(0x4406), + PSIL_PDMA_XY_PKT(0x4407), + PSIL_PDMA_XY_PKT(0x4408), + /* PDMA_MAIN1 - ADCs */ + PSIL_PDMA_XY_TR(0x440f), + PSIL_PDMA_XY_TR(0x4410), + /* CPSW2 */ + PSIL_ETHERNET(0x4500, 16, 16, 16), +}; + +/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */ +static struct psil_ep am64_dst_ep_map[] = { + /* SAUL */ + PSIL_SAUL(0xc000, 24, 80, 8, 80, 1), + PSIL_SAUL(0xc001, 25, 88, 8, 88, 1), + /* ICSS_G0 */ + PSIL_ETHERNET(0xc100, 26, 96, 1), + PSIL_ETHERNET(0xc101, 27, 97, 1), + PSIL_ETHERNET(0xc102, 28, 98, 1), + PSIL_ETHERNET(0xc103, 29, 99, 1), + PSIL_ETHERNET(0xc104, 30, 100, 1), + PSIL_ETHERNET(0xc105, 31, 101, 1), + PSIL_ETHERNET(0xc106, 32, 102, 1), + PSIL_ETHERNET(0xc107, 33, 103, 1), + /* ICSS_G1 */ + PSIL_ETHERNET(0xc200, 34, 104, 1), + PSIL_ETHERNET(0xc201, 35, 105, 1), + PSIL_ETHERNET(0xc202, 36, 106, 1), + PSIL_ETHERNET(0xc203, 37, 107, 1), + PSIL_ETHERNET(0xc204, 38, 108, 1), + PSIL_ETHERNET(0xc205, 39, 109, 1), + PSIL_ETHERNET(0xc206, 40, 110, 1), + PSIL_ETHERNET(0xc207, 41, 111, 1), + /* CPSW2 */ + PSIL_ETHERNET(0xc500, 16, 16, 8), + PSIL_ETHERNET(0xc501, 17, 24, 8), + PSIL_ETHERNET(0xc502, 18, 32, 8), + PSIL_ETHERNET(0xc503, 19, 40, 8), + PSIL_ETHERNET(0xc504, 20, 48, 8), + PSIL_ETHERNET(0xc505, 21, 56, 8), + PSIL_ETHERNET(0xc506, 22, 64, 8), + PSIL_ETHERNET(0xc507, 23, 72, 8), +}; + +struct psil_ep_map am64_ep_map = { + .name = "am64", + .src = am64_src_ep_map, + .src_count = ARRAY_SIZE(am64_src_ep_map), + .dst = am64_dst_ep_map, + .dst_count = ARRAY_SIZE(am64_dst_ep_map), +}; diff --git a/drivers/dma/ti/k3-psil-am654.c b/drivers/dma/ti/k3-psil-am654.c new file mode 100644 index 0000000000..a896a15908 --- /dev/null +++ b/drivers/dma/ti/k3-psil-am654.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * Author: Peter Ujfalusi + */ + +#include + +#include "k3-psil-priv.h" + +#define PSIL_PDMA_XY_TR(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + }, \ + } + +#define PSIL_PDMA_XY_PKT(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .pkt_mode = 1, \ + }, \ + } + +#define PSIL_ETHERNET(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 16, \ + }, \ + } + +#define PSIL_SA2UL(x, tx) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 64, \ + .notdpkt = tx, \ + }, \ + } + +/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */ +static struct psil_ep am654_src_ep_map[] = { + /* SA2UL */ + PSIL_SA2UL(0x4000, 0), + PSIL_SA2UL(0x4001, 0), + PSIL_SA2UL(0x4002, 0), + PSIL_SA2UL(0x4003, 0), + /* PRU_ICSSG0 */ + PSIL_ETHERNET(0x4100), + PSIL_ETHERNET(0x4101), + PSIL_ETHERNET(0x4102), + PSIL_ETHERNET(0x4103), + /* PRU_ICSSG1 */ + PSIL_ETHERNET(0x4200), + PSIL_ETHERNET(0x4201), + PSIL_ETHERNET(0x4202), + PSIL_ETHERNET(0x4203), + /* PRU_ICSSG2 */ + PSIL_ETHERNET(0x4300), + PSIL_ETHERNET(0x4301), + PSIL_ETHERNET(0x4302), + PSIL_ETHERNET(0x4303), + /* PDMA0 - McASPs */ + PSIL_PDMA_XY_TR(0x4400), + PSIL_PDMA_XY_TR(0x4401), + PSIL_PDMA_XY_TR(0x4402), + /* PDMA1 - SPI0-4 */ + PSIL_PDMA_XY_PKT(0x4500), + PSIL_PDMA_XY_PKT(0x4501), + PSIL_PDMA_XY_PKT(0x4502), + PSIL_PDMA_XY_PKT(0x4503), + PSIL_PDMA_XY_PKT(0x4504), + PSIL_PDMA_XY_PKT(0x4505), + PSIL_PDMA_XY_PKT(0x4506), + PSIL_PDMA_XY_PKT(0x4507), + PSIL_PDMA_XY_PKT(0x4508), + PSIL_PDMA_XY_PKT(0x4509), + PSIL_PDMA_XY_PKT(0x450a), + PSIL_PDMA_XY_PKT(0x450b), + PSIL_PDMA_XY_PKT(0x450c), + PSIL_PDMA_XY_PKT(0x450d), + PSIL_PDMA_XY_PKT(0x450e), + PSIL_PDMA_XY_PKT(0x450f), + PSIL_PDMA_XY_PKT(0x4510), + PSIL_PDMA_XY_PKT(0x4511), + PSIL_PDMA_XY_PKT(0x4512), + PSIL_PDMA_XY_PKT(0x4513), + /* PDMA1 - USART0-2 */ + PSIL_PDMA_XY_PKT(0x4514), + PSIL_PDMA_XY_PKT(0x4515), + PSIL_PDMA_XY_PKT(0x4516), + /* CPSW0 */ + PSIL_ETHERNET(0x7000), + /* MCU_PDMA0 - ADCs */ + PSIL_PDMA_XY_TR(0x7100), + PSIL_PDMA_XY_TR(0x7101), + PSIL_PDMA_XY_TR(0x7102), + PSIL_PDMA_XY_TR(0x7103), + /* MCU_PDMA1 - MCU_SPI0-2 */ + PSIL_PDMA_XY_PKT(0x7200), + PSIL_PDMA_XY_PKT(0x7201), + PSIL_PDMA_XY_PKT(0x7202), + PSIL_PDMA_XY_PKT(0x7203), + PSIL_PDMA_XY_PKT(0x7204), + PSIL_PDMA_XY_PKT(0x7205), + PSIL_PDMA_XY_PKT(0x7206), + PSIL_PDMA_XY_PKT(0x7207), + PSIL_PDMA_XY_PKT(0x7208), + PSIL_PDMA_XY_PKT(0x7209), + PSIL_PDMA_XY_PKT(0x720a), + PSIL_PDMA_XY_PKT(0x720b), + /* MCU_PDMA1 - MCU_USART0 */ + PSIL_PDMA_XY_PKT(0x7212), +}; + +/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */ +static struct psil_ep am654_dst_ep_map[] = { + /* SA2UL */ + PSIL_SA2UL(0xc000, 1), + PSIL_SA2UL(0xc001, 1), + /* PRU_ICSSG0 */ + PSIL_ETHERNET(0xc100), + PSIL_ETHERNET(0xc101), + PSIL_ETHERNET(0xc102), + PSIL_ETHERNET(0xc103), + PSIL_ETHERNET(0xc104), + PSIL_ETHERNET(0xc105), + PSIL_ETHERNET(0xc106), + PSIL_ETHERNET(0xc107), + /* PRU_ICSSG1 */ + PSIL_ETHERNET(0xc200), + PSIL_ETHERNET(0xc201), + PSIL_ETHERNET(0xc202), + PSIL_ETHERNET(0xc203), + PSIL_ETHERNET(0xc204), + PSIL_ETHERNET(0xc205), + PSIL_ETHERNET(0xc206), + PSIL_ETHERNET(0xc207), + /* PRU_ICSSG2 */ + PSIL_ETHERNET(0xc300), + PSIL_ETHERNET(0xc301), + PSIL_ETHERNET(0xc302), + PSIL_ETHERNET(0xc303), + PSIL_ETHERNET(0xc304), + PSIL_ETHERNET(0xc305), + PSIL_ETHERNET(0xc306), + PSIL_ETHERNET(0xc307), + /* CPSW0 */ + PSIL_ETHERNET(0xf000), + PSIL_ETHERNET(0xf001), + PSIL_ETHERNET(0xf002), + PSIL_ETHERNET(0xf003), + PSIL_ETHERNET(0xf004), + PSIL_ETHERNET(0xf005), + PSIL_ETHERNET(0xf006), + PSIL_ETHERNET(0xf007), +}; + +struct psil_ep_map am654_ep_map = { + .name = "am654", + .src = am654_src_ep_map, + .src_count = ARRAY_SIZE(am654_src_ep_map), + .dst = am654_dst_ep_map, + .dst_count = ARRAY_SIZE(am654_dst_ep_map), +}; diff --git a/drivers/dma/ti/k3-psil-j7200.c b/drivers/dma/ti/k3-psil-j7200.c new file mode 100644 index 0000000000..e3feff8699 --- /dev/null +++ b/drivers/dma/ti/k3-psil-j7200.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * Author: Peter Ujfalusi + */ + +#include + +#include "k3-psil-priv.h" + +#define PSIL_PDMA_XY_TR(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + }, \ + } + +#define PSIL_PDMA_XY_PKT(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .pkt_mode = 1, \ + }, \ + } + +#define PSIL_PDMA_MCASP(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .pdma_acc32 = 1, \ + .pdma_burst = 1, \ + }, \ + } + +#define PSIL_ETHERNET(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 16, \ + }, \ + } + +#define PSIL_SA2UL(x, tx) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 64, \ + .notdpkt = tx, \ + }, \ + } + +/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */ +static struct psil_ep j7200_src_ep_map[] = { + /* PDMA_MCASP - McASP0-2 */ + PSIL_PDMA_MCASP(0x4400), + PSIL_PDMA_MCASP(0x4401), + PSIL_PDMA_MCASP(0x4402), + /* PDMA_SPI_G0 - SPI0-3 */ + PSIL_PDMA_XY_PKT(0x4600), + PSIL_PDMA_XY_PKT(0x4601), + PSIL_PDMA_XY_PKT(0x4602), + PSIL_PDMA_XY_PKT(0x4603), + PSIL_PDMA_XY_PKT(0x4604), + PSIL_PDMA_XY_PKT(0x4605), + PSIL_PDMA_XY_PKT(0x4606), + PSIL_PDMA_XY_PKT(0x4607), + PSIL_PDMA_XY_PKT(0x4608), + PSIL_PDMA_XY_PKT(0x4609), + PSIL_PDMA_XY_PKT(0x460a), + PSIL_PDMA_XY_PKT(0x460b), + PSIL_PDMA_XY_PKT(0x460c), + PSIL_PDMA_XY_PKT(0x460d), + PSIL_PDMA_XY_PKT(0x460e), + PSIL_PDMA_XY_PKT(0x460f), + /* PDMA_SPI_G1 - SPI4-7 */ + PSIL_PDMA_XY_PKT(0x4610), + PSIL_PDMA_XY_PKT(0x4611), + PSIL_PDMA_XY_PKT(0x4612), + PSIL_PDMA_XY_PKT(0x4613), + PSIL_PDMA_XY_PKT(0x4614), + PSIL_PDMA_XY_PKT(0x4615), + PSIL_PDMA_XY_PKT(0x4616), + PSIL_PDMA_XY_PKT(0x4617), + PSIL_PDMA_XY_PKT(0x4618), + PSIL_PDMA_XY_PKT(0x4619), + PSIL_PDMA_XY_PKT(0x461a), + PSIL_PDMA_XY_PKT(0x461b), + PSIL_PDMA_XY_PKT(0x461c), + PSIL_PDMA_XY_PKT(0x461d), + PSIL_PDMA_XY_PKT(0x461e), + PSIL_PDMA_XY_PKT(0x461f), + /* PDMA_USART_G0 - UART0-1 */ + PSIL_PDMA_XY_PKT(0x4700), + PSIL_PDMA_XY_PKT(0x4701), + /* PDMA_USART_G1 - UART2-3 */ + PSIL_PDMA_XY_PKT(0x4702), + PSIL_PDMA_XY_PKT(0x4703), + /* PDMA_USART_G2 - UART4-9 */ + PSIL_PDMA_XY_PKT(0x4704), + PSIL_PDMA_XY_PKT(0x4705), + PSIL_PDMA_XY_PKT(0x4706), + PSIL_PDMA_XY_PKT(0x4707), + PSIL_PDMA_XY_PKT(0x4708), + PSIL_PDMA_XY_PKT(0x4709), + /* CPSW5 */ + PSIL_ETHERNET(0x4a00), + /* CPSW0 */ + PSIL_ETHERNET(0x7000), + /* MCU_PDMA_MISC_G0 - SPI0 */ + PSIL_PDMA_XY_PKT(0x7100), + PSIL_PDMA_XY_PKT(0x7101), + PSIL_PDMA_XY_PKT(0x7102), + PSIL_PDMA_XY_PKT(0x7103), + /* MCU_PDMA_MISC_G1 - SPI1-2 */ + PSIL_PDMA_XY_PKT(0x7200), + PSIL_PDMA_XY_PKT(0x7201), + PSIL_PDMA_XY_PKT(0x7202), + PSIL_PDMA_XY_PKT(0x7203), + PSIL_PDMA_XY_PKT(0x7204), + PSIL_PDMA_XY_PKT(0x7205), + PSIL_PDMA_XY_PKT(0x7206), + PSIL_PDMA_XY_PKT(0x7207), + /* MCU_PDMA_MISC_G2 - UART0 */ + PSIL_PDMA_XY_PKT(0x7300), + /* MCU_PDMA_ADC - ADC0-1 */ + PSIL_PDMA_XY_TR(0x7400), + PSIL_PDMA_XY_TR(0x7401), + /* SA2UL */ + PSIL_SA2UL(0x7500, 0), + PSIL_SA2UL(0x7501, 0), + PSIL_SA2UL(0x7502, 0), + PSIL_SA2UL(0x7503, 0), +}; + +/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */ +static struct psil_ep j7200_dst_ep_map[] = { + /* PDMA_MCASP - McASP0-2 */ + PSIL_PDMA_MCASP(0xc400), + PSIL_PDMA_MCASP(0xc401), + PSIL_PDMA_MCASP(0xc402), + /* PDMA_SPI_G0 - SPI0-3 */ + PSIL_PDMA_XY_PKT(0xc600), + PSIL_PDMA_XY_PKT(0xc601), + PSIL_PDMA_XY_PKT(0xc602), + PSIL_PDMA_XY_PKT(0xc603), + PSIL_PDMA_XY_PKT(0xc604), + PSIL_PDMA_XY_PKT(0xc605), + PSIL_PDMA_XY_PKT(0xc606), + PSIL_PDMA_XY_PKT(0xc607), + PSIL_PDMA_XY_PKT(0xc608), + PSIL_PDMA_XY_PKT(0xc609), + PSIL_PDMA_XY_PKT(0xc60a), + PSIL_PDMA_XY_PKT(0xc60b), + PSIL_PDMA_XY_PKT(0xc60c), + PSIL_PDMA_XY_PKT(0xc60d), + PSIL_PDMA_XY_PKT(0xc60e), + PSIL_PDMA_XY_PKT(0xc60f), + /* PDMA_SPI_G1 - SPI4-7 */ + PSIL_PDMA_XY_PKT(0xc610), + PSIL_PDMA_XY_PKT(0xc611), + PSIL_PDMA_XY_PKT(0xc612), + PSIL_PDMA_XY_PKT(0xc613), + PSIL_PDMA_XY_PKT(0xc614), + PSIL_PDMA_XY_PKT(0xc615), + PSIL_PDMA_XY_PKT(0xc616), + PSIL_PDMA_XY_PKT(0xc617), + PSIL_PDMA_XY_PKT(0xc618), + PSIL_PDMA_XY_PKT(0xc619), + PSIL_PDMA_XY_PKT(0xc61a), + PSIL_PDMA_XY_PKT(0xc61b), + PSIL_PDMA_XY_PKT(0xc61c), + PSIL_PDMA_XY_PKT(0xc61d), + PSIL_PDMA_XY_PKT(0xc61e), + PSIL_PDMA_XY_PKT(0xc61f), + /* PDMA_USART_G0 - UART0-1 */ + PSIL_PDMA_XY_PKT(0xc700), + PSIL_PDMA_XY_PKT(0xc701), + /* PDMA_USART_G1 - UART2-3 */ + PSIL_PDMA_XY_PKT(0xc702), + PSIL_PDMA_XY_PKT(0xc703), + /* PDMA_USART_G2 - UART4-9 */ + PSIL_PDMA_XY_PKT(0xc704), + PSIL_PDMA_XY_PKT(0xc705), + PSIL_PDMA_XY_PKT(0xc706), + PSIL_PDMA_XY_PKT(0xc707), + PSIL_PDMA_XY_PKT(0xc708), + PSIL_PDMA_XY_PKT(0xc709), + /* CPSW5 */ + PSIL_ETHERNET(0xca00), + PSIL_ETHERNET(0xca01), + PSIL_ETHERNET(0xca02), + PSIL_ETHERNET(0xca03), + PSIL_ETHERNET(0xca04), + PSIL_ETHERNET(0xca05), + PSIL_ETHERNET(0xca06), + PSIL_ETHERNET(0xca07), + /* CPSW0 */ + PSIL_ETHERNET(0xf000), + PSIL_ETHERNET(0xf001), + PSIL_ETHERNET(0xf002), + PSIL_ETHERNET(0xf003), + PSIL_ETHERNET(0xf004), + PSIL_ETHERNET(0xf005), + PSIL_ETHERNET(0xf006), + PSIL_ETHERNET(0xf007), + /* MCU_PDMA_MISC_G0 - SPI0 */ + PSIL_PDMA_XY_PKT(0xf100), + PSIL_PDMA_XY_PKT(0xf101), + PSIL_PDMA_XY_PKT(0xf102), + PSIL_PDMA_XY_PKT(0xf103), + /* MCU_PDMA_MISC_G1 - SPI1-2 */ + PSIL_PDMA_XY_PKT(0xf200), + PSIL_PDMA_XY_PKT(0xf201), + PSIL_PDMA_XY_PKT(0xf202), + PSIL_PDMA_XY_PKT(0xf203), + PSIL_PDMA_XY_PKT(0xf204), + PSIL_PDMA_XY_PKT(0xf205), + PSIL_PDMA_XY_PKT(0xf206), + PSIL_PDMA_XY_PKT(0xf207), + /* MCU_PDMA_MISC_G2 - UART0 */ + PSIL_PDMA_XY_PKT(0xf300), + /* SA2UL */ + PSIL_SA2UL(0xf500, 1), + PSIL_SA2UL(0xf501, 1), +}; + +struct psil_ep_map j7200_ep_map = { + .name = "j7200", + .src = j7200_src_ep_map, + .src_count = ARRAY_SIZE(j7200_src_ep_map), + .dst = j7200_dst_ep_map, + .dst_count = ARRAY_SIZE(j7200_dst_ep_map), +}; diff --git a/drivers/dma/ti/k3-psil-j721e.c b/drivers/dma/ti/k3-psil-j721e.c new file mode 100644 index 0000000000..e7c83d668b --- /dev/null +++ b/drivers/dma/ti/k3-psil-j721e.c @@ -0,0 +1,377 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * Author: Peter Ujfalusi + */ + +#include + +#include "k3-psil-priv.h" + +#define PSIL_PDMA_XY_TR(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + }, \ + } + +#define PSIL_PDMA_XY_PKT(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .pkt_mode = 1, \ + }, \ + } + +#define PSIL_PDMA_MCASP(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .pdma_acc32 = 1, \ + .pdma_burst = 1, \ + }, \ + } + +#define PSIL_ETHERNET(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 16, \ + }, \ + } + +#define PSIL_SA2UL(x, tx) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 64, \ + .notdpkt = tx, \ + }, \ + } + +#define PSIL_CSI2RX(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + }, \ + } + +/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */ +static struct psil_ep j721e_src_ep_map[] = { + /* SA2UL */ + PSIL_SA2UL(0x4000, 0), + PSIL_SA2UL(0x4001, 0), + PSIL_SA2UL(0x4002, 0), + PSIL_SA2UL(0x4003, 0), + /* PRU_ICSSG0 */ + PSIL_ETHERNET(0x4100), + PSIL_ETHERNET(0x4101), + PSIL_ETHERNET(0x4102), + PSIL_ETHERNET(0x4103), + /* PRU_ICSSG1 */ + PSIL_ETHERNET(0x4200), + PSIL_ETHERNET(0x4201), + PSIL_ETHERNET(0x4202), + PSIL_ETHERNET(0x4203), + /* PDMA6 (PSIL_PDMA_MCASP_G0) - McASP0-2 */ + PSIL_PDMA_MCASP(0x4400), + PSIL_PDMA_MCASP(0x4401), + PSIL_PDMA_MCASP(0x4402), + /* PDMA7 (PSIL_PDMA_MCASP_G1) - McASP3-11 */ + PSIL_PDMA_MCASP(0x4500), + PSIL_PDMA_MCASP(0x4501), + PSIL_PDMA_MCASP(0x4502), + PSIL_PDMA_MCASP(0x4503), + PSIL_PDMA_MCASP(0x4504), + PSIL_PDMA_MCASP(0x4505), + PSIL_PDMA_MCASP(0x4506), + PSIL_PDMA_MCASP(0x4507), + PSIL_PDMA_MCASP(0x4508), + /* PDMA8 (PDMA_MISC_G0) - SPI0-1 */ + PSIL_PDMA_XY_PKT(0x4600), + PSIL_PDMA_XY_PKT(0x4601), + PSIL_PDMA_XY_PKT(0x4602), + PSIL_PDMA_XY_PKT(0x4603), + PSIL_PDMA_XY_PKT(0x4604), + PSIL_PDMA_XY_PKT(0x4605), + PSIL_PDMA_XY_PKT(0x4606), + PSIL_PDMA_XY_PKT(0x4607), + /* PDMA9 (PDMA_MISC_G1) - SPI2-3 */ + PSIL_PDMA_XY_PKT(0x460c), + PSIL_PDMA_XY_PKT(0x460d), + PSIL_PDMA_XY_PKT(0x460e), + PSIL_PDMA_XY_PKT(0x460f), + PSIL_PDMA_XY_PKT(0x4610), + PSIL_PDMA_XY_PKT(0x4611), + PSIL_PDMA_XY_PKT(0x4612), + PSIL_PDMA_XY_PKT(0x4613), + /* PDMA10 (PDMA_MISC_G2) - SPI4-5 */ + PSIL_PDMA_XY_PKT(0x4618), + PSIL_PDMA_XY_PKT(0x4619), + PSIL_PDMA_XY_PKT(0x461a), + PSIL_PDMA_XY_PKT(0x461b), + PSIL_PDMA_XY_PKT(0x461c), + PSIL_PDMA_XY_PKT(0x461d), + PSIL_PDMA_XY_PKT(0x461e), + PSIL_PDMA_XY_PKT(0x461f), + /* PDMA11 (PDMA_MISC_G3) */ + PSIL_PDMA_XY_PKT(0x4624), + PSIL_PDMA_XY_PKT(0x4625), + PSIL_PDMA_XY_PKT(0x4626), + PSIL_PDMA_XY_PKT(0x4627), + PSIL_PDMA_XY_PKT(0x4628), + PSIL_PDMA_XY_PKT(0x4629), + PSIL_PDMA_XY_PKT(0x4630), + PSIL_PDMA_XY_PKT(0x463a), + /* PDMA13 (PDMA_USART_G0) - UART0-1 */ + PSIL_PDMA_XY_PKT(0x4700), + PSIL_PDMA_XY_PKT(0x4701), + /* PDMA14 (PDMA_USART_G1) - UART2-3 */ + PSIL_PDMA_XY_PKT(0x4702), + PSIL_PDMA_XY_PKT(0x4703), + /* PDMA15 (PDMA_USART_G2) - UART4-9 */ + PSIL_PDMA_XY_PKT(0x4704), + PSIL_PDMA_XY_PKT(0x4705), + PSIL_PDMA_XY_PKT(0x4706), + PSIL_PDMA_XY_PKT(0x4707), + PSIL_PDMA_XY_PKT(0x4708), + PSIL_PDMA_XY_PKT(0x4709), + /* CSI2RX */ + PSIL_CSI2RX(0x4940), + PSIL_CSI2RX(0x4941), + PSIL_CSI2RX(0x4942), + PSIL_CSI2RX(0x4943), + PSIL_CSI2RX(0x4944), + PSIL_CSI2RX(0x4945), + PSIL_CSI2RX(0x4946), + PSIL_CSI2RX(0x4947), + PSIL_CSI2RX(0x4948), + PSIL_CSI2RX(0x4949), + PSIL_CSI2RX(0x494a), + PSIL_CSI2RX(0x494b), + PSIL_CSI2RX(0x494c), + PSIL_CSI2RX(0x494d), + PSIL_CSI2RX(0x494e), + PSIL_CSI2RX(0x494f), + PSIL_CSI2RX(0x4950), + PSIL_CSI2RX(0x4951), + PSIL_CSI2RX(0x4952), + PSIL_CSI2RX(0x4953), + PSIL_CSI2RX(0x4954), + PSIL_CSI2RX(0x4955), + PSIL_CSI2RX(0x4956), + PSIL_CSI2RX(0x4957), + PSIL_CSI2RX(0x4958), + PSIL_CSI2RX(0x4959), + PSIL_CSI2RX(0x495a), + PSIL_CSI2RX(0x495b), + PSIL_CSI2RX(0x495c), + PSIL_CSI2RX(0x495d), + PSIL_CSI2RX(0x495e), + PSIL_CSI2RX(0x495f), + PSIL_CSI2RX(0x4960), + PSIL_CSI2RX(0x4961), + PSIL_CSI2RX(0x4962), + PSIL_CSI2RX(0x4963), + PSIL_CSI2RX(0x4964), + PSIL_CSI2RX(0x4965), + PSIL_CSI2RX(0x4966), + PSIL_CSI2RX(0x4967), + PSIL_CSI2RX(0x4968), + PSIL_CSI2RX(0x4969), + PSIL_CSI2RX(0x496a), + PSIL_CSI2RX(0x496b), + PSIL_CSI2RX(0x496c), + PSIL_CSI2RX(0x496d), + PSIL_CSI2RX(0x496e), + PSIL_CSI2RX(0x496f), + PSIL_CSI2RX(0x4970), + PSIL_CSI2RX(0x4971), + PSIL_CSI2RX(0x4972), + PSIL_CSI2RX(0x4973), + PSIL_CSI2RX(0x4974), + PSIL_CSI2RX(0x4975), + PSIL_CSI2RX(0x4976), + PSIL_CSI2RX(0x4977), + PSIL_CSI2RX(0x4978), + PSIL_CSI2RX(0x4979), + PSIL_CSI2RX(0x497a), + PSIL_CSI2RX(0x497b), + PSIL_CSI2RX(0x497c), + PSIL_CSI2RX(0x497d), + PSIL_CSI2RX(0x497e), + PSIL_CSI2RX(0x497f), + /* CPSW9 */ + PSIL_ETHERNET(0x4a00), + /* CPSW0 */ + PSIL_ETHERNET(0x7000), + /* MCU_PDMA0 (MCU_PDMA_MISC_G0) - SPI0 */ + PSIL_PDMA_XY_PKT(0x7100), + PSIL_PDMA_XY_PKT(0x7101), + PSIL_PDMA_XY_PKT(0x7102), + PSIL_PDMA_XY_PKT(0x7103), + /* MCU_PDMA1 (MCU_PDMA_MISC_G1) - SPI1-2 */ + PSIL_PDMA_XY_PKT(0x7200), + PSIL_PDMA_XY_PKT(0x7201), + PSIL_PDMA_XY_PKT(0x7202), + PSIL_PDMA_XY_PKT(0x7203), + PSIL_PDMA_XY_PKT(0x7204), + PSIL_PDMA_XY_PKT(0x7205), + PSIL_PDMA_XY_PKT(0x7206), + PSIL_PDMA_XY_PKT(0x7207), + /* MCU_PDMA2 (MCU_PDMA_MISC_G2) - UART0 */ + PSIL_PDMA_XY_PKT(0x7300), + /* MCU_PDMA_ADC - ADC0-1 */ + PSIL_PDMA_XY_TR(0x7400), + PSIL_PDMA_XY_TR(0x7401), + PSIL_PDMA_XY_TR(0x7402), + PSIL_PDMA_XY_TR(0x7403), + /* SA2UL */ + PSIL_SA2UL(0x7500, 0), + PSIL_SA2UL(0x7501, 0), + PSIL_SA2UL(0x7502, 0), + PSIL_SA2UL(0x7503, 0), +}; + +/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */ +static struct psil_ep j721e_dst_ep_map[] = { + /* SA2UL */ + PSIL_SA2UL(0xc000, 1), + PSIL_SA2UL(0xc001, 1), + /* PRU_ICSSG0 */ + PSIL_ETHERNET(0xc100), + PSIL_ETHERNET(0xc101), + PSIL_ETHERNET(0xc102), + PSIL_ETHERNET(0xc103), + PSIL_ETHERNET(0xc104), + PSIL_ETHERNET(0xc105), + PSIL_ETHERNET(0xc106), + PSIL_ETHERNET(0xc107), + /* PRU_ICSSG1 */ + PSIL_ETHERNET(0xc200), + PSIL_ETHERNET(0xc201), + PSIL_ETHERNET(0xc202), + PSIL_ETHERNET(0xc203), + PSIL_ETHERNET(0xc204), + PSIL_ETHERNET(0xc205), + PSIL_ETHERNET(0xc206), + PSIL_ETHERNET(0xc207), + /* PDMA6 (PSIL_PDMA_MCASP_G0) - McASP0-2 */ + PSIL_PDMA_MCASP(0xc400), + PSIL_PDMA_MCASP(0xc401), + PSIL_PDMA_MCASP(0xc402), + /* PDMA7 (PSIL_PDMA_MCASP_G1) - McASP3-11 */ + PSIL_PDMA_MCASP(0xc500), + PSIL_PDMA_MCASP(0xc501), + PSIL_PDMA_MCASP(0xc502), + PSIL_PDMA_MCASP(0xc503), + PSIL_PDMA_MCASP(0xc504), + PSIL_PDMA_MCASP(0xc505), + PSIL_PDMA_MCASP(0xc506), + PSIL_PDMA_MCASP(0xc507), + PSIL_PDMA_MCASP(0xc508), + /* PDMA8 (PDMA_MISC_G0) - SPI0-1 */ + PSIL_PDMA_XY_PKT(0xc600), + PSIL_PDMA_XY_PKT(0xc601), + PSIL_PDMA_XY_PKT(0xc602), + PSIL_PDMA_XY_PKT(0xc603), + PSIL_PDMA_XY_PKT(0xc604), + PSIL_PDMA_XY_PKT(0xc605), + PSIL_PDMA_XY_PKT(0xc606), + PSIL_PDMA_XY_PKT(0xc607), + /* PDMA9 (PDMA_MISC_G1) - SPI2-3 */ + PSIL_PDMA_XY_PKT(0xc60c), + PSIL_PDMA_XY_PKT(0xc60d), + PSIL_PDMA_XY_PKT(0xc60e), + PSIL_PDMA_XY_PKT(0xc60f), + PSIL_PDMA_XY_PKT(0xc610), + PSIL_PDMA_XY_PKT(0xc611), + PSIL_PDMA_XY_PKT(0xc612), + PSIL_PDMA_XY_PKT(0xc613), + /* PDMA10 (PDMA_MISC_G2) - SPI4-5 */ + PSIL_PDMA_XY_PKT(0xc618), + PSIL_PDMA_XY_PKT(0xc619), + PSIL_PDMA_XY_PKT(0xc61a), + PSIL_PDMA_XY_PKT(0xc61b), + PSIL_PDMA_XY_PKT(0xc61c), + PSIL_PDMA_XY_PKT(0xc61d), + PSIL_PDMA_XY_PKT(0xc61e), + PSIL_PDMA_XY_PKT(0xc61f), + /* PDMA11 (PDMA_MISC_G3) */ + PSIL_PDMA_XY_PKT(0xc624), + PSIL_PDMA_XY_PKT(0xc625), + PSIL_PDMA_XY_PKT(0xc626), + PSIL_PDMA_XY_PKT(0xc627), + PSIL_PDMA_XY_PKT(0xc628), + PSIL_PDMA_XY_PKT(0xc629), + PSIL_PDMA_XY_PKT(0xc630), + PSIL_PDMA_XY_PKT(0xc63a), + /* PDMA13 (PDMA_USART_G0) - UART0-1 */ + PSIL_PDMA_XY_PKT(0xc700), + PSIL_PDMA_XY_PKT(0xc701), + /* PDMA14 (PDMA_USART_G1) - UART2-3 */ + PSIL_PDMA_XY_PKT(0xc702), + PSIL_PDMA_XY_PKT(0xc703), + /* PDMA15 (PDMA_USART_G2) - UART4-9 */ + PSIL_PDMA_XY_PKT(0xc704), + PSIL_PDMA_XY_PKT(0xc705), + PSIL_PDMA_XY_PKT(0xc706), + PSIL_PDMA_XY_PKT(0xc707), + PSIL_PDMA_XY_PKT(0xc708), + PSIL_PDMA_XY_PKT(0xc709), + /* CPSW9 */ + PSIL_ETHERNET(0xca00), + PSIL_ETHERNET(0xca01), + PSIL_ETHERNET(0xca02), + PSIL_ETHERNET(0xca03), + PSIL_ETHERNET(0xca04), + PSIL_ETHERNET(0xca05), + PSIL_ETHERNET(0xca06), + PSIL_ETHERNET(0xca07), + /* CPSW0 */ + PSIL_ETHERNET(0xf000), + PSIL_ETHERNET(0xf001), + PSIL_ETHERNET(0xf002), + PSIL_ETHERNET(0xf003), + PSIL_ETHERNET(0xf004), + PSIL_ETHERNET(0xf005), + PSIL_ETHERNET(0xf006), + PSIL_ETHERNET(0xf007), + /* MCU_PDMA0 (MCU_PDMA_MISC_G0) - SPI0 */ + PSIL_PDMA_XY_PKT(0xf100), + PSIL_PDMA_XY_PKT(0xf101), + PSIL_PDMA_XY_PKT(0xf102), + PSIL_PDMA_XY_PKT(0xf103), + /* MCU_PDMA1 (MCU_PDMA_MISC_G1) - SPI1-2 */ + PSIL_PDMA_XY_PKT(0xf200), + PSIL_PDMA_XY_PKT(0xf201), + PSIL_PDMA_XY_PKT(0xf202), + PSIL_PDMA_XY_PKT(0xf203), + PSIL_PDMA_XY_PKT(0xf204), + PSIL_PDMA_XY_PKT(0xf205), + PSIL_PDMA_XY_PKT(0xf206), + PSIL_PDMA_XY_PKT(0xf207), + /* MCU_PDMA2 (MCU_PDMA_MISC_G2) - UART0 */ + PSIL_PDMA_XY_PKT(0xf300), + /* SA2UL */ + PSIL_SA2UL(0xf500, 1), + PSIL_SA2UL(0xf501, 1), +}; + +struct psil_ep_map j721e_ep_map = { + .name = "j721e", + .src = j721e_src_ep_map, + .src_count = ARRAY_SIZE(j721e_src_ep_map), + .dst = j721e_dst_ep_map, + .dst_count = ARRAY_SIZE(j721e_dst_ep_map), +}; diff --git a/drivers/dma/ti/k3-psil-j721s2.c b/drivers/dma/ti/k3-psil-j721s2.c new file mode 100644 index 0000000000..1d5430fc57 --- /dev/null +++ b/drivers/dma/ti/k3-psil-j721s2.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Texas Instruments Incorporated - https://www.ti.com + */ + +#include + +#include "k3-psil-priv.h" + +#define PSIL_PDMA_XY_TR(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + }, \ + } + +#define PSIL_PDMA_XY_PKT(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .pkt_mode = 1, \ + }, \ + } + +#define PSIL_PDMA_MCASP(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .pdma_acc32 = 1, \ + .pdma_burst = 1, \ + }, \ + } + +#define PSIL_ETHERNET(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 16, \ + }, \ + } + +#define PSIL_SA2UL(x, tx) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 64, \ + .notdpkt = tx, \ + }, \ + } + +/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */ +static struct psil_ep j721s2_src_ep_map[] = { + /* PDMA_MCASP - McASP0-4 */ + PSIL_PDMA_MCASP(0x4400), + PSIL_PDMA_MCASP(0x4401), + PSIL_PDMA_MCASP(0x4402), + PSIL_PDMA_MCASP(0x4403), + PSIL_PDMA_MCASP(0x4404), + /* PDMA_SPI_G0 - SPI0-3 */ + PSIL_PDMA_XY_PKT(0x4600), + PSIL_PDMA_XY_PKT(0x4601), + PSIL_PDMA_XY_PKT(0x4602), + PSIL_PDMA_XY_PKT(0x4603), + PSIL_PDMA_XY_PKT(0x4604), + PSIL_PDMA_XY_PKT(0x4605), + PSIL_PDMA_XY_PKT(0x4606), + PSIL_PDMA_XY_PKT(0x4607), + PSIL_PDMA_XY_PKT(0x4608), + PSIL_PDMA_XY_PKT(0x4609), + PSIL_PDMA_XY_PKT(0x460a), + PSIL_PDMA_XY_PKT(0x460b), + PSIL_PDMA_XY_PKT(0x460c), + PSIL_PDMA_XY_PKT(0x460d), + PSIL_PDMA_XY_PKT(0x460e), + PSIL_PDMA_XY_PKT(0x460f), + /* PDMA_SPI_G1 - SPI4-7 */ + PSIL_PDMA_XY_PKT(0x4610), + PSIL_PDMA_XY_PKT(0x4611), + PSIL_PDMA_XY_PKT(0x4612), + PSIL_PDMA_XY_PKT(0x4613), + PSIL_PDMA_XY_PKT(0x4614), + PSIL_PDMA_XY_PKT(0x4615), + PSIL_PDMA_XY_PKT(0x4616), + PSIL_PDMA_XY_PKT(0x4617), + PSIL_PDMA_XY_PKT(0x4618), + PSIL_PDMA_XY_PKT(0x4619), + PSIL_PDMA_XY_PKT(0x461a), + PSIL_PDMA_XY_PKT(0x461b), + PSIL_PDMA_XY_PKT(0x461c), + PSIL_PDMA_XY_PKT(0x461d), + PSIL_PDMA_XY_PKT(0x461e), + PSIL_PDMA_XY_PKT(0x461f), + /* MAIN_CPSW2G */ + PSIL_ETHERNET(0x4640), + /* PDMA_USART_G0 - UART0-1 */ + PSIL_PDMA_XY_PKT(0x4700), + PSIL_PDMA_XY_PKT(0x4701), + /* PDMA_USART_G1 - UART2-3 */ + PSIL_PDMA_XY_PKT(0x4702), + PSIL_PDMA_XY_PKT(0x4703), + /* PDMA_USART_G2 - UART4-9 */ + PSIL_PDMA_XY_PKT(0x4704), + PSIL_PDMA_XY_PKT(0x4705), + PSIL_PDMA_XY_PKT(0x4706), + PSIL_PDMA_XY_PKT(0x4707), + PSIL_PDMA_XY_PKT(0x4708), + PSIL_PDMA_XY_PKT(0x4709), + /* MAIN SA2UL */ + PSIL_SA2UL(0x4a40, 0), + PSIL_SA2UL(0x4a41, 0), + PSIL_SA2UL(0x4a42, 0), + PSIL_SA2UL(0x4a43, 0), + /* CPSW0 */ + PSIL_ETHERNET(0x7000), + /* MCU_PDMA0 (MCU_PDMA_MISC_G0) - SPI0 */ + PSIL_PDMA_XY_PKT(0x7100), + PSIL_PDMA_XY_PKT(0x7101), + PSIL_PDMA_XY_PKT(0x7102), + PSIL_PDMA_XY_PKT(0x7103), + /* MCU_PDMA1 (MCU_PDMA_MISC_G1) - SPI1-2 */ + PSIL_PDMA_XY_PKT(0x7200), + PSIL_PDMA_XY_PKT(0x7201), + PSIL_PDMA_XY_PKT(0x7202), + PSIL_PDMA_XY_PKT(0x7203), + PSIL_PDMA_XY_PKT(0x7204), + PSIL_PDMA_XY_PKT(0x7205), + PSIL_PDMA_XY_PKT(0x7206), + PSIL_PDMA_XY_PKT(0x7207), + /* MCU_PDMA2 (MCU_PDMA_MISC_G2) - UART0 */ + PSIL_PDMA_XY_PKT(0x7300), + /* MCU_PDMA_ADC - ADC0-1 */ + PSIL_PDMA_XY_TR(0x7400), + PSIL_PDMA_XY_TR(0x7401), + PSIL_PDMA_XY_TR(0x7402), + PSIL_PDMA_XY_TR(0x7403), + /* SA2UL */ + PSIL_SA2UL(0x7500, 0), + PSIL_SA2UL(0x7501, 0), + PSIL_SA2UL(0x7502, 0), + PSIL_SA2UL(0x7503, 0), +}; + +/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */ +static struct psil_ep j721s2_dst_ep_map[] = { + /* MAIN SA2UL */ + PSIL_SA2UL(0xca40, 1), + PSIL_SA2UL(0xca41, 1), + /* CPSW0 */ + PSIL_ETHERNET(0xf000), + PSIL_ETHERNET(0xf001), + PSIL_ETHERNET(0xf002), + PSIL_ETHERNET(0xf003), + PSIL_ETHERNET(0xf004), + PSIL_ETHERNET(0xf005), + PSIL_ETHERNET(0xf006), + PSIL_ETHERNET(0xf007), + /* MAIN_CPSW2G */ + PSIL_ETHERNET(0xc640), + PSIL_ETHERNET(0xc641), + PSIL_ETHERNET(0xc642), + PSIL_ETHERNET(0xc643), + PSIL_ETHERNET(0xc644), + PSIL_ETHERNET(0xc645), + PSIL_ETHERNET(0xc646), + PSIL_ETHERNET(0xc647), + /* SA2UL */ + PSIL_SA2UL(0xf500, 1), + PSIL_SA2UL(0xf501, 1), +}; + +struct psil_ep_map j721s2_ep_map = { + .name = "j721s2", + .src = j721s2_src_ep_map, + .src_count = ARRAY_SIZE(j721s2_src_ep_map), + .dst = j721s2_dst_ep_map, + .dst_count = ARRAY_SIZE(j721s2_dst_ep_map), +}; diff --git a/drivers/dma/ti/k3-psil-j784s4.c b/drivers/dma/ti/k3-psil-j784s4.c new file mode 100644 index 0000000000..12bfa2478f --- /dev/null +++ b/drivers/dma/ti/k3-psil-j784s4.c @@ -0,0 +1,354 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Texas Instruments Incorporated - https://www.ti.com + */ + +#include + +#include "k3-psil-priv.h" + +#define PSIL_PDMA_XY_TR(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + }, \ + } + +#define PSIL_PDMA_XY_PKT(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .pkt_mode = 1, \ + }, \ + } + +#define PSIL_PDMA_MCASP(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .pdma_acc32 = 1, \ + .pdma_burst = 1, \ + }, \ + } + +#define PSIL_ETHERNET(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 16, \ + }, \ + } + +#define PSIL_SA2UL(x, tx) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 64, \ + .notdpkt = tx, \ + }, \ + } + +#define PSIL_CSI2RX(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + }, \ + } + +/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */ +static struct psil_ep j784s4_src_ep_map[] = { + /* PDMA_MCASP - McASP0-4 */ + PSIL_PDMA_MCASP(0x4400), + PSIL_PDMA_MCASP(0x4401), + PSIL_PDMA_MCASP(0x4402), + PSIL_PDMA_MCASP(0x4403), + PSIL_PDMA_MCASP(0x4404), + /* PDMA_SPI_G0 - SPI0-3 */ + PSIL_PDMA_XY_PKT(0x4600), + PSIL_PDMA_XY_PKT(0x4601), + PSIL_PDMA_XY_PKT(0x4602), + PSIL_PDMA_XY_PKT(0x4603), + PSIL_PDMA_XY_PKT(0x4604), + PSIL_PDMA_XY_PKT(0x4605), + PSIL_PDMA_XY_PKT(0x4606), + PSIL_PDMA_XY_PKT(0x4607), + PSIL_PDMA_XY_PKT(0x4608), + PSIL_PDMA_XY_PKT(0x4609), + PSIL_PDMA_XY_PKT(0x460a), + PSIL_PDMA_XY_PKT(0x460b), + PSIL_PDMA_XY_PKT(0x460c), + PSIL_PDMA_XY_PKT(0x460d), + PSIL_PDMA_XY_PKT(0x460e), + PSIL_PDMA_XY_PKT(0x460f), + /* PDMA_SPI_G1 - SPI4-7 */ + PSIL_PDMA_XY_PKT(0x4620), + PSIL_PDMA_XY_PKT(0x4621), + PSIL_PDMA_XY_PKT(0x4622), + PSIL_PDMA_XY_PKT(0x4623), + PSIL_PDMA_XY_PKT(0x4624), + PSIL_PDMA_XY_PKT(0x4625), + PSIL_PDMA_XY_PKT(0x4626), + PSIL_PDMA_XY_PKT(0x4627), + PSIL_PDMA_XY_PKT(0x4628), + PSIL_PDMA_XY_PKT(0x4629), + PSIL_PDMA_XY_PKT(0x462a), + PSIL_PDMA_XY_PKT(0x462b), + PSIL_PDMA_XY_PKT(0x462c), + PSIL_PDMA_XY_PKT(0x462d), + PSIL_PDMA_XY_PKT(0x462e), + PSIL_PDMA_XY_PKT(0x462f), + /* MAIN_CPSW2G */ + PSIL_ETHERNET(0x4640), + /* PDMA_USART_G0 - UART0-1 */ + PSIL_PDMA_XY_PKT(0x4700), + PSIL_PDMA_XY_PKT(0x4701), + /* PDMA_USART_G1 - UART2-3 */ + PSIL_PDMA_XY_PKT(0x4702), + PSIL_PDMA_XY_PKT(0x4703), + /* PDMA_USART_G2 - UART4-9 */ + PSIL_PDMA_XY_PKT(0x4704), + PSIL_PDMA_XY_PKT(0x4705), + PSIL_PDMA_XY_PKT(0x4706), + PSIL_PDMA_XY_PKT(0x4707), + PSIL_PDMA_XY_PKT(0x4708), + PSIL_PDMA_XY_PKT(0x4709), + /* CSI2RX */ + PSIL_CSI2RX(0x4900), + PSIL_CSI2RX(0x4901), + PSIL_CSI2RX(0x4902), + PSIL_CSI2RX(0x4903), + PSIL_CSI2RX(0x4940), + PSIL_CSI2RX(0x4941), + PSIL_CSI2RX(0x4942), + PSIL_CSI2RX(0x4943), + PSIL_CSI2RX(0x4944), + PSIL_CSI2RX(0x4945), + PSIL_CSI2RX(0x4946), + PSIL_CSI2RX(0x4947), + PSIL_CSI2RX(0x4948), + PSIL_CSI2RX(0x4949), + PSIL_CSI2RX(0x494a), + PSIL_CSI2RX(0x494b), + PSIL_CSI2RX(0x494c), + PSIL_CSI2RX(0x494d), + PSIL_CSI2RX(0x494e), + PSIL_CSI2RX(0x494f), + PSIL_CSI2RX(0x4950), + PSIL_CSI2RX(0x4951), + PSIL_CSI2RX(0x4952), + PSIL_CSI2RX(0x4953), + PSIL_CSI2RX(0x4954), + PSIL_CSI2RX(0x4955), + PSIL_CSI2RX(0x4956), + PSIL_CSI2RX(0x4957), + PSIL_CSI2RX(0x4958), + PSIL_CSI2RX(0x4959), + PSIL_CSI2RX(0x495a), + PSIL_CSI2RX(0x495b), + PSIL_CSI2RX(0x495c), + PSIL_CSI2RX(0x495d), + PSIL_CSI2RX(0x495e), + PSIL_CSI2RX(0x495f), + PSIL_CSI2RX(0x4960), + PSIL_CSI2RX(0x4961), + PSIL_CSI2RX(0x4962), + PSIL_CSI2RX(0x4963), + PSIL_CSI2RX(0x4964), + PSIL_CSI2RX(0x4965), + PSIL_CSI2RX(0x4966), + PSIL_CSI2RX(0x4967), + PSIL_CSI2RX(0x4968), + PSIL_CSI2RX(0x4969), + PSIL_CSI2RX(0x496a), + PSIL_CSI2RX(0x496b), + PSIL_CSI2RX(0x496c), + PSIL_CSI2RX(0x496d), + PSIL_CSI2RX(0x496e), + PSIL_CSI2RX(0x496f), + PSIL_CSI2RX(0x4970), + PSIL_CSI2RX(0x4971), + PSIL_CSI2RX(0x4972), + PSIL_CSI2RX(0x4973), + PSIL_CSI2RX(0x4974), + PSIL_CSI2RX(0x4975), + PSIL_CSI2RX(0x4976), + PSIL_CSI2RX(0x4977), + PSIL_CSI2RX(0x4978), + PSIL_CSI2RX(0x4979), + PSIL_CSI2RX(0x497a), + PSIL_CSI2RX(0x497b), + PSIL_CSI2RX(0x497c), + PSIL_CSI2RX(0x497d), + PSIL_CSI2RX(0x497e), + PSIL_CSI2RX(0x497f), + PSIL_CSI2RX(0x4980), + PSIL_CSI2RX(0x4981), + PSIL_CSI2RX(0x4982), + PSIL_CSI2RX(0x4983), + PSIL_CSI2RX(0x4984), + PSIL_CSI2RX(0x4985), + PSIL_CSI2RX(0x4986), + PSIL_CSI2RX(0x4987), + PSIL_CSI2RX(0x4988), + PSIL_CSI2RX(0x4989), + PSIL_CSI2RX(0x498a), + PSIL_CSI2RX(0x498b), + PSIL_CSI2RX(0x498c), + PSIL_CSI2RX(0x498d), + PSIL_CSI2RX(0x498e), + PSIL_CSI2RX(0x498f), + PSIL_CSI2RX(0x4990), + PSIL_CSI2RX(0x4991), + PSIL_CSI2RX(0x4992), + PSIL_CSI2RX(0x4993), + PSIL_CSI2RX(0x4994), + PSIL_CSI2RX(0x4995), + PSIL_CSI2RX(0x4996), + PSIL_CSI2RX(0x4997), + PSIL_CSI2RX(0x4998), + PSIL_CSI2RX(0x4999), + PSIL_CSI2RX(0x499a), + PSIL_CSI2RX(0x499b), + PSIL_CSI2RX(0x499c), + PSIL_CSI2RX(0x499d), + PSIL_CSI2RX(0x499e), + PSIL_CSI2RX(0x499f), + /* MAIN_CPSW9G */ + PSIL_ETHERNET(0x4a00), + /* MAIN-SA2UL */ + PSIL_SA2UL(0x4a40, 0), + PSIL_SA2UL(0x4a41, 0), + PSIL_SA2UL(0x4a42, 0), + PSIL_SA2UL(0x4a43, 0), + /* MCU_CPSW0 */ + PSIL_ETHERNET(0x7000), + /* MCU_PDMA0 (MCU_PDMA_MISC_G0) - SPI0 */ + PSIL_PDMA_XY_PKT(0x7100), + PSIL_PDMA_XY_PKT(0x7101), + PSIL_PDMA_XY_PKT(0x7102), + PSIL_PDMA_XY_PKT(0x7103), + /* MCU_PDMA1 (MCU_PDMA_MISC_G1) - SPI1-2 */ + PSIL_PDMA_XY_PKT(0x7200), + PSIL_PDMA_XY_PKT(0x7201), + PSIL_PDMA_XY_PKT(0x7202), + PSIL_PDMA_XY_PKT(0x7203), + PSIL_PDMA_XY_PKT(0x7204), + PSIL_PDMA_XY_PKT(0x7205), + PSIL_PDMA_XY_PKT(0x7206), + PSIL_PDMA_XY_PKT(0x7207), + /* MCU_PDMA2 (MCU_PDMA_MISC_G2) - UART0 */ + PSIL_PDMA_XY_PKT(0x7300), + /* MCU_PDMA_ADC - ADC0-1 */ + PSIL_PDMA_XY_TR(0x7400), + PSIL_PDMA_XY_TR(0x7401), + PSIL_PDMA_XY_TR(0x7402), + PSIL_PDMA_XY_TR(0x7403), + /* MCU_SA2UL */ + PSIL_SA2UL(0x7500, 0), + PSIL_SA2UL(0x7501, 0), + PSIL_SA2UL(0x7502, 0), + PSIL_SA2UL(0x7503, 0), +}; + +/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */ +static struct psil_ep j784s4_dst_ep_map[] = { + /* MAIN_CPSW2G */ + PSIL_ETHERNET(0xc640), + PSIL_ETHERNET(0xc641), + PSIL_ETHERNET(0xc642), + PSIL_ETHERNET(0xc643), + PSIL_ETHERNET(0xc644), + PSIL_ETHERNET(0xc645), + PSIL_ETHERNET(0xc646), + PSIL_ETHERNET(0xc647), + /* MAIN_CPSW9G */ + PSIL_ETHERNET(0xca00), + PSIL_ETHERNET(0xca01), + PSIL_ETHERNET(0xca02), + PSIL_ETHERNET(0xca03), + PSIL_ETHERNET(0xca04), + PSIL_ETHERNET(0xca05), + PSIL_ETHERNET(0xca06), + PSIL_ETHERNET(0xca07), + /* MAIN-SA2UL */ + PSIL_SA2UL(0xca40, 1), + PSIL_SA2UL(0xca41, 1), + /* PDMA_SPI_G0 - SPI0-3 */ + PSIL_PDMA_XY_PKT(0xc600), + PSIL_PDMA_XY_PKT(0xc601), + PSIL_PDMA_XY_PKT(0xc602), + PSIL_PDMA_XY_PKT(0xc603), + PSIL_PDMA_XY_PKT(0xc604), + PSIL_PDMA_XY_PKT(0xc605), + PSIL_PDMA_XY_PKT(0xc606), + PSIL_PDMA_XY_PKT(0xc607), + PSIL_PDMA_XY_PKT(0xc608), + PSIL_PDMA_XY_PKT(0xc609), + PSIL_PDMA_XY_PKT(0xc60a), + PSIL_PDMA_XY_PKT(0xc60b), + PSIL_PDMA_XY_PKT(0xc60c), + PSIL_PDMA_XY_PKT(0xc60d), + PSIL_PDMA_XY_PKT(0xc60e), + PSIL_PDMA_XY_PKT(0xc60f), + /* PDMA_SPI_G1 - SPI4-7 */ + PSIL_PDMA_XY_PKT(0xc620), + PSIL_PDMA_XY_PKT(0xc621), + PSIL_PDMA_XY_PKT(0xc622), + PSIL_PDMA_XY_PKT(0xc623), + PSIL_PDMA_XY_PKT(0xc624), + PSIL_PDMA_XY_PKT(0xc625), + PSIL_PDMA_XY_PKT(0xc626), + PSIL_PDMA_XY_PKT(0xc627), + PSIL_PDMA_XY_PKT(0xc628), + PSIL_PDMA_XY_PKT(0xc629), + PSIL_PDMA_XY_PKT(0xc62a), + PSIL_PDMA_XY_PKT(0xc62b), + PSIL_PDMA_XY_PKT(0xc62c), + PSIL_PDMA_XY_PKT(0xc62d), + PSIL_PDMA_XY_PKT(0xc62e), + PSIL_PDMA_XY_PKT(0xc62f), + /* MCU_CPSW0 */ + PSIL_ETHERNET(0xf000), + PSIL_ETHERNET(0xf001), + PSIL_ETHERNET(0xf002), + PSIL_ETHERNET(0xf003), + PSIL_ETHERNET(0xf004), + PSIL_ETHERNET(0xf005), + PSIL_ETHERNET(0xf006), + PSIL_ETHERNET(0xf007), + /* MCU_PDMA_MISC_G0 - SPI0 */ + PSIL_PDMA_XY_PKT(0xf100), + PSIL_PDMA_XY_PKT(0xf101), + PSIL_PDMA_XY_PKT(0xf102), + PSIL_PDMA_XY_PKT(0xf103), + /* MCU_PDMA_MISC_G1 - SPI1-2 */ + PSIL_PDMA_XY_PKT(0xf200), + PSIL_PDMA_XY_PKT(0xf201), + PSIL_PDMA_XY_PKT(0xf202), + PSIL_PDMA_XY_PKT(0xf203), + PSIL_PDMA_XY_PKT(0xf204), + PSIL_PDMA_XY_PKT(0xf205), + PSIL_PDMA_XY_PKT(0xf206), + PSIL_PDMA_XY_PKT(0xf207), + /* MCU_SA2UL */ + PSIL_SA2UL(0xf500, 1), + PSIL_SA2UL(0xf501, 1), +}; + +struct psil_ep_map j784s4_ep_map = { + .name = "j784s4", + .src = j784s4_src_ep_map, + .src_count = ARRAY_SIZE(j784s4_src_ep_map), + .dst = j784s4_dst_ep_map, + .dst_count = ARRAY_SIZE(j784s4_dst_ep_map), +}; diff --git a/drivers/dma/ti/k3-psil-priv.h b/drivers/dma/ti/k3-psil-priv.h new file mode 100644 index 0000000000..c383723d1c --- /dev/null +++ b/drivers/dma/ti/k3-psil-priv.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + */ + +#ifndef K3_PSIL_PRIV_H_ +#define K3_PSIL_PRIV_H_ + +#include + +struct psil_ep { + u32 thread_id; + struct psil_endpoint_config ep_config; +}; + +/** + * struct psil_ep_map - PSI-L thread ID configuration maps + * @name: Name of the map, set it to the name of the SoC + * @src: Array of source PSI-L thread configurations + * @src_count: Number of entries in the src array + * @dst: Array of destination PSI-L thread configurations + * @dst_count: Number of entries in the dst array + * + * In case of symmetric configuration for a matching src/dst thread (for example + * 0x4400 and 0xc400) only the src configuration can be present. If no dst + * configuration found the code will look for (dst_thread_id & ~0x8000) to find + * the symmetric match. + */ +struct psil_ep_map { + char *name; + struct psil_ep *src; + int src_count; + struct psil_ep *dst; + int dst_count; +}; + +struct psil_endpoint_config *psil_get_ep_config(u32 thread_id); + +/* SoC PSI-L endpoint maps */ +extern struct psil_ep_map am654_ep_map; +extern struct psil_ep_map j721e_ep_map; +extern struct psil_ep_map j7200_ep_map; +extern struct psil_ep_map am64_ep_map; +extern struct psil_ep_map j721s2_ep_map; +extern struct psil_ep_map am62_ep_map; +extern struct psil_ep_map am62a_ep_map; +extern struct psil_ep_map j784s4_ep_map; + +#endif /* K3_PSIL_PRIV_H_ */ diff --git a/drivers/dma/ti/k3-psil.c b/drivers/dma/ti/k3-psil.c new file mode 100644 index 0000000000..c11389d67a --- /dev/null +++ b/drivers/dma/ti/k3-psil.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * Author: Peter Ujfalusi + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "k3-psil-priv.h" + +static DEFINE_MUTEX(ep_map_mutex); +static const struct psil_ep_map *soc_ep_map; + +static const struct soc_device_attribute k3_soc_devices[] = { + { .family = "AM65X", .data = &am654_ep_map }, + { .family = "J721E", .data = &j721e_ep_map }, + { .family = "J7200", .data = &j7200_ep_map }, + { .family = "AM64X", .data = &am64_ep_map }, + { .family = "J721S2", .data = &j721s2_ep_map }, + { .family = "AM62X", .data = &am62_ep_map }, + { .family = "AM62AX", .data = &am62a_ep_map }, + { .family = "J784S4", .data = &j784s4_ep_map }, + { /* sentinel */ } +}; + +struct psil_endpoint_config *psil_get_ep_config(u32 thread_id) +{ + int i; + + mutex_lock(&ep_map_mutex); + if (!soc_ep_map) { + const struct soc_device_attribute *soc; + + soc = soc_device_match(k3_soc_devices); + if (soc) { + soc_ep_map = soc->data; + } else { + pr_err("PSIL: No compatible machine found for map\n"); + mutex_unlock(&ep_map_mutex); + return ERR_PTR(-ENOTSUPP); + } + pr_debug("%s: Using map for %s\n", __func__, soc_ep_map->name); + } + mutex_unlock(&ep_map_mutex); + + if (thread_id & K3_PSIL_DST_THREAD_ID_OFFSET && soc_ep_map->dst) { + /* check in destination thread map */ + for (i = 0; i < soc_ep_map->dst_count; i++) { + if (soc_ep_map->dst[i].thread_id == thread_id) + return &soc_ep_map->dst[i].ep_config; + } + } + + thread_id &= ~K3_PSIL_DST_THREAD_ID_OFFSET; + if (soc_ep_map->src) { + for (i = 0; i < soc_ep_map->src_count; i++) { + if (soc_ep_map->src[i].thread_id == thread_id) + return &soc_ep_map->src[i].ep_config; + } + } + + return ERR_PTR(-ENOENT); +} +EXPORT_SYMBOL_GPL(psil_get_ep_config); + +int psil_set_new_ep_config(struct device *dev, const char *name, + struct psil_endpoint_config *ep_config) +{ + struct psil_endpoint_config *dst_ep_config; + struct of_phandle_args dma_spec; + u32 thread_id; + int index; + + if (!dev || !dev->of_node) + return -EINVAL; + + index = of_property_match_string(dev->of_node, "dma-names", name); + if (index < 0) + return index; + + if (of_parse_phandle_with_args(dev->of_node, "dmas", "#dma-cells", + index, &dma_spec)) + return -ENOENT; + + thread_id = dma_spec.args[0]; + + dst_ep_config = psil_get_ep_config(thread_id); + if (IS_ERR(dst_ep_config)) { + pr_err("PSIL: thread ID 0x%04x not defined in map\n", + thread_id); + of_node_put(dma_spec.np); + return PTR_ERR(dst_ep_config); + } + + memcpy(dst_ep_config, ep_config, sizeof(*dst_ep_config)); + + of_node_put(dma_spec.np); + return 0; +} +EXPORT_SYMBOL_GPL(psil_set_new_ep_config); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c new file mode 100644 index 0000000000..c278d5facf --- /dev/null +++ b/drivers/dma/ti/k3-udma-glue.c @@ -0,0 +1,1445 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * K3 NAVSS DMA glue interface + * + * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "k3-udma.h" +#include "k3-psil-priv.h" + +struct k3_udma_glue_common { + struct device *dev; + struct device chan_dev; + struct udma_dev *udmax; + const struct udma_tisci_rm *tisci_rm; + struct k3_ringacc *ringacc; + u32 src_thread; + u32 dst_thread; + + u32 hdesc_size; + bool epib; + u32 psdata_size; + u32 swdata_size; + u32 atype_asel; + struct psil_endpoint_config *ep_config; +}; + +struct k3_udma_glue_tx_channel { + struct k3_udma_glue_common common; + + struct udma_tchan *udma_tchanx; + int udma_tchan_id; + + struct k3_ring *ringtx; + struct k3_ring *ringtxcq; + + bool psil_paired; + + int virq; + + atomic_t free_pkts; + bool tx_pause_on_err; + bool tx_filt_einfo; + bool tx_filt_pswords; + bool tx_supr_tdpkt; + + int udma_tflow_id; +}; + +struct k3_udma_glue_rx_flow { + struct udma_rflow *udma_rflow; + int udma_rflow_id; + struct k3_ring *ringrx; + struct k3_ring *ringrxfdq; + + int virq; +}; + +struct k3_udma_glue_rx_channel { + struct k3_udma_glue_common common; + + struct udma_rchan *udma_rchanx; + int udma_rchan_id; + bool remote; + + bool psil_paired; + + u32 swdata_size; + int flow_id_base; + + struct k3_udma_glue_rx_flow *flows; + u32 flow_num; + u32 flows_ready; +}; + +static void k3_udma_chan_dev_release(struct device *dev) +{ + /* The struct containing the device is devm managed */ +} + +static struct class k3_udma_glue_devclass = { + .name = "k3_udma_glue_chan", + .dev_release = k3_udma_chan_dev_release, +}; + +#define K3_UDMAX_TDOWN_TIMEOUT_US 1000 + +static int of_k3_udma_glue_parse(struct device_node *udmax_np, + struct k3_udma_glue_common *common) +{ + common->udmax = of_xudma_dev_get(udmax_np, NULL); + if (IS_ERR(common->udmax)) + return PTR_ERR(common->udmax); + + common->ringacc = xudma_get_ringacc(common->udmax); + common->tisci_rm = xudma_dev_get_tisci_rm(common->udmax); + + return 0; +} + +static int of_k3_udma_glue_parse_chn(struct device_node *chn_np, + const char *name, struct k3_udma_glue_common *common, + bool tx_chn) +{ + struct of_phandle_args dma_spec; + u32 thread_id; + int ret = 0; + int index; + + if (unlikely(!name)) + return -EINVAL; + + index = of_property_match_string(chn_np, "dma-names", name); + if (index < 0) + return index; + + if (of_parse_phandle_with_args(chn_np, "dmas", "#dma-cells", index, + &dma_spec)) + return -ENOENT; + + ret = of_k3_udma_glue_parse(dma_spec.np, common); + if (ret) + goto out_put_spec; + + thread_id = dma_spec.args[0]; + if (dma_spec.args_count == 2) { + if (dma_spec.args[1] > 2 && !xudma_is_pktdma(common->udmax)) { + dev_err(common->dev, "Invalid channel atype: %u\n", + dma_spec.args[1]); + ret = -EINVAL; + goto out_put_spec; + } + if (dma_spec.args[1] > 15 && xudma_is_pktdma(common->udmax)) { + dev_err(common->dev, "Invalid channel asel: %u\n", + dma_spec.args[1]); + ret = -EINVAL; + goto out_put_spec; + } + + common->atype_asel = dma_spec.args[1]; + } + + if (tx_chn && !(thread_id & K3_PSIL_DST_THREAD_ID_OFFSET)) { + ret = -EINVAL; + goto out_put_spec; + } + + if (!tx_chn && (thread_id & K3_PSIL_DST_THREAD_ID_OFFSET)) { + ret = -EINVAL; + goto out_put_spec; + } + + /* get psil endpoint config */ + common->ep_config = psil_get_ep_config(thread_id); + if (IS_ERR(common->ep_config)) { + dev_err(common->dev, + "No configuration for psi-l thread 0x%04x\n", + thread_id); + ret = PTR_ERR(common->ep_config); + goto out_put_spec; + } + + common->epib = common->ep_config->needs_epib; + common->psdata_size = common->ep_config->psd_size; + + if (tx_chn) + common->dst_thread = thread_id; + else + common->src_thread = thread_id; + +out_put_spec: + of_node_put(dma_spec.np); + return ret; +}; + +static void k3_udma_glue_dump_tx_chn(struct k3_udma_glue_tx_channel *tx_chn) +{ + struct device *dev = tx_chn->common.dev; + + dev_dbg(dev, "dump_tx_chn:\n" + "udma_tchan_id: %d\n" + "src_thread: %08x\n" + "dst_thread: %08x\n", + tx_chn->udma_tchan_id, + tx_chn->common.src_thread, + tx_chn->common.dst_thread); +} + +static void k3_udma_glue_dump_tx_rt_chn(struct k3_udma_glue_tx_channel *chn, + char *mark) +{ + struct device *dev = chn->common.dev; + + dev_dbg(dev, "=== dump ===> %s\n", mark); + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_CTL_REG, + xudma_tchanrt_read(chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG)); + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_PEER_RT_EN_REG, + xudma_tchanrt_read(chn->udma_tchanx, + UDMA_CHAN_RT_PEER_RT_EN_REG)); + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_PCNT_REG, + xudma_tchanrt_read(chn->udma_tchanx, UDMA_CHAN_RT_PCNT_REG)); + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_BCNT_REG, + xudma_tchanrt_read(chn->udma_tchanx, UDMA_CHAN_RT_BCNT_REG)); + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_SBCNT_REG, + xudma_tchanrt_read(chn->udma_tchanx, UDMA_CHAN_RT_SBCNT_REG)); +} + +static int k3_udma_glue_cfg_tx_chn(struct k3_udma_glue_tx_channel *tx_chn) +{ + const struct udma_tisci_rm *tisci_rm = tx_chn->common.tisci_rm; + struct ti_sci_msg_rm_udmap_tx_ch_cfg req; + + memset(&req, 0, sizeof(req)); + + req.valid_params = TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FILT_EINFO_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FILT_PSWORDS_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_SUPR_TDPKT_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_CH_ATYPE_VALID; + req.nav_id = tisci_rm->tisci_dev_id; + req.index = tx_chn->udma_tchan_id; + if (tx_chn->tx_pause_on_err) + req.tx_pause_on_err = 1; + if (tx_chn->tx_filt_einfo) + req.tx_filt_einfo = 1; + if (tx_chn->tx_filt_pswords) + req.tx_filt_pswords = 1; + req.tx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR; + if (tx_chn->tx_supr_tdpkt) + req.tx_supr_tdpkt = 1; + req.tx_fetch_size = tx_chn->common.hdesc_size >> 2; + req.txcq_qnum = k3_ringacc_get_ring_id(tx_chn->ringtxcq); + req.tx_atype = tx_chn->common.atype_asel; + + return tisci_rm->tisci_udmap_ops->tx_ch_cfg(tisci_rm->tisci, &req); +} + +struct k3_udma_glue_tx_channel *k3_udma_glue_request_tx_chn(struct device *dev, + const char *name, struct k3_udma_glue_tx_channel_cfg *cfg) +{ + struct k3_udma_glue_tx_channel *tx_chn; + int ret; + + tx_chn = devm_kzalloc(dev, sizeof(*tx_chn), GFP_KERNEL); + if (!tx_chn) + return ERR_PTR(-ENOMEM); + + tx_chn->common.dev = dev; + tx_chn->common.swdata_size = cfg->swdata_size; + tx_chn->tx_pause_on_err = cfg->tx_pause_on_err; + tx_chn->tx_filt_einfo = cfg->tx_filt_einfo; + tx_chn->tx_filt_pswords = cfg->tx_filt_pswords; + tx_chn->tx_supr_tdpkt = cfg->tx_supr_tdpkt; + + /* parse of udmap channel */ + ret = of_k3_udma_glue_parse_chn(dev->of_node, name, + &tx_chn->common, true); + if (ret) + goto err; + + tx_chn->common.hdesc_size = cppi5_hdesc_calc_size(tx_chn->common.epib, + tx_chn->common.psdata_size, + tx_chn->common.swdata_size); + + if (xudma_is_pktdma(tx_chn->common.udmax)) + tx_chn->udma_tchan_id = tx_chn->common.ep_config->mapped_channel_id; + else + tx_chn->udma_tchan_id = -1; + + /* request and cfg UDMAP TX channel */ + tx_chn->udma_tchanx = xudma_tchan_get(tx_chn->common.udmax, + tx_chn->udma_tchan_id); + if (IS_ERR(tx_chn->udma_tchanx)) { + ret = PTR_ERR(tx_chn->udma_tchanx); + dev_err(dev, "UDMAX tchanx get err %d\n", ret); + goto err; + } + tx_chn->udma_tchan_id = xudma_tchan_get_id(tx_chn->udma_tchanx); + + tx_chn->common.chan_dev.class = &k3_udma_glue_devclass; + tx_chn->common.chan_dev.parent = xudma_get_device(tx_chn->common.udmax); + dev_set_name(&tx_chn->common.chan_dev, "tchan%d-0x%04x", + tx_chn->udma_tchan_id, tx_chn->common.dst_thread); + ret = device_register(&tx_chn->common.chan_dev); + if (ret) { + dev_err(dev, "Channel Device registration failed %d\n", ret); + put_device(&tx_chn->common.chan_dev); + tx_chn->common.chan_dev.parent = NULL; + goto err; + } + + if (xudma_is_pktdma(tx_chn->common.udmax)) { + /* prepare the channel device as coherent */ + tx_chn->common.chan_dev.dma_coherent = true; + dma_coerce_mask_and_coherent(&tx_chn->common.chan_dev, + DMA_BIT_MASK(48)); + } + + atomic_set(&tx_chn->free_pkts, cfg->txcq_cfg.size); + + if (xudma_is_pktdma(tx_chn->common.udmax)) + tx_chn->udma_tflow_id = tx_chn->common.ep_config->default_flow_id; + else + tx_chn->udma_tflow_id = tx_chn->udma_tchan_id; + + /* request and cfg rings */ + ret = k3_ringacc_request_rings_pair(tx_chn->common.ringacc, + tx_chn->udma_tflow_id, -1, + &tx_chn->ringtx, + &tx_chn->ringtxcq); + if (ret) { + dev_err(dev, "Failed to get TX/TXCQ rings %d\n", ret); + goto err; + } + + /* Set the dma_dev for the rings to be configured */ + cfg->tx_cfg.dma_dev = k3_udma_glue_tx_get_dma_device(tx_chn); + cfg->txcq_cfg.dma_dev = cfg->tx_cfg.dma_dev; + + /* Set the ASEL value for DMA rings of PKTDMA */ + if (xudma_is_pktdma(tx_chn->common.udmax)) { + cfg->tx_cfg.asel = tx_chn->common.atype_asel; + cfg->txcq_cfg.asel = tx_chn->common.atype_asel; + } + + ret = k3_ringacc_ring_cfg(tx_chn->ringtx, &cfg->tx_cfg); + if (ret) { + dev_err(dev, "Failed to cfg ringtx %d\n", ret); + goto err; + } + + ret = k3_ringacc_ring_cfg(tx_chn->ringtxcq, &cfg->txcq_cfg); + if (ret) { + dev_err(dev, "Failed to cfg ringtx %d\n", ret); + goto err; + } + + /* request and cfg psi-l */ + tx_chn->common.src_thread = + xudma_dev_get_psil_base(tx_chn->common.udmax) + + tx_chn->udma_tchan_id; + + ret = k3_udma_glue_cfg_tx_chn(tx_chn); + if (ret) { + dev_err(dev, "Failed to cfg tchan %d\n", ret); + goto err; + } + + k3_udma_glue_dump_tx_chn(tx_chn); + + return tx_chn; + +err: + k3_udma_glue_release_tx_chn(tx_chn); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(k3_udma_glue_request_tx_chn); + +void k3_udma_glue_release_tx_chn(struct k3_udma_glue_tx_channel *tx_chn) +{ + if (tx_chn->psil_paired) { + xudma_navss_psil_unpair(tx_chn->common.udmax, + tx_chn->common.src_thread, + tx_chn->common.dst_thread); + tx_chn->psil_paired = false; + } + + if (!IS_ERR_OR_NULL(tx_chn->udma_tchanx)) + xudma_tchan_put(tx_chn->common.udmax, + tx_chn->udma_tchanx); + + if (tx_chn->ringtxcq) + k3_ringacc_ring_free(tx_chn->ringtxcq); + + if (tx_chn->ringtx) + k3_ringacc_ring_free(tx_chn->ringtx); + + if (tx_chn->common.chan_dev.parent) { + device_unregister(&tx_chn->common.chan_dev); + tx_chn->common.chan_dev.parent = NULL; + } +} +EXPORT_SYMBOL_GPL(k3_udma_glue_release_tx_chn); + +int k3_udma_glue_push_tx_chn(struct k3_udma_glue_tx_channel *tx_chn, + struct cppi5_host_desc_t *desc_tx, + dma_addr_t desc_dma) +{ + u32 ringtxcq_id; + + if (!atomic_add_unless(&tx_chn->free_pkts, -1, 0)) + return -ENOMEM; + + ringtxcq_id = k3_ringacc_get_ring_id(tx_chn->ringtxcq); + cppi5_desc_set_retpolicy(&desc_tx->hdr, 0, ringtxcq_id); + + return k3_ringacc_ring_push(tx_chn->ringtx, &desc_dma); +} +EXPORT_SYMBOL_GPL(k3_udma_glue_push_tx_chn); + +int k3_udma_glue_pop_tx_chn(struct k3_udma_glue_tx_channel *tx_chn, + dma_addr_t *desc_dma) +{ + int ret; + + ret = k3_ringacc_ring_pop(tx_chn->ringtxcq, desc_dma); + if (!ret) + atomic_inc(&tx_chn->free_pkts); + + return ret; +} +EXPORT_SYMBOL_GPL(k3_udma_glue_pop_tx_chn); + +int k3_udma_glue_enable_tx_chn(struct k3_udma_glue_tx_channel *tx_chn) +{ + int ret; + + ret = xudma_navss_psil_pair(tx_chn->common.udmax, + tx_chn->common.src_thread, + tx_chn->common.dst_thread); + if (ret) { + dev_err(tx_chn->common.dev, "PSI-L request err %d\n", ret); + return ret; + } + + tx_chn->psil_paired = true; + + xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_CHAN_RT_PEER_RT_EN_REG, + UDMA_PEER_RT_EN_ENABLE); + + xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG, + UDMA_CHAN_RT_CTL_EN); + + k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn en"); + return 0; +} +EXPORT_SYMBOL_GPL(k3_udma_glue_enable_tx_chn); + +void k3_udma_glue_disable_tx_chn(struct k3_udma_glue_tx_channel *tx_chn) +{ + k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn dis1"); + + xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG, 0); + + xudma_tchanrt_write(tx_chn->udma_tchanx, + UDMA_CHAN_RT_PEER_RT_EN_REG, 0); + k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn dis2"); + + if (tx_chn->psil_paired) { + xudma_navss_psil_unpair(tx_chn->common.udmax, + tx_chn->common.src_thread, + tx_chn->common.dst_thread); + tx_chn->psil_paired = false; + } +} +EXPORT_SYMBOL_GPL(k3_udma_glue_disable_tx_chn); + +void k3_udma_glue_tdown_tx_chn(struct k3_udma_glue_tx_channel *tx_chn, + bool sync) +{ + int i = 0; + u32 val; + + k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn tdown1"); + + xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG, + UDMA_CHAN_RT_CTL_EN | UDMA_CHAN_RT_CTL_TDOWN); + + val = xudma_tchanrt_read(tx_chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG); + + while (sync && (val & UDMA_CHAN_RT_CTL_EN)) { + val = xudma_tchanrt_read(tx_chn->udma_tchanx, + UDMA_CHAN_RT_CTL_REG); + udelay(1); + if (i > K3_UDMAX_TDOWN_TIMEOUT_US) { + dev_err(tx_chn->common.dev, "TX tdown timeout\n"); + break; + } + i++; + } + + val = xudma_tchanrt_read(tx_chn->udma_tchanx, + UDMA_CHAN_RT_PEER_RT_EN_REG); + if (sync && (val & UDMA_PEER_RT_EN_ENABLE)) + dev_err(tx_chn->common.dev, "TX tdown peer not stopped\n"); + k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn tdown2"); +} +EXPORT_SYMBOL_GPL(k3_udma_glue_tdown_tx_chn); + +void k3_udma_glue_reset_tx_chn(struct k3_udma_glue_tx_channel *tx_chn, + void *data, + void (*cleanup)(void *data, dma_addr_t desc_dma)) +{ + struct device *dev = tx_chn->common.dev; + dma_addr_t desc_dma; + int occ_tx, i, ret; + + /* + * TXQ reset need to be special way as it is input for udma and its + * state cached by udma, so: + * 1) save TXQ occ + * 2) clean up TXQ and call callback .cleanup() for each desc + * 3) reset TXQ in a special way + */ + occ_tx = k3_ringacc_ring_get_occ(tx_chn->ringtx); + dev_dbg(dev, "TX reset occ_tx %u\n", occ_tx); + + for (i = 0; i < occ_tx; i++) { + ret = k3_ringacc_ring_pop(tx_chn->ringtx, &desc_dma); + if (ret) { + if (ret != -ENODATA) + dev_err(dev, "TX reset pop %d\n", ret); + break; + } + cleanup(data, desc_dma); + } + + /* reset TXCQ as it is not input for udma - expected to be empty */ + k3_ringacc_ring_reset(tx_chn->ringtxcq); + k3_ringacc_ring_reset_dma(tx_chn->ringtx, occ_tx); +} +EXPORT_SYMBOL_GPL(k3_udma_glue_reset_tx_chn); + +u32 k3_udma_glue_tx_get_hdesc_size(struct k3_udma_glue_tx_channel *tx_chn) +{ + return tx_chn->common.hdesc_size; +} +EXPORT_SYMBOL_GPL(k3_udma_glue_tx_get_hdesc_size); + +u32 k3_udma_glue_tx_get_txcq_id(struct k3_udma_glue_tx_channel *tx_chn) +{ + return k3_ringacc_get_ring_id(tx_chn->ringtxcq); +} +EXPORT_SYMBOL_GPL(k3_udma_glue_tx_get_txcq_id); + +int k3_udma_glue_tx_get_irq(struct k3_udma_glue_tx_channel *tx_chn) +{ + if (xudma_is_pktdma(tx_chn->common.udmax)) { + tx_chn->virq = xudma_pktdma_tflow_get_irq(tx_chn->common.udmax, + tx_chn->udma_tflow_id); + } else { + tx_chn->virq = k3_ringacc_get_ring_irq_num(tx_chn->ringtxcq); + } + + if (!tx_chn->virq) + return -ENXIO; + + return tx_chn->virq; +} +EXPORT_SYMBOL_GPL(k3_udma_glue_tx_get_irq); + +struct device * + k3_udma_glue_tx_get_dma_device(struct k3_udma_glue_tx_channel *tx_chn) +{ + if (xudma_is_pktdma(tx_chn->common.udmax) && + (tx_chn->common.atype_asel == 14 || tx_chn->common.atype_asel == 15)) + return &tx_chn->common.chan_dev; + + return xudma_get_device(tx_chn->common.udmax); +} +EXPORT_SYMBOL_GPL(k3_udma_glue_tx_get_dma_device); + +void k3_udma_glue_tx_dma_to_cppi5_addr(struct k3_udma_glue_tx_channel *tx_chn, + dma_addr_t *addr) +{ + if (!xudma_is_pktdma(tx_chn->common.udmax) || + !tx_chn->common.atype_asel) + return; + + *addr |= (u64)tx_chn->common.atype_asel << K3_ADDRESS_ASEL_SHIFT; +} +EXPORT_SYMBOL_GPL(k3_udma_glue_tx_dma_to_cppi5_addr); + +void k3_udma_glue_tx_cppi5_to_dma_addr(struct k3_udma_glue_tx_channel *tx_chn, + dma_addr_t *addr) +{ + if (!xudma_is_pktdma(tx_chn->common.udmax) || + !tx_chn->common.atype_asel) + return; + + *addr &= (u64)GENMASK(K3_ADDRESS_ASEL_SHIFT - 1, 0); +} +EXPORT_SYMBOL_GPL(k3_udma_glue_tx_cppi5_to_dma_addr); + +static int k3_udma_glue_cfg_rx_chn(struct k3_udma_glue_rx_channel *rx_chn) +{ + const struct udma_tisci_rm *tisci_rm = rx_chn->common.tisci_rm; + struct ti_sci_msg_rm_udmap_rx_ch_cfg req; + int ret; + + memset(&req, 0, sizeof(req)); + + req.valid_params = TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_CH_ATYPE_VALID; + + req.nav_id = tisci_rm->tisci_dev_id; + req.index = rx_chn->udma_rchan_id; + req.rx_fetch_size = rx_chn->common.hdesc_size >> 2; + /* + * TODO: we can't support rxcq_qnum/RCHAN[a]_RCQ cfg with current sysfw + * and udmax impl, so just configure it to invalid value. + * req.rxcq_qnum = k3_ringacc_get_ring_id(rx_chn->flows[0].ringrx); + */ + req.rxcq_qnum = 0xFFFF; + if (!xudma_is_pktdma(rx_chn->common.udmax) && rx_chn->flow_num && + rx_chn->flow_id_base != rx_chn->udma_rchan_id) { + /* Default flow + extra ones */ + req.valid_params |= TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_START_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_CNT_VALID; + req.flowid_start = rx_chn->flow_id_base; + req.flowid_cnt = rx_chn->flow_num; + } + req.rx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR; + req.rx_atype = rx_chn->common.atype_asel; + + ret = tisci_rm->tisci_udmap_ops->rx_ch_cfg(tisci_rm->tisci, &req); + if (ret) + dev_err(rx_chn->common.dev, "rchan%d cfg failed %d\n", + rx_chn->udma_rchan_id, ret); + + return ret; +} + +static void k3_udma_glue_release_rx_flow(struct k3_udma_glue_rx_channel *rx_chn, + u32 flow_num) +{ + struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_num]; + + if (IS_ERR_OR_NULL(flow->udma_rflow)) + return; + + if (flow->ringrxfdq) + k3_ringacc_ring_free(flow->ringrxfdq); + + if (flow->ringrx) + k3_ringacc_ring_free(flow->ringrx); + + xudma_rflow_put(rx_chn->common.udmax, flow->udma_rflow); + flow->udma_rflow = NULL; + rx_chn->flows_ready--; +} + +static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn, + u32 flow_idx, + struct k3_udma_glue_rx_flow_cfg *flow_cfg) +{ + struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_idx]; + const struct udma_tisci_rm *tisci_rm = rx_chn->common.tisci_rm; + struct device *dev = rx_chn->common.dev; + struct ti_sci_msg_rm_udmap_flow_cfg req; + int rx_ring_id; + int rx_ringfdq_id; + int ret = 0; + + flow->udma_rflow = xudma_rflow_get(rx_chn->common.udmax, + flow->udma_rflow_id); + if (IS_ERR(flow->udma_rflow)) { + ret = PTR_ERR(flow->udma_rflow); + dev_err(dev, "UDMAX rflow get err %d\n", ret); + return ret; + } + + if (flow->udma_rflow_id != xudma_rflow_get_id(flow->udma_rflow)) { + ret = -ENODEV; + goto err_rflow_put; + } + + if (xudma_is_pktdma(rx_chn->common.udmax)) { + rx_ringfdq_id = flow->udma_rflow_id + + xudma_get_rflow_ring_offset(rx_chn->common.udmax); + rx_ring_id = 0; + } else { + rx_ring_id = flow_cfg->ring_rxq_id; + rx_ringfdq_id = flow_cfg->ring_rxfdq0_id; + } + + /* request and cfg rings */ + ret = k3_ringacc_request_rings_pair(rx_chn->common.ringacc, + rx_ringfdq_id, rx_ring_id, + &flow->ringrxfdq, + &flow->ringrx); + if (ret) { + dev_err(dev, "Failed to get RX/RXFDQ rings %d\n", ret); + goto err_rflow_put; + } + + /* Set the dma_dev for the rings to be configured */ + flow_cfg->rx_cfg.dma_dev = k3_udma_glue_rx_get_dma_device(rx_chn); + flow_cfg->rxfdq_cfg.dma_dev = flow_cfg->rx_cfg.dma_dev; + + /* Set the ASEL value for DMA rings of PKTDMA */ + if (xudma_is_pktdma(rx_chn->common.udmax)) { + flow_cfg->rx_cfg.asel = rx_chn->common.atype_asel; + flow_cfg->rxfdq_cfg.asel = rx_chn->common.atype_asel; + } + + ret = k3_ringacc_ring_cfg(flow->ringrx, &flow_cfg->rx_cfg); + if (ret) { + dev_err(dev, "Failed to cfg ringrx %d\n", ret); + goto err_ringrxfdq_free; + } + + ret = k3_ringacc_ring_cfg(flow->ringrxfdq, &flow_cfg->rxfdq_cfg); + if (ret) { + dev_err(dev, "Failed to cfg ringrxfdq %d\n", ret); + goto err_ringrxfdq_free; + } + + if (rx_chn->remote) { + rx_ring_id = TI_SCI_RESOURCE_NULL; + rx_ringfdq_id = TI_SCI_RESOURCE_NULL; + } else { + rx_ring_id = k3_ringacc_get_ring_id(flow->ringrx); + rx_ringfdq_id = k3_ringacc_get_ring_id(flow->ringrxfdq); + } + + memset(&req, 0, sizeof(req)); + + req.valid_params = + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_EINFO_PRESENT_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_PSINFO_PRESENT_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_ERROR_HANDLING_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DESC_TYPE_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_QNUM_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_HI_SEL_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_LO_SEL_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_HI_SEL_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_LO_SEL_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ0_SZ0_QNUM_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ1_QNUM_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ2_QNUM_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ3_QNUM_VALID; + req.nav_id = tisci_rm->tisci_dev_id; + req.flow_index = flow->udma_rflow_id; + if (rx_chn->common.epib) + req.rx_einfo_present = 1; + if (rx_chn->common.psdata_size) + req.rx_psinfo_present = 1; + if (flow_cfg->rx_error_handling) + req.rx_error_handling = 1; + req.rx_desc_type = 0; + req.rx_dest_qnum = rx_ring_id; + req.rx_src_tag_hi_sel = 0; + req.rx_src_tag_lo_sel = flow_cfg->src_tag_lo_sel; + req.rx_dest_tag_hi_sel = 0; + req.rx_dest_tag_lo_sel = 0; + req.rx_fdq0_sz0_qnum = rx_ringfdq_id; + req.rx_fdq1_qnum = rx_ringfdq_id; + req.rx_fdq2_qnum = rx_ringfdq_id; + req.rx_fdq3_qnum = rx_ringfdq_id; + + ret = tisci_rm->tisci_udmap_ops->rx_flow_cfg(tisci_rm->tisci, &req); + if (ret) { + dev_err(dev, "flow%d config failed: %d\n", flow->udma_rflow_id, + ret); + goto err_ringrxfdq_free; + } + + rx_chn->flows_ready++; + dev_dbg(dev, "flow%d config done. ready:%d\n", + flow->udma_rflow_id, rx_chn->flows_ready); + + return 0; + +err_ringrxfdq_free: + k3_ringacc_ring_free(flow->ringrxfdq); + k3_ringacc_ring_free(flow->ringrx); + +err_rflow_put: + xudma_rflow_put(rx_chn->common.udmax, flow->udma_rflow); + flow->udma_rflow = NULL; + + return ret; +} + +static void k3_udma_glue_dump_rx_chn(struct k3_udma_glue_rx_channel *chn) +{ + struct device *dev = chn->common.dev; + + dev_dbg(dev, "dump_rx_chn:\n" + "udma_rchan_id: %d\n" + "src_thread: %08x\n" + "dst_thread: %08x\n" + "epib: %d\n" + "hdesc_size: %u\n" + "psdata_size: %u\n" + "swdata_size: %u\n" + "flow_id_base: %d\n" + "flow_num: %d\n", + chn->udma_rchan_id, + chn->common.src_thread, + chn->common.dst_thread, + chn->common.epib, + chn->common.hdesc_size, + chn->common.psdata_size, + chn->common.swdata_size, + chn->flow_id_base, + chn->flow_num); +} + +static void k3_udma_glue_dump_rx_rt_chn(struct k3_udma_glue_rx_channel *chn, + char *mark) +{ + struct device *dev = chn->common.dev; + + dev_dbg(dev, "=== dump ===> %s\n", mark); + + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_CTL_REG, + xudma_rchanrt_read(chn->udma_rchanx, UDMA_CHAN_RT_CTL_REG)); + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_PEER_RT_EN_REG, + xudma_rchanrt_read(chn->udma_rchanx, + UDMA_CHAN_RT_PEER_RT_EN_REG)); + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_PCNT_REG, + xudma_rchanrt_read(chn->udma_rchanx, UDMA_CHAN_RT_PCNT_REG)); + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_BCNT_REG, + xudma_rchanrt_read(chn->udma_rchanx, UDMA_CHAN_RT_BCNT_REG)); + dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_SBCNT_REG, + xudma_rchanrt_read(chn->udma_rchanx, UDMA_CHAN_RT_SBCNT_REG)); +} + +static int +k3_udma_glue_allocate_rx_flows(struct k3_udma_glue_rx_channel *rx_chn, + struct k3_udma_glue_rx_channel_cfg *cfg) +{ + int ret; + + /* default rflow */ + if (cfg->flow_id_use_rxchan_id) + return 0; + + /* not a GP rflows */ + if (rx_chn->flow_id_base != -1 && + !xudma_rflow_is_gp(rx_chn->common.udmax, rx_chn->flow_id_base)) + return 0; + + /* Allocate range of GP rflows */ + ret = xudma_alloc_gp_rflow_range(rx_chn->common.udmax, + rx_chn->flow_id_base, + rx_chn->flow_num); + if (ret < 0) { + dev_err(rx_chn->common.dev, "UDMAX reserve_rflow %d cnt:%d err: %d\n", + rx_chn->flow_id_base, rx_chn->flow_num, ret); + return ret; + } + rx_chn->flow_id_base = ret; + + return 0; +} + +static struct k3_udma_glue_rx_channel * +k3_udma_glue_request_rx_chn_priv(struct device *dev, const char *name, + struct k3_udma_glue_rx_channel_cfg *cfg) +{ + struct k3_udma_glue_rx_channel *rx_chn; + struct psil_endpoint_config *ep_cfg; + int ret, i; + + if (cfg->flow_id_num <= 0) + return ERR_PTR(-EINVAL); + + if (cfg->flow_id_num != 1 && + (cfg->def_flow_cfg || cfg->flow_id_use_rxchan_id)) + return ERR_PTR(-EINVAL); + + rx_chn = devm_kzalloc(dev, sizeof(*rx_chn), GFP_KERNEL); + if (!rx_chn) + return ERR_PTR(-ENOMEM); + + rx_chn->common.dev = dev; + rx_chn->common.swdata_size = cfg->swdata_size; + rx_chn->remote = false; + + /* parse of udmap channel */ + ret = of_k3_udma_glue_parse_chn(dev->of_node, name, + &rx_chn->common, false); + if (ret) + goto err; + + rx_chn->common.hdesc_size = cppi5_hdesc_calc_size(rx_chn->common.epib, + rx_chn->common.psdata_size, + rx_chn->common.swdata_size); + + ep_cfg = rx_chn->common.ep_config; + + if (xudma_is_pktdma(rx_chn->common.udmax)) + rx_chn->udma_rchan_id = ep_cfg->mapped_channel_id; + else + rx_chn->udma_rchan_id = -1; + + /* request and cfg UDMAP RX channel */ + rx_chn->udma_rchanx = xudma_rchan_get(rx_chn->common.udmax, + rx_chn->udma_rchan_id); + if (IS_ERR(rx_chn->udma_rchanx)) { + ret = PTR_ERR(rx_chn->udma_rchanx); + dev_err(dev, "UDMAX rchanx get err %d\n", ret); + goto err; + } + rx_chn->udma_rchan_id = xudma_rchan_get_id(rx_chn->udma_rchanx); + + rx_chn->common.chan_dev.class = &k3_udma_glue_devclass; + rx_chn->common.chan_dev.parent = xudma_get_device(rx_chn->common.udmax); + dev_set_name(&rx_chn->common.chan_dev, "rchan%d-0x%04x", + rx_chn->udma_rchan_id, rx_chn->common.src_thread); + ret = device_register(&rx_chn->common.chan_dev); + if (ret) { + dev_err(dev, "Channel Device registration failed %d\n", ret); + put_device(&rx_chn->common.chan_dev); + rx_chn->common.chan_dev.parent = NULL; + goto err; + } + + if (xudma_is_pktdma(rx_chn->common.udmax)) { + /* prepare the channel device as coherent */ + rx_chn->common.chan_dev.dma_coherent = true; + dma_coerce_mask_and_coherent(&rx_chn->common.chan_dev, + DMA_BIT_MASK(48)); + } + + if (xudma_is_pktdma(rx_chn->common.udmax)) { + int flow_start = cfg->flow_id_base; + int flow_end; + + if (flow_start == -1) + flow_start = ep_cfg->flow_start; + + flow_end = flow_start + cfg->flow_id_num - 1; + if (flow_start < ep_cfg->flow_start || + flow_end > (ep_cfg->flow_start + ep_cfg->flow_num - 1)) { + dev_err(dev, "Invalid flow range requested\n"); + ret = -EINVAL; + goto err; + } + rx_chn->flow_id_base = flow_start; + } else { + rx_chn->flow_id_base = cfg->flow_id_base; + + /* Use RX channel id as flow id: target dev can't generate flow_id */ + if (cfg->flow_id_use_rxchan_id) + rx_chn->flow_id_base = rx_chn->udma_rchan_id; + } + + rx_chn->flow_num = cfg->flow_id_num; + + rx_chn->flows = devm_kcalloc(dev, rx_chn->flow_num, + sizeof(*rx_chn->flows), GFP_KERNEL); + if (!rx_chn->flows) { + ret = -ENOMEM; + goto err; + } + + ret = k3_udma_glue_allocate_rx_flows(rx_chn, cfg); + if (ret) + goto err; + + for (i = 0; i < rx_chn->flow_num; i++) + rx_chn->flows[i].udma_rflow_id = rx_chn->flow_id_base + i; + + /* request and cfg psi-l */ + rx_chn->common.dst_thread = + xudma_dev_get_psil_base(rx_chn->common.udmax) + + rx_chn->udma_rchan_id; + + ret = k3_udma_glue_cfg_rx_chn(rx_chn); + if (ret) { + dev_err(dev, "Failed to cfg rchan %d\n", ret); + goto err; + } + + /* init default RX flow only if flow_num = 1 */ + if (cfg->def_flow_cfg) { + ret = k3_udma_glue_cfg_rx_flow(rx_chn, 0, cfg->def_flow_cfg); + if (ret) + goto err; + } + + k3_udma_glue_dump_rx_chn(rx_chn); + + return rx_chn; + +err: + k3_udma_glue_release_rx_chn(rx_chn); + return ERR_PTR(ret); +} + +static struct k3_udma_glue_rx_channel * +k3_udma_glue_request_remote_rx_chn(struct device *dev, const char *name, + struct k3_udma_glue_rx_channel_cfg *cfg) +{ + struct k3_udma_glue_rx_channel *rx_chn; + int ret, i; + + if (cfg->flow_id_num <= 0 || + cfg->flow_id_use_rxchan_id || + cfg->def_flow_cfg || + cfg->flow_id_base < 0) + return ERR_PTR(-EINVAL); + + /* + * Remote RX channel is under control of Remote CPU core, so + * Linux can only request and manipulate by dedicated RX flows + */ + + rx_chn = devm_kzalloc(dev, sizeof(*rx_chn), GFP_KERNEL); + if (!rx_chn) + return ERR_PTR(-ENOMEM); + + rx_chn->common.dev = dev; + rx_chn->common.swdata_size = cfg->swdata_size; + rx_chn->remote = true; + rx_chn->udma_rchan_id = -1; + rx_chn->flow_num = cfg->flow_id_num; + rx_chn->flow_id_base = cfg->flow_id_base; + rx_chn->psil_paired = false; + + /* parse of udmap channel */ + ret = of_k3_udma_glue_parse_chn(dev->of_node, name, + &rx_chn->common, false); + if (ret) + goto err; + + rx_chn->common.hdesc_size = cppi5_hdesc_calc_size(rx_chn->common.epib, + rx_chn->common.psdata_size, + rx_chn->common.swdata_size); + + rx_chn->flows = devm_kcalloc(dev, rx_chn->flow_num, + sizeof(*rx_chn->flows), GFP_KERNEL); + if (!rx_chn->flows) { + ret = -ENOMEM; + goto err; + } + + rx_chn->common.chan_dev.class = &k3_udma_glue_devclass; + rx_chn->common.chan_dev.parent = xudma_get_device(rx_chn->common.udmax); + dev_set_name(&rx_chn->common.chan_dev, "rchan_remote-0x%04x", + rx_chn->common.src_thread); + ret = device_register(&rx_chn->common.chan_dev); + if (ret) { + dev_err(dev, "Channel Device registration failed %d\n", ret); + put_device(&rx_chn->common.chan_dev); + rx_chn->common.chan_dev.parent = NULL; + goto err; + } + + if (xudma_is_pktdma(rx_chn->common.udmax)) { + /* prepare the channel device as coherent */ + rx_chn->common.chan_dev.dma_coherent = true; + dma_coerce_mask_and_coherent(&rx_chn->common.chan_dev, + DMA_BIT_MASK(48)); + } + + ret = k3_udma_glue_allocate_rx_flows(rx_chn, cfg); + if (ret) + goto err; + + for (i = 0; i < rx_chn->flow_num; i++) + rx_chn->flows[i].udma_rflow_id = rx_chn->flow_id_base + i; + + k3_udma_glue_dump_rx_chn(rx_chn); + + return rx_chn; + +err: + k3_udma_glue_release_rx_chn(rx_chn); + return ERR_PTR(ret); +} + +struct k3_udma_glue_rx_channel * +k3_udma_glue_request_rx_chn(struct device *dev, const char *name, + struct k3_udma_glue_rx_channel_cfg *cfg) +{ + if (cfg->remote) + return k3_udma_glue_request_remote_rx_chn(dev, name, cfg); + else + return k3_udma_glue_request_rx_chn_priv(dev, name, cfg); +} +EXPORT_SYMBOL_GPL(k3_udma_glue_request_rx_chn); + +void k3_udma_glue_release_rx_chn(struct k3_udma_glue_rx_channel *rx_chn) +{ + int i; + + if (IS_ERR_OR_NULL(rx_chn->common.udmax)) + return; + + if (rx_chn->psil_paired) { + xudma_navss_psil_unpair(rx_chn->common.udmax, + rx_chn->common.src_thread, + rx_chn->common.dst_thread); + rx_chn->psil_paired = false; + } + + for (i = 0; i < rx_chn->flow_num; i++) + k3_udma_glue_release_rx_flow(rx_chn, i); + + if (xudma_rflow_is_gp(rx_chn->common.udmax, rx_chn->flow_id_base)) + xudma_free_gp_rflow_range(rx_chn->common.udmax, + rx_chn->flow_id_base, + rx_chn->flow_num); + + if (!IS_ERR_OR_NULL(rx_chn->udma_rchanx)) + xudma_rchan_put(rx_chn->common.udmax, + rx_chn->udma_rchanx); + + if (rx_chn->common.chan_dev.parent) { + device_unregister(&rx_chn->common.chan_dev); + rx_chn->common.chan_dev.parent = NULL; + } +} +EXPORT_SYMBOL_GPL(k3_udma_glue_release_rx_chn); + +int k3_udma_glue_rx_flow_init(struct k3_udma_glue_rx_channel *rx_chn, + u32 flow_idx, + struct k3_udma_glue_rx_flow_cfg *flow_cfg) +{ + if (flow_idx >= rx_chn->flow_num) + return -EINVAL; + + return k3_udma_glue_cfg_rx_flow(rx_chn, flow_idx, flow_cfg); +} +EXPORT_SYMBOL_GPL(k3_udma_glue_rx_flow_init); + +u32 k3_udma_glue_rx_flow_get_fdq_id(struct k3_udma_glue_rx_channel *rx_chn, + u32 flow_idx) +{ + struct k3_udma_glue_rx_flow *flow; + + if (flow_idx >= rx_chn->flow_num) + return -EINVAL; + + flow = &rx_chn->flows[flow_idx]; + + return k3_ringacc_get_ring_id(flow->ringrxfdq); +} +EXPORT_SYMBOL_GPL(k3_udma_glue_rx_flow_get_fdq_id); + +u32 k3_udma_glue_rx_get_flow_id_base(struct k3_udma_glue_rx_channel *rx_chn) +{ + return rx_chn->flow_id_base; +} +EXPORT_SYMBOL_GPL(k3_udma_glue_rx_get_flow_id_base); + +int k3_udma_glue_rx_flow_enable(struct k3_udma_glue_rx_channel *rx_chn, + u32 flow_idx) +{ + struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_idx]; + const struct udma_tisci_rm *tisci_rm = rx_chn->common.tisci_rm; + struct device *dev = rx_chn->common.dev; + struct ti_sci_msg_rm_udmap_flow_cfg req; + int rx_ring_id; + int rx_ringfdq_id; + int ret = 0; + + if (!rx_chn->remote) + return -EINVAL; + + rx_ring_id = k3_ringacc_get_ring_id(flow->ringrx); + rx_ringfdq_id = k3_ringacc_get_ring_id(flow->ringrxfdq); + + memset(&req, 0, sizeof(req)); + + req.valid_params = + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_QNUM_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ0_SZ0_QNUM_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ1_QNUM_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ2_QNUM_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ3_QNUM_VALID; + req.nav_id = tisci_rm->tisci_dev_id; + req.flow_index = flow->udma_rflow_id; + req.rx_dest_qnum = rx_ring_id; + req.rx_fdq0_sz0_qnum = rx_ringfdq_id; + req.rx_fdq1_qnum = rx_ringfdq_id; + req.rx_fdq2_qnum = rx_ringfdq_id; + req.rx_fdq3_qnum = rx_ringfdq_id; + + ret = tisci_rm->tisci_udmap_ops->rx_flow_cfg(tisci_rm->tisci, &req); + if (ret) { + dev_err(dev, "flow%d enable failed: %d\n", flow->udma_rflow_id, + ret); + } + + return ret; +} +EXPORT_SYMBOL_GPL(k3_udma_glue_rx_flow_enable); + +int k3_udma_glue_rx_flow_disable(struct k3_udma_glue_rx_channel *rx_chn, + u32 flow_idx) +{ + struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_idx]; + const struct udma_tisci_rm *tisci_rm = rx_chn->common.tisci_rm; + struct device *dev = rx_chn->common.dev; + struct ti_sci_msg_rm_udmap_flow_cfg req; + int ret = 0; + + if (!rx_chn->remote) + return -EINVAL; + + memset(&req, 0, sizeof(req)); + req.valid_params = + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_QNUM_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ0_SZ0_QNUM_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ1_QNUM_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ2_QNUM_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ3_QNUM_VALID; + req.nav_id = tisci_rm->tisci_dev_id; + req.flow_index = flow->udma_rflow_id; + req.rx_dest_qnum = TI_SCI_RESOURCE_NULL; + req.rx_fdq0_sz0_qnum = TI_SCI_RESOURCE_NULL; + req.rx_fdq1_qnum = TI_SCI_RESOURCE_NULL; + req.rx_fdq2_qnum = TI_SCI_RESOURCE_NULL; + req.rx_fdq3_qnum = TI_SCI_RESOURCE_NULL; + + ret = tisci_rm->tisci_udmap_ops->rx_flow_cfg(tisci_rm->tisci, &req); + if (ret) { + dev_err(dev, "flow%d disable failed: %d\n", flow->udma_rflow_id, + ret); + } + + return ret; +} +EXPORT_SYMBOL_GPL(k3_udma_glue_rx_flow_disable); + +int k3_udma_glue_enable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn) +{ + int ret; + + if (rx_chn->remote) + return -EINVAL; + + if (rx_chn->flows_ready < rx_chn->flow_num) + return -EINVAL; + + ret = xudma_navss_psil_pair(rx_chn->common.udmax, + rx_chn->common.src_thread, + rx_chn->common.dst_thread); + if (ret) { + dev_err(rx_chn->common.dev, "PSI-L request err %d\n", ret); + return ret; + } + + rx_chn->psil_paired = true; + + xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_CHAN_RT_CTL_REG, + UDMA_CHAN_RT_CTL_EN); + + xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_CHAN_RT_PEER_RT_EN_REG, + UDMA_PEER_RT_EN_ENABLE); + + k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt en"); + return 0; +} +EXPORT_SYMBOL_GPL(k3_udma_glue_enable_rx_chn); + +void k3_udma_glue_disable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn) +{ + k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt dis1"); + + xudma_rchanrt_write(rx_chn->udma_rchanx, + UDMA_CHAN_RT_PEER_RT_EN_REG, 0); + xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_CHAN_RT_CTL_REG, 0); + + k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt dis2"); + + if (rx_chn->psil_paired) { + xudma_navss_psil_unpair(rx_chn->common.udmax, + rx_chn->common.src_thread, + rx_chn->common.dst_thread); + rx_chn->psil_paired = false; + } +} +EXPORT_SYMBOL_GPL(k3_udma_glue_disable_rx_chn); + +void k3_udma_glue_tdown_rx_chn(struct k3_udma_glue_rx_channel *rx_chn, + bool sync) +{ + int i = 0; + u32 val; + + if (rx_chn->remote) + return; + + k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt tdown1"); + + xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_CHAN_RT_PEER_RT_EN_REG, + UDMA_PEER_RT_EN_ENABLE | UDMA_PEER_RT_EN_TEARDOWN); + + val = xudma_rchanrt_read(rx_chn->udma_rchanx, UDMA_CHAN_RT_CTL_REG); + + while (sync && (val & UDMA_CHAN_RT_CTL_EN)) { + val = xudma_rchanrt_read(rx_chn->udma_rchanx, + UDMA_CHAN_RT_CTL_REG); + udelay(1); + if (i > K3_UDMAX_TDOWN_TIMEOUT_US) { + dev_err(rx_chn->common.dev, "RX tdown timeout\n"); + break; + } + i++; + } + + val = xudma_rchanrt_read(rx_chn->udma_rchanx, + UDMA_CHAN_RT_PEER_RT_EN_REG); + if (sync && (val & UDMA_PEER_RT_EN_ENABLE)) + dev_err(rx_chn->common.dev, "TX tdown peer not stopped\n"); + k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt tdown2"); +} +EXPORT_SYMBOL_GPL(k3_udma_glue_tdown_rx_chn); + +void k3_udma_glue_reset_rx_chn(struct k3_udma_glue_rx_channel *rx_chn, + u32 flow_num, void *data, + void (*cleanup)(void *data, dma_addr_t desc_dma), bool skip_fdq) +{ + struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_num]; + struct device *dev = rx_chn->common.dev; + dma_addr_t desc_dma; + int occ_rx, i, ret; + + /* reset RXCQ as it is not input for udma - expected to be empty */ + occ_rx = k3_ringacc_ring_get_occ(flow->ringrx); + dev_dbg(dev, "RX reset flow %u occ_rx %u\n", flow_num, occ_rx); + + /* Skip RX FDQ in case one FDQ is used for the set of flows */ + if (skip_fdq) + goto do_reset; + + /* + * RX FDQ reset need to be special way as it is input for udma and its + * state cached by udma, so: + * 1) save RX FDQ occ + * 2) clean up RX FDQ and call callback .cleanup() for each desc + * 3) reset RX FDQ in a special way + */ + occ_rx = k3_ringacc_ring_get_occ(flow->ringrxfdq); + dev_dbg(dev, "RX reset flow %u occ_rx_fdq %u\n", flow_num, occ_rx); + + for (i = 0; i < occ_rx; i++) { + ret = k3_ringacc_ring_pop(flow->ringrxfdq, &desc_dma); + if (ret) { + if (ret != -ENODATA) + dev_err(dev, "RX reset pop %d\n", ret); + break; + } + cleanup(data, desc_dma); + } + + k3_ringacc_ring_reset_dma(flow->ringrxfdq, occ_rx); + +do_reset: + k3_ringacc_ring_reset(flow->ringrx); +} +EXPORT_SYMBOL_GPL(k3_udma_glue_reset_rx_chn); + +int k3_udma_glue_push_rx_chn(struct k3_udma_glue_rx_channel *rx_chn, + u32 flow_num, struct cppi5_host_desc_t *desc_rx, + dma_addr_t desc_dma) +{ + struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_num]; + + return k3_ringacc_ring_push(flow->ringrxfdq, &desc_dma); +} +EXPORT_SYMBOL_GPL(k3_udma_glue_push_rx_chn); + +int k3_udma_glue_pop_rx_chn(struct k3_udma_glue_rx_channel *rx_chn, + u32 flow_num, dma_addr_t *desc_dma) +{ + struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_num]; + + return k3_ringacc_ring_pop(flow->ringrx, desc_dma); +} +EXPORT_SYMBOL_GPL(k3_udma_glue_pop_rx_chn); + +int k3_udma_glue_rx_get_irq(struct k3_udma_glue_rx_channel *rx_chn, + u32 flow_num) +{ + struct k3_udma_glue_rx_flow *flow; + + flow = &rx_chn->flows[flow_num]; + + if (xudma_is_pktdma(rx_chn->common.udmax)) { + flow->virq = xudma_pktdma_rflow_get_irq(rx_chn->common.udmax, + flow->udma_rflow_id); + } else { + flow->virq = k3_ringacc_get_ring_irq_num(flow->ringrx); + } + + return flow->virq; +} +EXPORT_SYMBOL_GPL(k3_udma_glue_rx_get_irq); + +struct device * + k3_udma_glue_rx_get_dma_device(struct k3_udma_glue_rx_channel *rx_chn) +{ + if (xudma_is_pktdma(rx_chn->common.udmax) && + (rx_chn->common.atype_asel == 14 || rx_chn->common.atype_asel == 15)) + return &rx_chn->common.chan_dev; + + return xudma_get_device(rx_chn->common.udmax); +} +EXPORT_SYMBOL_GPL(k3_udma_glue_rx_get_dma_device); + +void k3_udma_glue_rx_dma_to_cppi5_addr(struct k3_udma_glue_rx_channel *rx_chn, + dma_addr_t *addr) +{ + if (!xudma_is_pktdma(rx_chn->common.udmax) || + !rx_chn->common.atype_asel) + return; + + *addr |= (u64)rx_chn->common.atype_asel << K3_ADDRESS_ASEL_SHIFT; +} +EXPORT_SYMBOL_GPL(k3_udma_glue_rx_dma_to_cppi5_addr); + +void k3_udma_glue_rx_cppi5_to_dma_addr(struct k3_udma_glue_rx_channel *rx_chn, + dma_addr_t *addr) +{ + if (!xudma_is_pktdma(rx_chn->common.udmax) || + !rx_chn->common.atype_asel) + return; + + *addr &= (u64)GENMASK(K3_ADDRESS_ASEL_SHIFT - 1, 0); +} +EXPORT_SYMBOL_GPL(k3_udma_glue_rx_cppi5_to_dma_addr); + +static int __init k3_udma_glue_class_init(void) +{ + return class_register(&k3_udma_glue_devclass); +} + +module_init(k3_udma_glue_class_init); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/ti/k3-udma-private.c b/drivers/dma/ti/k3-udma-private.c new file mode 100644 index 0000000000..05228bf000 --- /dev/null +++ b/drivers/dma/ti/k3-udma-private.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * Author: Peter Ujfalusi + */ +#include +#include + +int xudma_navss_psil_pair(struct udma_dev *ud, u32 src_thread, u32 dst_thread) +{ + return navss_psil_pair(ud, src_thread, dst_thread); +} +EXPORT_SYMBOL(xudma_navss_psil_pair); + +int xudma_navss_psil_unpair(struct udma_dev *ud, u32 src_thread, u32 dst_thread) +{ + return navss_psil_unpair(ud, src_thread, dst_thread); +} +EXPORT_SYMBOL(xudma_navss_psil_unpair); + +struct udma_dev *of_xudma_dev_get(struct device_node *np, const char *property) +{ + struct device_node *udma_node = np; + struct platform_device *pdev; + struct udma_dev *ud; + + if (property) { + udma_node = of_parse_phandle(np, property, 0); + if (!udma_node) { + pr_err("UDMA node is not found\n"); + return ERR_PTR(-ENODEV); + } + } + + pdev = of_find_device_by_node(udma_node); + if (np != udma_node) + of_node_put(udma_node); + + if (!pdev) { + pr_debug("UDMA device not found\n"); + return ERR_PTR(-EPROBE_DEFER); + } + + ud = platform_get_drvdata(pdev); + if (!ud) { + pr_debug("UDMA has not been probed\n"); + put_device(&pdev->dev); + return ERR_PTR(-EPROBE_DEFER); + } + + return ud; +} +EXPORT_SYMBOL(of_xudma_dev_get); + +struct device *xudma_get_device(struct udma_dev *ud) +{ + return ud->dev; +} +EXPORT_SYMBOL(xudma_get_device); + +struct k3_ringacc *xudma_get_ringacc(struct udma_dev *ud) +{ + return ud->ringacc; +} +EXPORT_SYMBOL(xudma_get_ringacc); + +u32 xudma_dev_get_psil_base(struct udma_dev *ud) +{ + return ud->psil_base; +} +EXPORT_SYMBOL(xudma_dev_get_psil_base); + +struct udma_tisci_rm *xudma_dev_get_tisci_rm(struct udma_dev *ud) +{ + return &ud->tisci_rm; +} +EXPORT_SYMBOL(xudma_dev_get_tisci_rm); + +int xudma_alloc_gp_rflow_range(struct udma_dev *ud, int from, int cnt) +{ + return __udma_alloc_gp_rflow_range(ud, from, cnt); +} +EXPORT_SYMBOL(xudma_alloc_gp_rflow_range); + +int xudma_free_gp_rflow_range(struct udma_dev *ud, int from, int cnt) +{ + return __udma_free_gp_rflow_range(ud, from, cnt); +} +EXPORT_SYMBOL(xudma_free_gp_rflow_range); + +bool xudma_rflow_is_gp(struct udma_dev *ud, int id) +{ + if (!ud->rflow_gp_map) + return false; + + return !test_bit(id, ud->rflow_gp_map); +} +EXPORT_SYMBOL(xudma_rflow_is_gp); + +#define XUDMA_GET_PUT_RESOURCE(res) \ +struct udma_##res *xudma_##res##_get(struct udma_dev *ud, int id) \ +{ \ + return __udma_reserve_##res(ud, UDMA_TP_NORMAL, id); \ +} \ +EXPORT_SYMBOL(xudma_##res##_get); \ + \ +void xudma_##res##_put(struct udma_dev *ud, struct udma_##res *p) \ +{ \ + clear_bit(p->id, ud->res##_map); \ +} \ +EXPORT_SYMBOL(xudma_##res##_put) +XUDMA_GET_PUT_RESOURCE(tchan); +XUDMA_GET_PUT_RESOURCE(rchan); + +struct udma_rflow *xudma_rflow_get(struct udma_dev *ud, int id) +{ + return __udma_get_rflow(ud, id); +} +EXPORT_SYMBOL(xudma_rflow_get); + +void xudma_rflow_put(struct udma_dev *ud, struct udma_rflow *p) +{ + __udma_put_rflow(ud, p); +} +EXPORT_SYMBOL(xudma_rflow_put); + +int xudma_get_rflow_ring_offset(struct udma_dev *ud) +{ + return ud->tflow_cnt; +} +EXPORT_SYMBOL(xudma_get_rflow_ring_offset); + +#define XUDMA_GET_RESOURCE_ID(res) \ +int xudma_##res##_get_id(struct udma_##res *p) \ +{ \ + return p->id; \ +} \ +EXPORT_SYMBOL(xudma_##res##_get_id) +XUDMA_GET_RESOURCE_ID(tchan); +XUDMA_GET_RESOURCE_ID(rchan); +XUDMA_GET_RESOURCE_ID(rflow); + +/* Exported register access functions */ +#define XUDMA_RT_IO_FUNCTIONS(res) \ +u32 xudma_##res##rt_read(struct udma_##res *p, int reg) \ +{ \ + if (!p) \ + return 0; \ + return udma_read(p->reg_rt, reg); \ +} \ +EXPORT_SYMBOL(xudma_##res##rt_read); \ + \ +void xudma_##res##rt_write(struct udma_##res *p, int reg, u32 val) \ +{ \ + if (!p) \ + return; \ + udma_write(p->reg_rt, reg, val); \ +} \ +EXPORT_SYMBOL(xudma_##res##rt_write) +XUDMA_RT_IO_FUNCTIONS(tchan); +XUDMA_RT_IO_FUNCTIONS(rchan); + +int xudma_is_pktdma(struct udma_dev *ud) +{ + return ud->match_data->type == DMA_TYPE_PKTDMA; +} +EXPORT_SYMBOL(xudma_is_pktdma); + +int xudma_pktdma_tflow_get_irq(struct udma_dev *ud, int udma_tflow_id) +{ + const struct udma_oes_offsets *oes = &ud->soc_data->oes; + + return msi_get_virq(ud->dev, udma_tflow_id + oes->pktdma_tchan_flow); +} +EXPORT_SYMBOL(xudma_pktdma_tflow_get_irq); + +int xudma_pktdma_rflow_get_irq(struct udma_dev *ud, int udma_rflow_id) +{ + const struct udma_oes_offsets *oes = &ud->soc_data->oes; + + return msi_get_virq(ud->dev, udma_rflow_id + oes->pktdma_rchan_flow); +} +EXPORT_SYMBOL(xudma_pktdma_rflow_get_irq); diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c new file mode 100644 index 0000000000..30fd2f386f --- /dev/null +++ b/drivers/dma/ti/k3-udma.c @@ -0,0 +1,5619 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * Author: Peter Ujfalusi + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../virt-dma.h" +#include "k3-udma.h" +#include "k3-psil-priv.h" + +struct udma_static_tr { + u8 elsize; /* RPSTR0 */ + u16 elcnt; /* RPSTR0 */ + u16 bstcnt; /* RPSTR1 */ +}; + +#define K3_UDMA_MAX_RFLOWS 1024 +#define K3_UDMA_DEFAULT_RING_SIZE 16 + +/* How SRC/DST tag should be updated by UDMA in the descriptor's Word 3 */ +#define UDMA_RFLOW_SRCTAG_NONE 0 +#define UDMA_RFLOW_SRCTAG_CFG_TAG 1 +#define UDMA_RFLOW_SRCTAG_FLOW_ID 2 +#define UDMA_RFLOW_SRCTAG_SRC_TAG 4 + +#define UDMA_RFLOW_DSTTAG_NONE 0 +#define UDMA_RFLOW_DSTTAG_CFG_TAG 1 +#define UDMA_RFLOW_DSTTAG_FLOW_ID 2 +#define UDMA_RFLOW_DSTTAG_DST_TAG_LO 4 +#define UDMA_RFLOW_DSTTAG_DST_TAG_HI 5 + +struct udma_chan; + +enum k3_dma_type { + DMA_TYPE_UDMA = 0, + DMA_TYPE_BCDMA, + DMA_TYPE_PKTDMA, +}; + +enum udma_mmr { + MMR_GCFG = 0, + MMR_BCHANRT, + MMR_RCHANRT, + MMR_TCHANRT, + MMR_LAST, +}; + +static const char * const mmr_names[] = { + [MMR_GCFG] = "gcfg", + [MMR_BCHANRT] = "bchanrt", + [MMR_RCHANRT] = "rchanrt", + [MMR_TCHANRT] = "tchanrt", +}; + +struct udma_tchan { + void __iomem *reg_rt; + + int id; + struct k3_ring *t_ring; /* Transmit ring */ + struct k3_ring *tc_ring; /* Transmit Completion ring */ + int tflow_id; /* applicable only for PKTDMA */ + +}; + +#define udma_bchan udma_tchan + +struct udma_rflow { + int id; + struct k3_ring *fd_ring; /* Free Descriptor ring */ + struct k3_ring *r_ring; /* Receive ring */ +}; + +struct udma_rchan { + void __iomem *reg_rt; + + int id; +}; + +struct udma_oes_offsets { + /* K3 UDMA Output Event Offset */ + u32 udma_rchan; + + /* BCDMA Output Event Offsets */ + u32 bcdma_bchan_data; + u32 bcdma_bchan_ring; + u32 bcdma_tchan_data; + u32 bcdma_tchan_ring; + u32 bcdma_rchan_data; + u32 bcdma_rchan_ring; + + /* PKTDMA Output Event Offsets */ + u32 pktdma_tchan_flow; + u32 pktdma_rchan_flow; +}; + +#define UDMA_FLAG_PDMA_ACC32 BIT(0) +#define UDMA_FLAG_PDMA_BURST BIT(1) +#define UDMA_FLAG_TDTYPE BIT(2) +#define UDMA_FLAG_BURST_SIZE BIT(3) +#define UDMA_FLAGS_J7_CLASS (UDMA_FLAG_PDMA_ACC32 | \ + UDMA_FLAG_PDMA_BURST | \ + UDMA_FLAG_TDTYPE | \ + UDMA_FLAG_BURST_SIZE) + +struct udma_match_data { + enum k3_dma_type type; + u32 psil_base; + bool enable_memcpy_support; + u32 flags; + u32 statictr_z_mask; + u8 burst_size[3]; + struct udma_soc_data *soc_data; +}; + +struct udma_soc_data { + struct udma_oes_offsets oes; + u32 bcdma_trigger_event_offset; +}; + +struct udma_hwdesc { + size_t cppi5_desc_size; + void *cppi5_desc_vaddr; + dma_addr_t cppi5_desc_paddr; + + /* TR descriptor internal pointers */ + void *tr_req_base; + struct cppi5_tr_resp_t *tr_resp_base; +}; + +struct udma_rx_flush { + struct udma_hwdesc hwdescs[2]; + + size_t buffer_size; + void *buffer_vaddr; + dma_addr_t buffer_paddr; +}; + +struct udma_tpl { + u8 levels; + u32 start_idx[3]; +}; + +struct udma_dev { + struct dma_device ddev; + struct device *dev; + void __iomem *mmrs[MMR_LAST]; + const struct udma_match_data *match_data; + const struct udma_soc_data *soc_data; + + struct udma_tpl bchan_tpl; + struct udma_tpl tchan_tpl; + struct udma_tpl rchan_tpl; + + size_t desc_align; /* alignment to use for descriptors */ + + struct udma_tisci_rm tisci_rm; + + struct k3_ringacc *ringacc; + + struct work_struct purge_work; + struct list_head desc_to_purge; + spinlock_t lock; + + struct udma_rx_flush rx_flush; + + int bchan_cnt; + int tchan_cnt; + int echan_cnt; + int rchan_cnt; + int rflow_cnt; + int tflow_cnt; + unsigned long *bchan_map; + unsigned long *tchan_map; + unsigned long *rchan_map; + unsigned long *rflow_gp_map; + unsigned long *rflow_gp_map_allocated; + unsigned long *rflow_in_use; + unsigned long *tflow_map; + + struct udma_bchan *bchans; + struct udma_tchan *tchans; + struct udma_rchan *rchans; + struct udma_rflow *rflows; + + struct udma_chan *channels; + u32 psil_base; + u32 atype; + u32 asel; +}; + +struct udma_desc { + struct virt_dma_desc vd; + + bool terminated; + + enum dma_transfer_direction dir; + + struct udma_static_tr static_tr; + u32 residue; + + unsigned int sglen; + unsigned int desc_idx; /* Only used for cyclic in packet mode */ + unsigned int tr_idx; + + u32 metadata_size; + void *metadata; /* pointer to provided metadata buffer (EPIP, PSdata) */ + + unsigned int hwdesc_count; + struct udma_hwdesc hwdesc[]; +}; + +enum udma_chan_state { + UDMA_CHAN_IS_IDLE = 0, /* not active, no teardown is in progress */ + UDMA_CHAN_IS_ACTIVE, /* Normal operation */ + UDMA_CHAN_IS_TERMINATING, /* channel is being terminated */ +}; + +struct udma_tx_drain { + struct delayed_work work; + ktime_t tstamp; + u32 residue; +}; + +struct udma_chan_config { + bool pkt_mode; /* TR or packet */ + bool needs_epib; /* EPIB is needed for the communication or not */ + u32 psd_size; /* size of Protocol Specific Data */ + u32 metadata_size; /* (needs_epib ? 16:0) + psd_size */ + u32 hdesc_size; /* Size of a packet descriptor in packet mode */ + bool notdpkt; /* Suppress sending TDC packet */ + int remote_thread_id; + u32 atype; + u32 asel; + u32 src_thread; + u32 dst_thread; + enum psil_endpoint_type ep_type; + bool enable_acc32; + bool enable_burst; + enum udma_tp_level channel_tpl; /* Channel Throughput Level */ + + u32 tr_trigger_type; + unsigned long tx_flags; + + /* PKDMA mapped channel */ + int mapped_channel_id; + /* PKTDMA default tflow or rflow for mapped channel */ + int default_flow_id; + + enum dma_transfer_direction dir; +}; + +struct udma_chan { + struct virt_dma_chan vc; + struct dma_slave_config cfg; + struct udma_dev *ud; + struct device *dma_dev; + struct udma_desc *desc; + struct udma_desc *terminated_desc; + struct udma_static_tr static_tr; + char *name; + + struct udma_bchan *bchan; + struct udma_tchan *tchan; + struct udma_rchan *rchan; + struct udma_rflow *rflow; + + bool psil_paired; + + int irq_num_ring; + int irq_num_udma; + + bool cyclic; + bool paused; + + enum udma_chan_state state; + struct completion teardown_completed; + + struct udma_tx_drain tx_drain; + + /* Channel configuration parameters */ + struct udma_chan_config config; + /* Channel configuration parameters (backup) */ + struct udma_chan_config backup_config; + + /* dmapool for packet mode descriptors */ + bool use_dma_pool; + struct dma_pool *hdesc_pool; + + u32 id; +}; + +static inline struct udma_dev *to_udma_dev(struct dma_device *d) +{ + return container_of(d, struct udma_dev, ddev); +} + +static inline struct udma_chan *to_udma_chan(struct dma_chan *c) +{ + return container_of(c, struct udma_chan, vc.chan); +} + +static inline struct udma_desc *to_udma_desc(struct dma_async_tx_descriptor *t) +{ + return container_of(t, struct udma_desc, vd.tx); +} + +/* Generic register access functions */ +static inline u32 udma_read(void __iomem *base, int reg) +{ + return readl(base + reg); +} + +static inline void udma_write(void __iomem *base, int reg, u32 val) +{ + writel(val, base + reg); +} + +static inline void udma_update_bits(void __iomem *base, int reg, + u32 mask, u32 val) +{ + u32 tmp, orig; + + orig = readl(base + reg); + tmp = orig & ~mask; + tmp |= (val & mask); + + if (tmp != orig) + writel(tmp, base + reg); +} + +/* TCHANRT */ +static inline u32 udma_tchanrt_read(struct udma_chan *uc, int reg) +{ + if (!uc->tchan) + return 0; + return udma_read(uc->tchan->reg_rt, reg); +} + +static inline void udma_tchanrt_write(struct udma_chan *uc, int reg, u32 val) +{ + if (!uc->tchan) + return; + udma_write(uc->tchan->reg_rt, reg, val); +} + +static inline void udma_tchanrt_update_bits(struct udma_chan *uc, int reg, + u32 mask, u32 val) +{ + if (!uc->tchan) + return; + udma_update_bits(uc->tchan->reg_rt, reg, mask, val); +} + +/* RCHANRT */ +static inline u32 udma_rchanrt_read(struct udma_chan *uc, int reg) +{ + if (!uc->rchan) + return 0; + return udma_read(uc->rchan->reg_rt, reg); +} + +static inline void udma_rchanrt_write(struct udma_chan *uc, int reg, u32 val) +{ + if (!uc->rchan) + return; + udma_write(uc->rchan->reg_rt, reg, val); +} + +static inline void udma_rchanrt_update_bits(struct udma_chan *uc, int reg, + u32 mask, u32 val) +{ + if (!uc->rchan) + return; + udma_update_bits(uc->rchan->reg_rt, reg, mask, val); +} + +static int navss_psil_pair(struct udma_dev *ud, u32 src_thread, u32 dst_thread) +{ + struct udma_tisci_rm *tisci_rm = &ud->tisci_rm; + + dst_thread |= K3_PSIL_DST_THREAD_ID_OFFSET; + return tisci_rm->tisci_psil_ops->pair(tisci_rm->tisci, + tisci_rm->tisci_navss_dev_id, + src_thread, dst_thread); +} + +static int navss_psil_unpair(struct udma_dev *ud, u32 src_thread, + u32 dst_thread) +{ + struct udma_tisci_rm *tisci_rm = &ud->tisci_rm; + + dst_thread |= K3_PSIL_DST_THREAD_ID_OFFSET; + return tisci_rm->tisci_psil_ops->unpair(tisci_rm->tisci, + tisci_rm->tisci_navss_dev_id, + src_thread, dst_thread); +} + +static void k3_configure_chan_coherency(struct dma_chan *chan, u32 asel) +{ + struct device *chan_dev = &chan->dev->device; + + if (asel == 0) { + /* No special handling for the channel */ + chan->dev->chan_dma_dev = false; + + chan_dev->dma_coherent = false; + chan_dev->dma_parms = NULL; + } else if (asel == 14 || asel == 15) { + chan->dev->chan_dma_dev = true; + + chan_dev->dma_coherent = true; + dma_coerce_mask_and_coherent(chan_dev, DMA_BIT_MASK(48)); + chan_dev->dma_parms = chan_dev->parent->dma_parms; + } else { + dev_warn(chan->device->dev, "Invalid ASEL value: %u\n", asel); + + chan_dev->dma_coherent = false; + chan_dev->dma_parms = NULL; + } +} + +static u8 udma_get_chan_tpl_index(struct udma_tpl *tpl_map, int chan_id) +{ + int i; + + for (i = 0; i < tpl_map->levels; i++) { + if (chan_id >= tpl_map->start_idx[i]) + return i; + } + + return 0; +} + +static void udma_reset_uchan(struct udma_chan *uc) +{ + memset(&uc->config, 0, sizeof(uc->config)); + uc->config.remote_thread_id = -1; + uc->config.mapped_channel_id = -1; + uc->config.default_flow_id = -1; + uc->state = UDMA_CHAN_IS_IDLE; +} + +static void udma_dump_chan_stdata(struct udma_chan *uc) +{ + struct device *dev = uc->ud->dev; + u32 offset; + int i; + + if (uc->config.dir == DMA_MEM_TO_DEV || uc->config.dir == DMA_MEM_TO_MEM) { + dev_dbg(dev, "TCHAN State data:\n"); + for (i = 0; i < 32; i++) { + offset = UDMA_CHAN_RT_STDATA_REG + i * 4; + dev_dbg(dev, "TRT_STDATA[%02d]: 0x%08x\n", i, + udma_tchanrt_read(uc, offset)); + } + } + + if (uc->config.dir == DMA_DEV_TO_MEM || uc->config.dir == DMA_MEM_TO_MEM) { + dev_dbg(dev, "RCHAN State data:\n"); + for (i = 0; i < 32; i++) { + offset = UDMA_CHAN_RT_STDATA_REG + i * 4; + dev_dbg(dev, "RRT_STDATA[%02d]: 0x%08x\n", i, + udma_rchanrt_read(uc, offset)); + } + } +} + +static inline dma_addr_t udma_curr_cppi5_desc_paddr(struct udma_desc *d, + int idx) +{ + return d->hwdesc[idx].cppi5_desc_paddr; +} + +static inline void *udma_curr_cppi5_desc_vaddr(struct udma_desc *d, int idx) +{ + return d->hwdesc[idx].cppi5_desc_vaddr; +} + +static struct udma_desc *udma_udma_desc_from_paddr(struct udma_chan *uc, + dma_addr_t paddr) +{ + struct udma_desc *d = uc->terminated_desc; + + if (d) { + dma_addr_t desc_paddr = udma_curr_cppi5_desc_paddr(d, + d->desc_idx); + + if (desc_paddr != paddr) + d = NULL; + } + + if (!d) { + d = uc->desc; + if (d) { + dma_addr_t desc_paddr = udma_curr_cppi5_desc_paddr(d, + d->desc_idx); + + if (desc_paddr != paddr) + d = NULL; + } + } + + return d; +} + +static void udma_free_hwdesc(struct udma_chan *uc, struct udma_desc *d) +{ + if (uc->use_dma_pool) { + int i; + + for (i = 0; i < d->hwdesc_count; i++) { + if (!d->hwdesc[i].cppi5_desc_vaddr) + continue; + + dma_pool_free(uc->hdesc_pool, + d->hwdesc[i].cppi5_desc_vaddr, + d->hwdesc[i].cppi5_desc_paddr); + + d->hwdesc[i].cppi5_desc_vaddr = NULL; + } + } else if (d->hwdesc[0].cppi5_desc_vaddr) { + dma_free_coherent(uc->dma_dev, d->hwdesc[0].cppi5_desc_size, + d->hwdesc[0].cppi5_desc_vaddr, + d->hwdesc[0].cppi5_desc_paddr); + + d->hwdesc[0].cppi5_desc_vaddr = NULL; + } +} + +static void udma_purge_desc_work(struct work_struct *work) +{ + struct udma_dev *ud = container_of(work, typeof(*ud), purge_work); + struct virt_dma_desc *vd, *_vd; + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&ud->lock, flags); + list_splice_tail_init(&ud->desc_to_purge, &head); + spin_unlock_irqrestore(&ud->lock, flags); + + list_for_each_entry_safe(vd, _vd, &head, node) { + struct udma_chan *uc = to_udma_chan(vd->tx.chan); + struct udma_desc *d = to_udma_desc(&vd->tx); + + udma_free_hwdesc(uc, d); + list_del(&vd->node); + kfree(d); + } + + /* If more to purge, schedule the work again */ + if (!list_empty(&ud->desc_to_purge)) + schedule_work(&ud->purge_work); +} + +static void udma_desc_free(struct virt_dma_desc *vd) +{ + struct udma_dev *ud = to_udma_dev(vd->tx.chan->device); + struct udma_chan *uc = to_udma_chan(vd->tx.chan); + struct udma_desc *d = to_udma_desc(&vd->tx); + unsigned long flags; + + if (uc->terminated_desc == d) + uc->terminated_desc = NULL; + + if (uc->use_dma_pool) { + udma_free_hwdesc(uc, d); + kfree(d); + return; + } + + spin_lock_irqsave(&ud->lock, flags); + list_add_tail(&vd->node, &ud->desc_to_purge); + spin_unlock_irqrestore(&ud->lock, flags); + + schedule_work(&ud->purge_work); +} + +static bool udma_is_chan_running(struct udma_chan *uc) +{ + u32 trt_ctl = 0; + u32 rrt_ctl = 0; + + if (uc->tchan) + trt_ctl = udma_tchanrt_read(uc, UDMA_CHAN_RT_CTL_REG); + if (uc->rchan) + rrt_ctl = udma_rchanrt_read(uc, UDMA_CHAN_RT_CTL_REG); + + if (trt_ctl & UDMA_CHAN_RT_CTL_EN || rrt_ctl & UDMA_CHAN_RT_CTL_EN) + return true; + + return false; +} + +static bool udma_is_chan_paused(struct udma_chan *uc) +{ + u32 val, pause_mask; + + switch (uc->config.dir) { + case DMA_DEV_TO_MEM: + val = udma_rchanrt_read(uc, UDMA_CHAN_RT_PEER_RT_EN_REG); + pause_mask = UDMA_PEER_RT_EN_PAUSE; + break; + case DMA_MEM_TO_DEV: + val = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_RT_EN_REG); + pause_mask = UDMA_PEER_RT_EN_PAUSE; + break; + case DMA_MEM_TO_MEM: + val = udma_tchanrt_read(uc, UDMA_CHAN_RT_CTL_REG); + pause_mask = UDMA_CHAN_RT_CTL_PAUSE; + break; + default: + return false; + } + + if (val & pause_mask) + return true; + + return false; +} + +static inline dma_addr_t udma_get_rx_flush_hwdesc_paddr(struct udma_chan *uc) +{ + return uc->ud->rx_flush.hwdescs[uc->config.pkt_mode].cppi5_desc_paddr; +} + +static int udma_push_to_ring(struct udma_chan *uc, int idx) +{ + struct udma_desc *d = uc->desc; + struct k3_ring *ring = NULL; + dma_addr_t paddr; + + switch (uc->config.dir) { + case DMA_DEV_TO_MEM: + ring = uc->rflow->fd_ring; + break; + case DMA_MEM_TO_DEV: + case DMA_MEM_TO_MEM: + ring = uc->tchan->t_ring; + break; + default: + return -EINVAL; + } + + /* RX flush packet: idx == -1 is only passed in case of DEV_TO_MEM */ + if (idx == -1) { + paddr = udma_get_rx_flush_hwdesc_paddr(uc); + } else { + paddr = udma_curr_cppi5_desc_paddr(d, idx); + + wmb(); /* Ensure that writes are not moved over this point */ + } + + return k3_ringacc_ring_push(ring, &paddr); +} + +static bool udma_desc_is_rx_flush(struct udma_chan *uc, dma_addr_t addr) +{ + if (uc->config.dir != DMA_DEV_TO_MEM) + return false; + + if (addr == udma_get_rx_flush_hwdesc_paddr(uc)) + return true; + + return false; +} + +static int udma_pop_from_ring(struct udma_chan *uc, dma_addr_t *addr) +{ + struct k3_ring *ring = NULL; + int ret; + + switch (uc->config.dir) { + case DMA_DEV_TO_MEM: + ring = uc->rflow->r_ring; + break; + case DMA_MEM_TO_DEV: + case DMA_MEM_TO_MEM: + ring = uc->tchan->tc_ring; + break; + default: + return -ENOENT; + } + + ret = k3_ringacc_ring_pop(ring, addr); + if (ret) + return ret; + + rmb(); /* Ensure that reads are not moved before this point */ + + /* Teardown completion */ + if (cppi5_desc_is_tdcm(*addr)) + return 0; + + /* Check for flush descriptor */ + if (udma_desc_is_rx_flush(uc, *addr)) + return -ENOENT; + + return 0; +} + +static void udma_reset_rings(struct udma_chan *uc) +{ + struct k3_ring *ring1 = NULL; + struct k3_ring *ring2 = NULL; + + switch (uc->config.dir) { + case DMA_DEV_TO_MEM: + if (uc->rchan) { + ring1 = uc->rflow->fd_ring; + ring2 = uc->rflow->r_ring; + } + break; + case DMA_MEM_TO_DEV: + case DMA_MEM_TO_MEM: + if (uc->tchan) { + ring1 = uc->tchan->t_ring; + ring2 = uc->tchan->tc_ring; + } + break; + default: + break; + } + + if (ring1) + k3_ringacc_ring_reset_dma(ring1, + k3_ringacc_ring_get_occ(ring1)); + if (ring2) + k3_ringacc_ring_reset(ring2); + + /* make sure we are not leaking memory by stalled descriptor */ + if (uc->terminated_desc) { + udma_desc_free(&uc->terminated_desc->vd); + uc->terminated_desc = NULL; + } +} + +static void udma_decrement_byte_counters(struct udma_chan *uc, u32 val) +{ + if (uc->desc->dir == DMA_DEV_TO_MEM) { + udma_rchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val); + udma_rchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val); + if (uc->config.ep_type != PSIL_EP_NATIVE) + udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val); + } else { + udma_tchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val); + udma_tchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val); + if (!uc->bchan && uc->config.ep_type != PSIL_EP_NATIVE) + udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val); + } +} + +static void udma_reset_counters(struct udma_chan *uc) +{ + u32 val; + + if (uc->tchan) { + val = udma_tchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG); + udma_tchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val); + + val = udma_tchanrt_read(uc, UDMA_CHAN_RT_SBCNT_REG); + udma_tchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val); + + val = udma_tchanrt_read(uc, UDMA_CHAN_RT_PCNT_REG); + udma_tchanrt_write(uc, UDMA_CHAN_RT_PCNT_REG, val); + + if (!uc->bchan) { + val = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG); + udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val); + } + } + + if (uc->rchan) { + val = udma_rchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG); + udma_rchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val); + + val = udma_rchanrt_read(uc, UDMA_CHAN_RT_SBCNT_REG); + udma_rchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val); + + val = udma_rchanrt_read(uc, UDMA_CHAN_RT_PCNT_REG); + udma_rchanrt_write(uc, UDMA_CHAN_RT_PCNT_REG, val); + + val = udma_rchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG); + udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val); + } +} + +static int udma_reset_chan(struct udma_chan *uc, bool hard) +{ + switch (uc->config.dir) { + case DMA_DEV_TO_MEM: + udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, 0); + udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, 0); + break; + case DMA_MEM_TO_DEV: + udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, 0); + udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, 0); + break; + case DMA_MEM_TO_MEM: + udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, 0); + udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, 0); + break; + default: + return -EINVAL; + } + + /* Reset all counters */ + udma_reset_counters(uc); + + /* Hard reset: re-initialize the channel to reset */ + if (hard) { + struct udma_chan_config ucc_backup; + int ret; + + memcpy(&ucc_backup, &uc->config, sizeof(uc->config)); + uc->ud->ddev.device_free_chan_resources(&uc->vc.chan); + + /* restore the channel configuration */ + memcpy(&uc->config, &ucc_backup, sizeof(uc->config)); + ret = uc->ud->ddev.device_alloc_chan_resources(&uc->vc.chan); + if (ret) + return ret; + + /* + * Setting forced teardown after forced reset helps recovering + * the rchan. + */ + if (uc->config.dir == DMA_DEV_TO_MEM) + udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, + UDMA_CHAN_RT_CTL_EN | + UDMA_CHAN_RT_CTL_TDOWN | + UDMA_CHAN_RT_CTL_FTDOWN); + } + uc->state = UDMA_CHAN_IS_IDLE; + + return 0; +} + +static void udma_start_desc(struct udma_chan *uc) +{ + struct udma_chan_config *ucc = &uc->config; + + if (uc->ud->match_data->type == DMA_TYPE_UDMA && ucc->pkt_mode && + (uc->cyclic || ucc->dir == DMA_DEV_TO_MEM)) { + int i; + + /* + * UDMA only: Push all descriptors to ring for packet mode + * cyclic or RX + * PKTDMA supports pre-linked descriptor and cyclic is not + * supported + */ + for (i = 0; i < uc->desc->sglen; i++) + udma_push_to_ring(uc, i); + } else { + udma_push_to_ring(uc, 0); + } +} + +static bool udma_chan_needs_reconfiguration(struct udma_chan *uc) +{ + /* Only PDMAs have staticTR */ + if (uc->config.ep_type == PSIL_EP_NATIVE) + return false; + + /* Check if the staticTR configuration has changed for TX */ + if (memcmp(&uc->static_tr, &uc->desc->static_tr, sizeof(uc->static_tr))) + return true; + + return false; +} + +static int udma_start(struct udma_chan *uc) +{ + struct virt_dma_desc *vd = vchan_next_desc(&uc->vc); + + if (!vd) { + uc->desc = NULL; + return -ENOENT; + } + + list_del(&vd->node); + + uc->desc = to_udma_desc(&vd->tx); + + /* Channel is already running and does not need reconfiguration */ + if (udma_is_chan_running(uc) && !udma_chan_needs_reconfiguration(uc)) { + udma_start_desc(uc); + goto out; + } + + /* Make sure that we clear the teardown bit, if it is set */ + udma_reset_chan(uc, false); + + /* Push descriptors before we start the channel */ + udma_start_desc(uc); + + switch (uc->desc->dir) { + case DMA_DEV_TO_MEM: + /* Config remote TR */ + if (uc->config.ep_type == PSIL_EP_PDMA_XY) { + u32 val = PDMA_STATIC_TR_Y(uc->desc->static_tr.elcnt) | + PDMA_STATIC_TR_X(uc->desc->static_tr.elsize); + const struct udma_match_data *match_data = + uc->ud->match_data; + + if (uc->config.enable_acc32) + val |= PDMA_STATIC_TR_XY_ACC32; + if (uc->config.enable_burst) + val |= PDMA_STATIC_TR_XY_BURST; + + udma_rchanrt_write(uc, + UDMA_CHAN_RT_PEER_STATIC_TR_XY_REG, + val); + + udma_rchanrt_write(uc, + UDMA_CHAN_RT_PEER_STATIC_TR_Z_REG, + PDMA_STATIC_TR_Z(uc->desc->static_tr.bstcnt, + match_data->statictr_z_mask)); + + /* save the current staticTR configuration */ + memcpy(&uc->static_tr, &uc->desc->static_tr, + sizeof(uc->static_tr)); + } + + udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, + UDMA_CHAN_RT_CTL_EN); + + /* Enable remote */ + udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, + UDMA_PEER_RT_EN_ENABLE); + + break; + case DMA_MEM_TO_DEV: + /* Config remote TR */ + if (uc->config.ep_type == PSIL_EP_PDMA_XY) { + u32 val = PDMA_STATIC_TR_Y(uc->desc->static_tr.elcnt) | + PDMA_STATIC_TR_X(uc->desc->static_tr.elsize); + + if (uc->config.enable_acc32) + val |= PDMA_STATIC_TR_XY_ACC32; + if (uc->config.enable_burst) + val |= PDMA_STATIC_TR_XY_BURST; + + udma_tchanrt_write(uc, + UDMA_CHAN_RT_PEER_STATIC_TR_XY_REG, + val); + + /* save the current staticTR configuration */ + memcpy(&uc->static_tr, &uc->desc->static_tr, + sizeof(uc->static_tr)); + } + + /* Enable remote */ + udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, + UDMA_PEER_RT_EN_ENABLE); + + udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, + UDMA_CHAN_RT_CTL_EN); + + break; + case DMA_MEM_TO_MEM: + udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, + UDMA_CHAN_RT_CTL_EN); + udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, + UDMA_CHAN_RT_CTL_EN); + + break; + default: + return -EINVAL; + } + + uc->state = UDMA_CHAN_IS_ACTIVE; +out: + + return 0; +} + +static int udma_stop(struct udma_chan *uc) +{ + enum udma_chan_state old_state = uc->state; + + uc->state = UDMA_CHAN_IS_TERMINATING; + reinit_completion(&uc->teardown_completed); + + switch (uc->config.dir) { + case DMA_DEV_TO_MEM: + if (!uc->cyclic && !uc->desc) + udma_push_to_ring(uc, -1); + + udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, + UDMA_PEER_RT_EN_ENABLE | + UDMA_PEER_RT_EN_TEARDOWN); + break; + case DMA_MEM_TO_DEV: + udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, + UDMA_PEER_RT_EN_ENABLE | + UDMA_PEER_RT_EN_FLUSH); + udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, + UDMA_CHAN_RT_CTL_EN | + UDMA_CHAN_RT_CTL_TDOWN); + break; + case DMA_MEM_TO_MEM: + udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, + UDMA_CHAN_RT_CTL_EN | + UDMA_CHAN_RT_CTL_TDOWN); + break; + default: + uc->state = old_state; + complete_all(&uc->teardown_completed); + return -EINVAL; + } + + return 0; +} + +static void udma_cyclic_packet_elapsed(struct udma_chan *uc) +{ + struct udma_desc *d = uc->desc; + struct cppi5_host_desc_t *h_desc; + + h_desc = d->hwdesc[d->desc_idx].cppi5_desc_vaddr; + cppi5_hdesc_reset_to_original(h_desc); + udma_push_to_ring(uc, d->desc_idx); + d->desc_idx = (d->desc_idx + 1) % d->sglen; +} + +static inline void udma_fetch_epib(struct udma_chan *uc, struct udma_desc *d) +{ + struct cppi5_host_desc_t *h_desc = d->hwdesc[0].cppi5_desc_vaddr; + + memcpy(d->metadata, h_desc->epib, d->metadata_size); +} + +static bool udma_is_desc_really_done(struct udma_chan *uc, struct udma_desc *d) +{ + u32 peer_bcnt, bcnt; + + /* + * Only TX towards PDMA is affected. + * If DMA_PREP_INTERRUPT is not set by consumer then skip the transfer + * completion calculation, consumer must ensure that there is no stale + * data in DMA fabric in this case. + */ + if (uc->config.ep_type == PSIL_EP_NATIVE || + uc->config.dir != DMA_MEM_TO_DEV || !(uc->config.tx_flags & DMA_PREP_INTERRUPT)) + return true; + + peer_bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG); + bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG); + + /* Transfer is incomplete, store current residue and time stamp */ + if (peer_bcnt < bcnt) { + uc->tx_drain.residue = bcnt - peer_bcnt; + uc->tx_drain.tstamp = ktime_get(); + return false; + } + + return true; +} + +static void udma_check_tx_completion(struct work_struct *work) +{ + struct udma_chan *uc = container_of(work, typeof(*uc), + tx_drain.work.work); + bool desc_done = true; + u32 residue_diff; + ktime_t time_diff; + unsigned long delay; + + while (1) { + if (uc->desc) { + /* Get previous residue and time stamp */ + residue_diff = uc->tx_drain.residue; + time_diff = uc->tx_drain.tstamp; + /* + * Get current residue and time stamp or see if + * transfer is complete + */ + desc_done = udma_is_desc_really_done(uc, uc->desc); + } + + if (!desc_done) { + /* + * Find the time delta and residue delta w.r.t + * previous poll + */ + time_diff = ktime_sub(uc->tx_drain.tstamp, + time_diff) + 1; + residue_diff -= uc->tx_drain.residue; + if (residue_diff) { + /* + * Try to guess when we should check + * next time by calculating rate at + * which data is being drained at the + * peer device + */ + delay = (time_diff / residue_diff) * + uc->tx_drain.residue; + } else { + /* No progress, check again in 1 second */ + schedule_delayed_work(&uc->tx_drain.work, HZ); + break; + } + + usleep_range(ktime_to_us(delay), + ktime_to_us(delay) + 10); + continue; + } + + if (uc->desc) { + struct udma_desc *d = uc->desc; + + udma_decrement_byte_counters(uc, d->residue); + udma_start(uc); + vchan_cookie_complete(&d->vd); + break; + } + + break; + } +} + +static irqreturn_t udma_ring_irq_handler(int irq, void *data) +{ + struct udma_chan *uc = data; + struct udma_desc *d; + dma_addr_t paddr = 0; + + if (udma_pop_from_ring(uc, &paddr) || !paddr) + return IRQ_HANDLED; + + spin_lock(&uc->vc.lock); + + /* Teardown completion message */ + if (cppi5_desc_is_tdcm(paddr)) { + complete_all(&uc->teardown_completed); + + if (uc->terminated_desc) { + udma_desc_free(&uc->terminated_desc->vd); + uc->terminated_desc = NULL; + } + + if (!uc->desc) + udma_start(uc); + + goto out; + } + + d = udma_udma_desc_from_paddr(uc, paddr); + + if (d) { + dma_addr_t desc_paddr = udma_curr_cppi5_desc_paddr(d, + d->desc_idx); + if (desc_paddr != paddr) { + dev_err(uc->ud->dev, "not matching descriptors!\n"); + goto out; + } + + if (d == uc->desc) { + /* active descriptor */ + if (uc->cyclic) { + udma_cyclic_packet_elapsed(uc); + vchan_cyclic_callback(&d->vd); + } else { + if (udma_is_desc_really_done(uc, d)) { + udma_decrement_byte_counters(uc, d->residue); + udma_start(uc); + vchan_cookie_complete(&d->vd); + } else { + schedule_delayed_work(&uc->tx_drain.work, + 0); + } + } + } else { + /* + * terminated descriptor, mark the descriptor as + * completed to update the channel's cookie marker + */ + dma_cookie_complete(&d->vd.tx); + } + } +out: + spin_unlock(&uc->vc.lock); + + return IRQ_HANDLED; +} + +static irqreturn_t udma_udma_irq_handler(int irq, void *data) +{ + struct udma_chan *uc = data; + struct udma_desc *d; + + spin_lock(&uc->vc.lock); + d = uc->desc; + if (d) { + d->tr_idx = (d->tr_idx + 1) % d->sglen; + + if (uc->cyclic) { + vchan_cyclic_callback(&d->vd); + } else { + /* TODO: figure out the real amount of data */ + udma_decrement_byte_counters(uc, d->residue); + udma_start(uc); + vchan_cookie_complete(&d->vd); + } + } + + spin_unlock(&uc->vc.lock); + + return IRQ_HANDLED; +} + +/** + * __udma_alloc_gp_rflow_range - alloc range of GP RX flows + * @ud: UDMA device + * @from: Start the search from this flow id number + * @cnt: Number of consecutive flow ids to allocate + * + * Allocate range of RX flow ids for future use, those flows can be requested + * only using explicit flow id number. if @from is set to -1 it will try to find + * first free range. if @from is positive value it will force allocation only + * of the specified range of flows. + * + * Returns -ENOMEM if can't find free range. + * -EEXIST if requested range is busy. + * -EINVAL if wrong input values passed. + * Returns flow id on success. + */ +static int __udma_alloc_gp_rflow_range(struct udma_dev *ud, int from, int cnt) +{ + int start, tmp_from; + DECLARE_BITMAP(tmp, K3_UDMA_MAX_RFLOWS); + + tmp_from = from; + if (tmp_from < 0) + tmp_from = ud->rchan_cnt; + /* default flows can't be allocated and accessible only by id */ + if (tmp_from < ud->rchan_cnt) + return -EINVAL; + + if (tmp_from + cnt > ud->rflow_cnt) + return -EINVAL; + + bitmap_or(tmp, ud->rflow_gp_map, ud->rflow_gp_map_allocated, + ud->rflow_cnt); + + start = bitmap_find_next_zero_area(tmp, + ud->rflow_cnt, + tmp_from, cnt, 0); + if (start >= ud->rflow_cnt) + return -ENOMEM; + + if (from >= 0 && start != from) + return -EEXIST; + + bitmap_set(ud->rflow_gp_map_allocated, start, cnt); + return start; +} + +static int __udma_free_gp_rflow_range(struct udma_dev *ud, int from, int cnt) +{ + if (from < ud->rchan_cnt) + return -EINVAL; + if (from + cnt > ud->rflow_cnt) + return -EINVAL; + + bitmap_clear(ud->rflow_gp_map_allocated, from, cnt); + return 0; +} + +static struct udma_rflow *__udma_get_rflow(struct udma_dev *ud, int id) +{ + /* + * Attempt to request rflow by ID can be made for any rflow + * if not in use with assumption that caller knows what's doing. + * TI-SCI FW will perform additional permission check ant way, it's + * safe + */ + + if (id < 0 || id >= ud->rflow_cnt) + return ERR_PTR(-ENOENT); + + if (test_bit(id, ud->rflow_in_use)) + return ERR_PTR(-ENOENT); + + if (ud->rflow_gp_map) { + /* GP rflow has to be allocated first */ + if (!test_bit(id, ud->rflow_gp_map) && + !test_bit(id, ud->rflow_gp_map_allocated)) + return ERR_PTR(-EINVAL); + } + + dev_dbg(ud->dev, "get rflow%d\n", id); + set_bit(id, ud->rflow_in_use); + return &ud->rflows[id]; +} + +static void __udma_put_rflow(struct udma_dev *ud, struct udma_rflow *rflow) +{ + if (!test_bit(rflow->id, ud->rflow_in_use)) { + dev_err(ud->dev, "attempt to put unused rflow%d\n", rflow->id); + return; + } + + dev_dbg(ud->dev, "put rflow%d\n", rflow->id); + clear_bit(rflow->id, ud->rflow_in_use); +} + +#define UDMA_RESERVE_RESOURCE(res) \ +static struct udma_##res *__udma_reserve_##res(struct udma_dev *ud, \ + enum udma_tp_level tpl, \ + int id) \ +{ \ + if (id >= 0) { \ + if (test_bit(id, ud->res##_map)) { \ + dev_err(ud->dev, "res##%d is in use\n", id); \ + return ERR_PTR(-ENOENT); \ + } \ + } else { \ + int start; \ + \ + if (tpl >= ud->res##_tpl.levels) \ + tpl = ud->res##_tpl.levels - 1; \ + \ + start = ud->res##_tpl.start_idx[tpl]; \ + \ + id = find_next_zero_bit(ud->res##_map, ud->res##_cnt, \ + start); \ + if (id == ud->res##_cnt) { \ + return ERR_PTR(-ENOENT); \ + } \ + } \ + \ + set_bit(id, ud->res##_map); \ + return &ud->res##s[id]; \ +} + +UDMA_RESERVE_RESOURCE(bchan); +UDMA_RESERVE_RESOURCE(tchan); +UDMA_RESERVE_RESOURCE(rchan); + +static int bcdma_get_bchan(struct udma_chan *uc) +{ + struct udma_dev *ud = uc->ud; + enum udma_tp_level tpl; + int ret; + + if (uc->bchan) { + dev_dbg(ud->dev, "chan%d: already have bchan%d allocated\n", + uc->id, uc->bchan->id); + return 0; + } + + /* + * Use normal channels for peripherals, and highest TPL channel for + * mem2mem + */ + if (uc->config.tr_trigger_type) + tpl = 0; + else + tpl = ud->bchan_tpl.levels - 1; + + uc->bchan = __udma_reserve_bchan(ud, tpl, -1); + if (IS_ERR(uc->bchan)) { + ret = PTR_ERR(uc->bchan); + uc->bchan = NULL; + return ret; + } + + uc->tchan = uc->bchan; + + return 0; +} + +static int udma_get_tchan(struct udma_chan *uc) +{ + struct udma_dev *ud = uc->ud; + int ret; + + if (uc->tchan) { + dev_dbg(ud->dev, "chan%d: already have tchan%d allocated\n", + uc->id, uc->tchan->id); + return 0; + } + + /* + * mapped_channel_id is -1 for UDMA, BCDMA and PKTDMA unmapped channels. + * For PKTDMA mapped channels it is configured to a channel which must + * be used to service the peripheral. + */ + uc->tchan = __udma_reserve_tchan(ud, uc->config.channel_tpl, + uc->config.mapped_channel_id); + if (IS_ERR(uc->tchan)) { + ret = PTR_ERR(uc->tchan); + uc->tchan = NULL; + return ret; + } + + if (ud->tflow_cnt) { + int tflow_id; + + /* Only PKTDMA have support for tx flows */ + if (uc->config.default_flow_id >= 0) + tflow_id = uc->config.default_flow_id; + else + tflow_id = uc->tchan->id; + + if (test_bit(tflow_id, ud->tflow_map)) { + dev_err(ud->dev, "tflow%d is in use\n", tflow_id); + clear_bit(uc->tchan->id, ud->tchan_map); + uc->tchan = NULL; + return -ENOENT; + } + + uc->tchan->tflow_id = tflow_id; + set_bit(tflow_id, ud->tflow_map); + } else { + uc->tchan->tflow_id = -1; + } + + return 0; +} + +static int udma_get_rchan(struct udma_chan *uc) +{ + struct udma_dev *ud = uc->ud; + int ret; + + if (uc->rchan) { + dev_dbg(ud->dev, "chan%d: already have rchan%d allocated\n", + uc->id, uc->rchan->id); + return 0; + } + + /* + * mapped_channel_id is -1 for UDMA, BCDMA and PKTDMA unmapped channels. + * For PKTDMA mapped channels it is configured to a channel which must + * be used to service the peripheral. + */ + uc->rchan = __udma_reserve_rchan(ud, uc->config.channel_tpl, + uc->config.mapped_channel_id); + if (IS_ERR(uc->rchan)) { + ret = PTR_ERR(uc->rchan); + uc->rchan = NULL; + return ret; + } + + return 0; +} + +static int udma_get_chan_pair(struct udma_chan *uc) +{ + struct udma_dev *ud = uc->ud; + int chan_id, end; + + if ((uc->tchan && uc->rchan) && uc->tchan->id == uc->rchan->id) { + dev_info(ud->dev, "chan%d: already have %d pair allocated\n", + uc->id, uc->tchan->id); + return 0; + } + + if (uc->tchan) { + dev_err(ud->dev, "chan%d: already have tchan%d allocated\n", + uc->id, uc->tchan->id); + return -EBUSY; + } else if (uc->rchan) { + dev_err(ud->dev, "chan%d: already have rchan%d allocated\n", + uc->id, uc->rchan->id); + return -EBUSY; + } + + /* Can be optimized, but let's have it like this for now */ + end = min(ud->tchan_cnt, ud->rchan_cnt); + /* + * Try to use the highest TPL channel pair for MEM_TO_MEM channels + * Note: in UDMAP the channel TPL is symmetric between tchan and rchan + */ + chan_id = ud->tchan_tpl.start_idx[ud->tchan_tpl.levels - 1]; + for (; chan_id < end; chan_id++) { + if (!test_bit(chan_id, ud->tchan_map) && + !test_bit(chan_id, ud->rchan_map)) + break; + } + + if (chan_id == end) + return -ENOENT; + + set_bit(chan_id, ud->tchan_map); + set_bit(chan_id, ud->rchan_map); + uc->tchan = &ud->tchans[chan_id]; + uc->rchan = &ud->rchans[chan_id]; + + /* UDMA does not use tx flows */ + uc->tchan->tflow_id = -1; + + return 0; +} + +static int udma_get_rflow(struct udma_chan *uc, int flow_id) +{ + struct udma_dev *ud = uc->ud; + int ret; + + if (!uc->rchan) { + dev_err(ud->dev, "chan%d: does not have rchan??\n", uc->id); + return -EINVAL; + } + + if (uc->rflow) { + dev_dbg(ud->dev, "chan%d: already have rflow%d allocated\n", + uc->id, uc->rflow->id); + return 0; + } + + uc->rflow = __udma_get_rflow(ud, flow_id); + if (IS_ERR(uc->rflow)) { + ret = PTR_ERR(uc->rflow); + uc->rflow = NULL; + return ret; + } + + return 0; +} + +static void bcdma_put_bchan(struct udma_chan *uc) +{ + struct udma_dev *ud = uc->ud; + + if (uc->bchan) { + dev_dbg(ud->dev, "chan%d: put bchan%d\n", uc->id, + uc->bchan->id); + clear_bit(uc->bchan->id, ud->bchan_map); + uc->bchan = NULL; + uc->tchan = NULL; + } +} + +static void udma_put_rchan(struct udma_chan *uc) +{ + struct udma_dev *ud = uc->ud; + + if (uc->rchan) { + dev_dbg(ud->dev, "chan%d: put rchan%d\n", uc->id, + uc->rchan->id); + clear_bit(uc->rchan->id, ud->rchan_map); + uc->rchan = NULL; + } +} + +static void udma_put_tchan(struct udma_chan *uc) +{ + struct udma_dev *ud = uc->ud; + + if (uc->tchan) { + dev_dbg(ud->dev, "chan%d: put tchan%d\n", uc->id, + uc->tchan->id); + clear_bit(uc->tchan->id, ud->tchan_map); + + if (uc->tchan->tflow_id >= 0) + clear_bit(uc->tchan->tflow_id, ud->tflow_map); + + uc->tchan = NULL; + } +} + +static void udma_put_rflow(struct udma_chan *uc) +{ + struct udma_dev *ud = uc->ud; + + if (uc->rflow) { + dev_dbg(ud->dev, "chan%d: put rflow%d\n", uc->id, + uc->rflow->id); + __udma_put_rflow(ud, uc->rflow); + uc->rflow = NULL; + } +} + +static void bcdma_free_bchan_resources(struct udma_chan *uc) +{ + if (!uc->bchan) + return; + + k3_ringacc_ring_free(uc->bchan->tc_ring); + k3_ringacc_ring_free(uc->bchan->t_ring); + uc->bchan->tc_ring = NULL; + uc->bchan->t_ring = NULL; + k3_configure_chan_coherency(&uc->vc.chan, 0); + + bcdma_put_bchan(uc); +} + +static int bcdma_alloc_bchan_resources(struct udma_chan *uc) +{ + struct k3_ring_cfg ring_cfg; + struct udma_dev *ud = uc->ud; + int ret; + + ret = bcdma_get_bchan(uc); + if (ret) + return ret; + + ret = k3_ringacc_request_rings_pair(ud->ringacc, uc->bchan->id, -1, + &uc->bchan->t_ring, + &uc->bchan->tc_ring); + if (ret) { + ret = -EBUSY; + goto err_ring; + } + + memset(&ring_cfg, 0, sizeof(ring_cfg)); + ring_cfg.size = K3_UDMA_DEFAULT_RING_SIZE; + ring_cfg.elm_size = K3_RINGACC_RING_ELSIZE_8; + ring_cfg.mode = K3_RINGACC_RING_MODE_RING; + + k3_configure_chan_coherency(&uc->vc.chan, ud->asel); + ring_cfg.asel = ud->asel; + ring_cfg.dma_dev = dmaengine_get_dma_device(&uc->vc.chan); + + ret = k3_ringacc_ring_cfg(uc->bchan->t_ring, &ring_cfg); + if (ret) + goto err_ringcfg; + + return 0; + +err_ringcfg: + k3_ringacc_ring_free(uc->bchan->tc_ring); + uc->bchan->tc_ring = NULL; + k3_ringacc_ring_free(uc->bchan->t_ring); + uc->bchan->t_ring = NULL; + k3_configure_chan_coherency(&uc->vc.chan, 0); +err_ring: + bcdma_put_bchan(uc); + + return ret; +} + +static void udma_free_tx_resources(struct udma_chan *uc) +{ + if (!uc->tchan) + return; + + k3_ringacc_ring_free(uc->tchan->t_ring); + k3_ringacc_ring_free(uc->tchan->tc_ring); + uc->tchan->t_ring = NULL; + uc->tchan->tc_ring = NULL; + + udma_put_tchan(uc); +} + +static int udma_alloc_tx_resources(struct udma_chan *uc) +{ + struct k3_ring_cfg ring_cfg; + struct udma_dev *ud = uc->ud; + struct udma_tchan *tchan; + int ring_idx, ret; + + ret = udma_get_tchan(uc); + if (ret) + return ret; + + tchan = uc->tchan; + if (tchan->tflow_id >= 0) + ring_idx = tchan->tflow_id; + else + ring_idx = ud->bchan_cnt + tchan->id; + + ret = k3_ringacc_request_rings_pair(ud->ringacc, ring_idx, -1, + &tchan->t_ring, + &tchan->tc_ring); + if (ret) { + ret = -EBUSY; + goto err_ring; + } + + memset(&ring_cfg, 0, sizeof(ring_cfg)); + ring_cfg.size = K3_UDMA_DEFAULT_RING_SIZE; + ring_cfg.elm_size = K3_RINGACC_RING_ELSIZE_8; + if (ud->match_data->type == DMA_TYPE_UDMA) { + ring_cfg.mode = K3_RINGACC_RING_MODE_MESSAGE; + } else { + ring_cfg.mode = K3_RINGACC_RING_MODE_RING; + + k3_configure_chan_coherency(&uc->vc.chan, uc->config.asel); + ring_cfg.asel = uc->config.asel; + ring_cfg.dma_dev = dmaengine_get_dma_device(&uc->vc.chan); + } + + ret = k3_ringacc_ring_cfg(tchan->t_ring, &ring_cfg); + ret |= k3_ringacc_ring_cfg(tchan->tc_ring, &ring_cfg); + + if (ret) + goto err_ringcfg; + + return 0; + +err_ringcfg: + k3_ringacc_ring_free(uc->tchan->tc_ring); + uc->tchan->tc_ring = NULL; + k3_ringacc_ring_free(uc->tchan->t_ring); + uc->tchan->t_ring = NULL; +err_ring: + udma_put_tchan(uc); + + return ret; +} + +static void udma_free_rx_resources(struct udma_chan *uc) +{ + if (!uc->rchan) + return; + + if (uc->rflow) { + struct udma_rflow *rflow = uc->rflow; + + k3_ringacc_ring_free(rflow->fd_ring); + k3_ringacc_ring_free(rflow->r_ring); + rflow->fd_ring = NULL; + rflow->r_ring = NULL; + + udma_put_rflow(uc); + } + + udma_put_rchan(uc); +} + +static int udma_alloc_rx_resources(struct udma_chan *uc) +{ + struct udma_dev *ud = uc->ud; + struct k3_ring_cfg ring_cfg; + struct udma_rflow *rflow; + int fd_ring_id; + int ret; + + ret = udma_get_rchan(uc); + if (ret) + return ret; + + /* For MEM_TO_MEM we don't need rflow or rings */ + if (uc->config.dir == DMA_MEM_TO_MEM) + return 0; + + if (uc->config.default_flow_id >= 0) + ret = udma_get_rflow(uc, uc->config.default_flow_id); + else + ret = udma_get_rflow(uc, uc->rchan->id); + + if (ret) { + ret = -EBUSY; + goto err_rflow; + } + + rflow = uc->rflow; + if (ud->tflow_cnt) + fd_ring_id = ud->tflow_cnt + rflow->id; + else + fd_ring_id = ud->bchan_cnt + ud->tchan_cnt + ud->echan_cnt + + uc->rchan->id; + + ret = k3_ringacc_request_rings_pair(ud->ringacc, fd_ring_id, -1, + &rflow->fd_ring, &rflow->r_ring); + if (ret) { + ret = -EBUSY; + goto err_ring; + } + + memset(&ring_cfg, 0, sizeof(ring_cfg)); + + ring_cfg.elm_size = K3_RINGACC_RING_ELSIZE_8; + if (ud->match_data->type == DMA_TYPE_UDMA) { + if (uc->config.pkt_mode) + ring_cfg.size = SG_MAX_SEGMENTS; + else + ring_cfg.size = K3_UDMA_DEFAULT_RING_SIZE; + + ring_cfg.mode = K3_RINGACC_RING_MODE_MESSAGE; + } else { + ring_cfg.size = K3_UDMA_DEFAULT_RING_SIZE; + ring_cfg.mode = K3_RINGACC_RING_MODE_RING; + + k3_configure_chan_coherency(&uc->vc.chan, uc->config.asel); + ring_cfg.asel = uc->config.asel; + ring_cfg.dma_dev = dmaengine_get_dma_device(&uc->vc.chan); + } + + ret = k3_ringacc_ring_cfg(rflow->fd_ring, &ring_cfg); + + ring_cfg.size = K3_UDMA_DEFAULT_RING_SIZE; + ret |= k3_ringacc_ring_cfg(rflow->r_ring, &ring_cfg); + + if (ret) + goto err_ringcfg; + + return 0; + +err_ringcfg: + k3_ringacc_ring_free(rflow->r_ring); + rflow->r_ring = NULL; + k3_ringacc_ring_free(rflow->fd_ring); + rflow->fd_ring = NULL; +err_ring: + udma_put_rflow(uc); +err_rflow: + udma_put_rchan(uc); + + return ret; +} + +#define TISCI_BCDMA_BCHAN_VALID_PARAMS ( \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID | \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_EXTENDED_CH_TYPE_VALID) + +#define TISCI_BCDMA_TCHAN_VALID_PARAMS ( \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID | \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_SUPR_TDPKT_VALID) + +#define TISCI_BCDMA_RCHAN_VALID_PARAMS ( \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID) + +#define TISCI_UDMA_TCHAN_VALID_PARAMS ( \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID | \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FILT_EINFO_VALID | \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FILT_PSWORDS_VALID | \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID | \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_SUPR_TDPKT_VALID | \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID | \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID | \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_ATYPE_VALID) + +#define TISCI_UDMA_RCHAN_VALID_PARAMS ( \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID | \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID | \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID | \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID | \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_IGNORE_SHORT_VALID | \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_IGNORE_LONG_VALID | \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_START_VALID | \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_CNT_VALID | \ + TI_SCI_MSG_VALUE_RM_UDMAP_CH_ATYPE_VALID) + +static int udma_tisci_m2m_channel_config(struct udma_chan *uc) +{ + struct udma_dev *ud = uc->ud; + struct udma_tisci_rm *tisci_rm = &ud->tisci_rm; + const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops; + struct udma_tchan *tchan = uc->tchan; + struct udma_rchan *rchan = uc->rchan; + u8 burst_size = 0; + int ret; + u8 tpl; + + /* Non synchronized - mem to mem type of transfer */ + int tc_ring = k3_ringacc_get_ring_id(tchan->tc_ring); + struct ti_sci_msg_rm_udmap_tx_ch_cfg req_tx = { 0 }; + struct ti_sci_msg_rm_udmap_rx_ch_cfg req_rx = { 0 }; + + if (ud->match_data->flags & UDMA_FLAG_BURST_SIZE) { + tpl = udma_get_chan_tpl_index(&ud->tchan_tpl, tchan->id); + + burst_size = ud->match_data->burst_size[tpl]; + } + + req_tx.valid_params = TISCI_UDMA_TCHAN_VALID_PARAMS; + req_tx.nav_id = tisci_rm->tisci_dev_id; + req_tx.index = tchan->id; + req_tx.tx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_BCOPY_PBRR; + req_tx.tx_fetch_size = sizeof(struct cppi5_desc_hdr_t) >> 2; + req_tx.txcq_qnum = tc_ring; + req_tx.tx_atype = ud->atype; + if (burst_size) { + req_tx.valid_params |= TI_SCI_MSG_VALUE_RM_UDMAP_CH_BURST_SIZE_VALID; + req_tx.tx_burst_size = burst_size; + } + + ret = tisci_ops->tx_ch_cfg(tisci_rm->tisci, &req_tx); + if (ret) { + dev_err(ud->dev, "tchan%d cfg failed %d\n", tchan->id, ret); + return ret; + } + + req_rx.valid_params = TISCI_UDMA_RCHAN_VALID_PARAMS; + req_rx.nav_id = tisci_rm->tisci_dev_id; + req_rx.index = rchan->id; + req_rx.rx_fetch_size = sizeof(struct cppi5_desc_hdr_t) >> 2; + req_rx.rxcq_qnum = tc_ring; + req_rx.rx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_BCOPY_PBRR; + req_rx.rx_atype = ud->atype; + if (burst_size) { + req_rx.valid_params |= TI_SCI_MSG_VALUE_RM_UDMAP_CH_BURST_SIZE_VALID; + req_rx.rx_burst_size = burst_size; + } + + ret = tisci_ops->rx_ch_cfg(tisci_rm->tisci, &req_rx); + if (ret) + dev_err(ud->dev, "rchan%d alloc failed %d\n", rchan->id, ret); + + return ret; +} + +static int bcdma_tisci_m2m_channel_config(struct udma_chan *uc) +{ + struct udma_dev *ud = uc->ud; + struct udma_tisci_rm *tisci_rm = &ud->tisci_rm; + const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops; + struct ti_sci_msg_rm_udmap_tx_ch_cfg req_tx = { 0 }; + struct udma_bchan *bchan = uc->bchan; + u8 burst_size = 0; + int ret; + u8 tpl; + + if (ud->match_data->flags & UDMA_FLAG_BURST_SIZE) { + tpl = udma_get_chan_tpl_index(&ud->bchan_tpl, bchan->id); + + burst_size = ud->match_data->burst_size[tpl]; + } + + req_tx.valid_params = TISCI_BCDMA_BCHAN_VALID_PARAMS; + req_tx.nav_id = tisci_rm->tisci_dev_id; + req_tx.extended_ch_type = TI_SCI_RM_BCDMA_EXTENDED_CH_TYPE_BCHAN; + req_tx.index = bchan->id; + if (burst_size) { + req_tx.valid_params |= TI_SCI_MSG_VALUE_RM_UDMAP_CH_BURST_SIZE_VALID; + req_tx.tx_burst_size = burst_size; + } + + ret = tisci_ops->tx_ch_cfg(tisci_rm->tisci, &req_tx); + if (ret) + dev_err(ud->dev, "bchan%d cfg failed %d\n", bchan->id, ret); + + return ret; +} + +static int udma_tisci_tx_channel_config(struct udma_chan *uc) +{ + struct udma_dev *ud = uc->ud; + struct udma_tisci_rm *tisci_rm = &ud->tisci_rm; + const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops; + struct udma_tchan *tchan = uc->tchan; + int tc_ring = k3_ringacc_get_ring_id(tchan->tc_ring); + struct ti_sci_msg_rm_udmap_tx_ch_cfg req_tx = { 0 }; + u32 mode, fetch_size; + int ret; + + if (uc->config.pkt_mode) { + mode = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR; + fetch_size = cppi5_hdesc_calc_size(uc->config.needs_epib, + uc->config.psd_size, 0); + } else { + mode = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_PBRR; + fetch_size = sizeof(struct cppi5_desc_hdr_t); + } + + req_tx.valid_params = TISCI_UDMA_TCHAN_VALID_PARAMS; + req_tx.nav_id = tisci_rm->tisci_dev_id; + req_tx.index = tchan->id; + req_tx.tx_chan_type = mode; + req_tx.tx_supr_tdpkt = uc->config.notdpkt; + req_tx.tx_fetch_size = fetch_size >> 2; + req_tx.txcq_qnum = tc_ring; + req_tx.tx_atype = uc->config.atype; + if (uc->config.ep_type == PSIL_EP_PDMA_XY && + ud->match_data->flags & UDMA_FLAG_TDTYPE) { + /* wait for peer to complete the teardown for PDMAs */ + req_tx.valid_params |= + TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_TDTYPE_VALID; + req_tx.tx_tdtype = 1; + } + + ret = tisci_ops->tx_ch_cfg(tisci_rm->tisci, &req_tx); + if (ret) + dev_err(ud->dev, "tchan%d cfg failed %d\n", tchan->id, ret); + + return ret; +} + +static int bcdma_tisci_tx_channel_config(struct udma_chan *uc) +{ + struct udma_dev *ud = uc->ud; + struct udma_tisci_rm *tisci_rm = &ud->tisci_rm; + const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops; + struct udma_tchan *tchan = uc->tchan; + struct ti_sci_msg_rm_udmap_tx_ch_cfg req_tx = { 0 }; + int ret; + + req_tx.valid_params = TISCI_BCDMA_TCHAN_VALID_PARAMS; + req_tx.nav_id = tisci_rm->tisci_dev_id; + req_tx.index = tchan->id; + req_tx.tx_supr_tdpkt = uc->config.notdpkt; + if (ud->match_data->flags & UDMA_FLAG_TDTYPE) { + /* wait for peer to complete the teardown for PDMAs */ + req_tx.valid_params |= + TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_TDTYPE_VALID; + req_tx.tx_tdtype = 1; + } + + ret = tisci_ops->tx_ch_cfg(tisci_rm->tisci, &req_tx); + if (ret) + dev_err(ud->dev, "tchan%d cfg failed %d\n", tchan->id, ret); + + return ret; +} + +#define pktdma_tisci_tx_channel_config bcdma_tisci_tx_channel_config + +static int udma_tisci_rx_channel_config(struct udma_chan *uc) +{ + struct udma_dev *ud = uc->ud; + struct udma_tisci_rm *tisci_rm = &ud->tisci_rm; + const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops; + struct udma_rchan *rchan = uc->rchan; + int fd_ring = k3_ringacc_get_ring_id(uc->rflow->fd_ring); + int rx_ring = k3_ringacc_get_ring_id(uc->rflow->r_ring); + struct ti_sci_msg_rm_udmap_rx_ch_cfg req_rx = { 0 }; + struct ti_sci_msg_rm_udmap_flow_cfg flow_req = { 0 }; + u32 mode, fetch_size; + int ret; + + if (uc->config.pkt_mode) { + mode = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR; + fetch_size = cppi5_hdesc_calc_size(uc->config.needs_epib, + uc->config.psd_size, 0); + } else { + mode = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_PBRR; + fetch_size = sizeof(struct cppi5_desc_hdr_t); + } + + req_rx.valid_params = TISCI_UDMA_RCHAN_VALID_PARAMS; + req_rx.nav_id = tisci_rm->tisci_dev_id; + req_rx.index = rchan->id; + req_rx.rx_fetch_size = fetch_size >> 2; + req_rx.rxcq_qnum = rx_ring; + req_rx.rx_chan_type = mode; + req_rx.rx_atype = uc->config.atype; + + ret = tisci_ops->rx_ch_cfg(tisci_rm->tisci, &req_rx); + if (ret) { + dev_err(ud->dev, "rchan%d cfg failed %d\n", rchan->id, ret); + return ret; + } + + flow_req.valid_params = + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_EINFO_PRESENT_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_PSINFO_PRESENT_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_ERROR_HANDLING_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DESC_TYPE_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_QNUM_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_HI_SEL_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_LO_SEL_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_HI_SEL_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_LO_SEL_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ0_SZ0_QNUM_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ1_QNUM_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ2_QNUM_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ3_QNUM_VALID; + + flow_req.nav_id = tisci_rm->tisci_dev_id; + flow_req.flow_index = rchan->id; + + if (uc->config.needs_epib) + flow_req.rx_einfo_present = 1; + else + flow_req.rx_einfo_present = 0; + if (uc->config.psd_size) + flow_req.rx_psinfo_present = 1; + else + flow_req.rx_psinfo_present = 0; + flow_req.rx_error_handling = 1; + flow_req.rx_dest_qnum = rx_ring; + flow_req.rx_src_tag_hi_sel = UDMA_RFLOW_SRCTAG_NONE; + flow_req.rx_src_tag_lo_sel = UDMA_RFLOW_SRCTAG_SRC_TAG; + flow_req.rx_dest_tag_hi_sel = UDMA_RFLOW_DSTTAG_DST_TAG_HI; + flow_req.rx_dest_tag_lo_sel = UDMA_RFLOW_DSTTAG_DST_TAG_LO; + flow_req.rx_fdq0_sz0_qnum = fd_ring; + flow_req.rx_fdq1_qnum = fd_ring; + flow_req.rx_fdq2_qnum = fd_ring; + flow_req.rx_fdq3_qnum = fd_ring; + + ret = tisci_ops->rx_flow_cfg(tisci_rm->tisci, &flow_req); + + if (ret) + dev_err(ud->dev, "flow%d config failed: %d\n", rchan->id, ret); + + return 0; +} + +static int bcdma_tisci_rx_channel_config(struct udma_chan *uc) +{ + struct udma_dev *ud = uc->ud; + struct udma_tisci_rm *tisci_rm = &ud->tisci_rm; + const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops; + struct udma_rchan *rchan = uc->rchan; + struct ti_sci_msg_rm_udmap_rx_ch_cfg req_rx = { 0 }; + int ret; + + req_rx.valid_params = TISCI_BCDMA_RCHAN_VALID_PARAMS; + req_rx.nav_id = tisci_rm->tisci_dev_id; + req_rx.index = rchan->id; + + ret = tisci_ops->rx_ch_cfg(tisci_rm->tisci, &req_rx); + if (ret) + dev_err(ud->dev, "rchan%d cfg failed %d\n", rchan->id, ret); + + return ret; +} + +static int pktdma_tisci_rx_channel_config(struct udma_chan *uc) +{ + struct udma_dev *ud = uc->ud; + struct udma_tisci_rm *tisci_rm = &ud->tisci_rm; + const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops; + struct ti_sci_msg_rm_udmap_rx_ch_cfg req_rx = { 0 }; + struct ti_sci_msg_rm_udmap_flow_cfg flow_req = { 0 }; + int ret; + + req_rx.valid_params = TISCI_BCDMA_RCHAN_VALID_PARAMS; + req_rx.nav_id = tisci_rm->tisci_dev_id; + req_rx.index = uc->rchan->id; + + ret = tisci_ops->rx_ch_cfg(tisci_rm->tisci, &req_rx); + if (ret) { + dev_err(ud->dev, "rchan%d cfg failed %d\n", uc->rchan->id, ret); + return ret; + } + + flow_req.valid_params = + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_EINFO_PRESENT_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_PSINFO_PRESENT_VALID | + TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_ERROR_HANDLING_VALID; + + flow_req.nav_id = tisci_rm->tisci_dev_id; + flow_req.flow_index = uc->rflow->id; + + if (uc->config.needs_epib) + flow_req.rx_einfo_present = 1; + else + flow_req.rx_einfo_present = 0; + if (uc->config.psd_size) + flow_req.rx_psinfo_present = 1; + else + flow_req.rx_psinfo_present = 0; + flow_req.rx_error_handling = 1; + + ret = tisci_ops->rx_flow_cfg(tisci_rm->tisci, &flow_req); + + if (ret) + dev_err(ud->dev, "flow%d config failed: %d\n", uc->rflow->id, + ret); + + return ret; +} + +static int udma_alloc_chan_resources(struct dma_chan *chan) +{ + struct udma_chan *uc = to_udma_chan(chan); + struct udma_dev *ud = to_udma_dev(chan->device); + const struct udma_soc_data *soc_data = ud->soc_data; + struct k3_ring *irq_ring; + u32 irq_udma_idx; + int ret; + + uc->dma_dev = ud->dev; + + if (uc->config.pkt_mode || uc->config.dir == DMA_MEM_TO_MEM) { + uc->use_dma_pool = true; + /* in case of MEM_TO_MEM we have maximum of two TRs */ + if (uc->config.dir == DMA_MEM_TO_MEM) { + uc->config.hdesc_size = cppi5_trdesc_calc_size( + sizeof(struct cppi5_tr_type15_t), 2); + uc->config.pkt_mode = false; + } + } + + if (uc->use_dma_pool) { + uc->hdesc_pool = dma_pool_create(uc->name, ud->ddev.dev, + uc->config.hdesc_size, + ud->desc_align, + 0); + if (!uc->hdesc_pool) { + dev_err(ud->ddev.dev, + "Descriptor pool allocation failed\n"); + uc->use_dma_pool = false; + ret = -ENOMEM; + goto err_cleanup; + } + } + + /* + * Make sure that the completion is in a known state: + * No teardown, the channel is idle + */ + reinit_completion(&uc->teardown_completed); + complete_all(&uc->teardown_completed); + uc->state = UDMA_CHAN_IS_IDLE; + + switch (uc->config.dir) { + case DMA_MEM_TO_MEM: + /* Non synchronized - mem to mem type of transfer */ + dev_dbg(uc->ud->dev, "%s: chan%d as MEM-to-MEM\n", __func__, + uc->id); + + ret = udma_get_chan_pair(uc); + if (ret) + goto err_cleanup; + + ret = udma_alloc_tx_resources(uc); + if (ret) { + udma_put_rchan(uc); + goto err_cleanup; + } + + ret = udma_alloc_rx_resources(uc); + if (ret) { + udma_free_tx_resources(uc); + goto err_cleanup; + } + + uc->config.src_thread = ud->psil_base + uc->tchan->id; + uc->config.dst_thread = (ud->psil_base + uc->rchan->id) | + K3_PSIL_DST_THREAD_ID_OFFSET; + + irq_ring = uc->tchan->tc_ring; + irq_udma_idx = uc->tchan->id; + + ret = udma_tisci_m2m_channel_config(uc); + break; + case DMA_MEM_TO_DEV: + /* Slave transfer synchronized - mem to dev (TX) trasnfer */ + dev_dbg(uc->ud->dev, "%s: chan%d as MEM-to-DEV\n", __func__, + uc->id); + + ret = udma_alloc_tx_resources(uc); + if (ret) + goto err_cleanup; + + uc->config.src_thread = ud->psil_base + uc->tchan->id; + uc->config.dst_thread = uc->config.remote_thread_id; + uc->config.dst_thread |= K3_PSIL_DST_THREAD_ID_OFFSET; + + irq_ring = uc->tchan->tc_ring; + irq_udma_idx = uc->tchan->id; + + ret = udma_tisci_tx_channel_config(uc); + break; + case DMA_DEV_TO_MEM: + /* Slave transfer synchronized - dev to mem (RX) trasnfer */ + dev_dbg(uc->ud->dev, "%s: chan%d as DEV-to-MEM\n", __func__, + uc->id); + + ret = udma_alloc_rx_resources(uc); + if (ret) + goto err_cleanup; + + uc->config.src_thread = uc->config.remote_thread_id; + uc->config.dst_thread = (ud->psil_base + uc->rchan->id) | + K3_PSIL_DST_THREAD_ID_OFFSET; + + irq_ring = uc->rflow->r_ring; + irq_udma_idx = soc_data->oes.udma_rchan + uc->rchan->id; + + ret = udma_tisci_rx_channel_config(uc); + break; + default: + /* Can not happen */ + dev_err(uc->ud->dev, "%s: chan%d invalid direction (%u)\n", + __func__, uc->id, uc->config.dir); + ret = -EINVAL; + goto err_cleanup; + + } + + /* check if the channel configuration was successful */ + if (ret) + goto err_res_free; + + if (udma_is_chan_running(uc)) { + dev_warn(ud->dev, "chan%d: is running!\n", uc->id); + udma_reset_chan(uc, false); + if (udma_is_chan_running(uc)) { + dev_err(ud->dev, "chan%d: won't stop!\n", uc->id); + ret = -EBUSY; + goto err_res_free; + } + } + + /* PSI-L pairing */ + ret = navss_psil_pair(ud, uc->config.src_thread, uc->config.dst_thread); + if (ret) { + dev_err(ud->dev, "PSI-L pairing failed: 0x%04x -> 0x%04x\n", + uc->config.src_thread, uc->config.dst_thread); + goto err_res_free; + } + + uc->psil_paired = true; + + uc->irq_num_ring = k3_ringacc_get_ring_irq_num(irq_ring); + if (uc->irq_num_ring <= 0) { + dev_err(ud->dev, "Failed to get ring irq (index: %u)\n", + k3_ringacc_get_ring_id(irq_ring)); + ret = -EINVAL; + goto err_psi_free; + } + + ret = request_irq(uc->irq_num_ring, udma_ring_irq_handler, + IRQF_TRIGGER_HIGH, uc->name, uc); + if (ret) { + dev_err(ud->dev, "chan%d: ring irq request failed\n", uc->id); + goto err_irq_free; + } + + /* Event from UDMA (TR events) only needed for slave TR mode channels */ + if (is_slave_direction(uc->config.dir) && !uc->config.pkt_mode) { + uc->irq_num_udma = msi_get_virq(ud->dev, irq_udma_idx); + if (uc->irq_num_udma <= 0) { + dev_err(ud->dev, "Failed to get udma irq (index: %u)\n", + irq_udma_idx); + free_irq(uc->irq_num_ring, uc); + ret = -EINVAL; + goto err_irq_free; + } + + ret = request_irq(uc->irq_num_udma, udma_udma_irq_handler, 0, + uc->name, uc); + if (ret) { + dev_err(ud->dev, "chan%d: UDMA irq request failed\n", + uc->id); + free_irq(uc->irq_num_ring, uc); + goto err_irq_free; + } + } else { + uc->irq_num_udma = 0; + } + + udma_reset_rings(uc); + + return 0; + +err_irq_free: + uc->irq_num_ring = 0; + uc->irq_num_udma = 0; +err_psi_free: + navss_psil_unpair(ud, uc->config.src_thread, uc->config.dst_thread); + uc->psil_paired = false; +err_res_free: + udma_free_tx_resources(uc); + udma_free_rx_resources(uc); +err_cleanup: + udma_reset_uchan(uc); + + if (uc->use_dma_pool) { + dma_pool_destroy(uc->hdesc_pool); + uc->use_dma_pool = false; + } + + return ret; +} + +static int bcdma_alloc_chan_resources(struct dma_chan *chan) +{ + struct udma_chan *uc = to_udma_chan(chan); + struct udma_dev *ud = to_udma_dev(chan->device); + const struct udma_oes_offsets *oes = &ud->soc_data->oes; + u32 irq_udma_idx, irq_ring_idx; + int ret; + + /* Only TR mode is supported */ + uc->config.pkt_mode = false; + + /* + * Make sure that the completion is in a known state: + * No teardown, the channel is idle + */ + reinit_completion(&uc->teardown_completed); + complete_all(&uc->teardown_completed); + uc->state = UDMA_CHAN_IS_IDLE; + + switch (uc->config.dir) { + case DMA_MEM_TO_MEM: + /* Non synchronized - mem to mem type of transfer */ + dev_dbg(uc->ud->dev, "%s: chan%d as MEM-to-MEM\n", __func__, + uc->id); + + ret = bcdma_alloc_bchan_resources(uc); + if (ret) + return ret; + + irq_ring_idx = uc->bchan->id + oes->bcdma_bchan_ring; + irq_udma_idx = uc->bchan->id + oes->bcdma_bchan_data; + + ret = bcdma_tisci_m2m_channel_config(uc); + break; + case DMA_MEM_TO_DEV: + /* Slave transfer synchronized - mem to dev (TX) trasnfer */ + dev_dbg(uc->ud->dev, "%s: chan%d as MEM-to-DEV\n", __func__, + uc->id); + + ret = udma_alloc_tx_resources(uc); + if (ret) { + uc->config.remote_thread_id = -1; + return ret; + } + + uc->config.src_thread = ud->psil_base + uc->tchan->id; + uc->config.dst_thread = uc->config.remote_thread_id; + uc->config.dst_thread |= K3_PSIL_DST_THREAD_ID_OFFSET; + + irq_ring_idx = uc->tchan->id + oes->bcdma_tchan_ring; + irq_udma_idx = uc->tchan->id + oes->bcdma_tchan_data; + + ret = bcdma_tisci_tx_channel_config(uc); + break; + case DMA_DEV_TO_MEM: + /* Slave transfer synchronized - dev to mem (RX) trasnfer */ + dev_dbg(uc->ud->dev, "%s: chan%d as DEV-to-MEM\n", __func__, + uc->id); + + ret = udma_alloc_rx_resources(uc); + if (ret) { + uc->config.remote_thread_id = -1; + return ret; + } + + uc->config.src_thread = uc->config.remote_thread_id; + uc->config.dst_thread = (ud->psil_base + uc->rchan->id) | + K3_PSIL_DST_THREAD_ID_OFFSET; + + irq_ring_idx = uc->rchan->id + oes->bcdma_rchan_ring; + irq_udma_idx = uc->rchan->id + oes->bcdma_rchan_data; + + ret = bcdma_tisci_rx_channel_config(uc); + break; + default: + /* Can not happen */ + dev_err(uc->ud->dev, "%s: chan%d invalid direction (%u)\n", + __func__, uc->id, uc->config.dir); + return -EINVAL; + } + + /* check if the channel configuration was successful */ + if (ret) + goto err_res_free; + + if (udma_is_chan_running(uc)) { + dev_warn(ud->dev, "chan%d: is running!\n", uc->id); + udma_reset_chan(uc, false); + if (udma_is_chan_running(uc)) { + dev_err(ud->dev, "chan%d: won't stop!\n", uc->id); + ret = -EBUSY; + goto err_res_free; + } + } + + uc->dma_dev = dmaengine_get_dma_device(chan); + if (uc->config.dir == DMA_MEM_TO_MEM && !uc->config.tr_trigger_type) { + uc->config.hdesc_size = cppi5_trdesc_calc_size( + sizeof(struct cppi5_tr_type15_t), 2); + + uc->hdesc_pool = dma_pool_create(uc->name, ud->ddev.dev, + uc->config.hdesc_size, + ud->desc_align, + 0); + if (!uc->hdesc_pool) { + dev_err(ud->ddev.dev, + "Descriptor pool allocation failed\n"); + uc->use_dma_pool = false; + ret = -ENOMEM; + goto err_res_free; + } + + uc->use_dma_pool = true; + } else if (uc->config.dir != DMA_MEM_TO_MEM) { + /* PSI-L pairing */ + ret = navss_psil_pair(ud, uc->config.src_thread, + uc->config.dst_thread); + if (ret) { + dev_err(ud->dev, + "PSI-L pairing failed: 0x%04x -> 0x%04x\n", + uc->config.src_thread, uc->config.dst_thread); + goto err_res_free; + } + + uc->psil_paired = true; + } + + uc->irq_num_ring = msi_get_virq(ud->dev, irq_ring_idx); + if (uc->irq_num_ring <= 0) { + dev_err(ud->dev, "Failed to get ring irq (index: %u)\n", + irq_ring_idx); + ret = -EINVAL; + goto err_psi_free; + } + + ret = request_irq(uc->irq_num_ring, udma_ring_irq_handler, + IRQF_TRIGGER_HIGH, uc->name, uc); + if (ret) { + dev_err(ud->dev, "chan%d: ring irq request failed\n", uc->id); + goto err_irq_free; + } + + /* Event from BCDMA (TR events) only needed for slave channels */ + if (is_slave_direction(uc->config.dir)) { + uc->irq_num_udma = msi_get_virq(ud->dev, irq_udma_idx); + if (uc->irq_num_udma <= 0) { + dev_err(ud->dev, "Failed to get bcdma irq (index: %u)\n", + irq_udma_idx); + free_irq(uc->irq_num_ring, uc); + ret = -EINVAL; + goto err_irq_free; + } + + ret = request_irq(uc->irq_num_udma, udma_udma_irq_handler, 0, + uc->name, uc); + if (ret) { + dev_err(ud->dev, "chan%d: BCDMA irq request failed\n", + uc->id); + free_irq(uc->irq_num_ring, uc); + goto err_irq_free; + } + } else { + uc->irq_num_udma = 0; + } + + udma_reset_rings(uc); + + INIT_DELAYED_WORK_ONSTACK(&uc->tx_drain.work, + udma_check_tx_completion); + return 0; + +err_irq_free: + uc->irq_num_ring = 0; + uc->irq_num_udma = 0; +err_psi_free: + if (uc->psil_paired) + navss_psil_unpair(ud, uc->config.src_thread, + uc->config.dst_thread); + uc->psil_paired = false; +err_res_free: + bcdma_free_bchan_resources(uc); + udma_free_tx_resources(uc); + udma_free_rx_resources(uc); + + udma_reset_uchan(uc); + + if (uc->use_dma_pool) { + dma_pool_destroy(uc->hdesc_pool); + uc->use_dma_pool = false; + } + + return ret; +} + +static int bcdma_router_config(struct dma_chan *chan) +{ + struct k3_event_route_data *router_data = chan->route_data; + struct udma_chan *uc = to_udma_chan(chan); + u32 trigger_event; + + if (!uc->bchan) + return -EINVAL; + + if (uc->config.tr_trigger_type != 1 && uc->config.tr_trigger_type != 2) + return -EINVAL; + + trigger_event = uc->ud->soc_data->bcdma_trigger_event_offset; + trigger_event += (uc->bchan->id * 2) + uc->config.tr_trigger_type - 1; + + return router_data->set_event(router_data->priv, trigger_event); +} + +static int pktdma_alloc_chan_resources(struct dma_chan *chan) +{ + struct udma_chan *uc = to_udma_chan(chan); + struct udma_dev *ud = to_udma_dev(chan->device); + const struct udma_oes_offsets *oes = &ud->soc_data->oes; + u32 irq_ring_idx; + int ret; + + /* + * Make sure that the completion is in a known state: + * No teardown, the channel is idle + */ + reinit_completion(&uc->teardown_completed); + complete_all(&uc->teardown_completed); + uc->state = UDMA_CHAN_IS_IDLE; + + switch (uc->config.dir) { + case DMA_MEM_TO_DEV: + /* Slave transfer synchronized - mem to dev (TX) trasnfer */ + dev_dbg(uc->ud->dev, "%s: chan%d as MEM-to-DEV\n", __func__, + uc->id); + + ret = udma_alloc_tx_resources(uc); + if (ret) { + uc->config.remote_thread_id = -1; + return ret; + } + + uc->config.src_thread = ud->psil_base + uc->tchan->id; + uc->config.dst_thread = uc->config.remote_thread_id; + uc->config.dst_thread |= K3_PSIL_DST_THREAD_ID_OFFSET; + + irq_ring_idx = uc->tchan->tflow_id + oes->pktdma_tchan_flow; + + ret = pktdma_tisci_tx_channel_config(uc); + break; + case DMA_DEV_TO_MEM: + /* Slave transfer synchronized - dev to mem (RX) trasnfer */ + dev_dbg(uc->ud->dev, "%s: chan%d as DEV-to-MEM\n", __func__, + uc->id); + + ret = udma_alloc_rx_resources(uc); + if (ret) { + uc->config.remote_thread_id = -1; + return ret; + } + + uc->config.src_thread = uc->config.remote_thread_id; + uc->config.dst_thread = (ud->psil_base + uc->rchan->id) | + K3_PSIL_DST_THREAD_ID_OFFSET; + + irq_ring_idx = uc->rflow->id + oes->pktdma_rchan_flow; + + ret = pktdma_tisci_rx_channel_config(uc); + break; + default: + /* Can not happen */ + dev_err(uc->ud->dev, "%s: chan%d invalid direction (%u)\n", + __func__, uc->id, uc->config.dir); + return -EINVAL; + } + + /* check if the channel configuration was successful */ + if (ret) + goto err_res_free; + + if (udma_is_chan_running(uc)) { + dev_warn(ud->dev, "chan%d: is running!\n", uc->id); + udma_reset_chan(uc, false); + if (udma_is_chan_running(uc)) { + dev_err(ud->dev, "chan%d: won't stop!\n", uc->id); + ret = -EBUSY; + goto err_res_free; + } + } + + uc->dma_dev = dmaengine_get_dma_device(chan); + uc->hdesc_pool = dma_pool_create(uc->name, uc->dma_dev, + uc->config.hdesc_size, ud->desc_align, + 0); + if (!uc->hdesc_pool) { + dev_err(ud->ddev.dev, + "Descriptor pool allocation failed\n"); + uc->use_dma_pool = false; + ret = -ENOMEM; + goto err_res_free; + } + + uc->use_dma_pool = true; + + /* PSI-L pairing */ + ret = navss_psil_pair(ud, uc->config.src_thread, uc->config.dst_thread); + if (ret) { + dev_err(ud->dev, "PSI-L pairing failed: 0x%04x -> 0x%04x\n", + uc->config.src_thread, uc->config.dst_thread); + goto err_res_free; + } + + uc->psil_paired = true; + + uc->irq_num_ring = msi_get_virq(ud->dev, irq_ring_idx); + if (uc->irq_num_ring <= 0) { + dev_err(ud->dev, "Failed to get ring irq (index: %u)\n", + irq_ring_idx); + ret = -EINVAL; + goto err_psi_free; + } + + ret = request_irq(uc->irq_num_ring, udma_ring_irq_handler, + IRQF_TRIGGER_HIGH, uc->name, uc); + if (ret) { + dev_err(ud->dev, "chan%d: ring irq request failed\n", uc->id); + goto err_irq_free; + } + + uc->irq_num_udma = 0; + + udma_reset_rings(uc); + + INIT_DELAYED_WORK_ONSTACK(&uc->tx_drain.work, + udma_check_tx_completion); + + if (uc->tchan) + dev_dbg(ud->dev, + "chan%d: tchan%d, tflow%d, Remote thread: 0x%04x\n", + uc->id, uc->tchan->id, uc->tchan->tflow_id, + uc->config.remote_thread_id); + else if (uc->rchan) + dev_dbg(ud->dev, + "chan%d: rchan%d, rflow%d, Remote thread: 0x%04x\n", + uc->id, uc->rchan->id, uc->rflow->id, + uc->config.remote_thread_id); + return 0; + +err_irq_free: + uc->irq_num_ring = 0; +err_psi_free: + navss_psil_unpair(ud, uc->config.src_thread, uc->config.dst_thread); + uc->psil_paired = false; +err_res_free: + udma_free_tx_resources(uc); + udma_free_rx_resources(uc); + + udma_reset_uchan(uc); + + dma_pool_destroy(uc->hdesc_pool); + uc->use_dma_pool = false; + + return ret; +} + +static int udma_slave_config(struct dma_chan *chan, + struct dma_slave_config *cfg) +{ + struct udma_chan *uc = to_udma_chan(chan); + + memcpy(&uc->cfg, cfg, sizeof(uc->cfg)); + + return 0; +} + +static struct udma_desc *udma_alloc_tr_desc(struct udma_chan *uc, + size_t tr_size, int tr_count, + enum dma_transfer_direction dir) +{ + struct udma_hwdesc *hwdesc; + struct cppi5_desc_hdr_t *tr_desc; + struct udma_desc *d; + u32 reload_count = 0; + u32 ring_id; + + switch (tr_size) { + case 16: + case 32: + case 64: + case 128: + break; + default: + dev_err(uc->ud->dev, "Unsupported TR size of %zu\n", tr_size); + return NULL; + } + + /* We have only one descriptor containing multiple TRs */ + d = kzalloc(sizeof(*d) + sizeof(d->hwdesc[0]), GFP_NOWAIT); + if (!d) + return NULL; + + d->sglen = tr_count; + + d->hwdesc_count = 1; + hwdesc = &d->hwdesc[0]; + + /* Allocate memory for DMA ring descriptor */ + if (uc->use_dma_pool) { + hwdesc->cppi5_desc_size = uc->config.hdesc_size; + hwdesc->cppi5_desc_vaddr = dma_pool_zalloc(uc->hdesc_pool, + GFP_NOWAIT, + &hwdesc->cppi5_desc_paddr); + } else { + hwdesc->cppi5_desc_size = cppi5_trdesc_calc_size(tr_size, + tr_count); + hwdesc->cppi5_desc_size = ALIGN(hwdesc->cppi5_desc_size, + uc->ud->desc_align); + hwdesc->cppi5_desc_vaddr = dma_alloc_coherent(uc->ud->dev, + hwdesc->cppi5_desc_size, + &hwdesc->cppi5_desc_paddr, + GFP_NOWAIT); + } + + if (!hwdesc->cppi5_desc_vaddr) { + kfree(d); + return NULL; + } + + /* Start of the TR req records */ + hwdesc->tr_req_base = hwdesc->cppi5_desc_vaddr + tr_size; + /* Start address of the TR response array */ + hwdesc->tr_resp_base = hwdesc->tr_req_base + tr_size * tr_count; + + tr_desc = hwdesc->cppi5_desc_vaddr; + + if (uc->cyclic) + reload_count = CPPI5_INFO0_TRDESC_RLDCNT_INFINITE; + + if (dir == DMA_DEV_TO_MEM) + ring_id = k3_ringacc_get_ring_id(uc->rflow->r_ring); + else + ring_id = k3_ringacc_get_ring_id(uc->tchan->tc_ring); + + cppi5_trdesc_init(tr_desc, tr_count, tr_size, 0, reload_count); + cppi5_desc_set_pktids(tr_desc, uc->id, + CPPI5_INFO1_DESC_FLOWID_DEFAULT); + cppi5_desc_set_retpolicy(tr_desc, 0, ring_id); + + return d; +} + +/** + * udma_get_tr_counters - calculate TR counters for a given length + * @len: Length of the trasnfer + * @align_to: Preferred alignment + * @tr0_cnt0: First TR icnt0 + * @tr0_cnt1: First TR icnt1 + * @tr1_cnt0: Second (if used) TR icnt0 + * + * For len < SZ_64K only one TR is enough, tr1_cnt0 is not updated + * For len >= SZ_64K two TRs are used in a simple way: + * First TR: SZ_64K-alignment blocks (tr0_cnt0, tr0_cnt1) + * Second TR: the remaining length (tr1_cnt0) + * + * Returns the number of TRs the length needs (1 or 2) + * -EINVAL if the length can not be supported + */ +static int udma_get_tr_counters(size_t len, unsigned long align_to, + u16 *tr0_cnt0, u16 *tr0_cnt1, u16 *tr1_cnt0) +{ + if (len < SZ_64K) { + *tr0_cnt0 = len; + *tr0_cnt1 = 1; + + return 1; + } + + if (align_to > 3) + align_to = 3; + +realign: + *tr0_cnt0 = SZ_64K - BIT(align_to); + if (len / *tr0_cnt0 >= SZ_64K) { + if (align_to) { + align_to--; + goto realign; + } + return -EINVAL; + } + + *tr0_cnt1 = len / *tr0_cnt0; + *tr1_cnt0 = len % *tr0_cnt0; + + return 2; +} + +static struct udma_desc * +udma_prep_slave_sg_tr(struct udma_chan *uc, struct scatterlist *sgl, + unsigned int sglen, enum dma_transfer_direction dir, + unsigned long tx_flags, void *context) +{ + struct scatterlist *sgent; + struct udma_desc *d; + struct cppi5_tr_type1_t *tr_req = NULL; + u16 tr0_cnt0, tr0_cnt1, tr1_cnt0; + unsigned int i; + size_t tr_size; + int num_tr = 0; + int tr_idx = 0; + u64 asel; + + /* estimate the number of TRs we will need */ + for_each_sg(sgl, sgent, sglen, i) { + if (sg_dma_len(sgent) < SZ_64K) + num_tr++; + else + num_tr += 2; + } + + /* Now allocate and setup the descriptor. */ + tr_size = sizeof(struct cppi5_tr_type1_t); + d = udma_alloc_tr_desc(uc, tr_size, num_tr, dir); + if (!d) + return NULL; + + d->sglen = sglen; + + if (uc->ud->match_data->type == DMA_TYPE_UDMA) + asel = 0; + else + asel = (u64)uc->config.asel << K3_ADDRESS_ASEL_SHIFT; + + tr_req = d->hwdesc[0].tr_req_base; + for_each_sg(sgl, sgent, sglen, i) { + dma_addr_t sg_addr = sg_dma_address(sgent); + + num_tr = udma_get_tr_counters(sg_dma_len(sgent), __ffs(sg_addr), + &tr0_cnt0, &tr0_cnt1, &tr1_cnt0); + if (num_tr < 0) { + dev_err(uc->ud->dev, "size %u is not supported\n", + sg_dma_len(sgent)); + udma_free_hwdesc(uc, d); + kfree(d); + return NULL; + } + + cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1, false, + false, CPPI5_TR_EVENT_SIZE_COMPLETION, 0); + cppi5_tr_csf_set(&tr_req[tr_idx].flags, CPPI5_TR_CSF_SUPR_EVT); + + sg_addr |= asel; + tr_req[tr_idx].addr = sg_addr; + tr_req[tr_idx].icnt0 = tr0_cnt0; + tr_req[tr_idx].icnt1 = tr0_cnt1; + tr_req[tr_idx].dim1 = tr0_cnt0; + tr_idx++; + + if (num_tr == 2) { + cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1, + false, false, + CPPI5_TR_EVENT_SIZE_COMPLETION, 0); + cppi5_tr_csf_set(&tr_req[tr_idx].flags, + CPPI5_TR_CSF_SUPR_EVT); + + tr_req[tr_idx].addr = sg_addr + tr0_cnt1 * tr0_cnt0; + tr_req[tr_idx].icnt0 = tr1_cnt0; + tr_req[tr_idx].icnt1 = 1; + tr_req[tr_idx].dim1 = tr1_cnt0; + tr_idx++; + } + + d->residue += sg_dma_len(sgent); + } + + cppi5_tr_csf_set(&tr_req[tr_idx - 1].flags, + CPPI5_TR_CSF_SUPR_EVT | CPPI5_TR_CSF_EOP); + + return d; +} + +static struct udma_desc * +udma_prep_slave_sg_triggered_tr(struct udma_chan *uc, struct scatterlist *sgl, + unsigned int sglen, + enum dma_transfer_direction dir, + unsigned long tx_flags, void *context) +{ + struct scatterlist *sgent; + struct cppi5_tr_type15_t *tr_req = NULL; + enum dma_slave_buswidth dev_width; + u32 csf = CPPI5_TR_CSF_SUPR_EVT; + u16 tr_cnt0, tr_cnt1; + dma_addr_t dev_addr; + struct udma_desc *d; + unsigned int i; + size_t tr_size, sg_len; + int num_tr = 0; + int tr_idx = 0; + u32 burst, trigger_size, port_window; + u64 asel; + + if (dir == DMA_DEV_TO_MEM) { + dev_addr = uc->cfg.src_addr; + dev_width = uc->cfg.src_addr_width; + burst = uc->cfg.src_maxburst; + port_window = uc->cfg.src_port_window_size; + } else if (dir == DMA_MEM_TO_DEV) { + dev_addr = uc->cfg.dst_addr; + dev_width = uc->cfg.dst_addr_width; + burst = uc->cfg.dst_maxburst; + port_window = uc->cfg.dst_port_window_size; + } else { + dev_err(uc->ud->dev, "%s: bad direction?\n", __func__); + return NULL; + } + + if (!burst) + burst = 1; + + if (port_window) { + if (port_window != burst) { + dev_err(uc->ud->dev, + "The burst must be equal to port_window\n"); + return NULL; + } + + tr_cnt0 = dev_width * port_window; + tr_cnt1 = 1; + } else { + tr_cnt0 = dev_width; + tr_cnt1 = burst; + } + trigger_size = tr_cnt0 * tr_cnt1; + + /* estimate the number of TRs we will need */ + for_each_sg(sgl, sgent, sglen, i) { + sg_len = sg_dma_len(sgent); + + if (sg_len % trigger_size) { + dev_err(uc->ud->dev, + "Not aligned SG entry (%zu for %u)\n", sg_len, + trigger_size); + return NULL; + } + + if (sg_len / trigger_size < SZ_64K) + num_tr++; + else + num_tr += 2; + } + + /* Now allocate and setup the descriptor. */ + tr_size = sizeof(struct cppi5_tr_type15_t); + d = udma_alloc_tr_desc(uc, tr_size, num_tr, dir); + if (!d) + return NULL; + + d->sglen = sglen; + + if (uc->ud->match_data->type == DMA_TYPE_UDMA) { + asel = 0; + csf |= CPPI5_TR_CSF_EOL_ICNT0; + } else { + asel = (u64)uc->config.asel << K3_ADDRESS_ASEL_SHIFT; + dev_addr |= asel; + } + + tr_req = d->hwdesc[0].tr_req_base; + for_each_sg(sgl, sgent, sglen, i) { + u16 tr0_cnt2, tr0_cnt3, tr1_cnt2; + dma_addr_t sg_addr = sg_dma_address(sgent); + + sg_len = sg_dma_len(sgent); + num_tr = udma_get_tr_counters(sg_len / trigger_size, 0, + &tr0_cnt2, &tr0_cnt3, &tr1_cnt2); + if (num_tr < 0) { + dev_err(uc->ud->dev, "size %zu is not supported\n", + sg_len); + udma_free_hwdesc(uc, d); + kfree(d); + return NULL; + } + + cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE15, false, + true, CPPI5_TR_EVENT_SIZE_COMPLETION, 0); + cppi5_tr_csf_set(&tr_req[tr_idx].flags, csf); + cppi5_tr_set_trigger(&tr_req[tr_idx].flags, + uc->config.tr_trigger_type, + CPPI5_TR_TRIGGER_TYPE_ICNT2_DEC, 0, 0); + + sg_addr |= asel; + if (dir == DMA_DEV_TO_MEM) { + tr_req[tr_idx].addr = dev_addr; + tr_req[tr_idx].icnt0 = tr_cnt0; + tr_req[tr_idx].icnt1 = tr_cnt1; + tr_req[tr_idx].icnt2 = tr0_cnt2; + tr_req[tr_idx].icnt3 = tr0_cnt3; + tr_req[tr_idx].dim1 = (-1) * tr_cnt0; + + tr_req[tr_idx].daddr = sg_addr; + tr_req[tr_idx].dicnt0 = tr_cnt0; + tr_req[tr_idx].dicnt1 = tr_cnt1; + tr_req[tr_idx].dicnt2 = tr0_cnt2; + tr_req[tr_idx].dicnt3 = tr0_cnt3; + tr_req[tr_idx].ddim1 = tr_cnt0; + tr_req[tr_idx].ddim2 = trigger_size; + tr_req[tr_idx].ddim3 = trigger_size * tr0_cnt2; + } else { + tr_req[tr_idx].addr = sg_addr; + tr_req[tr_idx].icnt0 = tr_cnt0; + tr_req[tr_idx].icnt1 = tr_cnt1; + tr_req[tr_idx].icnt2 = tr0_cnt2; + tr_req[tr_idx].icnt3 = tr0_cnt3; + tr_req[tr_idx].dim1 = tr_cnt0; + tr_req[tr_idx].dim2 = trigger_size; + tr_req[tr_idx].dim3 = trigger_size * tr0_cnt2; + + tr_req[tr_idx].daddr = dev_addr; + tr_req[tr_idx].dicnt0 = tr_cnt0; + tr_req[tr_idx].dicnt1 = tr_cnt1; + tr_req[tr_idx].dicnt2 = tr0_cnt2; + tr_req[tr_idx].dicnt3 = tr0_cnt3; + tr_req[tr_idx].ddim1 = (-1) * tr_cnt0; + } + + tr_idx++; + + if (num_tr == 2) { + cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE15, + false, true, + CPPI5_TR_EVENT_SIZE_COMPLETION, 0); + cppi5_tr_csf_set(&tr_req[tr_idx].flags, csf); + cppi5_tr_set_trigger(&tr_req[tr_idx].flags, + uc->config.tr_trigger_type, + CPPI5_TR_TRIGGER_TYPE_ICNT2_DEC, + 0, 0); + + sg_addr += trigger_size * tr0_cnt2 * tr0_cnt3; + if (dir == DMA_DEV_TO_MEM) { + tr_req[tr_idx].addr = dev_addr; + tr_req[tr_idx].icnt0 = tr_cnt0; + tr_req[tr_idx].icnt1 = tr_cnt1; + tr_req[tr_idx].icnt2 = tr1_cnt2; + tr_req[tr_idx].icnt3 = 1; + tr_req[tr_idx].dim1 = (-1) * tr_cnt0; + + tr_req[tr_idx].daddr = sg_addr; + tr_req[tr_idx].dicnt0 = tr_cnt0; + tr_req[tr_idx].dicnt1 = tr_cnt1; + tr_req[tr_idx].dicnt2 = tr1_cnt2; + tr_req[tr_idx].dicnt3 = 1; + tr_req[tr_idx].ddim1 = tr_cnt0; + tr_req[tr_idx].ddim2 = trigger_size; + } else { + tr_req[tr_idx].addr = sg_addr; + tr_req[tr_idx].icnt0 = tr_cnt0; + tr_req[tr_idx].icnt1 = tr_cnt1; + tr_req[tr_idx].icnt2 = tr1_cnt2; + tr_req[tr_idx].icnt3 = 1; + tr_req[tr_idx].dim1 = tr_cnt0; + tr_req[tr_idx].dim2 = trigger_size; + + tr_req[tr_idx].daddr = dev_addr; + tr_req[tr_idx].dicnt0 = tr_cnt0; + tr_req[tr_idx].dicnt1 = tr_cnt1; + tr_req[tr_idx].dicnt2 = tr1_cnt2; + tr_req[tr_idx].dicnt3 = 1; + tr_req[tr_idx].ddim1 = (-1) * tr_cnt0; + } + tr_idx++; + } + + d->residue += sg_len; + } + + cppi5_tr_csf_set(&tr_req[tr_idx - 1].flags, csf | CPPI5_TR_CSF_EOP); + + return d; +} + +static int udma_configure_statictr(struct udma_chan *uc, struct udma_desc *d, + enum dma_slave_buswidth dev_width, + u16 elcnt) +{ + if (uc->config.ep_type != PSIL_EP_PDMA_XY) + return 0; + + /* Bus width translates to the element size (ES) */ + switch (dev_width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + d->static_tr.elsize = 0; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + d->static_tr.elsize = 1; + break; + case DMA_SLAVE_BUSWIDTH_3_BYTES: + d->static_tr.elsize = 2; + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + d->static_tr.elsize = 3; + break; + case DMA_SLAVE_BUSWIDTH_8_BYTES: + d->static_tr.elsize = 4; + break; + default: /* not reached */ + return -EINVAL; + } + + d->static_tr.elcnt = elcnt; + + /* + * PDMA must to close the packet when the channel is in packet mode. + * For TR mode when the channel is not cyclic we also need PDMA to close + * the packet otherwise the transfer will stall because PDMA holds on + * the data it has received from the peripheral. + */ + if (uc->config.pkt_mode || !uc->cyclic) { + unsigned int div = dev_width * elcnt; + + if (uc->cyclic) + d->static_tr.bstcnt = d->residue / d->sglen / div; + else + d->static_tr.bstcnt = d->residue / div; + + if (uc->config.dir == DMA_DEV_TO_MEM && + d->static_tr.bstcnt > uc->ud->match_data->statictr_z_mask) + return -EINVAL; + } else { + d->static_tr.bstcnt = 0; + } + + return 0; +} + +static struct udma_desc * +udma_prep_slave_sg_pkt(struct udma_chan *uc, struct scatterlist *sgl, + unsigned int sglen, enum dma_transfer_direction dir, + unsigned long tx_flags, void *context) +{ + struct scatterlist *sgent; + struct cppi5_host_desc_t *h_desc = NULL; + struct udma_desc *d; + u32 ring_id; + unsigned int i; + u64 asel; + + d = kzalloc(struct_size(d, hwdesc, sglen), GFP_NOWAIT); + if (!d) + return NULL; + + d->sglen = sglen; + d->hwdesc_count = sglen; + + if (dir == DMA_DEV_TO_MEM) + ring_id = k3_ringacc_get_ring_id(uc->rflow->r_ring); + else + ring_id = k3_ringacc_get_ring_id(uc->tchan->tc_ring); + + if (uc->ud->match_data->type == DMA_TYPE_UDMA) + asel = 0; + else + asel = (u64)uc->config.asel << K3_ADDRESS_ASEL_SHIFT; + + for_each_sg(sgl, sgent, sglen, i) { + struct udma_hwdesc *hwdesc = &d->hwdesc[i]; + dma_addr_t sg_addr = sg_dma_address(sgent); + struct cppi5_host_desc_t *desc; + size_t sg_len = sg_dma_len(sgent); + + hwdesc->cppi5_desc_vaddr = dma_pool_zalloc(uc->hdesc_pool, + GFP_NOWAIT, + &hwdesc->cppi5_desc_paddr); + if (!hwdesc->cppi5_desc_vaddr) { + dev_err(uc->ud->dev, + "descriptor%d allocation failed\n", i); + + udma_free_hwdesc(uc, d); + kfree(d); + return NULL; + } + + d->residue += sg_len; + hwdesc->cppi5_desc_size = uc->config.hdesc_size; + desc = hwdesc->cppi5_desc_vaddr; + + if (i == 0) { + cppi5_hdesc_init(desc, 0, 0); + /* Flow and Packed ID */ + cppi5_desc_set_pktids(&desc->hdr, uc->id, + CPPI5_INFO1_DESC_FLOWID_DEFAULT); + cppi5_desc_set_retpolicy(&desc->hdr, 0, ring_id); + } else { + cppi5_hdesc_reset_hbdesc(desc); + cppi5_desc_set_retpolicy(&desc->hdr, 0, 0xffff); + } + + /* attach the sg buffer to the descriptor */ + sg_addr |= asel; + cppi5_hdesc_attach_buf(desc, sg_addr, sg_len, sg_addr, sg_len); + + /* Attach link as host buffer descriptor */ + if (h_desc) + cppi5_hdesc_link_hbdesc(h_desc, + hwdesc->cppi5_desc_paddr | asel); + + if (uc->ud->match_data->type == DMA_TYPE_PKTDMA || + dir == DMA_MEM_TO_DEV) + h_desc = desc; + } + + if (d->residue >= SZ_4M) { + dev_err(uc->ud->dev, + "%s: Transfer size %u is over the supported 4M range\n", + __func__, d->residue); + udma_free_hwdesc(uc, d); + kfree(d); + return NULL; + } + + h_desc = d->hwdesc[0].cppi5_desc_vaddr; + cppi5_hdesc_set_pktlen(h_desc, d->residue); + + return d; +} + +static int udma_attach_metadata(struct dma_async_tx_descriptor *desc, + void *data, size_t len) +{ + struct udma_desc *d = to_udma_desc(desc); + struct udma_chan *uc = to_udma_chan(desc->chan); + struct cppi5_host_desc_t *h_desc; + u32 psd_size = len; + u32 flags = 0; + + if (!uc->config.pkt_mode || !uc->config.metadata_size) + return -ENOTSUPP; + + if (!data || len > uc->config.metadata_size) + return -EINVAL; + + if (uc->config.needs_epib && len < CPPI5_INFO0_HDESC_EPIB_SIZE) + return -EINVAL; + + h_desc = d->hwdesc[0].cppi5_desc_vaddr; + if (d->dir == DMA_MEM_TO_DEV) + memcpy(h_desc->epib, data, len); + + if (uc->config.needs_epib) + psd_size -= CPPI5_INFO0_HDESC_EPIB_SIZE; + + d->metadata = data; + d->metadata_size = len; + if (uc->config.needs_epib) + flags |= CPPI5_INFO0_HDESC_EPIB_PRESENT; + + cppi5_hdesc_update_flags(h_desc, flags); + cppi5_hdesc_update_psdata_size(h_desc, psd_size); + + return 0; +} + +static void *udma_get_metadata_ptr(struct dma_async_tx_descriptor *desc, + size_t *payload_len, size_t *max_len) +{ + struct udma_desc *d = to_udma_desc(desc); + struct udma_chan *uc = to_udma_chan(desc->chan); + struct cppi5_host_desc_t *h_desc; + + if (!uc->config.pkt_mode || !uc->config.metadata_size) + return ERR_PTR(-ENOTSUPP); + + h_desc = d->hwdesc[0].cppi5_desc_vaddr; + + *max_len = uc->config.metadata_size; + + *payload_len = cppi5_hdesc_epib_present(&h_desc->hdr) ? + CPPI5_INFO0_HDESC_EPIB_SIZE : 0; + *payload_len += cppi5_hdesc_get_psdata_size(h_desc); + + return h_desc->epib; +} + +static int udma_set_metadata_len(struct dma_async_tx_descriptor *desc, + size_t payload_len) +{ + struct udma_desc *d = to_udma_desc(desc); + struct udma_chan *uc = to_udma_chan(desc->chan); + struct cppi5_host_desc_t *h_desc; + u32 psd_size = payload_len; + u32 flags = 0; + + if (!uc->config.pkt_mode || !uc->config.metadata_size) + return -ENOTSUPP; + + if (payload_len > uc->config.metadata_size) + return -EINVAL; + + if (uc->config.needs_epib && payload_len < CPPI5_INFO0_HDESC_EPIB_SIZE) + return -EINVAL; + + h_desc = d->hwdesc[0].cppi5_desc_vaddr; + + if (uc->config.needs_epib) { + psd_size -= CPPI5_INFO0_HDESC_EPIB_SIZE; + flags |= CPPI5_INFO0_HDESC_EPIB_PRESENT; + } + + cppi5_hdesc_update_flags(h_desc, flags); + cppi5_hdesc_update_psdata_size(h_desc, psd_size); + + return 0; +} + +static struct dma_descriptor_metadata_ops metadata_ops = { + .attach = udma_attach_metadata, + .get_ptr = udma_get_metadata_ptr, + .set_len = udma_set_metadata_len, +}; + +static struct dma_async_tx_descriptor * +udma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sglen, enum dma_transfer_direction dir, + unsigned long tx_flags, void *context) +{ + struct udma_chan *uc = to_udma_chan(chan); + enum dma_slave_buswidth dev_width; + struct udma_desc *d; + u32 burst; + + if (dir != uc->config.dir && + (uc->config.dir == DMA_MEM_TO_MEM && !uc->config.tr_trigger_type)) { + dev_err(chan->device->dev, + "%s: chan%d is for %s, not supporting %s\n", + __func__, uc->id, + dmaengine_get_direction_text(uc->config.dir), + dmaengine_get_direction_text(dir)); + return NULL; + } + + if (dir == DMA_DEV_TO_MEM) { + dev_width = uc->cfg.src_addr_width; + burst = uc->cfg.src_maxburst; + } else if (dir == DMA_MEM_TO_DEV) { + dev_width = uc->cfg.dst_addr_width; + burst = uc->cfg.dst_maxburst; + } else { + dev_err(chan->device->dev, "%s: bad direction?\n", __func__); + return NULL; + } + + if (!burst) + burst = 1; + + uc->config.tx_flags = tx_flags; + + if (uc->config.pkt_mode) + d = udma_prep_slave_sg_pkt(uc, sgl, sglen, dir, tx_flags, + context); + else if (is_slave_direction(uc->config.dir)) + d = udma_prep_slave_sg_tr(uc, sgl, sglen, dir, tx_flags, + context); + else + d = udma_prep_slave_sg_triggered_tr(uc, sgl, sglen, dir, + tx_flags, context); + + if (!d) + return NULL; + + d->dir = dir; + d->desc_idx = 0; + d->tr_idx = 0; + + /* static TR for remote PDMA */ + if (udma_configure_statictr(uc, d, dev_width, burst)) { + dev_err(uc->ud->dev, + "%s: StaticTR Z is limited to maximum 4095 (%u)\n", + __func__, d->static_tr.bstcnt); + + udma_free_hwdesc(uc, d); + kfree(d); + return NULL; + } + + if (uc->config.metadata_size) + d->vd.tx.metadata_ops = &metadata_ops; + + return vchan_tx_prep(&uc->vc, &d->vd, tx_flags); +} + +static struct udma_desc * +udma_prep_dma_cyclic_tr(struct udma_chan *uc, dma_addr_t buf_addr, + size_t buf_len, size_t period_len, + enum dma_transfer_direction dir, unsigned long flags) +{ + struct udma_desc *d; + size_t tr_size, period_addr; + struct cppi5_tr_type1_t *tr_req; + unsigned int periods = buf_len / period_len; + u16 tr0_cnt0, tr0_cnt1, tr1_cnt0; + unsigned int i; + int num_tr; + + num_tr = udma_get_tr_counters(period_len, __ffs(buf_addr), &tr0_cnt0, + &tr0_cnt1, &tr1_cnt0); + if (num_tr < 0) { + dev_err(uc->ud->dev, "size %zu is not supported\n", + period_len); + return NULL; + } + + /* Now allocate and setup the descriptor. */ + tr_size = sizeof(struct cppi5_tr_type1_t); + d = udma_alloc_tr_desc(uc, tr_size, periods * num_tr, dir); + if (!d) + return NULL; + + tr_req = d->hwdesc[0].tr_req_base; + if (uc->ud->match_data->type == DMA_TYPE_UDMA) + period_addr = buf_addr; + else + period_addr = buf_addr | + ((u64)uc->config.asel << K3_ADDRESS_ASEL_SHIFT); + + for (i = 0; i < periods; i++) { + int tr_idx = i * num_tr; + + cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1, false, + false, CPPI5_TR_EVENT_SIZE_COMPLETION, 0); + + tr_req[tr_idx].addr = period_addr; + tr_req[tr_idx].icnt0 = tr0_cnt0; + tr_req[tr_idx].icnt1 = tr0_cnt1; + tr_req[tr_idx].dim1 = tr0_cnt0; + + if (num_tr == 2) { + cppi5_tr_csf_set(&tr_req[tr_idx].flags, + CPPI5_TR_CSF_SUPR_EVT); + tr_idx++; + + cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1, + false, false, + CPPI5_TR_EVENT_SIZE_COMPLETION, 0); + + tr_req[tr_idx].addr = period_addr + tr0_cnt1 * tr0_cnt0; + tr_req[tr_idx].icnt0 = tr1_cnt0; + tr_req[tr_idx].icnt1 = 1; + tr_req[tr_idx].dim1 = tr1_cnt0; + } + + if (!(flags & DMA_PREP_INTERRUPT)) + cppi5_tr_csf_set(&tr_req[tr_idx].flags, + CPPI5_TR_CSF_SUPR_EVT); + + period_addr += period_len; + } + + return d; +} + +static struct udma_desc * +udma_prep_dma_cyclic_pkt(struct udma_chan *uc, dma_addr_t buf_addr, + size_t buf_len, size_t period_len, + enum dma_transfer_direction dir, unsigned long flags) +{ + struct udma_desc *d; + u32 ring_id; + int i; + int periods = buf_len / period_len; + + if (periods > (K3_UDMA_DEFAULT_RING_SIZE - 1)) + return NULL; + + if (period_len >= SZ_4M) + return NULL; + + d = kzalloc(struct_size(d, hwdesc, periods), GFP_NOWAIT); + if (!d) + return NULL; + + d->hwdesc_count = periods; + + /* TODO: re-check this... */ + if (dir == DMA_DEV_TO_MEM) + ring_id = k3_ringacc_get_ring_id(uc->rflow->r_ring); + else + ring_id = k3_ringacc_get_ring_id(uc->tchan->tc_ring); + + if (uc->ud->match_data->type != DMA_TYPE_UDMA) + buf_addr |= (u64)uc->config.asel << K3_ADDRESS_ASEL_SHIFT; + + for (i = 0; i < periods; i++) { + struct udma_hwdesc *hwdesc = &d->hwdesc[i]; + dma_addr_t period_addr = buf_addr + (period_len * i); + struct cppi5_host_desc_t *h_desc; + + hwdesc->cppi5_desc_vaddr = dma_pool_zalloc(uc->hdesc_pool, + GFP_NOWAIT, + &hwdesc->cppi5_desc_paddr); + if (!hwdesc->cppi5_desc_vaddr) { + dev_err(uc->ud->dev, + "descriptor%d allocation failed\n", i); + + udma_free_hwdesc(uc, d); + kfree(d); + return NULL; + } + + hwdesc->cppi5_desc_size = uc->config.hdesc_size; + h_desc = hwdesc->cppi5_desc_vaddr; + + cppi5_hdesc_init(h_desc, 0, 0); + cppi5_hdesc_set_pktlen(h_desc, period_len); + + /* Flow and Packed ID */ + cppi5_desc_set_pktids(&h_desc->hdr, uc->id, + CPPI5_INFO1_DESC_FLOWID_DEFAULT); + cppi5_desc_set_retpolicy(&h_desc->hdr, 0, ring_id); + + /* attach each period to a new descriptor */ + cppi5_hdesc_attach_buf(h_desc, + period_addr, period_len, + period_addr, period_len); + } + + return d; +} + +static struct dma_async_tx_descriptor * +udma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction dir, + unsigned long flags) +{ + struct udma_chan *uc = to_udma_chan(chan); + enum dma_slave_buswidth dev_width; + struct udma_desc *d; + u32 burst; + + if (dir != uc->config.dir) { + dev_err(chan->device->dev, + "%s: chan%d is for %s, not supporting %s\n", + __func__, uc->id, + dmaengine_get_direction_text(uc->config.dir), + dmaengine_get_direction_text(dir)); + return NULL; + } + + uc->cyclic = true; + + if (dir == DMA_DEV_TO_MEM) { + dev_width = uc->cfg.src_addr_width; + burst = uc->cfg.src_maxburst; + } else if (dir == DMA_MEM_TO_DEV) { + dev_width = uc->cfg.dst_addr_width; + burst = uc->cfg.dst_maxburst; + } else { + dev_err(uc->ud->dev, "%s: bad direction?\n", __func__); + return NULL; + } + + if (!burst) + burst = 1; + + if (uc->config.pkt_mode) + d = udma_prep_dma_cyclic_pkt(uc, buf_addr, buf_len, period_len, + dir, flags); + else + d = udma_prep_dma_cyclic_tr(uc, buf_addr, buf_len, period_len, + dir, flags); + + if (!d) + return NULL; + + d->sglen = buf_len / period_len; + + d->dir = dir; + d->residue = buf_len; + + /* static TR for remote PDMA */ + if (udma_configure_statictr(uc, d, dev_width, burst)) { + dev_err(uc->ud->dev, + "%s: StaticTR Z is limited to maximum 4095 (%u)\n", + __func__, d->static_tr.bstcnt); + + udma_free_hwdesc(uc, d); + kfree(d); + return NULL; + } + + if (uc->config.metadata_size) + d->vd.tx.metadata_ops = &metadata_ops; + + return vchan_tx_prep(&uc->vc, &d->vd, flags); +} + +static struct dma_async_tx_descriptor * +udma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long tx_flags) +{ + struct udma_chan *uc = to_udma_chan(chan); + struct udma_desc *d; + struct cppi5_tr_type15_t *tr_req; + int num_tr; + size_t tr_size = sizeof(struct cppi5_tr_type15_t); + u16 tr0_cnt0, tr0_cnt1, tr1_cnt0; + u32 csf = CPPI5_TR_CSF_SUPR_EVT; + + if (uc->config.dir != DMA_MEM_TO_MEM) { + dev_err(chan->device->dev, + "%s: chan%d is for %s, not supporting %s\n", + __func__, uc->id, + dmaengine_get_direction_text(uc->config.dir), + dmaengine_get_direction_text(DMA_MEM_TO_MEM)); + return NULL; + } + + num_tr = udma_get_tr_counters(len, __ffs(src | dest), &tr0_cnt0, + &tr0_cnt1, &tr1_cnt0); + if (num_tr < 0) { + dev_err(uc->ud->dev, "size %zu is not supported\n", + len); + return NULL; + } + + d = udma_alloc_tr_desc(uc, tr_size, num_tr, DMA_MEM_TO_MEM); + if (!d) + return NULL; + + d->dir = DMA_MEM_TO_MEM; + d->desc_idx = 0; + d->tr_idx = 0; + d->residue = len; + + if (uc->ud->match_data->type != DMA_TYPE_UDMA) { + src |= (u64)uc->ud->asel << K3_ADDRESS_ASEL_SHIFT; + dest |= (u64)uc->ud->asel << K3_ADDRESS_ASEL_SHIFT; + } else { + csf |= CPPI5_TR_CSF_EOL_ICNT0; + } + + tr_req = d->hwdesc[0].tr_req_base; + + cppi5_tr_init(&tr_req[0].flags, CPPI5_TR_TYPE15, false, true, + CPPI5_TR_EVENT_SIZE_COMPLETION, 0); + cppi5_tr_csf_set(&tr_req[0].flags, csf); + + tr_req[0].addr = src; + tr_req[0].icnt0 = tr0_cnt0; + tr_req[0].icnt1 = tr0_cnt1; + tr_req[0].icnt2 = 1; + tr_req[0].icnt3 = 1; + tr_req[0].dim1 = tr0_cnt0; + + tr_req[0].daddr = dest; + tr_req[0].dicnt0 = tr0_cnt0; + tr_req[0].dicnt1 = tr0_cnt1; + tr_req[0].dicnt2 = 1; + tr_req[0].dicnt3 = 1; + tr_req[0].ddim1 = tr0_cnt0; + + if (num_tr == 2) { + cppi5_tr_init(&tr_req[1].flags, CPPI5_TR_TYPE15, false, true, + CPPI5_TR_EVENT_SIZE_COMPLETION, 0); + cppi5_tr_csf_set(&tr_req[1].flags, csf); + + tr_req[1].addr = src + tr0_cnt1 * tr0_cnt0; + tr_req[1].icnt0 = tr1_cnt0; + tr_req[1].icnt1 = 1; + tr_req[1].icnt2 = 1; + tr_req[1].icnt3 = 1; + + tr_req[1].daddr = dest + tr0_cnt1 * tr0_cnt0; + tr_req[1].dicnt0 = tr1_cnt0; + tr_req[1].dicnt1 = 1; + tr_req[1].dicnt2 = 1; + tr_req[1].dicnt3 = 1; + } + + cppi5_tr_csf_set(&tr_req[num_tr - 1].flags, csf | CPPI5_TR_CSF_EOP); + + if (uc->config.metadata_size) + d->vd.tx.metadata_ops = &metadata_ops; + + return vchan_tx_prep(&uc->vc, &d->vd, tx_flags); +} + +static void udma_issue_pending(struct dma_chan *chan) +{ + struct udma_chan *uc = to_udma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&uc->vc.lock, flags); + + /* If we have something pending and no active descriptor, then */ + if (vchan_issue_pending(&uc->vc) && !uc->desc) { + /* + * start a descriptor if the channel is NOT [marked as + * terminating _and_ it is still running (teardown has not + * completed yet)]. + */ + if (!(uc->state == UDMA_CHAN_IS_TERMINATING && + udma_is_chan_running(uc))) + udma_start(uc); + } + + spin_unlock_irqrestore(&uc->vc.lock, flags); +} + +static enum dma_status udma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct udma_chan *uc = to_udma_chan(chan); + enum dma_status ret; + unsigned long flags; + + spin_lock_irqsave(&uc->vc.lock, flags); + + ret = dma_cookie_status(chan, cookie, txstate); + + if (!udma_is_chan_running(uc)) + ret = DMA_COMPLETE; + + if (ret == DMA_IN_PROGRESS && udma_is_chan_paused(uc)) + ret = DMA_PAUSED; + + if (ret == DMA_COMPLETE || !txstate) + goto out; + + if (uc->desc && uc->desc->vd.tx.cookie == cookie) { + u32 peer_bcnt = 0; + u32 bcnt = 0; + u32 residue = uc->desc->residue; + u32 delay = 0; + + if (uc->desc->dir == DMA_MEM_TO_DEV) { + bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_SBCNT_REG); + + if (uc->config.ep_type != PSIL_EP_NATIVE) { + peer_bcnt = udma_tchanrt_read(uc, + UDMA_CHAN_RT_PEER_BCNT_REG); + + if (bcnt > peer_bcnt) + delay = bcnt - peer_bcnt; + } + } else if (uc->desc->dir == DMA_DEV_TO_MEM) { + bcnt = udma_rchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG); + + if (uc->config.ep_type != PSIL_EP_NATIVE) { + peer_bcnt = udma_rchanrt_read(uc, + UDMA_CHAN_RT_PEER_BCNT_REG); + + if (peer_bcnt > bcnt) + delay = peer_bcnt - bcnt; + } + } else { + bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG); + } + + if (bcnt && !(bcnt % uc->desc->residue)) + residue = 0; + else + residue -= bcnt % uc->desc->residue; + + if (!residue && (uc->config.dir == DMA_DEV_TO_MEM || !delay)) { + ret = DMA_COMPLETE; + delay = 0; + } + + dma_set_residue(txstate, residue); + dma_set_in_flight_bytes(txstate, delay); + + } else { + ret = DMA_COMPLETE; + } + +out: + spin_unlock_irqrestore(&uc->vc.lock, flags); + return ret; +} + +static int udma_pause(struct dma_chan *chan) +{ + struct udma_chan *uc = to_udma_chan(chan); + + /* pause the channel */ + switch (uc->config.dir) { + case DMA_DEV_TO_MEM: + udma_rchanrt_update_bits(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, + UDMA_PEER_RT_EN_PAUSE, + UDMA_PEER_RT_EN_PAUSE); + break; + case DMA_MEM_TO_DEV: + udma_tchanrt_update_bits(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, + UDMA_PEER_RT_EN_PAUSE, + UDMA_PEER_RT_EN_PAUSE); + break; + case DMA_MEM_TO_MEM: + udma_tchanrt_update_bits(uc, UDMA_CHAN_RT_CTL_REG, + UDMA_CHAN_RT_CTL_PAUSE, + UDMA_CHAN_RT_CTL_PAUSE); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int udma_resume(struct dma_chan *chan) +{ + struct udma_chan *uc = to_udma_chan(chan); + + /* resume the channel */ + switch (uc->config.dir) { + case DMA_DEV_TO_MEM: + udma_rchanrt_update_bits(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, + UDMA_PEER_RT_EN_PAUSE, 0); + + break; + case DMA_MEM_TO_DEV: + udma_tchanrt_update_bits(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, + UDMA_PEER_RT_EN_PAUSE, 0); + break; + case DMA_MEM_TO_MEM: + udma_tchanrt_update_bits(uc, UDMA_CHAN_RT_CTL_REG, + UDMA_CHAN_RT_CTL_PAUSE, 0); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int udma_terminate_all(struct dma_chan *chan) +{ + struct udma_chan *uc = to_udma_chan(chan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&uc->vc.lock, flags); + + if (udma_is_chan_running(uc)) + udma_stop(uc); + + if (uc->desc) { + uc->terminated_desc = uc->desc; + uc->desc = NULL; + uc->terminated_desc->terminated = true; + cancel_delayed_work(&uc->tx_drain.work); + } + + uc->paused = false; + + vchan_get_all_descriptors(&uc->vc, &head); + spin_unlock_irqrestore(&uc->vc.lock, flags); + vchan_dma_desc_free_list(&uc->vc, &head); + + return 0; +} + +static void udma_synchronize(struct dma_chan *chan) +{ + struct udma_chan *uc = to_udma_chan(chan); + unsigned long timeout = msecs_to_jiffies(1000); + + vchan_synchronize(&uc->vc); + + if (uc->state == UDMA_CHAN_IS_TERMINATING) { + timeout = wait_for_completion_timeout(&uc->teardown_completed, + timeout); + if (!timeout) { + dev_warn(uc->ud->dev, "chan%d teardown timeout!\n", + uc->id); + udma_dump_chan_stdata(uc); + udma_reset_chan(uc, true); + } + } + + udma_reset_chan(uc, false); + if (udma_is_chan_running(uc)) + dev_warn(uc->ud->dev, "chan%d refused to stop!\n", uc->id); + + cancel_delayed_work_sync(&uc->tx_drain.work); + udma_reset_rings(uc); +} + +static void udma_desc_pre_callback(struct virt_dma_chan *vc, + struct virt_dma_desc *vd, + struct dmaengine_result *result) +{ + struct udma_chan *uc = to_udma_chan(&vc->chan); + struct udma_desc *d; + + if (!vd) + return; + + d = to_udma_desc(&vd->tx); + + if (d->metadata_size) + udma_fetch_epib(uc, d); + + /* Provide residue information for the client */ + if (result) { + void *desc_vaddr = udma_curr_cppi5_desc_vaddr(d, d->desc_idx); + + if (cppi5_desc_get_type(desc_vaddr) == + CPPI5_INFO0_DESC_TYPE_VAL_HOST) { + result->residue = d->residue - + cppi5_hdesc_get_pktlen(desc_vaddr); + if (result->residue) + result->result = DMA_TRANS_ABORTED; + else + result->result = DMA_TRANS_NOERROR; + } else { + result->residue = 0; + result->result = DMA_TRANS_NOERROR; + } + } +} + +/* + * This tasklet handles the completion of a DMA descriptor by + * calling its callback and freeing it. + */ +static void udma_vchan_complete(struct tasklet_struct *t) +{ + struct virt_dma_chan *vc = from_tasklet(vc, t, task); + struct virt_dma_desc *vd, *_vd; + struct dmaengine_desc_callback cb; + LIST_HEAD(head); + + spin_lock_irq(&vc->lock); + list_splice_tail_init(&vc->desc_completed, &head); + vd = vc->cyclic; + if (vd) { + vc->cyclic = NULL; + dmaengine_desc_get_callback(&vd->tx, &cb); + } else { + memset(&cb, 0, sizeof(cb)); + } + spin_unlock_irq(&vc->lock); + + udma_desc_pre_callback(vc, vd, NULL); + dmaengine_desc_callback_invoke(&cb, NULL); + + list_for_each_entry_safe(vd, _vd, &head, node) { + struct dmaengine_result result; + + dmaengine_desc_get_callback(&vd->tx, &cb); + + list_del(&vd->node); + + udma_desc_pre_callback(vc, vd, &result); + dmaengine_desc_callback_invoke(&cb, &result); + + vchan_vdesc_fini(vd); + } +} + +static void udma_free_chan_resources(struct dma_chan *chan) +{ + struct udma_chan *uc = to_udma_chan(chan); + struct udma_dev *ud = to_udma_dev(chan->device); + + udma_terminate_all(chan); + if (uc->terminated_desc) { + udma_reset_chan(uc, false); + udma_reset_rings(uc); + } + + cancel_delayed_work_sync(&uc->tx_drain.work); + + if (uc->irq_num_ring > 0) { + free_irq(uc->irq_num_ring, uc); + + uc->irq_num_ring = 0; + } + if (uc->irq_num_udma > 0) { + free_irq(uc->irq_num_udma, uc); + + uc->irq_num_udma = 0; + } + + /* Release PSI-L pairing */ + if (uc->psil_paired) { + navss_psil_unpair(ud, uc->config.src_thread, + uc->config.dst_thread); + uc->psil_paired = false; + } + + vchan_free_chan_resources(&uc->vc); + tasklet_kill(&uc->vc.task); + + bcdma_free_bchan_resources(uc); + udma_free_tx_resources(uc); + udma_free_rx_resources(uc); + udma_reset_uchan(uc); + + if (uc->use_dma_pool) { + dma_pool_destroy(uc->hdesc_pool); + uc->use_dma_pool = false; + } +} + +static struct platform_driver udma_driver; +static struct platform_driver bcdma_driver; +static struct platform_driver pktdma_driver; + +struct udma_filter_param { + int remote_thread_id; + u32 atype; + u32 asel; + u32 tr_trigger_type; +}; + +static bool udma_dma_filter_fn(struct dma_chan *chan, void *param) +{ + struct udma_chan_config *ucc; + struct psil_endpoint_config *ep_config; + struct udma_filter_param *filter_param; + struct udma_chan *uc; + struct udma_dev *ud; + + if (chan->device->dev->driver != &udma_driver.driver && + chan->device->dev->driver != &bcdma_driver.driver && + chan->device->dev->driver != &pktdma_driver.driver) + return false; + + uc = to_udma_chan(chan); + ucc = &uc->config; + ud = uc->ud; + filter_param = param; + + if (filter_param->atype > 2) { + dev_err(ud->dev, "Invalid channel atype: %u\n", + filter_param->atype); + return false; + } + + if (filter_param->asel > 15) { + dev_err(ud->dev, "Invalid channel asel: %u\n", + filter_param->asel); + return false; + } + + ucc->remote_thread_id = filter_param->remote_thread_id; + ucc->atype = filter_param->atype; + ucc->asel = filter_param->asel; + ucc->tr_trigger_type = filter_param->tr_trigger_type; + + if (ucc->tr_trigger_type) { + ucc->dir = DMA_MEM_TO_MEM; + goto triggered_bchan; + } else if (ucc->remote_thread_id & K3_PSIL_DST_THREAD_ID_OFFSET) { + ucc->dir = DMA_MEM_TO_DEV; + } else { + ucc->dir = DMA_DEV_TO_MEM; + } + + ep_config = psil_get_ep_config(ucc->remote_thread_id); + if (IS_ERR(ep_config)) { + dev_err(ud->dev, "No configuration for psi-l thread 0x%04x\n", + ucc->remote_thread_id); + ucc->dir = DMA_MEM_TO_MEM; + ucc->remote_thread_id = -1; + ucc->atype = 0; + ucc->asel = 0; + return false; + } + + if (ud->match_data->type == DMA_TYPE_BCDMA && + ep_config->pkt_mode) { + dev_err(ud->dev, + "Only TR mode is supported (psi-l thread 0x%04x)\n", + ucc->remote_thread_id); + ucc->dir = DMA_MEM_TO_MEM; + ucc->remote_thread_id = -1; + ucc->atype = 0; + ucc->asel = 0; + return false; + } + + ucc->pkt_mode = ep_config->pkt_mode; + ucc->channel_tpl = ep_config->channel_tpl; + ucc->notdpkt = ep_config->notdpkt; + ucc->ep_type = ep_config->ep_type; + + if (ud->match_data->type == DMA_TYPE_PKTDMA && + ep_config->mapped_channel_id >= 0) { + ucc->mapped_channel_id = ep_config->mapped_channel_id; + ucc->default_flow_id = ep_config->default_flow_id; + } else { + ucc->mapped_channel_id = -1; + ucc->default_flow_id = -1; + } + + if (ucc->ep_type != PSIL_EP_NATIVE) { + const struct udma_match_data *match_data = ud->match_data; + + if (match_data->flags & UDMA_FLAG_PDMA_ACC32) + ucc->enable_acc32 = ep_config->pdma_acc32; + if (match_data->flags & UDMA_FLAG_PDMA_BURST) + ucc->enable_burst = ep_config->pdma_burst; + } + + ucc->needs_epib = ep_config->needs_epib; + ucc->psd_size = ep_config->psd_size; + ucc->metadata_size = + (ucc->needs_epib ? CPPI5_INFO0_HDESC_EPIB_SIZE : 0) + + ucc->psd_size; + + if (ucc->pkt_mode) + ucc->hdesc_size = ALIGN(sizeof(struct cppi5_host_desc_t) + + ucc->metadata_size, ud->desc_align); + + dev_dbg(ud->dev, "chan%d: Remote thread: 0x%04x (%s)\n", uc->id, + ucc->remote_thread_id, dmaengine_get_direction_text(ucc->dir)); + + return true; + +triggered_bchan: + dev_dbg(ud->dev, "chan%d: triggered channel (type: %u)\n", uc->id, + ucc->tr_trigger_type); + + return true; + +} + +static struct dma_chan *udma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct udma_dev *ud = ofdma->of_dma_data; + dma_cap_mask_t mask = ud->ddev.cap_mask; + struct udma_filter_param filter_param; + struct dma_chan *chan; + + if (ud->match_data->type == DMA_TYPE_BCDMA) { + if (dma_spec->args_count != 3) + return NULL; + + filter_param.tr_trigger_type = dma_spec->args[0]; + filter_param.remote_thread_id = dma_spec->args[1]; + filter_param.asel = dma_spec->args[2]; + filter_param.atype = 0; + } else { + if (dma_spec->args_count != 1 && dma_spec->args_count != 2) + return NULL; + + filter_param.remote_thread_id = dma_spec->args[0]; + filter_param.tr_trigger_type = 0; + if (dma_spec->args_count == 2) { + if (ud->match_data->type == DMA_TYPE_UDMA) { + filter_param.atype = dma_spec->args[1]; + filter_param.asel = 0; + } else { + filter_param.atype = 0; + filter_param.asel = dma_spec->args[1]; + } + } else { + filter_param.atype = 0; + filter_param.asel = 0; + } + } + + chan = __dma_request_channel(&mask, udma_dma_filter_fn, &filter_param, + ofdma->of_node); + if (!chan) { + dev_err(ud->dev, "get channel fail in %s.\n", __func__); + return ERR_PTR(-EINVAL); + } + + return chan; +} + +static struct udma_match_data am654_main_data = { + .type = DMA_TYPE_UDMA, + .psil_base = 0x1000, + .enable_memcpy_support = true, + .statictr_z_mask = GENMASK(11, 0), + .burst_size = { + TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */ + TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* H Channels */ + 0, /* No UH Channels */ + }, +}; + +static struct udma_match_data am654_mcu_data = { + .type = DMA_TYPE_UDMA, + .psil_base = 0x6000, + .enable_memcpy_support = false, + .statictr_z_mask = GENMASK(11, 0), + .burst_size = { + TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */ + TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* H Channels */ + 0, /* No UH Channels */ + }, +}; + +static struct udma_match_data j721e_main_data = { + .type = DMA_TYPE_UDMA, + .psil_base = 0x1000, + .enable_memcpy_support = true, + .flags = UDMA_FLAGS_J7_CLASS, + .statictr_z_mask = GENMASK(23, 0), + .burst_size = { + TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */ + TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_256_BYTES, /* H Channels */ + TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_256_BYTES, /* UH Channels */ + }, +}; + +static struct udma_match_data j721e_mcu_data = { + .type = DMA_TYPE_UDMA, + .psil_base = 0x6000, + .enable_memcpy_support = false, /* MEM_TO_MEM is slow via MCU UDMA */ + .flags = UDMA_FLAGS_J7_CLASS, + .statictr_z_mask = GENMASK(23, 0), + .burst_size = { + TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */ + TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_128_BYTES, /* H Channels */ + 0, /* No UH Channels */ + }, +}; + +static struct udma_soc_data am62a_dmss_csi_soc_data = { + .oes = { + .bcdma_rchan_data = 0xe00, + .bcdma_rchan_ring = 0x1000, + }, +}; + +static struct udma_soc_data j721s2_bcdma_csi_soc_data = { + .oes = { + .bcdma_tchan_data = 0x800, + .bcdma_tchan_ring = 0xa00, + .bcdma_rchan_data = 0xe00, + .bcdma_rchan_ring = 0x1000, + }, +}; + +static struct udma_match_data am62a_bcdma_csirx_data = { + .type = DMA_TYPE_BCDMA, + .psil_base = 0x3100, + .enable_memcpy_support = false, + .burst_size = { + TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */ + 0, /* No H Channels */ + 0, /* No UH Channels */ + }, + .soc_data = &am62a_dmss_csi_soc_data, +}; + +static struct udma_match_data am64_bcdma_data = { + .type = DMA_TYPE_BCDMA, + .psil_base = 0x2000, /* for tchan and rchan, not applicable to bchan */ + .enable_memcpy_support = true, /* Supported via bchan */ + .flags = UDMA_FLAGS_J7_CLASS, + .statictr_z_mask = GENMASK(23, 0), + .burst_size = { + TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */ + 0, /* No H Channels */ + 0, /* No UH Channels */ + }, +}; + +static struct udma_match_data am64_pktdma_data = { + .type = DMA_TYPE_PKTDMA, + .psil_base = 0x1000, + .enable_memcpy_support = false, /* PKTDMA does not support MEM_TO_MEM */ + .flags = UDMA_FLAGS_J7_CLASS, + .statictr_z_mask = GENMASK(23, 0), + .burst_size = { + TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */ + 0, /* No H Channels */ + 0, /* No UH Channels */ + }, +}; + +static struct udma_match_data j721s2_bcdma_csi_data = { + .type = DMA_TYPE_BCDMA, + .psil_base = 0x2000, + .enable_memcpy_support = false, + .burst_size = { + TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */ + 0, /* No H Channels */ + 0, /* No UH Channels */ + }, + .soc_data = &j721s2_bcdma_csi_soc_data, +}; + +static const struct of_device_id udma_of_match[] = { + { + .compatible = "ti,am654-navss-main-udmap", + .data = &am654_main_data, + }, + { + .compatible = "ti,am654-navss-mcu-udmap", + .data = &am654_mcu_data, + }, { + .compatible = "ti,j721e-navss-main-udmap", + .data = &j721e_main_data, + }, { + .compatible = "ti,j721e-navss-mcu-udmap", + .data = &j721e_mcu_data, + }, + { + .compatible = "ti,am64-dmss-bcdma", + .data = &am64_bcdma_data, + }, + { + .compatible = "ti,am64-dmss-pktdma", + .data = &am64_pktdma_data, + }, + { + .compatible = "ti,am62a-dmss-bcdma-csirx", + .data = &am62a_bcdma_csirx_data, + }, + { + .compatible = "ti,j721s2-dmss-bcdma-csi", + .data = &j721s2_bcdma_csi_data, + }, + { /* Sentinel */ }, +}; + +static struct udma_soc_data am654_soc_data = { + .oes = { + .udma_rchan = 0x200, + }, +}; + +static struct udma_soc_data j721e_soc_data = { + .oes = { + .udma_rchan = 0x400, + }, +}; + +static struct udma_soc_data j7200_soc_data = { + .oes = { + .udma_rchan = 0x80, + }, +}; + +static struct udma_soc_data am64_soc_data = { + .oes = { + .bcdma_bchan_data = 0x2200, + .bcdma_bchan_ring = 0x2400, + .bcdma_tchan_data = 0x2800, + .bcdma_tchan_ring = 0x2a00, + .bcdma_rchan_data = 0x2e00, + .bcdma_rchan_ring = 0x3000, + .pktdma_tchan_flow = 0x1200, + .pktdma_rchan_flow = 0x1600, + }, + .bcdma_trigger_event_offset = 0xc400, +}; + +static const struct soc_device_attribute k3_soc_devices[] = { + { .family = "AM65X", .data = &am654_soc_data }, + { .family = "J721E", .data = &j721e_soc_data }, + { .family = "J7200", .data = &j7200_soc_data }, + { .family = "AM64X", .data = &am64_soc_data }, + { .family = "J721S2", .data = &j721e_soc_data}, + { .family = "AM62X", .data = &am64_soc_data }, + { .family = "AM62AX", .data = &am64_soc_data }, + { .family = "J784S4", .data = &j721e_soc_data }, + { /* sentinel */ } +}; + +static int udma_get_mmrs(struct platform_device *pdev, struct udma_dev *ud) +{ + u32 cap2, cap3, cap4; + int i; + + ud->mmrs[MMR_GCFG] = devm_platform_ioremap_resource_byname(pdev, mmr_names[MMR_GCFG]); + if (IS_ERR(ud->mmrs[MMR_GCFG])) + return PTR_ERR(ud->mmrs[MMR_GCFG]); + + cap2 = udma_read(ud->mmrs[MMR_GCFG], 0x28); + cap3 = udma_read(ud->mmrs[MMR_GCFG], 0x2c); + + switch (ud->match_data->type) { + case DMA_TYPE_UDMA: + ud->rflow_cnt = UDMA_CAP3_RFLOW_CNT(cap3); + ud->tchan_cnt = UDMA_CAP2_TCHAN_CNT(cap2); + ud->echan_cnt = UDMA_CAP2_ECHAN_CNT(cap2); + ud->rchan_cnt = UDMA_CAP2_RCHAN_CNT(cap2); + break; + case DMA_TYPE_BCDMA: + ud->bchan_cnt = BCDMA_CAP2_BCHAN_CNT(cap2); + ud->tchan_cnt = BCDMA_CAP2_TCHAN_CNT(cap2); + ud->rchan_cnt = BCDMA_CAP2_RCHAN_CNT(cap2); + ud->rflow_cnt = ud->rchan_cnt; + break; + case DMA_TYPE_PKTDMA: + cap4 = udma_read(ud->mmrs[MMR_GCFG], 0x30); + ud->tchan_cnt = UDMA_CAP2_TCHAN_CNT(cap2); + ud->rchan_cnt = UDMA_CAP2_RCHAN_CNT(cap2); + ud->rflow_cnt = UDMA_CAP3_RFLOW_CNT(cap3); + ud->tflow_cnt = PKTDMA_CAP4_TFLOW_CNT(cap4); + break; + default: + return -EINVAL; + } + + for (i = 1; i < MMR_LAST; i++) { + if (i == MMR_BCHANRT && ud->bchan_cnt == 0) + continue; + if (i == MMR_TCHANRT && ud->tchan_cnt == 0) + continue; + if (i == MMR_RCHANRT && ud->rchan_cnt == 0) + continue; + + ud->mmrs[i] = devm_platform_ioremap_resource_byname(pdev, mmr_names[i]); + if (IS_ERR(ud->mmrs[i])) + return PTR_ERR(ud->mmrs[i]); + } + + return 0; +} + +static void udma_mark_resource_ranges(struct udma_dev *ud, unsigned long *map, + struct ti_sci_resource_desc *rm_desc, + char *name) +{ + bitmap_clear(map, rm_desc->start, rm_desc->num); + bitmap_clear(map, rm_desc->start_sec, rm_desc->num_sec); + dev_dbg(ud->dev, "ti_sci resource range for %s: %d:%d | %d:%d\n", name, + rm_desc->start, rm_desc->num, rm_desc->start_sec, + rm_desc->num_sec); +} + +static const char * const range_names[] = { + [RM_RANGE_BCHAN] = "ti,sci-rm-range-bchan", + [RM_RANGE_TCHAN] = "ti,sci-rm-range-tchan", + [RM_RANGE_RCHAN] = "ti,sci-rm-range-rchan", + [RM_RANGE_RFLOW] = "ti,sci-rm-range-rflow", + [RM_RANGE_TFLOW] = "ti,sci-rm-range-tflow", +}; + +static int udma_setup_resources(struct udma_dev *ud) +{ + int ret, i, j; + struct device *dev = ud->dev; + struct ti_sci_resource *rm_res, irq_res; + struct udma_tisci_rm *tisci_rm = &ud->tisci_rm; + u32 cap3; + + /* Set up the throughput level start indexes */ + cap3 = udma_read(ud->mmrs[MMR_GCFG], 0x2c); + if (of_device_is_compatible(dev->of_node, + "ti,am654-navss-main-udmap")) { + ud->tchan_tpl.levels = 2; + ud->tchan_tpl.start_idx[0] = 8; + } else if (of_device_is_compatible(dev->of_node, + "ti,am654-navss-mcu-udmap")) { + ud->tchan_tpl.levels = 2; + ud->tchan_tpl.start_idx[0] = 2; + } else if (UDMA_CAP3_UCHAN_CNT(cap3)) { + ud->tchan_tpl.levels = 3; + ud->tchan_tpl.start_idx[1] = UDMA_CAP3_UCHAN_CNT(cap3); + ud->tchan_tpl.start_idx[0] = UDMA_CAP3_HCHAN_CNT(cap3); + } else if (UDMA_CAP3_HCHAN_CNT(cap3)) { + ud->tchan_tpl.levels = 2; + ud->tchan_tpl.start_idx[0] = UDMA_CAP3_HCHAN_CNT(cap3); + } else { + ud->tchan_tpl.levels = 1; + } + + ud->rchan_tpl.levels = ud->tchan_tpl.levels; + ud->rchan_tpl.start_idx[0] = ud->tchan_tpl.start_idx[0]; + ud->rchan_tpl.start_idx[1] = ud->tchan_tpl.start_idx[1]; + + ud->tchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->tchan_cnt), + sizeof(unsigned long), GFP_KERNEL); + ud->tchans = devm_kcalloc(dev, ud->tchan_cnt, sizeof(*ud->tchans), + GFP_KERNEL); + ud->rchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->rchan_cnt), + sizeof(unsigned long), GFP_KERNEL); + ud->rchans = devm_kcalloc(dev, ud->rchan_cnt, sizeof(*ud->rchans), + GFP_KERNEL); + ud->rflow_gp_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->rflow_cnt), + sizeof(unsigned long), + GFP_KERNEL); + ud->rflow_gp_map_allocated = devm_kcalloc(dev, + BITS_TO_LONGS(ud->rflow_cnt), + sizeof(unsigned long), + GFP_KERNEL); + ud->rflow_in_use = devm_kcalloc(dev, BITS_TO_LONGS(ud->rflow_cnt), + sizeof(unsigned long), + GFP_KERNEL); + ud->rflows = devm_kcalloc(dev, ud->rflow_cnt, sizeof(*ud->rflows), + GFP_KERNEL); + + if (!ud->tchan_map || !ud->rchan_map || !ud->rflow_gp_map || + !ud->rflow_gp_map_allocated || !ud->tchans || !ud->rchans || + !ud->rflows || !ud->rflow_in_use) + return -ENOMEM; + + /* + * RX flows with the same Ids as RX channels are reserved to be used + * as default flows if remote HW can't generate flow_ids. Those + * RX flows can be requested only explicitly by id. + */ + bitmap_set(ud->rflow_gp_map_allocated, 0, ud->rchan_cnt); + + /* by default no GP rflows are assigned to Linux */ + bitmap_set(ud->rflow_gp_map, 0, ud->rflow_cnt); + + /* Get resource ranges from tisci */ + for (i = 0; i < RM_RANGE_LAST; i++) { + if (i == RM_RANGE_BCHAN || i == RM_RANGE_TFLOW) + continue; + + tisci_rm->rm_ranges[i] = + devm_ti_sci_get_of_resource(tisci_rm->tisci, dev, + tisci_rm->tisci_dev_id, + (char *)range_names[i]); + } + + /* tchan ranges */ + rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN]; + if (IS_ERR(rm_res)) { + bitmap_zero(ud->tchan_map, ud->tchan_cnt); + irq_res.sets = 1; + } else { + bitmap_fill(ud->tchan_map, ud->tchan_cnt); + for (i = 0; i < rm_res->sets; i++) + udma_mark_resource_ranges(ud, ud->tchan_map, + &rm_res->desc[i], "tchan"); + irq_res.sets = rm_res->sets; + } + + /* rchan and matching default flow ranges */ + rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN]; + if (IS_ERR(rm_res)) { + bitmap_zero(ud->rchan_map, ud->rchan_cnt); + irq_res.sets++; + } else { + bitmap_fill(ud->rchan_map, ud->rchan_cnt); + for (i = 0; i < rm_res->sets; i++) + udma_mark_resource_ranges(ud, ud->rchan_map, + &rm_res->desc[i], "rchan"); + irq_res.sets += rm_res->sets; + } + + irq_res.desc = kcalloc(irq_res.sets, sizeof(*irq_res.desc), GFP_KERNEL); + if (!irq_res.desc) + return -ENOMEM; + rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN]; + if (IS_ERR(rm_res)) { + irq_res.desc[0].start = 0; + irq_res.desc[0].num = ud->tchan_cnt; + i = 1; + } else { + for (i = 0; i < rm_res->sets; i++) { + irq_res.desc[i].start = rm_res->desc[i].start; + irq_res.desc[i].num = rm_res->desc[i].num; + irq_res.desc[i].start_sec = rm_res->desc[i].start_sec; + irq_res.desc[i].num_sec = rm_res->desc[i].num_sec; + } + } + rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN]; + if (IS_ERR(rm_res)) { + irq_res.desc[i].start = 0; + irq_res.desc[i].num = ud->rchan_cnt; + } else { + for (j = 0; j < rm_res->sets; j++, i++) { + if (rm_res->desc[j].num) { + irq_res.desc[i].start = rm_res->desc[j].start + + ud->soc_data->oes.udma_rchan; + irq_res.desc[i].num = rm_res->desc[j].num; + } + if (rm_res->desc[j].num_sec) { + irq_res.desc[i].start_sec = rm_res->desc[j].start_sec + + ud->soc_data->oes.udma_rchan; + irq_res.desc[i].num_sec = rm_res->desc[j].num_sec; + } + } + } + ret = ti_sci_inta_msi_domain_alloc_irqs(ud->dev, &irq_res); + kfree(irq_res.desc); + if (ret) { + dev_err(ud->dev, "Failed to allocate MSI interrupts\n"); + return ret; + } + + /* GP rflow ranges */ + rm_res = tisci_rm->rm_ranges[RM_RANGE_RFLOW]; + if (IS_ERR(rm_res)) { + /* all gp flows are assigned exclusively to Linux */ + bitmap_clear(ud->rflow_gp_map, ud->rchan_cnt, + ud->rflow_cnt - ud->rchan_cnt); + } else { + for (i = 0; i < rm_res->sets; i++) + udma_mark_resource_ranges(ud, ud->rflow_gp_map, + &rm_res->desc[i], "gp-rflow"); + } + + return 0; +} + +static int bcdma_setup_resources(struct udma_dev *ud) +{ + int ret, i, j; + struct device *dev = ud->dev; + struct ti_sci_resource *rm_res, irq_res; + struct udma_tisci_rm *tisci_rm = &ud->tisci_rm; + const struct udma_oes_offsets *oes = &ud->soc_data->oes; + u32 cap; + + /* Set up the throughput level start indexes */ + cap = udma_read(ud->mmrs[MMR_GCFG], 0x2c); + if (BCDMA_CAP3_UBCHAN_CNT(cap)) { + ud->bchan_tpl.levels = 3; + ud->bchan_tpl.start_idx[1] = BCDMA_CAP3_UBCHAN_CNT(cap); + ud->bchan_tpl.start_idx[0] = BCDMA_CAP3_HBCHAN_CNT(cap); + } else if (BCDMA_CAP3_HBCHAN_CNT(cap)) { + ud->bchan_tpl.levels = 2; + ud->bchan_tpl.start_idx[0] = BCDMA_CAP3_HBCHAN_CNT(cap); + } else { + ud->bchan_tpl.levels = 1; + } + + cap = udma_read(ud->mmrs[MMR_GCFG], 0x30); + if (BCDMA_CAP4_URCHAN_CNT(cap)) { + ud->rchan_tpl.levels = 3; + ud->rchan_tpl.start_idx[1] = BCDMA_CAP4_URCHAN_CNT(cap); + ud->rchan_tpl.start_idx[0] = BCDMA_CAP4_HRCHAN_CNT(cap); + } else if (BCDMA_CAP4_HRCHAN_CNT(cap)) { + ud->rchan_tpl.levels = 2; + ud->rchan_tpl.start_idx[0] = BCDMA_CAP4_HRCHAN_CNT(cap); + } else { + ud->rchan_tpl.levels = 1; + } + + if (BCDMA_CAP4_UTCHAN_CNT(cap)) { + ud->tchan_tpl.levels = 3; + ud->tchan_tpl.start_idx[1] = BCDMA_CAP4_UTCHAN_CNT(cap); + ud->tchan_tpl.start_idx[0] = BCDMA_CAP4_HTCHAN_CNT(cap); + } else if (BCDMA_CAP4_HTCHAN_CNT(cap)) { + ud->tchan_tpl.levels = 2; + ud->tchan_tpl.start_idx[0] = BCDMA_CAP4_HTCHAN_CNT(cap); + } else { + ud->tchan_tpl.levels = 1; + } + + ud->bchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->bchan_cnt), + sizeof(unsigned long), GFP_KERNEL); + ud->bchans = devm_kcalloc(dev, ud->bchan_cnt, sizeof(*ud->bchans), + GFP_KERNEL); + ud->tchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->tchan_cnt), + sizeof(unsigned long), GFP_KERNEL); + ud->tchans = devm_kcalloc(dev, ud->tchan_cnt, sizeof(*ud->tchans), + GFP_KERNEL); + ud->rchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->rchan_cnt), + sizeof(unsigned long), GFP_KERNEL); + ud->rchans = devm_kcalloc(dev, ud->rchan_cnt, sizeof(*ud->rchans), + GFP_KERNEL); + /* BCDMA do not really have flows, but the driver expect it */ + ud->rflow_in_use = devm_kcalloc(dev, BITS_TO_LONGS(ud->rchan_cnt), + sizeof(unsigned long), + GFP_KERNEL); + ud->rflows = devm_kcalloc(dev, ud->rchan_cnt, sizeof(*ud->rflows), + GFP_KERNEL); + + if (!ud->bchan_map || !ud->tchan_map || !ud->rchan_map || + !ud->rflow_in_use || !ud->bchans || !ud->tchans || !ud->rchans || + !ud->rflows) + return -ENOMEM; + + /* Get resource ranges from tisci */ + for (i = 0; i < RM_RANGE_LAST; i++) { + if (i == RM_RANGE_RFLOW || i == RM_RANGE_TFLOW) + continue; + if (i == RM_RANGE_BCHAN && ud->bchan_cnt == 0) + continue; + if (i == RM_RANGE_TCHAN && ud->tchan_cnt == 0) + continue; + if (i == RM_RANGE_RCHAN && ud->rchan_cnt == 0) + continue; + + tisci_rm->rm_ranges[i] = + devm_ti_sci_get_of_resource(tisci_rm->tisci, dev, + tisci_rm->tisci_dev_id, + (char *)range_names[i]); + } + + irq_res.sets = 0; + + /* bchan ranges */ + if (ud->bchan_cnt) { + rm_res = tisci_rm->rm_ranges[RM_RANGE_BCHAN]; + if (IS_ERR(rm_res)) { + bitmap_zero(ud->bchan_map, ud->bchan_cnt); + irq_res.sets++; + } else { + bitmap_fill(ud->bchan_map, ud->bchan_cnt); + for (i = 0; i < rm_res->sets; i++) + udma_mark_resource_ranges(ud, ud->bchan_map, + &rm_res->desc[i], + "bchan"); + irq_res.sets += rm_res->sets; + } + } + + /* tchan ranges */ + if (ud->tchan_cnt) { + rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN]; + if (IS_ERR(rm_res)) { + bitmap_zero(ud->tchan_map, ud->tchan_cnt); + irq_res.sets += 2; + } else { + bitmap_fill(ud->tchan_map, ud->tchan_cnt); + for (i = 0; i < rm_res->sets; i++) + udma_mark_resource_ranges(ud, ud->tchan_map, + &rm_res->desc[i], + "tchan"); + irq_res.sets += rm_res->sets * 2; + } + } + + /* rchan ranges */ + if (ud->rchan_cnt) { + rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN]; + if (IS_ERR(rm_res)) { + bitmap_zero(ud->rchan_map, ud->rchan_cnt); + irq_res.sets += 2; + } else { + bitmap_fill(ud->rchan_map, ud->rchan_cnt); + for (i = 0; i < rm_res->sets; i++) + udma_mark_resource_ranges(ud, ud->rchan_map, + &rm_res->desc[i], + "rchan"); + irq_res.sets += rm_res->sets * 2; + } + } + + irq_res.desc = kcalloc(irq_res.sets, sizeof(*irq_res.desc), GFP_KERNEL); + if (!irq_res.desc) + return -ENOMEM; + if (ud->bchan_cnt) { + rm_res = tisci_rm->rm_ranges[RM_RANGE_BCHAN]; + if (IS_ERR(rm_res)) { + irq_res.desc[0].start = oes->bcdma_bchan_ring; + irq_res.desc[0].num = ud->bchan_cnt; + i = 1; + } else { + for (i = 0; i < rm_res->sets; i++) { + irq_res.desc[i].start = rm_res->desc[i].start + + oes->bcdma_bchan_ring; + irq_res.desc[i].num = rm_res->desc[i].num; + } + } + } else { + i = 0; + } + + if (ud->tchan_cnt) { + rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN]; + if (IS_ERR(rm_res)) { + irq_res.desc[i].start = oes->bcdma_tchan_data; + irq_res.desc[i].num = ud->tchan_cnt; + irq_res.desc[i + 1].start = oes->bcdma_tchan_ring; + irq_res.desc[i + 1].num = ud->tchan_cnt; + i += 2; + } else { + for (j = 0; j < rm_res->sets; j++, i += 2) { + irq_res.desc[i].start = rm_res->desc[j].start + + oes->bcdma_tchan_data; + irq_res.desc[i].num = rm_res->desc[j].num; + + irq_res.desc[i + 1].start = rm_res->desc[j].start + + oes->bcdma_tchan_ring; + irq_res.desc[i + 1].num = rm_res->desc[j].num; + } + } + } + if (ud->rchan_cnt) { + rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN]; + if (IS_ERR(rm_res)) { + irq_res.desc[i].start = oes->bcdma_rchan_data; + irq_res.desc[i].num = ud->rchan_cnt; + irq_res.desc[i + 1].start = oes->bcdma_rchan_ring; + irq_res.desc[i + 1].num = ud->rchan_cnt; + i += 2; + } else { + for (j = 0; j < rm_res->sets; j++, i += 2) { + irq_res.desc[i].start = rm_res->desc[j].start + + oes->bcdma_rchan_data; + irq_res.desc[i].num = rm_res->desc[j].num; + + irq_res.desc[i + 1].start = rm_res->desc[j].start + + oes->bcdma_rchan_ring; + irq_res.desc[i + 1].num = rm_res->desc[j].num; + } + } + } + + ret = ti_sci_inta_msi_domain_alloc_irqs(ud->dev, &irq_res); + kfree(irq_res.desc); + if (ret) { + dev_err(ud->dev, "Failed to allocate MSI interrupts\n"); + return ret; + } + + return 0; +} + +static int pktdma_setup_resources(struct udma_dev *ud) +{ + int ret, i, j; + struct device *dev = ud->dev; + struct ti_sci_resource *rm_res, irq_res; + struct udma_tisci_rm *tisci_rm = &ud->tisci_rm; + const struct udma_oes_offsets *oes = &ud->soc_data->oes; + u32 cap3; + + /* Set up the throughput level start indexes */ + cap3 = udma_read(ud->mmrs[MMR_GCFG], 0x2c); + if (UDMA_CAP3_UCHAN_CNT(cap3)) { + ud->tchan_tpl.levels = 3; + ud->tchan_tpl.start_idx[1] = UDMA_CAP3_UCHAN_CNT(cap3); + ud->tchan_tpl.start_idx[0] = UDMA_CAP3_HCHAN_CNT(cap3); + } else if (UDMA_CAP3_HCHAN_CNT(cap3)) { + ud->tchan_tpl.levels = 2; + ud->tchan_tpl.start_idx[0] = UDMA_CAP3_HCHAN_CNT(cap3); + } else { + ud->tchan_tpl.levels = 1; + } + + ud->rchan_tpl.levels = ud->tchan_tpl.levels; + ud->rchan_tpl.start_idx[0] = ud->tchan_tpl.start_idx[0]; + ud->rchan_tpl.start_idx[1] = ud->tchan_tpl.start_idx[1]; + + ud->tchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->tchan_cnt), + sizeof(unsigned long), GFP_KERNEL); + ud->tchans = devm_kcalloc(dev, ud->tchan_cnt, sizeof(*ud->tchans), + GFP_KERNEL); + ud->rchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->rchan_cnt), + sizeof(unsigned long), GFP_KERNEL); + ud->rchans = devm_kcalloc(dev, ud->rchan_cnt, sizeof(*ud->rchans), + GFP_KERNEL); + ud->rflow_in_use = devm_kcalloc(dev, BITS_TO_LONGS(ud->rflow_cnt), + sizeof(unsigned long), + GFP_KERNEL); + ud->rflows = devm_kcalloc(dev, ud->rflow_cnt, sizeof(*ud->rflows), + GFP_KERNEL); + ud->tflow_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->tflow_cnt), + sizeof(unsigned long), GFP_KERNEL); + + if (!ud->tchan_map || !ud->rchan_map || !ud->tflow_map || !ud->tchans || + !ud->rchans || !ud->rflows || !ud->rflow_in_use) + return -ENOMEM; + + /* Get resource ranges from tisci */ + for (i = 0; i < RM_RANGE_LAST; i++) { + if (i == RM_RANGE_BCHAN) + continue; + + tisci_rm->rm_ranges[i] = + devm_ti_sci_get_of_resource(tisci_rm->tisci, dev, + tisci_rm->tisci_dev_id, + (char *)range_names[i]); + } + + /* tchan ranges */ + rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN]; + if (IS_ERR(rm_res)) { + bitmap_zero(ud->tchan_map, ud->tchan_cnt); + } else { + bitmap_fill(ud->tchan_map, ud->tchan_cnt); + for (i = 0; i < rm_res->sets; i++) + udma_mark_resource_ranges(ud, ud->tchan_map, + &rm_res->desc[i], "tchan"); + } + + /* rchan ranges */ + rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN]; + if (IS_ERR(rm_res)) { + bitmap_zero(ud->rchan_map, ud->rchan_cnt); + } else { + bitmap_fill(ud->rchan_map, ud->rchan_cnt); + for (i = 0; i < rm_res->sets; i++) + udma_mark_resource_ranges(ud, ud->rchan_map, + &rm_res->desc[i], "rchan"); + } + + /* rflow ranges */ + rm_res = tisci_rm->rm_ranges[RM_RANGE_RFLOW]; + if (IS_ERR(rm_res)) { + /* all rflows are assigned exclusively to Linux */ + bitmap_zero(ud->rflow_in_use, ud->rflow_cnt); + irq_res.sets = 1; + } else { + bitmap_fill(ud->rflow_in_use, ud->rflow_cnt); + for (i = 0; i < rm_res->sets; i++) + udma_mark_resource_ranges(ud, ud->rflow_in_use, + &rm_res->desc[i], "rflow"); + irq_res.sets = rm_res->sets; + } + + /* tflow ranges */ + rm_res = tisci_rm->rm_ranges[RM_RANGE_TFLOW]; + if (IS_ERR(rm_res)) { + /* all tflows are assigned exclusively to Linux */ + bitmap_zero(ud->tflow_map, ud->tflow_cnt); + irq_res.sets++; + } else { + bitmap_fill(ud->tflow_map, ud->tflow_cnt); + for (i = 0; i < rm_res->sets; i++) + udma_mark_resource_ranges(ud, ud->tflow_map, + &rm_res->desc[i], "tflow"); + irq_res.sets += rm_res->sets; + } + + irq_res.desc = kcalloc(irq_res.sets, sizeof(*irq_res.desc), GFP_KERNEL); + if (!irq_res.desc) + return -ENOMEM; + rm_res = tisci_rm->rm_ranges[RM_RANGE_TFLOW]; + if (IS_ERR(rm_res)) { + irq_res.desc[0].start = oes->pktdma_tchan_flow; + irq_res.desc[0].num = ud->tflow_cnt; + i = 1; + } else { + for (i = 0; i < rm_res->sets; i++) { + irq_res.desc[i].start = rm_res->desc[i].start + + oes->pktdma_tchan_flow; + irq_res.desc[i].num = rm_res->desc[i].num; + } + } + rm_res = tisci_rm->rm_ranges[RM_RANGE_RFLOW]; + if (IS_ERR(rm_res)) { + irq_res.desc[i].start = oes->pktdma_rchan_flow; + irq_res.desc[i].num = ud->rflow_cnt; + } else { + for (j = 0; j < rm_res->sets; j++, i++) { + irq_res.desc[i].start = rm_res->desc[j].start + + oes->pktdma_rchan_flow; + irq_res.desc[i].num = rm_res->desc[j].num; + } + } + ret = ti_sci_inta_msi_domain_alloc_irqs(ud->dev, &irq_res); + kfree(irq_res.desc); + if (ret) { + dev_err(ud->dev, "Failed to allocate MSI interrupts\n"); + return ret; + } + + return 0; +} + +static int setup_resources(struct udma_dev *ud) +{ + struct device *dev = ud->dev; + int ch_count, ret; + + switch (ud->match_data->type) { + case DMA_TYPE_UDMA: + ret = udma_setup_resources(ud); + break; + case DMA_TYPE_BCDMA: + ret = bcdma_setup_resources(ud); + break; + case DMA_TYPE_PKTDMA: + ret = pktdma_setup_resources(ud); + break; + default: + return -EINVAL; + } + + if (ret) + return ret; + + ch_count = ud->bchan_cnt + ud->tchan_cnt + ud->rchan_cnt; + if (ud->bchan_cnt) + ch_count -= bitmap_weight(ud->bchan_map, ud->bchan_cnt); + ch_count -= bitmap_weight(ud->tchan_map, ud->tchan_cnt); + ch_count -= bitmap_weight(ud->rchan_map, ud->rchan_cnt); + if (!ch_count) + return -ENODEV; + + ud->channels = devm_kcalloc(dev, ch_count, sizeof(*ud->channels), + GFP_KERNEL); + if (!ud->channels) + return -ENOMEM; + + switch (ud->match_data->type) { + case DMA_TYPE_UDMA: + dev_info(dev, + "Channels: %d (tchan: %u, rchan: %u, gp-rflow: %u)\n", + ch_count, + ud->tchan_cnt - bitmap_weight(ud->tchan_map, + ud->tchan_cnt), + ud->rchan_cnt - bitmap_weight(ud->rchan_map, + ud->rchan_cnt), + ud->rflow_cnt - bitmap_weight(ud->rflow_gp_map, + ud->rflow_cnt)); + break; + case DMA_TYPE_BCDMA: + dev_info(dev, + "Channels: %d (bchan: %u, tchan: %u, rchan: %u)\n", + ch_count, + ud->bchan_cnt - bitmap_weight(ud->bchan_map, + ud->bchan_cnt), + ud->tchan_cnt - bitmap_weight(ud->tchan_map, + ud->tchan_cnt), + ud->rchan_cnt - bitmap_weight(ud->rchan_map, + ud->rchan_cnt)); + break; + case DMA_TYPE_PKTDMA: + dev_info(dev, + "Channels: %d (tchan: %u, rchan: %u)\n", + ch_count, + ud->tchan_cnt - bitmap_weight(ud->tchan_map, + ud->tchan_cnt), + ud->rchan_cnt - bitmap_weight(ud->rchan_map, + ud->rchan_cnt)); + break; + default: + break; + } + + return ch_count; +} + +static int udma_setup_rx_flush(struct udma_dev *ud) +{ + struct udma_rx_flush *rx_flush = &ud->rx_flush; + struct cppi5_desc_hdr_t *tr_desc; + struct cppi5_tr_type1_t *tr_req; + struct cppi5_host_desc_t *desc; + struct device *dev = ud->dev; + struct udma_hwdesc *hwdesc; + size_t tr_size; + + /* Allocate 1K buffer for discarded data on RX channel teardown */ + rx_flush->buffer_size = SZ_1K; + rx_flush->buffer_vaddr = devm_kzalloc(dev, rx_flush->buffer_size, + GFP_KERNEL); + if (!rx_flush->buffer_vaddr) + return -ENOMEM; + + rx_flush->buffer_paddr = dma_map_single(dev, rx_flush->buffer_vaddr, + rx_flush->buffer_size, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, rx_flush->buffer_paddr)) + return -ENOMEM; + + /* Set up descriptor to be used for TR mode */ + hwdesc = &rx_flush->hwdescs[0]; + tr_size = sizeof(struct cppi5_tr_type1_t); + hwdesc->cppi5_desc_size = cppi5_trdesc_calc_size(tr_size, 1); + hwdesc->cppi5_desc_size = ALIGN(hwdesc->cppi5_desc_size, + ud->desc_align); + + hwdesc->cppi5_desc_vaddr = devm_kzalloc(dev, hwdesc->cppi5_desc_size, + GFP_KERNEL); + if (!hwdesc->cppi5_desc_vaddr) + return -ENOMEM; + + hwdesc->cppi5_desc_paddr = dma_map_single(dev, hwdesc->cppi5_desc_vaddr, + hwdesc->cppi5_desc_size, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, hwdesc->cppi5_desc_paddr)) + return -ENOMEM; + + /* Start of the TR req records */ + hwdesc->tr_req_base = hwdesc->cppi5_desc_vaddr + tr_size; + /* Start address of the TR response array */ + hwdesc->tr_resp_base = hwdesc->tr_req_base + tr_size; + + tr_desc = hwdesc->cppi5_desc_vaddr; + cppi5_trdesc_init(tr_desc, 1, tr_size, 0, 0); + cppi5_desc_set_pktids(tr_desc, 0, CPPI5_INFO1_DESC_FLOWID_DEFAULT); + cppi5_desc_set_retpolicy(tr_desc, 0, 0); + + tr_req = hwdesc->tr_req_base; + cppi5_tr_init(&tr_req->flags, CPPI5_TR_TYPE1, false, false, + CPPI5_TR_EVENT_SIZE_COMPLETION, 0); + cppi5_tr_csf_set(&tr_req->flags, CPPI5_TR_CSF_SUPR_EVT); + + tr_req->addr = rx_flush->buffer_paddr; + tr_req->icnt0 = rx_flush->buffer_size; + tr_req->icnt1 = 1; + + dma_sync_single_for_device(dev, hwdesc->cppi5_desc_paddr, + hwdesc->cppi5_desc_size, DMA_TO_DEVICE); + + /* Set up descriptor to be used for packet mode */ + hwdesc = &rx_flush->hwdescs[1]; + hwdesc->cppi5_desc_size = ALIGN(sizeof(struct cppi5_host_desc_t) + + CPPI5_INFO0_HDESC_EPIB_SIZE + + CPPI5_INFO0_HDESC_PSDATA_MAX_SIZE, + ud->desc_align); + + hwdesc->cppi5_desc_vaddr = devm_kzalloc(dev, hwdesc->cppi5_desc_size, + GFP_KERNEL); + if (!hwdesc->cppi5_desc_vaddr) + return -ENOMEM; + + hwdesc->cppi5_desc_paddr = dma_map_single(dev, hwdesc->cppi5_desc_vaddr, + hwdesc->cppi5_desc_size, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, hwdesc->cppi5_desc_paddr)) + return -ENOMEM; + + desc = hwdesc->cppi5_desc_vaddr; + cppi5_hdesc_init(desc, 0, 0); + cppi5_desc_set_pktids(&desc->hdr, 0, CPPI5_INFO1_DESC_FLOWID_DEFAULT); + cppi5_desc_set_retpolicy(&desc->hdr, 0, 0); + + cppi5_hdesc_attach_buf(desc, + rx_flush->buffer_paddr, rx_flush->buffer_size, + rx_flush->buffer_paddr, rx_flush->buffer_size); + + dma_sync_single_for_device(dev, hwdesc->cppi5_desc_paddr, + hwdesc->cppi5_desc_size, DMA_TO_DEVICE); + return 0; +} + +#ifdef CONFIG_DEBUG_FS +static void udma_dbg_summary_show_chan(struct seq_file *s, + struct dma_chan *chan) +{ + struct udma_chan *uc = to_udma_chan(chan); + struct udma_chan_config *ucc = &uc->config; + + seq_printf(s, " %-13s| %s", dma_chan_name(chan), + chan->dbg_client_name ?: "in-use"); + if (ucc->tr_trigger_type) + seq_puts(s, " (triggered, "); + else + seq_printf(s, " (%s, ", + dmaengine_get_direction_text(uc->config.dir)); + + switch (uc->config.dir) { + case DMA_MEM_TO_MEM: + if (uc->ud->match_data->type == DMA_TYPE_BCDMA) { + seq_printf(s, "bchan%d)\n", uc->bchan->id); + return; + } + + seq_printf(s, "chan%d pair [0x%04x -> 0x%04x], ", uc->tchan->id, + ucc->src_thread, ucc->dst_thread); + break; + case DMA_DEV_TO_MEM: + seq_printf(s, "rchan%d [0x%04x -> 0x%04x], ", uc->rchan->id, + ucc->src_thread, ucc->dst_thread); + if (uc->ud->match_data->type == DMA_TYPE_PKTDMA) + seq_printf(s, "rflow%d, ", uc->rflow->id); + break; + case DMA_MEM_TO_DEV: + seq_printf(s, "tchan%d [0x%04x -> 0x%04x], ", uc->tchan->id, + ucc->src_thread, ucc->dst_thread); + if (uc->ud->match_data->type == DMA_TYPE_PKTDMA) + seq_printf(s, "tflow%d, ", uc->tchan->tflow_id); + break; + default: + seq_printf(s, ")\n"); + return; + } + + if (ucc->ep_type == PSIL_EP_NATIVE) { + seq_printf(s, "PSI-L Native"); + if (ucc->metadata_size) { + seq_printf(s, "[%s", ucc->needs_epib ? " EPIB" : ""); + if (ucc->psd_size) + seq_printf(s, " PSDsize:%u", ucc->psd_size); + seq_printf(s, " ]"); + } + } else { + seq_printf(s, "PDMA"); + if (ucc->enable_acc32 || ucc->enable_burst) + seq_printf(s, "[%s%s ]", + ucc->enable_acc32 ? " ACC32" : "", + ucc->enable_burst ? " BURST" : ""); + } + + seq_printf(s, ", %s)\n", ucc->pkt_mode ? "Packet mode" : "TR mode"); +} + +static void udma_dbg_summary_show(struct seq_file *s, + struct dma_device *dma_dev) +{ + struct dma_chan *chan; + + list_for_each_entry(chan, &dma_dev->channels, device_node) { + if (chan->client_count) + udma_dbg_summary_show_chan(s, chan); + } +} +#endif /* CONFIG_DEBUG_FS */ + +static enum dmaengine_alignment udma_get_copy_align(struct udma_dev *ud) +{ + const struct udma_match_data *match_data = ud->match_data; + u8 tpl; + + if (!match_data->enable_memcpy_support) + return DMAENGINE_ALIGN_8_BYTES; + + /* Get the highest TPL level the device supports for memcpy */ + if (ud->bchan_cnt) + tpl = udma_get_chan_tpl_index(&ud->bchan_tpl, 0); + else if (ud->tchan_cnt) + tpl = udma_get_chan_tpl_index(&ud->tchan_tpl, 0); + else + return DMAENGINE_ALIGN_8_BYTES; + + switch (match_data->burst_size[tpl]) { + case TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_256_BYTES: + return DMAENGINE_ALIGN_256_BYTES; + case TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_128_BYTES: + return DMAENGINE_ALIGN_128_BYTES; + case TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES: + fallthrough; + default: + return DMAENGINE_ALIGN_64_BYTES; + } +} + +#define TI_UDMAC_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)) + +static int udma_probe(struct platform_device *pdev) +{ + struct device_node *navss_node = pdev->dev.parent->of_node; + const struct soc_device_attribute *soc; + struct device *dev = &pdev->dev; + struct udma_dev *ud; + const struct of_device_id *match; + int i, ret; + int ch_count; + + ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(48)); + if (ret) + dev_err(dev, "failed to set dma mask stuff\n"); + + ud = devm_kzalloc(dev, sizeof(*ud), GFP_KERNEL); + if (!ud) + return -ENOMEM; + + match = of_match_node(udma_of_match, dev->of_node); + if (!match) { + dev_err(dev, "No compatible match found\n"); + return -ENODEV; + } + ud->match_data = match->data; + + ud->soc_data = ud->match_data->soc_data; + if (!ud->soc_data) { + soc = soc_device_match(k3_soc_devices); + if (!soc) { + dev_err(dev, "No compatible SoC found\n"); + return -ENODEV; + } + ud->soc_data = soc->data; + } + + ret = udma_get_mmrs(pdev, ud); + if (ret) + return ret; + + ud->tisci_rm.tisci = ti_sci_get_by_phandle(dev->of_node, "ti,sci"); + if (IS_ERR(ud->tisci_rm.tisci)) + return PTR_ERR(ud->tisci_rm.tisci); + + ret = of_property_read_u32(dev->of_node, "ti,sci-dev-id", + &ud->tisci_rm.tisci_dev_id); + if (ret) { + dev_err(dev, "ti,sci-dev-id read failure %d\n", ret); + return ret; + } + pdev->id = ud->tisci_rm.tisci_dev_id; + + ret = of_property_read_u32(navss_node, "ti,sci-dev-id", + &ud->tisci_rm.tisci_navss_dev_id); + if (ret) { + dev_err(dev, "NAVSS ti,sci-dev-id read failure %d\n", ret); + return ret; + } + + if (ud->match_data->type == DMA_TYPE_UDMA) { + ret = of_property_read_u32(dev->of_node, "ti,udma-atype", + &ud->atype); + if (!ret && ud->atype > 2) { + dev_err(dev, "Invalid atype: %u\n", ud->atype); + return -EINVAL; + } + } else { + ret = of_property_read_u32(dev->of_node, "ti,asel", + &ud->asel); + if (!ret && ud->asel > 15) { + dev_err(dev, "Invalid asel: %u\n", ud->asel); + return -EINVAL; + } + } + + ud->tisci_rm.tisci_udmap_ops = &ud->tisci_rm.tisci->ops.rm_udmap_ops; + ud->tisci_rm.tisci_psil_ops = &ud->tisci_rm.tisci->ops.rm_psil_ops; + + if (ud->match_data->type == DMA_TYPE_UDMA) { + ud->ringacc = of_k3_ringacc_get_by_phandle(dev->of_node, "ti,ringacc"); + } else { + struct k3_ringacc_init_data ring_init_data; + + ring_init_data.tisci = ud->tisci_rm.tisci; + ring_init_data.tisci_dev_id = ud->tisci_rm.tisci_dev_id; + if (ud->match_data->type == DMA_TYPE_BCDMA) { + ring_init_data.num_rings = ud->bchan_cnt + + ud->tchan_cnt + + ud->rchan_cnt; + } else { + ring_init_data.num_rings = ud->rflow_cnt + + ud->tflow_cnt; + } + + ud->ringacc = k3_ringacc_dmarings_init(pdev, &ring_init_data); + } + + if (IS_ERR(ud->ringacc)) + return PTR_ERR(ud->ringacc); + + dev->msi.domain = of_msi_get_domain(dev, dev->of_node, + DOMAIN_BUS_TI_SCI_INTA_MSI); + if (!dev->msi.domain) { + return -EPROBE_DEFER; + } + + dma_cap_set(DMA_SLAVE, ud->ddev.cap_mask); + /* cyclic operation is not supported via PKTDMA */ + if (ud->match_data->type != DMA_TYPE_PKTDMA) { + dma_cap_set(DMA_CYCLIC, ud->ddev.cap_mask); + ud->ddev.device_prep_dma_cyclic = udma_prep_dma_cyclic; + } + + ud->ddev.device_config = udma_slave_config; + ud->ddev.device_prep_slave_sg = udma_prep_slave_sg; + ud->ddev.device_issue_pending = udma_issue_pending; + ud->ddev.device_tx_status = udma_tx_status; + ud->ddev.device_pause = udma_pause; + ud->ddev.device_resume = udma_resume; + ud->ddev.device_terminate_all = udma_terminate_all; + ud->ddev.device_synchronize = udma_synchronize; +#ifdef CONFIG_DEBUG_FS + ud->ddev.dbg_summary_show = udma_dbg_summary_show; +#endif + + switch (ud->match_data->type) { + case DMA_TYPE_UDMA: + ud->ddev.device_alloc_chan_resources = + udma_alloc_chan_resources; + break; + case DMA_TYPE_BCDMA: + ud->ddev.device_alloc_chan_resources = + bcdma_alloc_chan_resources; + ud->ddev.device_router_config = bcdma_router_config; + break; + case DMA_TYPE_PKTDMA: + ud->ddev.device_alloc_chan_resources = + pktdma_alloc_chan_resources; + break; + default: + return -EINVAL; + } + ud->ddev.device_free_chan_resources = udma_free_chan_resources; + + ud->ddev.src_addr_widths = TI_UDMAC_BUSWIDTHS; + ud->ddev.dst_addr_widths = TI_UDMAC_BUSWIDTHS; + ud->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + ud->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + ud->ddev.desc_metadata_modes = DESC_METADATA_CLIENT | + DESC_METADATA_ENGINE; + if (ud->match_data->enable_memcpy_support && + !(ud->match_data->type == DMA_TYPE_BCDMA && ud->bchan_cnt == 0)) { + dma_cap_set(DMA_MEMCPY, ud->ddev.cap_mask); + ud->ddev.device_prep_dma_memcpy = udma_prep_dma_memcpy; + ud->ddev.directions |= BIT(DMA_MEM_TO_MEM); + } + + ud->ddev.dev = dev; + ud->dev = dev; + ud->psil_base = ud->match_data->psil_base; + + INIT_LIST_HEAD(&ud->ddev.channels); + INIT_LIST_HEAD(&ud->desc_to_purge); + + ch_count = setup_resources(ud); + if (ch_count <= 0) + return ch_count; + + spin_lock_init(&ud->lock); + INIT_WORK(&ud->purge_work, udma_purge_desc_work); + + ud->desc_align = 64; + if (ud->desc_align < dma_get_cache_alignment()) + ud->desc_align = dma_get_cache_alignment(); + + ret = udma_setup_rx_flush(ud); + if (ret) + return ret; + + for (i = 0; i < ud->bchan_cnt; i++) { + struct udma_bchan *bchan = &ud->bchans[i]; + + bchan->id = i; + bchan->reg_rt = ud->mmrs[MMR_BCHANRT] + i * 0x1000; + } + + for (i = 0; i < ud->tchan_cnt; i++) { + struct udma_tchan *tchan = &ud->tchans[i]; + + tchan->id = i; + tchan->reg_rt = ud->mmrs[MMR_TCHANRT] + i * 0x1000; + } + + for (i = 0; i < ud->rchan_cnt; i++) { + struct udma_rchan *rchan = &ud->rchans[i]; + + rchan->id = i; + rchan->reg_rt = ud->mmrs[MMR_RCHANRT] + i * 0x1000; + } + + for (i = 0; i < ud->rflow_cnt; i++) { + struct udma_rflow *rflow = &ud->rflows[i]; + + rflow->id = i; + } + + for (i = 0; i < ch_count; i++) { + struct udma_chan *uc = &ud->channels[i]; + + uc->ud = ud; + uc->vc.desc_free = udma_desc_free; + uc->id = i; + uc->bchan = NULL; + uc->tchan = NULL; + uc->rchan = NULL; + uc->config.remote_thread_id = -1; + uc->config.mapped_channel_id = -1; + uc->config.default_flow_id = -1; + uc->config.dir = DMA_MEM_TO_MEM; + uc->name = devm_kasprintf(dev, GFP_KERNEL, "%s chan%d", + dev_name(dev), i); + + vchan_init(&uc->vc, &ud->ddev); + /* Use custom vchan completion handling */ + tasklet_setup(&uc->vc.task, udma_vchan_complete); + init_completion(&uc->teardown_completed); + INIT_DELAYED_WORK(&uc->tx_drain.work, udma_check_tx_completion); + } + + /* Configure the copy_align to the maximum burst size the device supports */ + ud->ddev.copy_align = udma_get_copy_align(ud); + + ret = dma_async_device_register(&ud->ddev); + if (ret) { + dev_err(dev, "failed to register slave DMA engine: %d\n", ret); + return ret; + } + + platform_set_drvdata(pdev, ud); + + ret = of_dma_controller_register(dev->of_node, udma_of_xlate, ud); + if (ret) { + dev_err(dev, "failed to register of_dma controller\n"); + dma_async_device_unregister(&ud->ddev); + } + + return ret; +} + +static int __maybe_unused udma_pm_suspend(struct device *dev) +{ + struct udma_dev *ud = dev_get_drvdata(dev); + struct dma_device *dma_dev = &ud->ddev; + struct dma_chan *chan; + struct udma_chan *uc; + + list_for_each_entry(chan, &dma_dev->channels, device_node) { + if (chan->client_count) { + uc = to_udma_chan(chan); + /* backup the channel configuration */ + memcpy(&uc->backup_config, &uc->config, + sizeof(struct udma_chan_config)); + dev_dbg(dev, "Suspending channel %s\n", + dma_chan_name(chan)); + ud->ddev.device_free_chan_resources(chan); + } + } + + return 0; +} + +static int __maybe_unused udma_pm_resume(struct device *dev) +{ + struct udma_dev *ud = dev_get_drvdata(dev); + struct dma_device *dma_dev = &ud->ddev; + struct dma_chan *chan; + struct udma_chan *uc; + int ret; + + list_for_each_entry(chan, &dma_dev->channels, device_node) { + if (chan->client_count) { + uc = to_udma_chan(chan); + /* restore the channel configuration */ + memcpy(&uc->config, &uc->backup_config, + sizeof(struct udma_chan_config)); + dev_dbg(dev, "Resuming channel %s\n", + dma_chan_name(chan)); + ret = ud->ddev.device_alloc_chan_resources(chan); + if (ret) + return ret; + } + } + + return 0; +} + +static const struct dev_pm_ops udma_pm_ops = { + SET_LATE_SYSTEM_SLEEP_PM_OPS(udma_pm_suspend, udma_pm_resume) +}; + +static struct platform_driver udma_driver = { + .driver = { + .name = "ti-udma", + .of_match_table = udma_of_match, + .suppress_bind_attrs = true, + .pm = &udma_pm_ops, + }, + .probe = udma_probe, +}; + +module_platform_driver(udma_driver); +MODULE_LICENSE("GPL v2"); + +/* Private interfaces to UDMA */ +#include "k3-udma-private.c" diff --git a/drivers/dma/ti/k3-udma.h b/drivers/dma/ti/k3-udma.h new file mode 100644 index 0000000000..d349c6d482 --- /dev/null +++ b/drivers/dma/ti/k3-udma.h @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + */ + +#ifndef K3_UDMA_H_ +#define K3_UDMA_H_ + +#include + +/* Global registers */ +#define UDMA_REV_REG 0x0 +#define UDMA_PERF_CTL_REG 0x4 +#define UDMA_EMU_CTL_REG 0x8 +#define UDMA_PSIL_TO_REG 0x10 +#define UDMA_UTC_CTL_REG 0x1c +#define UDMA_CAP_REG(i) (0x20 + ((i) * 4)) +#define UDMA_RX_FLOW_ID_FW_OES_REG 0x80 +#define UDMA_RX_FLOW_ID_FW_STATUS_REG 0x88 + +/* BCHANRT/TCHANRT/RCHANRT registers */ +#define UDMA_CHAN_RT_CTL_REG 0x0 +#define UDMA_CHAN_RT_SWTRIG_REG 0x8 +#define UDMA_CHAN_RT_STDATA_REG 0x80 + +#define UDMA_CHAN_RT_PEER_REG(i) (0x200 + ((i) * 0x4)) +#define UDMA_CHAN_RT_PEER_STATIC_TR_XY_REG \ + UDMA_CHAN_RT_PEER_REG(0) /* PSI-L: 0x400 */ +#define UDMA_CHAN_RT_PEER_STATIC_TR_Z_REG \ + UDMA_CHAN_RT_PEER_REG(1) /* PSI-L: 0x401 */ +#define UDMA_CHAN_RT_PEER_BCNT_REG \ + UDMA_CHAN_RT_PEER_REG(4) /* PSI-L: 0x404 */ +#define UDMA_CHAN_RT_PEER_RT_EN_REG \ + UDMA_CHAN_RT_PEER_REG(8) /* PSI-L: 0x408 */ + +#define UDMA_CHAN_RT_PCNT_REG 0x400 +#define UDMA_CHAN_RT_BCNT_REG 0x408 +#define UDMA_CHAN_RT_SBCNT_REG 0x410 + +/* UDMA_CAP Registers */ +#define UDMA_CAP2_TCHAN_CNT(val) ((val) & 0x1ff) +#define UDMA_CAP2_ECHAN_CNT(val) (((val) >> 9) & 0x1ff) +#define UDMA_CAP2_RCHAN_CNT(val) (((val) >> 18) & 0x1ff) +#define UDMA_CAP3_RFLOW_CNT(val) ((val) & 0x3fff) +#define UDMA_CAP3_HCHAN_CNT(val) (((val) >> 14) & 0x1ff) +#define UDMA_CAP3_UCHAN_CNT(val) (((val) >> 23) & 0x1ff) + +#define BCDMA_CAP2_BCHAN_CNT(val) ((val) & 0x1ff) +#define BCDMA_CAP2_TCHAN_CNT(val) (((val) >> 9) & 0x1ff) +#define BCDMA_CAP2_RCHAN_CNT(val) (((val) >> 18) & 0x1ff) +#define BCDMA_CAP3_HBCHAN_CNT(val) (((val) >> 14) & 0x1ff) +#define BCDMA_CAP3_UBCHAN_CNT(val) (((val) >> 23) & 0x1ff) +#define BCDMA_CAP4_HRCHAN_CNT(val) ((val) & 0xff) +#define BCDMA_CAP4_URCHAN_CNT(val) (((val) >> 8) & 0xff) +#define BCDMA_CAP4_HTCHAN_CNT(val) (((val) >> 16) & 0xff) +#define BCDMA_CAP4_UTCHAN_CNT(val) (((val) >> 24) & 0xff) + +#define PKTDMA_CAP4_TFLOW_CNT(val) ((val) & 0x3fff) + +/* UDMA_CHAN_RT_CTL_REG */ +#define UDMA_CHAN_RT_CTL_EN BIT(31) +#define UDMA_CHAN_RT_CTL_TDOWN BIT(30) +#define UDMA_CHAN_RT_CTL_PAUSE BIT(29) +#define UDMA_CHAN_RT_CTL_FTDOWN BIT(28) +#define UDMA_CHAN_RT_CTL_ERROR BIT(0) + +/* UDMA_CHAN_RT_PEER_RT_EN_REG */ +#define UDMA_PEER_RT_EN_ENABLE BIT(31) +#define UDMA_PEER_RT_EN_TEARDOWN BIT(30) +#define UDMA_PEER_RT_EN_PAUSE BIT(29) +#define UDMA_PEER_RT_EN_FLUSH BIT(28) +#define UDMA_PEER_RT_EN_IDLE BIT(1) + +/* + * UDMA_TCHAN_RT_PEER_STATIC_TR_XY_REG / + * UDMA_RCHAN_RT_PEER_STATIC_TR_XY_REG + */ +#define PDMA_STATIC_TR_X_MASK GENMASK(26, 24) +#define PDMA_STATIC_TR_X_SHIFT (24) +#define PDMA_STATIC_TR_Y_MASK GENMASK(11, 0) +#define PDMA_STATIC_TR_Y_SHIFT (0) + +#define PDMA_STATIC_TR_Y(x) \ + (((x) << PDMA_STATIC_TR_Y_SHIFT) & PDMA_STATIC_TR_Y_MASK) +#define PDMA_STATIC_TR_X(x) \ + (((x) << PDMA_STATIC_TR_X_SHIFT) & PDMA_STATIC_TR_X_MASK) + +#define PDMA_STATIC_TR_XY_ACC32 BIT(30) +#define PDMA_STATIC_TR_XY_BURST BIT(31) + +/* + * UDMA_TCHAN_RT_PEER_STATIC_TR_Z_REG / + * UDMA_RCHAN_RT_PEER_STATIC_TR_Z_REG + */ +#define PDMA_STATIC_TR_Z(x, mask) ((x) & (mask)) + +/* Address Space Select */ +#define K3_ADDRESS_ASEL_SHIFT 48 + +struct udma_dev; +struct udma_tchan; +struct udma_rchan; +struct udma_rflow; + +enum udma_rm_range { + RM_RANGE_BCHAN = 0, + RM_RANGE_TCHAN, + RM_RANGE_RCHAN, + RM_RANGE_RFLOW, + RM_RANGE_TFLOW, + RM_RANGE_LAST, +}; + +struct udma_tisci_rm { + const struct ti_sci_handle *tisci; + const struct ti_sci_rm_udmap_ops *tisci_udmap_ops; + u32 tisci_dev_id; + + /* tisci information for PSI-L thread pairing/unpairing */ + const struct ti_sci_rm_psil_ops *tisci_psil_ops; + u32 tisci_navss_dev_id; + + struct ti_sci_resource *rm_ranges[RM_RANGE_LAST]; +}; + +/* Direct access to UDMA low lever resources for the glue layer */ +int xudma_navss_psil_pair(struct udma_dev *ud, u32 src_thread, u32 dst_thread); +int xudma_navss_psil_unpair(struct udma_dev *ud, u32 src_thread, + u32 dst_thread); + +struct udma_dev *of_xudma_dev_get(struct device_node *np, const char *property); +struct device *xudma_get_device(struct udma_dev *ud); +struct k3_ringacc *xudma_get_ringacc(struct udma_dev *ud); +void xudma_dev_put(struct udma_dev *ud); +u32 xudma_dev_get_psil_base(struct udma_dev *ud); +struct udma_tisci_rm *xudma_dev_get_tisci_rm(struct udma_dev *ud); + +int xudma_alloc_gp_rflow_range(struct udma_dev *ud, int from, int cnt); +int xudma_free_gp_rflow_range(struct udma_dev *ud, int from, int cnt); + +struct udma_tchan *xudma_tchan_get(struct udma_dev *ud, int id); +struct udma_rchan *xudma_rchan_get(struct udma_dev *ud, int id); +struct udma_rflow *xudma_rflow_get(struct udma_dev *ud, int id); + +void xudma_tchan_put(struct udma_dev *ud, struct udma_tchan *p); +void xudma_rchan_put(struct udma_dev *ud, struct udma_rchan *p); +void xudma_rflow_put(struct udma_dev *ud, struct udma_rflow *p); + +int xudma_tchan_get_id(struct udma_tchan *p); +int xudma_rchan_get_id(struct udma_rchan *p); +int xudma_rflow_get_id(struct udma_rflow *p); + +u32 xudma_tchanrt_read(struct udma_tchan *tchan, int reg); +void xudma_tchanrt_write(struct udma_tchan *tchan, int reg, u32 val); +u32 xudma_rchanrt_read(struct udma_rchan *rchan, int reg); +void xudma_rchanrt_write(struct udma_rchan *rchan, int reg, u32 val); +bool xudma_rflow_is_gp(struct udma_dev *ud, int id); +int xudma_get_rflow_ring_offset(struct udma_dev *ud); + +int xudma_is_pktdma(struct udma_dev *ud); + +int xudma_pktdma_tflow_get_irq(struct udma_dev *ud, int udma_tflow_id); +int xudma_pktdma_rflow_get_irq(struct udma_dev *ud, int udma_rflow_id); +#endif /* K3_UDMA_H_ */ diff --git a/drivers/dma/ti/omap-dma.c b/drivers/dma/ti/omap-dma.c new file mode 100644 index 0000000000..cf96cf915c --- /dev/null +++ b/drivers/dma/ti/omap-dma.c @@ -0,0 +1,1956 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * OMAP DMAengine support + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../virt-dma.h" + +#define OMAP_SDMA_REQUESTS 127 +#define OMAP_SDMA_CHANNELS 32 + +struct omap_dma_config { + int lch_end; + unsigned int rw_priority:1; + unsigned int needs_busy_check:1; + unsigned int may_lose_context:1; + unsigned int needs_lch_clear:1; +}; + +struct omap_dma_context { + u32 irqenable_l0; + u32 irqenable_l1; + u32 ocp_sysconfig; + u32 gcr; +}; + +struct omap_dmadev { + struct dma_device ddev; + spinlock_t lock; + void __iomem *base; + const struct omap_dma_reg *reg_map; + struct omap_system_dma_plat_info *plat; + const struct omap_dma_config *cfg; + struct notifier_block nb; + struct omap_dma_context context; + int lch_count; + DECLARE_BITMAP(lch_bitmap, OMAP_SDMA_CHANNELS); + struct mutex lch_lock; /* for assigning logical channels */ + bool legacy; + bool ll123_supported; + struct dma_pool *desc_pool; + unsigned dma_requests; + spinlock_t irq_lock; + uint32_t irq_enable_mask; + struct omap_chan **lch_map; +}; + +struct omap_chan { + struct virt_dma_chan vc; + void __iomem *channel_base; + const struct omap_dma_reg *reg_map; + uint32_t ccr; + + struct dma_slave_config cfg; + unsigned dma_sig; + bool cyclic; + bool paused; + bool running; + + int dma_ch; + struct omap_desc *desc; + unsigned sgidx; +}; + +#define DESC_NXT_SV_REFRESH (0x1 << 24) +#define DESC_NXT_SV_REUSE (0x2 << 24) +#define DESC_NXT_DV_REFRESH (0x1 << 26) +#define DESC_NXT_DV_REUSE (0x2 << 26) +#define DESC_NTYPE_TYPE2 (0x2 << 29) + +/* Type 2 descriptor with Source or Destination address update */ +struct omap_type2_desc { + uint32_t next_desc; + uint32_t en; + uint32_t addr; /* src or dst */ + uint16_t fn; + uint16_t cicr; + int16_t cdei; + int16_t csei; + int32_t cdfi; + int32_t csfi; +} __packed; + +struct omap_sg { + dma_addr_t addr; + uint32_t en; /* number of elements (24-bit) */ + uint32_t fn; /* number of frames (16-bit) */ + int32_t fi; /* for double indexing */ + int16_t ei; /* for double indexing */ + + /* Linked list */ + struct omap_type2_desc *t2_desc; + dma_addr_t t2_desc_paddr; +}; + +struct omap_desc { + struct virt_dma_desc vd; + bool using_ll; + enum dma_transfer_direction dir; + dma_addr_t dev_addr; + bool polled; + + int32_t fi; /* for OMAP_DMA_SYNC_PACKET / double indexing */ + int16_t ei; /* for double indexing */ + uint8_t es; /* CSDP_DATA_TYPE_xxx */ + uint32_t ccr; /* CCR value */ + uint16_t clnk_ctrl; /* CLNK_CTRL value */ + uint16_t cicr; /* CICR value */ + uint32_t csdp; /* CSDP value */ + + unsigned sglen; + struct omap_sg sg[]; +}; + +enum { + CAPS_0_SUPPORT_LL123 = BIT(20), /* Linked List type1/2/3 */ + CAPS_0_SUPPORT_LL4 = BIT(21), /* Linked List type4 */ + + CCR_FS = BIT(5), + CCR_READ_PRIORITY = BIT(6), + CCR_ENABLE = BIT(7), + CCR_AUTO_INIT = BIT(8), /* OMAP1 only */ + CCR_REPEAT = BIT(9), /* OMAP1 only */ + CCR_OMAP31_DISABLE = BIT(10), /* OMAP1 only */ + CCR_SUSPEND_SENSITIVE = BIT(8), /* OMAP2+ only */ + CCR_RD_ACTIVE = BIT(9), /* OMAP2+ only */ + CCR_WR_ACTIVE = BIT(10), /* OMAP2+ only */ + CCR_SRC_AMODE_CONSTANT = 0 << 12, + CCR_SRC_AMODE_POSTINC = 1 << 12, + CCR_SRC_AMODE_SGLIDX = 2 << 12, + CCR_SRC_AMODE_DBLIDX = 3 << 12, + CCR_DST_AMODE_CONSTANT = 0 << 14, + CCR_DST_AMODE_POSTINC = 1 << 14, + CCR_DST_AMODE_SGLIDX = 2 << 14, + CCR_DST_AMODE_DBLIDX = 3 << 14, + CCR_CONSTANT_FILL = BIT(16), + CCR_TRANSPARENT_COPY = BIT(17), + CCR_BS = BIT(18), + CCR_SUPERVISOR = BIT(22), + CCR_PREFETCH = BIT(23), + CCR_TRIGGER_SRC = BIT(24), + CCR_BUFFERING_DISABLE = BIT(25), + CCR_WRITE_PRIORITY = BIT(26), + CCR_SYNC_ELEMENT = 0, + CCR_SYNC_FRAME = CCR_FS, + CCR_SYNC_BLOCK = CCR_BS, + CCR_SYNC_PACKET = CCR_BS | CCR_FS, + + CSDP_DATA_TYPE_8 = 0, + CSDP_DATA_TYPE_16 = 1, + CSDP_DATA_TYPE_32 = 2, + CSDP_SRC_PORT_EMIFF = 0 << 2, /* OMAP1 only */ + CSDP_SRC_PORT_EMIFS = 1 << 2, /* OMAP1 only */ + CSDP_SRC_PORT_OCP_T1 = 2 << 2, /* OMAP1 only */ + CSDP_SRC_PORT_TIPB = 3 << 2, /* OMAP1 only */ + CSDP_SRC_PORT_OCP_T2 = 4 << 2, /* OMAP1 only */ + CSDP_SRC_PORT_MPUI = 5 << 2, /* OMAP1 only */ + CSDP_SRC_PACKED = BIT(6), + CSDP_SRC_BURST_1 = 0 << 7, + CSDP_SRC_BURST_16 = 1 << 7, + CSDP_SRC_BURST_32 = 2 << 7, + CSDP_SRC_BURST_64 = 3 << 7, + CSDP_DST_PORT_EMIFF = 0 << 9, /* OMAP1 only */ + CSDP_DST_PORT_EMIFS = 1 << 9, /* OMAP1 only */ + CSDP_DST_PORT_OCP_T1 = 2 << 9, /* OMAP1 only */ + CSDP_DST_PORT_TIPB = 3 << 9, /* OMAP1 only */ + CSDP_DST_PORT_OCP_T2 = 4 << 9, /* OMAP1 only */ + CSDP_DST_PORT_MPUI = 5 << 9, /* OMAP1 only */ + CSDP_DST_PACKED = BIT(13), + CSDP_DST_BURST_1 = 0 << 14, + CSDP_DST_BURST_16 = 1 << 14, + CSDP_DST_BURST_32 = 2 << 14, + CSDP_DST_BURST_64 = 3 << 14, + CSDP_WRITE_NON_POSTED = 0 << 16, + CSDP_WRITE_POSTED = 1 << 16, + CSDP_WRITE_LAST_NON_POSTED = 2 << 16, + + CICR_TOUT_IE = BIT(0), /* OMAP1 only */ + CICR_DROP_IE = BIT(1), + CICR_HALF_IE = BIT(2), + CICR_FRAME_IE = BIT(3), + CICR_LAST_IE = BIT(4), + CICR_BLOCK_IE = BIT(5), + CICR_PKT_IE = BIT(7), /* OMAP2+ only */ + CICR_TRANS_ERR_IE = BIT(8), /* OMAP2+ only */ + CICR_SUPERVISOR_ERR_IE = BIT(10), /* OMAP2+ only */ + CICR_MISALIGNED_ERR_IE = BIT(11), /* OMAP2+ only */ + CICR_DRAIN_IE = BIT(12), /* OMAP2+ only */ + CICR_SUPER_BLOCK_IE = BIT(14), /* OMAP2+ only */ + + CLNK_CTRL_ENABLE_LNK = BIT(15), + + CDP_DST_VALID_INC = 0 << 0, + CDP_DST_VALID_RELOAD = 1 << 0, + CDP_DST_VALID_REUSE = 2 << 0, + CDP_SRC_VALID_INC = 0 << 2, + CDP_SRC_VALID_RELOAD = 1 << 2, + CDP_SRC_VALID_REUSE = 2 << 2, + CDP_NTYPE_TYPE1 = 1 << 4, + CDP_NTYPE_TYPE2 = 2 << 4, + CDP_NTYPE_TYPE3 = 3 << 4, + CDP_TMODE_NORMAL = 0 << 8, + CDP_TMODE_LLIST = 1 << 8, + CDP_FAST = BIT(10), +}; + +static const unsigned es_bytes[] = { + [CSDP_DATA_TYPE_8] = 1, + [CSDP_DATA_TYPE_16] = 2, + [CSDP_DATA_TYPE_32] = 4, +}; + +static bool omap_dma_filter_fn(struct dma_chan *chan, void *param); +static struct of_dma_filter_info omap_dma_info = { + .filter_fn = omap_dma_filter_fn, +}; + +static inline struct omap_dmadev *to_omap_dma_dev(struct dma_device *d) +{ + return container_of(d, struct omap_dmadev, ddev); +} + +static inline struct omap_chan *to_omap_dma_chan(struct dma_chan *c) +{ + return container_of(c, struct omap_chan, vc.chan); +} + +static inline struct omap_desc *to_omap_dma_desc(struct dma_async_tx_descriptor *t) +{ + return container_of(t, struct omap_desc, vd.tx); +} + +static void omap_dma_desc_free(struct virt_dma_desc *vd) +{ + struct omap_desc *d = to_omap_dma_desc(&vd->tx); + + if (d->using_ll) { + struct omap_dmadev *od = to_omap_dma_dev(vd->tx.chan->device); + int i; + + for (i = 0; i < d->sglen; i++) { + if (d->sg[i].t2_desc) + dma_pool_free(od->desc_pool, d->sg[i].t2_desc, + d->sg[i].t2_desc_paddr); + } + } + + kfree(d); +} + +static void omap_dma_fill_type2_desc(struct omap_desc *d, int idx, + enum dma_transfer_direction dir, bool last) +{ + struct omap_sg *sg = &d->sg[idx]; + struct omap_type2_desc *t2_desc = sg->t2_desc; + + if (idx) + d->sg[idx - 1].t2_desc->next_desc = sg->t2_desc_paddr; + if (last) + t2_desc->next_desc = 0xfffffffc; + + t2_desc->en = sg->en; + t2_desc->addr = sg->addr; + t2_desc->fn = sg->fn & 0xffff; + t2_desc->cicr = d->cicr; + if (!last) + t2_desc->cicr &= ~CICR_BLOCK_IE; + + switch (dir) { + case DMA_DEV_TO_MEM: + t2_desc->cdei = sg->ei; + t2_desc->csei = d->ei; + t2_desc->cdfi = sg->fi; + t2_desc->csfi = d->fi; + + t2_desc->en |= DESC_NXT_DV_REFRESH; + t2_desc->en |= DESC_NXT_SV_REUSE; + break; + case DMA_MEM_TO_DEV: + t2_desc->cdei = d->ei; + t2_desc->csei = sg->ei; + t2_desc->cdfi = d->fi; + t2_desc->csfi = sg->fi; + + t2_desc->en |= DESC_NXT_SV_REFRESH; + t2_desc->en |= DESC_NXT_DV_REUSE; + break; + default: + return; + } + + t2_desc->en |= DESC_NTYPE_TYPE2; +} + +static void omap_dma_write(uint32_t val, unsigned type, void __iomem *addr) +{ + switch (type) { + case OMAP_DMA_REG_16BIT: + writew_relaxed(val, addr); + break; + case OMAP_DMA_REG_2X16BIT: + writew_relaxed(val, addr); + writew_relaxed(val >> 16, addr + 2); + break; + case OMAP_DMA_REG_32BIT: + writel_relaxed(val, addr); + break; + default: + WARN_ON(1); + } +} + +static unsigned omap_dma_read(unsigned type, void __iomem *addr) +{ + unsigned val; + + switch (type) { + case OMAP_DMA_REG_16BIT: + val = readw_relaxed(addr); + break; + case OMAP_DMA_REG_2X16BIT: + val = readw_relaxed(addr); + val |= readw_relaxed(addr + 2) << 16; + break; + case OMAP_DMA_REG_32BIT: + val = readl_relaxed(addr); + break; + default: + WARN_ON(1); + val = 0; + } + + return val; +} + +static void omap_dma_glbl_write(struct omap_dmadev *od, unsigned reg, unsigned val) +{ + const struct omap_dma_reg *r = od->reg_map + reg; + + WARN_ON(r->stride); + + omap_dma_write(val, r->type, od->base + r->offset); +} + +static unsigned omap_dma_glbl_read(struct omap_dmadev *od, unsigned reg) +{ + const struct omap_dma_reg *r = od->reg_map + reg; + + WARN_ON(r->stride); + + return omap_dma_read(r->type, od->base + r->offset); +} + +static void omap_dma_chan_write(struct omap_chan *c, unsigned reg, unsigned val) +{ + const struct omap_dma_reg *r = c->reg_map + reg; + + omap_dma_write(val, r->type, c->channel_base + r->offset); +} + +static unsigned omap_dma_chan_read(struct omap_chan *c, unsigned reg) +{ + const struct omap_dma_reg *r = c->reg_map + reg; + + return omap_dma_read(r->type, c->channel_base + r->offset); +} + +static void omap_dma_clear_csr(struct omap_chan *c) +{ + if (dma_omap1()) + omap_dma_chan_read(c, CSR); + else + omap_dma_chan_write(c, CSR, ~0); +} + +static unsigned omap_dma_get_csr(struct omap_chan *c) +{ + unsigned val = omap_dma_chan_read(c, CSR); + + if (!dma_omap1()) + omap_dma_chan_write(c, CSR, val); + + return val; +} + +static void omap_dma_clear_lch(struct omap_dmadev *od, int lch) +{ + struct omap_chan *c; + int i; + + c = od->lch_map[lch]; + if (!c) + return; + + for (i = CSDP; i <= od->cfg->lch_end; i++) + omap_dma_chan_write(c, i, 0); +} + +static void omap_dma_assign(struct omap_dmadev *od, struct omap_chan *c, + unsigned lch) +{ + c->channel_base = od->base + od->plat->channel_stride * lch; + + od->lch_map[lch] = c; +} + +static void omap_dma_start(struct omap_chan *c, struct omap_desc *d) +{ + struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device); + uint16_t cicr = d->cicr; + + if (__dma_omap15xx(od->plat->dma_attr)) + omap_dma_chan_write(c, CPC, 0); + else + omap_dma_chan_write(c, CDAC, 0); + + omap_dma_clear_csr(c); + + if (d->using_ll) { + uint32_t cdp = CDP_TMODE_LLIST | CDP_NTYPE_TYPE2 | CDP_FAST; + + if (d->dir == DMA_DEV_TO_MEM) + cdp |= (CDP_DST_VALID_RELOAD | CDP_SRC_VALID_REUSE); + else + cdp |= (CDP_DST_VALID_REUSE | CDP_SRC_VALID_RELOAD); + omap_dma_chan_write(c, CDP, cdp); + + omap_dma_chan_write(c, CNDP, d->sg[0].t2_desc_paddr); + omap_dma_chan_write(c, CCDN, 0); + omap_dma_chan_write(c, CCFN, 0xffff); + omap_dma_chan_write(c, CCEN, 0xffffff); + + cicr &= ~CICR_BLOCK_IE; + } else if (od->ll123_supported) { + omap_dma_chan_write(c, CDP, 0); + } + + /* Enable interrupts */ + omap_dma_chan_write(c, CICR, cicr); + + /* Enable channel */ + omap_dma_chan_write(c, CCR, d->ccr | CCR_ENABLE); + + c->running = true; +} + +static void omap_dma_drain_chan(struct omap_chan *c) +{ + int i; + u32 val; + + /* Wait for sDMA FIFO to drain */ + for (i = 0; ; i++) { + val = omap_dma_chan_read(c, CCR); + if (!(val & (CCR_RD_ACTIVE | CCR_WR_ACTIVE))) + break; + + if (i > 100) + break; + + udelay(5); + } + + if (val & (CCR_RD_ACTIVE | CCR_WR_ACTIVE)) + dev_err(c->vc.chan.device->dev, + "DMA drain did not complete on lch %d\n", + c->dma_ch); +} + +static int omap_dma_stop(struct omap_chan *c) +{ + struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device); + uint32_t val; + + /* disable irq */ + omap_dma_chan_write(c, CICR, 0); + + omap_dma_clear_csr(c); + + val = omap_dma_chan_read(c, CCR); + if (od->plat->errata & DMA_ERRATA_i541 && val & CCR_TRIGGER_SRC) { + uint32_t sysconfig; + + sysconfig = omap_dma_glbl_read(od, OCP_SYSCONFIG); + val = sysconfig & ~DMA_SYSCONFIG_MIDLEMODE_MASK; + val |= DMA_SYSCONFIG_MIDLEMODE(DMA_IDLEMODE_NO_IDLE); + omap_dma_glbl_write(od, OCP_SYSCONFIG, val); + + val = omap_dma_chan_read(c, CCR); + val &= ~CCR_ENABLE; + omap_dma_chan_write(c, CCR, val); + + if (!(c->ccr & CCR_BUFFERING_DISABLE)) + omap_dma_drain_chan(c); + + omap_dma_glbl_write(od, OCP_SYSCONFIG, sysconfig); + } else { + if (!(val & CCR_ENABLE)) + return -EINVAL; + + val &= ~CCR_ENABLE; + omap_dma_chan_write(c, CCR, val); + + if (!(c->ccr & CCR_BUFFERING_DISABLE)) + omap_dma_drain_chan(c); + } + + mb(); + + if (!__dma_omap15xx(od->plat->dma_attr) && c->cyclic) { + val = omap_dma_chan_read(c, CLNK_CTRL); + + if (dma_omap1()) + val |= 1 << 14; /* set the STOP_LNK bit */ + else + val &= ~CLNK_CTRL_ENABLE_LNK; + + omap_dma_chan_write(c, CLNK_CTRL, val); + } + c->running = false; + return 0; +} + +static void omap_dma_start_sg(struct omap_chan *c, struct omap_desc *d) +{ + struct omap_sg *sg = d->sg + c->sgidx; + unsigned cxsa, cxei, cxfi; + + if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) { + cxsa = CDSA; + cxei = CDEI; + cxfi = CDFI; + } else { + cxsa = CSSA; + cxei = CSEI; + cxfi = CSFI; + } + + omap_dma_chan_write(c, cxsa, sg->addr); + omap_dma_chan_write(c, cxei, sg->ei); + omap_dma_chan_write(c, cxfi, sg->fi); + omap_dma_chan_write(c, CEN, sg->en); + omap_dma_chan_write(c, CFN, sg->fn); + + omap_dma_start(c, d); + c->sgidx++; +} + +static void omap_dma_start_desc(struct omap_chan *c) +{ + struct virt_dma_desc *vd = vchan_next_desc(&c->vc); + struct omap_desc *d; + unsigned cxsa, cxei, cxfi; + + if (!vd) { + c->desc = NULL; + return; + } + + list_del(&vd->node); + + c->desc = d = to_omap_dma_desc(&vd->tx); + c->sgidx = 0; + + /* + * This provides the necessary barrier to ensure data held in + * DMA coherent memory is visible to the DMA engine prior to + * the transfer starting. + */ + mb(); + + omap_dma_chan_write(c, CCR, d->ccr); + if (dma_omap1()) + omap_dma_chan_write(c, CCR2, d->ccr >> 16); + + if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) { + cxsa = CSSA; + cxei = CSEI; + cxfi = CSFI; + } else { + cxsa = CDSA; + cxei = CDEI; + cxfi = CDFI; + } + + omap_dma_chan_write(c, cxsa, d->dev_addr); + omap_dma_chan_write(c, cxei, d->ei); + omap_dma_chan_write(c, cxfi, d->fi); + omap_dma_chan_write(c, CSDP, d->csdp); + omap_dma_chan_write(c, CLNK_CTRL, d->clnk_ctrl); + + omap_dma_start_sg(c, d); +} + +static void omap_dma_callback(int ch, u16 status, void *data) +{ + struct omap_chan *c = data; + struct omap_desc *d; + unsigned long flags; + + spin_lock_irqsave(&c->vc.lock, flags); + d = c->desc; + if (d) { + if (c->cyclic) { + vchan_cyclic_callback(&d->vd); + } else if (d->using_ll || c->sgidx == d->sglen) { + omap_dma_start_desc(c); + vchan_cookie_complete(&d->vd); + } else { + omap_dma_start_sg(c, d); + } + } + spin_unlock_irqrestore(&c->vc.lock, flags); +} + +static irqreturn_t omap_dma_irq(int irq, void *devid) +{ + struct omap_dmadev *od = devid; + unsigned status, channel; + + spin_lock(&od->irq_lock); + + status = omap_dma_glbl_read(od, IRQSTATUS_L1); + status &= od->irq_enable_mask; + if (status == 0) { + spin_unlock(&od->irq_lock); + return IRQ_NONE; + } + + while ((channel = ffs(status)) != 0) { + unsigned mask, csr; + struct omap_chan *c; + + channel -= 1; + mask = BIT(channel); + status &= ~mask; + + c = od->lch_map[channel]; + if (c == NULL) { + /* This should never happen */ + dev_err(od->ddev.dev, "invalid channel %u\n", channel); + continue; + } + + csr = omap_dma_get_csr(c); + omap_dma_glbl_write(od, IRQSTATUS_L1, mask); + + omap_dma_callback(channel, csr, c); + } + + spin_unlock(&od->irq_lock); + + return IRQ_HANDLED; +} + +static int omap_dma_get_lch(struct omap_dmadev *od, int *lch) +{ + int channel; + + mutex_lock(&od->lch_lock); + channel = find_first_zero_bit(od->lch_bitmap, od->lch_count); + if (channel >= od->lch_count) + goto out_busy; + set_bit(channel, od->lch_bitmap); + mutex_unlock(&od->lch_lock); + + omap_dma_clear_lch(od, channel); + *lch = channel; + + return 0; + +out_busy: + mutex_unlock(&od->lch_lock); + *lch = -EINVAL; + + return -EBUSY; +} + +static void omap_dma_put_lch(struct omap_dmadev *od, int lch) +{ + omap_dma_clear_lch(od, lch); + mutex_lock(&od->lch_lock); + clear_bit(lch, od->lch_bitmap); + mutex_unlock(&od->lch_lock); +} + +static inline bool omap_dma_legacy(struct omap_dmadev *od) +{ + return IS_ENABLED(CONFIG_ARCH_OMAP1) && od->legacy; +} + +static int omap_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct omap_dmadev *od = to_omap_dma_dev(chan->device); + struct omap_chan *c = to_omap_dma_chan(chan); + struct device *dev = od->ddev.dev; + int ret; + + if (omap_dma_legacy(od)) { + ret = omap_request_dma(c->dma_sig, "DMA engine", + omap_dma_callback, c, &c->dma_ch); + } else { + ret = omap_dma_get_lch(od, &c->dma_ch); + } + + dev_dbg(dev, "allocating channel %u for %u\n", c->dma_ch, c->dma_sig); + + if (ret >= 0) { + omap_dma_assign(od, c, c->dma_ch); + + if (!omap_dma_legacy(od)) { + unsigned val; + + spin_lock_irq(&od->irq_lock); + val = BIT(c->dma_ch); + omap_dma_glbl_write(od, IRQSTATUS_L1, val); + od->irq_enable_mask |= val; + omap_dma_glbl_write(od, IRQENABLE_L1, od->irq_enable_mask); + + val = omap_dma_glbl_read(od, IRQENABLE_L0); + val &= ~BIT(c->dma_ch); + omap_dma_glbl_write(od, IRQENABLE_L0, val); + spin_unlock_irq(&od->irq_lock); + } + } + + if (dma_omap1()) { + if (__dma_omap16xx(od->plat->dma_attr)) { + c->ccr = CCR_OMAP31_DISABLE; + /* Duplicate what plat-omap/dma.c does */ + c->ccr |= c->dma_ch + 1; + } else { + c->ccr = c->dma_sig & 0x1f; + } + } else { + c->ccr = c->dma_sig & 0x1f; + c->ccr |= (c->dma_sig & ~0x1f) << 14; + } + if (od->plat->errata & DMA_ERRATA_IFRAME_BUFFERING) + c->ccr |= CCR_BUFFERING_DISABLE; + + return ret; +} + +static void omap_dma_free_chan_resources(struct dma_chan *chan) +{ + struct omap_dmadev *od = to_omap_dma_dev(chan->device); + struct omap_chan *c = to_omap_dma_chan(chan); + + if (!omap_dma_legacy(od)) { + spin_lock_irq(&od->irq_lock); + od->irq_enable_mask &= ~BIT(c->dma_ch); + omap_dma_glbl_write(od, IRQENABLE_L1, od->irq_enable_mask); + spin_unlock_irq(&od->irq_lock); + } + + c->channel_base = NULL; + od->lch_map[c->dma_ch] = NULL; + vchan_free_chan_resources(&c->vc); + + if (omap_dma_legacy(od)) + omap_free_dma(c->dma_ch); + else + omap_dma_put_lch(od, c->dma_ch); + + dev_dbg(od->ddev.dev, "freeing channel %u used for %u\n", c->dma_ch, + c->dma_sig); + c->dma_sig = 0; +} + +static size_t omap_dma_sg_size(struct omap_sg *sg) +{ + return sg->en * sg->fn; +} + +static size_t omap_dma_desc_size(struct omap_desc *d) +{ + unsigned i; + size_t size; + + for (size = i = 0; i < d->sglen; i++) + size += omap_dma_sg_size(&d->sg[i]); + + return size * es_bytes[d->es]; +} + +static size_t omap_dma_desc_size_pos(struct omap_desc *d, dma_addr_t addr) +{ + unsigned i; + size_t size, es_size = es_bytes[d->es]; + + for (size = i = 0; i < d->sglen; i++) { + size_t this_size = omap_dma_sg_size(&d->sg[i]) * es_size; + + if (size) + size += this_size; + else if (addr >= d->sg[i].addr && + addr < d->sg[i].addr + this_size) + size += d->sg[i].addr + this_size - addr; + } + return size; +} + +/* + * OMAP 3.2/3.3 erratum: sometimes 0 is returned if CSAC/CDAC is + * read before the DMA controller finished disabling the channel. + */ +static uint32_t omap_dma_chan_read_3_3(struct omap_chan *c, unsigned reg) +{ + struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device); + uint32_t val; + + val = omap_dma_chan_read(c, reg); + if (val == 0 && od->plat->errata & DMA_ERRATA_3_3) + val = omap_dma_chan_read(c, reg); + + return val; +} + +static dma_addr_t omap_dma_get_src_pos(struct omap_chan *c) +{ + struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device); + dma_addr_t addr, cdac; + + if (__dma_omap15xx(od->plat->dma_attr)) { + addr = omap_dma_chan_read(c, CPC); + } else { + addr = omap_dma_chan_read_3_3(c, CSAC); + cdac = omap_dma_chan_read_3_3(c, CDAC); + + /* + * CDAC == 0 indicates that the DMA transfer on the channel has + * not been started (no data has been transferred so far). + * Return the programmed source start address in this case. + */ + if (cdac == 0) + addr = omap_dma_chan_read(c, CSSA); + } + + if (dma_omap1()) + addr |= omap_dma_chan_read(c, CSSA) & 0xffff0000; + + return addr; +} + +static dma_addr_t omap_dma_get_dst_pos(struct omap_chan *c) +{ + struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device); + dma_addr_t addr; + + if (__dma_omap15xx(od->plat->dma_attr)) { + addr = omap_dma_chan_read(c, CPC); + } else { + addr = omap_dma_chan_read_3_3(c, CDAC); + + /* + * CDAC == 0 indicates that the DMA transfer on the channel + * has not been started (no data has been transferred so + * far). Return the programmed destination start address in + * this case. + */ + if (addr == 0) + addr = omap_dma_chan_read(c, CDSA); + } + + if (dma_omap1()) + addr |= omap_dma_chan_read(c, CDSA) & 0xffff0000; + + return addr; +} + +static enum dma_status omap_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) +{ + struct omap_chan *c = to_omap_dma_chan(chan); + enum dma_status ret; + unsigned long flags; + struct omap_desc *d = NULL; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; + + spin_lock_irqsave(&c->vc.lock, flags); + if (c->desc && c->desc->vd.tx.cookie == cookie) + d = c->desc; + + if (!txstate) + goto out; + + if (d) { + dma_addr_t pos; + + if (d->dir == DMA_MEM_TO_DEV) + pos = omap_dma_get_src_pos(c); + else if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) + pos = omap_dma_get_dst_pos(c); + else + pos = 0; + + txstate->residue = omap_dma_desc_size_pos(d, pos); + } else { + struct virt_dma_desc *vd = vchan_find_desc(&c->vc, cookie); + + if (vd) + txstate->residue = omap_dma_desc_size( + to_omap_dma_desc(&vd->tx)); + else + txstate->residue = 0; + } + +out: + if (ret == DMA_IN_PROGRESS && c->paused) { + ret = DMA_PAUSED; + } else if (d && d->polled && c->running) { + uint32_t ccr = omap_dma_chan_read(c, CCR); + /* + * The channel is no longer active, set the return value + * accordingly and mark it as completed + */ + if (!(ccr & CCR_ENABLE)) { + ret = DMA_COMPLETE; + omap_dma_start_desc(c); + vchan_cookie_complete(&d->vd); + } + } + + spin_unlock_irqrestore(&c->vc.lock, flags); + + return ret; +} + +static void omap_dma_issue_pending(struct dma_chan *chan) +{ + struct omap_chan *c = to_omap_dma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&c->vc.lock, flags); + if (vchan_issue_pending(&c->vc) && !c->desc) + omap_dma_start_desc(c); + spin_unlock_irqrestore(&c->vc.lock, flags); +} + +static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, unsigned sglen, + enum dma_transfer_direction dir, unsigned long tx_flags, void *context) +{ + struct omap_dmadev *od = to_omap_dma_dev(chan->device); + struct omap_chan *c = to_omap_dma_chan(chan); + enum dma_slave_buswidth dev_width; + struct scatterlist *sgent; + struct omap_desc *d; + dma_addr_t dev_addr; + unsigned i, es, en, frame_bytes; + bool ll_failed = false; + u32 burst; + u32 port_window, port_window_bytes; + + if (dir == DMA_DEV_TO_MEM) { + dev_addr = c->cfg.src_addr; + dev_width = c->cfg.src_addr_width; + burst = c->cfg.src_maxburst; + port_window = c->cfg.src_port_window_size; + } else if (dir == DMA_MEM_TO_DEV) { + dev_addr = c->cfg.dst_addr; + dev_width = c->cfg.dst_addr_width; + burst = c->cfg.dst_maxburst; + port_window = c->cfg.dst_port_window_size; + } else { + dev_err(chan->device->dev, "%s: bad direction?\n", __func__); + return NULL; + } + + /* Bus width translates to the element size (ES) */ + switch (dev_width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + es = CSDP_DATA_TYPE_8; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + es = CSDP_DATA_TYPE_16; + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + es = CSDP_DATA_TYPE_32; + break; + default: /* not reached */ + return NULL; + } + + /* Now allocate and setup the descriptor. */ + d = kzalloc(struct_size(d, sg, sglen), GFP_ATOMIC); + if (!d) + return NULL; + + d->dir = dir; + d->dev_addr = dev_addr; + d->es = es; + + /* When the port_window is used, one frame must cover the window */ + if (port_window) { + burst = port_window; + port_window_bytes = port_window * es_bytes[es]; + + d->ei = 1; + /* + * One frame covers the port_window and by configure + * the source frame index to be -1 * (port_window - 1) + * we instruct the sDMA that after a frame is processed + * it should move back to the start of the window. + */ + d->fi = -(port_window_bytes - 1); + } + + d->ccr = c->ccr | CCR_SYNC_FRAME; + if (dir == DMA_DEV_TO_MEM) { + d->csdp = CSDP_DST_BURST_64 | CSDP_DST_PACKED; + + d->ccr |= CCR_DST_AMODE_POSTINC; + if (port_window) { + d->ccr |= CCR_SRC_AMODE_DBLIDX; + + if (port_window_bytes >= 64) + d->csdp |= CSDP_SRC_BURST_64; + else if (port_window_bytes >= 32) + d->csdp |= CSDP_SRC_BURST_32; + else if (port_window_bytes >= 16) + d->csdp |= CSDP_SRC_BURST_16; + + } else { + d->ccr |= CCR_SRC_AMODE_CONSTANT; + } + } else { + d->csdp = CSDP_SRC_BURST_64 | CSDP_SRC_PACKED; + + d->ccr |= CCR_SRC_AMODE_POSTINC; + if (port_window) { + d->ccr |= CCR_DST_AMODE_DBLIDX; + + if (port_window_bytes >= 64) + d->csdp |= CSDP_DST_BURST_64; + else if (port_window_bytes >= 32) + d->csdp |= CSDP_DST_BURST_32; + else if (port_window_bytes >= 16) + d->csdp |= CSDP_DST_BURST_16; + } else { + d->ccr |= CCR_DST_AMODE_CONSTANT; + } + } + + d->cicr = CICR_DROP_IE | CICR_BLOCK_IE; + d->csdp |= es; + + if (dma_omap1()) { + d->cicr |= CICR_TOUT_IE; + + if (dir == DMA_DEV_TO_MEM) + d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_TIPB; + else + d->csdp |= CSDP_DST_PORT_TIPB | CSDP_SRC_PORT_EMIFF; + } else { + if (dir == DMA_DEV_TO_MEM) + d->ccr |= CCR_TRIGGER_SRC; + + d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE; + + if (port_window) + d->csdp |= CSDP_WRITE_LAST_NON_POSTED; + } + if (od->plat->errata & DMA_ERRATA_PARALLEL_CHANNELS) + d->clnk_ctrl = c->dma_ch; + + /* + * Build our scatterlist entries: each contains the address, + * the number of elements (EN) in each frame, and the number of + * frames (FN). Number of bytes for this entry = ES * EN * FN. + * + * Burst size translates to number of elements with frame sync. + * Note: DMA engine defines burst to be the number of dev-width + * transfers. + */ + en = burst; + frame_bytes = es_bytes[es] * en; + + if (sglen >= 2) + d->using_ll = od->ll123_supported; + + for_each_sg(sgl, sgent, sglen, i) { + struct omap_sg *osg = &d->sg[i]; + + osg->addr = sg_dma_address(sgent); + osg->en = en; + osg->fn = sg_dma_len(sgent) / frame_bytes; + + if (d->using_ll) { + osg->t2_desc = dma_pool_alloc(od->desc_pool, GFP_ATOMIC, + &osg->t2_desc_paddr); + if (!osg->t2_desc) { + dev_err(chan->device->dev, + "t2_desc[%d] allocation failed\n", i); + ll_failed = true; + d->using_ll = false; + continue; + } + + omap_dma_fill_type2_desc(d, i, dir, (i == sglen - 1)); + } + } + + d->sglen = sglen; + + /* Release the dma_pool entries if one allocation failed */ + if (ll_failed) { + for (i = 0; i < d->sglen; i++) { + struct omap_sg *osg = &d->sg[i]; + + if (osg->t2_desc) { + dma_pool_free(od->desc_pool, osg->t2_desc, + osg->t2_desc_paddr); + osg->t2_desc = NULL; + } + } + } + + return vchan_tx_prep(&c->vc, &d->vd, tx_flags); +} + +static struct dma_async_tx_descriptor *omap_dma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction dir, unsigned long flags) +{ + struct omap_dmadev *od = to_omap_dma_dev(chan->device); + struct omap_chan *c = to_omap_dma_chan(chan); + enum dma_slave_buswidth dev_width; + struct omap_desc *d; + dma_addr_t dev_addr; + unsigned es; + u32 burst; + + if (dir == DMA_DEV_TO_MEM) { + dev_addr = c->cfg.src_addr; + dev_width = c->cfg.src_addr_width; + burst = c->cfg.src_maxburst; + } else if (dir == DMA_MEM_TO_DEV) { + dev_addr = c->cfg.dst_addr; + dev_width = c->cfg.dst_addr_width; + burst = c->cfg.dst_maxburst; + } else { + dev_err(chan->device->dev, "%s: bad direction?\n", __func__); + return NULL; + } + + /* Bus width translates to the element size (ES) */ + switch (dev_width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + es = CSDP_DATA_TYPE_8; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + es = CSDP_DATA_TYPE_16; + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + es = CSDP_DATA_TYPE_32; + break; + default: /* not reached */ + return NULL; + } + + /* Now allocate and setup the descriptor. */ + d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC); + if (!d) + return NULL; + + d->dir = dir; + d->dev_addr = dev_addr; + d->fi = burst; + d->es = es; + d->sg[0].addr = buf_addr; + d->sg[0].en = period_len / es_bytes[es]; + d->sg[0].fn = buf_len / period_len; + d->sglen = 1; + + d->ccr = c->ccr; + if (dir == DMA_DEV_TO_MEM) + d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_CONSTANT; + else + d->ccr |= CCR_DST_AMODE_CONSTANT | CCR_SRC_AMODE_POSTINC; + + d->cicr = CICR_DROP_IE; + if (flags & DMA_PREP_INTERRUPT) + d->cicr |= CICR_FRAME_IE; + + d->csdp = es; + + if (dma_omap1()) { + d->cicr |= CICR_TOUT_IE; + + if (dir == DMA_DEV_TO_MEM) + d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_MPUI; + else + d->csdp |= CSDP_DST_PORT_MPUI | CSDP_SRC_PORT_EMIFF; + } else { + if (burst) + d->ccr |= CCR_SYNC_PACKET; + else + d->ccr |= CCR_SYNC_ELEMENT; + + if (dir == DMA_DEV_TO_MEM) { + d->ccr |= CCR_TRIGGER_SRC; + d->csdp |= CSDP_DST_PACKED; + } else { + d->csdp |= CSDP_SRC_PACKED; + } + + d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE; + + d->csdp |= CSDP_DST_BURST_64 | CSDP_SRC_BURST_64; + } + + if (__dma_omap15xx(od->plat->dma_attr)) + d->ccr |= CCR_AUTO_INIT | CCR_REPEAT; + else + d->clnk_ctrl = c->dma_ch | CLNK_CTRL_ENABLE_LNK; + + c->cyclic = true; + + return vchan_tx_prep(&c->vc, &d->vd, flags); +} + +static struct dma_async_tx_descriptor *omap_dma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long tx_flags) +{ + struct omap_chan *c = to_omap_dma_chan(chan); + struct omap_desc *d; + uint8_t data_type; + + d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC); + if (!d) + return NULL; + + data_type = __ffs((src | dest | len)); + if (data_type > CSDP_DATA_TYPE_32) + data_type = CSDP_DATA_TYPE_32; + + d->dir = DMA_MEM_TO_MEM; + d->dev_addr = src; + d->fi = 0; + d->es = data_type; + d->sg[0].en = len / BIT(data_type); + d->sg[0].fn = 1; + d->sg[0].addr = dest; + d->sglen = 1; + d->ccr = c->ccr; + d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_POSTINC; + + if (tx_flags & DMA_PREP_INTERRUPT) + d->cicr |= CICR_FRAME_IE; + else + d->polled = true; + + d->csdp = data_type; + + if (dma_omap1()) { + d->cicr |= CICR_TOUT_IE; + d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_EMIFF; + } else { + d->csdp |= CSDP_DST_PACKED | CSDP_SRC_PACKED; + d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE; + d->csdp |= CSDP_DST_BURST_64 | CSDP_SRC_BURST_64; + } + + return vchan_tx_prep(&c->vc, &d->vd, tx_flags); +} + +static struct dma_async_tx_descriptor *omap_dma_prep_dma_interleaved( + struct dma_chan *chan, struct dma_interleaved_template *xt, + unsigned long flags) +{ + struct omap_chan *c = to_omap_dma_chan(chan); + struct omap_desc *d; + struct omap_sg *sg; + uint8_t data_type; + size_t src_icg, dst_icg; + + /* Slave mode is not supported */ + if (is_slave_direction(xt->dir)) + return NULL; + + if (xt->frame_size != 1 || xt->numf == 0) + return NULL; + + d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC); + if (!d) + return NULL; + + data_type = __ffs((xt->src_start | xt->dst_start | xt->sgl[0].size)); + if (data_type > CSDP_DATA_TYPE_32) + data_type = CSDP_DATA_TYPE_32; + + sg = &d->sg[0]; + d->dir = DMA_MEM_TO_MEM; + d->dev_addr = xt->src_start; + d->es = data_type; + sg->en = xt->sgl[0].size / BIT(data_type); + sg->fn = xt->numf; + sg->addr = xt->dst_start; + d->sglen = 1; + d->ccr = c->ccr; + + src_icg = dmaengine_get_src_icg(xt, &xt->sgl[0]); + dst_icg = dmaengine_get_dst_icg(xt, &xt->sgl[0]); + if (src_icg) { + d->ccr |= CCR_SRC_AMODE_DBLIDX; + d->ei = 1; + d->fi = src_icg + 1; + } else if (xt->src_inc) { + d->ccr |= CCR_SRC_AMODE_POSTINC; + d->fi = 0; + } else { + dev_err(chan->device->dev, + "%s: SRC constant addressing is not supported\n", + __func__); + kfree(d); + return NULL; + } + + if (dst_icg) { + d->ccr |= CCR_DST_AMODE_DBLIDX; + sg->ei = 1; + sg->fi = dst_icg + 1; + } else if (xt->dst_inc) { + d->ccr |= CCR_DST_AMODE_POSTINC; + sg->fi = 0; + } else { + dev_err(chan->device->dev, + "%s: DST constant addressing is not supported\n", + __func__); + kfree(d); + return NULL; + } + + d->cicr = CICR_DROP_IE | CICR_FRAME_IE; + + d->csdp = data_type; + + if (dma_omap1()) { + d->cicr |= CICR_TOUT_IE; + d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_EMIFF; + } else { + d->csdp |= CSDP_DST_PACKED | CSDP_SRC_PACKED; + d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE; + d->csdp |= CSDP_DST_BURST_64 | CSDP_SRC_BURST_64; + } + + return vchan_tx_prep(&c->vc, &d->vd, flags); +} + +static int omap_dma_slave_config(struct dma_chan *chan, struct dma_slave_config *cfg) +{ + struct omap_chan *c = to_omap_dma_chan(chan); + + if (cfg->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES || + cfg->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES) + return -EINVAL; + + if (cfg->src_maxburst > chan->device->max_burst || + cfg->dst_maxburst > chan->device->max_burst) + return -EINVAL; + + memcpy(&c->cfg, cfg, sizeof(c->cfg)); + + return 0; +} + +static int omap_dma_terminate_all(struct dma_chan *chan) +{ + struct omap_chan *c = to_omap_dma_chan(chan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&c->vc.lock, flags); + + /* + * Stop DMA activity: we assume the callback will not be called + * after omap_dma_stop() returns (even if it does, it will see + * c->desc is NULL and exit.) + */ + if (c->desc) { + vchan_terminate_vdesc(&c->desc->vd); + c->desc = NULL; + /* Avoid stopping the dma twice */ + if (!c->paused) + omap_dma_stop(c); + } + + c->cyclic = false; + c->paused = false; + + vchan_get_all_descriptors(&c->vc, &head); + spin_unlock_irqrestore(&c->vc.lock, flags); + vchan_dma_desc_free_list(&c->vc, &head); + + return 0; +} + +static void omap_dma_synchronize(struct dma_chan *chan) +{ + struct omap_chan *c = to_omap_dma_chan(chan); + + vchan_synchronize(&c->vc); +} + +static int omap_dma_pause(struct dma_chan *chan) +{ + struct omap_chan *c = to_omap_dma_chan(chan); + struct omap_dmadev *od = to_omap_dma_dev(chan->device); + unsigned long flags; + int ret = -EINVAL; + bool can_pause = false; + + spin_lock_irqsave(&od->irq_lock, flags); + + if (!c->desc) + goto out; + + if (c->cyclic) + can_pause = true; + + /* + * We do not allow DMA_MEM_TO_DEV transfers to be paused. + * From the AM572x TRM, 16.1.4.18 Disabling a Channel During Transfer: + * "When a channel is disabled during a transfer, the channel undergoes + * an abort, unless it is hardware-source-synchronized …". + * A source-synchronised channel is one where the fetching of data is + * under control of the device. In other words, a device-to-memory + * transfer. So, a destination-synchronised channel (which would be a + * memory-to-device transfer) undergoes an abort if the CCR_ENABLE + * bit is cleared. + * From 16.1.4.20.4.6.2 Abort: "If an abort trigger occurs, the channel + * aborts immediately after completion of current read/write + * transactions and then the FIFO is cleaned up." The term "cleaned up" + * is not defined. TI recommends to check that RD_ACTIVE and WR_ACTIVE + * are both clear _before_ disabling the channel, otherwise data loss + * will occur. + * The problem is that if the channel is active, then device activity + * can result in DMA activity starting between reading those as both + * clear and the write to DMA_CCR to clear the enable bit hitting the + * hardware. If the DMA hardware can't drain the data in its FIFO to the + * destination, then data loss "might" occur (say if we write to an UART + * and the UART is not accepting any further data). + */ + else if (c->desc->dir == DMA_DEV_TO_MEM) + can_pause = true; + + if (can_pause && !c->paused) { + ret = omap_dma_stop(c); + if (!ret) + c->paused = true; + } +out: + spin_unlock_irqrestore(&od->irq_lock, flags); + + return ret; +} + +static int omap_dma_resume(struct dma_chan *chan) +{ + struct omap_chan *c = to_omap_dma_chan(chan); + struct omap_dmadev *od = to_omap_dma_dev(chan->device); + unsigned long flags; + int ret = -EINVAL; + + spin_lock_irqsave(&od->irq_lock, flags); + + if (c->paused && c->desc) { + mb(); + + /* Restore channel link register */ + omap_dma_chan_write(c, CLNK_CTRL, c->desc->clnk_ctrl); + + omap_dma_start(c, c->desc); + c->paused = false; + ret = 0; + } + spin_unlock_irqrestore(&od->irq_lock, flags); + + return ret; +} + +static int omap_dma_chan_init(struct omap_dmadev *od) +{ + struct omap_chan *c; + + c = kzalloc(sizeof(*c), GFP_KERNEL); + if (!c) + return -ENOMEM; + + c->reg_map = od->reg_map; + c->vc.desc_free = omap_dma_desc_free; + vchan_init(&c->vc, &od->ddev); + + return 0; +} + +static void omap_dma_free(struct omap_dmadev *od) +{ + while (!list_empty(&od->ddev.channels)) { + struct omap_chan *c = list_first_entry(&od->ddev.channels, + struct omap_chan, vc.chan.device_node); + + list_del(&c->vc.chan.device_node); + tasklet_kill(&c->vc.task); + kfree(c); + } +} + +/* Currently used by omap2 & 3 to block deeper SoC idle states */ +static bool omap_dma_busy(struct omap_dmadev *od) +{ + struct omap_chan *c; + int lch = -1; + + while (1) { + lch = find_next_bit(od->lch_bitmap, od->lch_count, lch + 1); + if (lch >= od->lch_count) + break; + c = od->lch_map[lch]; + if (!c) + continue; + if (omap_dma_chan_read(c, CCR) & CCR_ENABLE) + return true; + } + + return false; +} + +/* Currently only used for omap2. For omap1, also a check for lcd_dma is needed */ +static int omap_dma_busy_notifier(struct notifier_block *nb, + unsigned long cmd, void *v) +{ + struct omap_dmadev *od; + + od = container_of(nb, struct omap_dmadev, nb); + + switch (cmd) { + case CPU_CLUSTER_PM_ENTER: + if (omap_dma_busy(od)) + return NOTIFY_BAD; + break; + case CPU_CLUSTER_PM_ENTER_FAILED: + case CPU_CLUSTER_PM_EXIT: + break; + } + + return NOTIFY_OK; +} + +/* + * We are using IRQENABLE_L1, and legacy DMA code was using IRQENABLE_L0. + * As the DSP may be using IRQENABLE_L2 and L3, let's not touch those for + * now. Context save seems to be only currently needed on omap3. + */ +static void omap_dma_context_save(struct omap_dmadev *od) +{ + od->context.irqenable_l0 = omap_dma_glbl_read(od, IRQENABLE_L0); + od->context.irqenable_l1 = omap_dma_glbl_read(od, IRQENABLE_L1); + od->context.ocp_sysconfig = omap_dma_glbl_read(od, OCP_SYSCONFIG); + od->context.gcr = omap_dma_glbl_read(od, GCR); +} + +static void omap_dma_context_restore(struct omap_dmadev *od) +{ + int i; + + omap_dma_glbl_write(od, GCR, od->context.gcr); + omap_dma_glbl_write(od, OCP_SYSCONFIG, od->context.ocp_sysconfig); + omap_dma_glbl_write(od, IRQENABLE_L0, od->context.irqenable_l0); + omap_dma_glbl_write(od, IRQENABLE_L1, od->context.irqenable_l1); + + /* Clear IRQSTATUS_L0 as legacy DMA code is no longer doing it */ + if (od->plat->errata & DMA_ROMCODE_BUG) + omap_dma_glbl_write(od, IRQSTATUS_L0, 0); + + /* Clear dma channels */ + for (i = 0; i < od->lch_count; i++) + omap_dma_clear_lch(od, i); +} + +/* Currently only used for omap3 */ +static int omap_dma_context_notifier(struct notifier_block *nb, + unsigned long cmd, void *v) +{ + struct omap_dmadev *od; + + od = container_of(nb, struct omap_dmadev, nb); + + switch (cmd) { + case CPU_CLUSTER_PM_ENTER: + if (omap_dma_busy(od)) + return NOTIFY_BAD; + omap_dma_context_save(od); + break; + case CPU_CLUSTER_PM_ENTER_FAILED: /* No need to restore context */ + break; + case CPU_CLUSTER_PM_EXIT: + omap_dma_context_restore(od); + break; + } + + return NOTIFY_OK; +} + +static void omap_dma_init_gcr(struct omap_dmadev *od, int arb_rate, + int max_fifo_depth, int tparams) +{ + u32 val; + + /* Set only for omap2430 and later */ + if (!od->cfg->rw_priority) + return; + + if (max_fifo_depth == 0) + max_fifo_depth = 1; + if (arb_rate == 0) + arb_rate = 1; + + val = 0xff & max_fifo_depth; + val |= (0x3 & tparams) << 12; + val |= (arb_rate & 0xff) << 16; + + omap_dma_glbl_write(od, GCR, val); +} + +#define OMAP_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) + +/* + * No flags currently set for default configuration as omap1 is still + * using platform data. + */ +static const struct omap_dma_config default_cfg; + +static int omap_dma_probe(struct platform_device *pdev) +{ + const struct omap_dma_config *conf; + struct omap_dmadev *od; + int rc, i, irq; + u32 val; + + od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL); + if (!od) + return -ENOMEM; + + od->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(od->base)) + return PTR_ERR(od->base); + + conf = of_device_get_match_data(&pdev->dev); + if (conf) { + od->cfg = conf; + od->plat = dev_get_platdata(&pdev->dev); + if (!od->plat) { + dev_err(&pdev->dev, "omap_system_dma_plat_info is missing"); + return -ENODEV; + } + } else if (IS_ENABLED(CONFIG_ARCH_OMAP1)) { + od->cfg = &default_cfg; + + od->plat = omap_get_plat_info(); + if (!od->plat) + return -EPROBE_DEFER; + } else { + return -ENODEV; + } + + od->reg_map = od->plat->reg_map; + + dma_cap_set(DMA_SLAVE, od->ddev.cap_mask); + dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask); + dma_cap_set(DMA_MEMCPY, od->ddev.cap_mask); + dma_cap_set(DMA_INTERLEAVE, od->ddev.cap_mask); + od->ddev.device_alloc_chan_resources = omap_dma_alloc_chan_resources; + od->ddev.device_free_chan_resources = omap_dma_free_chan_resources; + od->ddev.device_tx_status = omap_dma_tx_status; + od->ddev.device_issue_pending = omap_dma_issue_pending; + od->ddev.device_prep_slave_sg = omap_dma_prep_slave_sg; + od->ddev.device_prep_dma_cyclic = omap_dma_prep_dma_cyclic; + od->ddev.device_prep_dma_memcpy = omap_dma_prep_dma_memcpy; + od->ddev.device_prep_interleaved_dma = omap_dma_prep_dma_interleaved; + od->ddev.device_config = omap_dma_slave_config; + od->ddev.device_pause = omap_dma_pause; + od->ddev.device_resume = omap_dma_resume; + od->ddev.device_terminate_all = omap_dma_terminate_all; + od->ddev.device_synchronize = omap_dma_synchronize; + od->ddev.src_addr_widths = OMAP_DMA_BUSWIDTHS; + od->ddev.dst_addr_widths = OMAP_DMA_BUSWIDTHS; + od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + if (__dma_omap15xx(od->plat->dma_attr)) + od->ddev.residue_granularity = + DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + else + od->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + od->ddev.max_burst = SZ_16M - 1; /* CCEN: 24bit unsigned */ + od->ddev.dev = &pdev->dev; + INIT_LIST_HEAD(&od->ddev.channels); + mutex_init(&od->lch_lock); + spin_lock_init(&od->lock); + spin_lock_init(&od->irq_lock); + + /* Number of DMA requests */ + od->dma_requests = OMAP_SDMA_REQUESTS; + if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node, + "dma-requests", + &od->dma_requests)) { + dev_info(&pdev->dev, + "Missing dma-requests property, using %u.\n", + OMAP_SDMA_REQUESTS); + } + + /* Number of available logical channels */ + if (!pdev->dev.of_node) { + od->lch_count = od->plat->dma_attr->lch_count; + if (unlikely(!od->lch_count)) + od->lch_count = OMAP_SDMA_CHANNELS; + } else if (of_property_read_u32(pdev->dev.of_node, "dma-channels", + &od->lch_count)) { + dev_info(&pdev->dev, + "Missing dma-channels property, using %u.\n", + OMAP_SDMA_CHANNELS); + od->lch_count = OMAP_SDMA_CHANNELS; + } + + /* Mask of allowed logical channels */ + if (pdev->dev.of_node && !of_property_read_u32(pdev->dev.of_node, + "dma-channel-mask", + &val)) { + /* Tag channels not in mask as reserved */ + val = ~val; + bitmap_from_arr32(od->lch_bitmap, &val, od->lch_count); + } + if (od->plat->dma_attr->dev_caps & HS_CHANNELS_RESERVED) + bitmap_set(od->lch_bitmap, 0, 2); + + od->lch_map = devm_kcalloc(&pdev->dev, od->lch_count, + sizeof(*od->lch_map), + GFP_KERNEL); + if (!od->lch_map) + return -ENOMEM; + + for (i = 0; i < od->dma_requests; i++) { + rc = omap_dma_chan_init(od); + if (rc) { + omap_dma_free(od); + return rc; + } + } + + irq = platform_get_irq(pdev, 1); + if (irq <= 0) { + dev_info(&pdev->dev, "failed to get L1 IRQ: %d\n", irq); + od->legacy = true; + } else { + /* Disable all interrupts */ + od->irq_enable_mask = 0; + omap_dma_glbl_write(od, IRQENABLE_L1, 0); + + rc = devm_request_irq(&pdev->dev, irq, omap_dma_irq, + IRQF_SHARED, "omap-dma-engine", od); + if (rc) { + omap_dma_free(od); + return rc; + } + } + + if (omap_dma_glbl_read(od, CAPS_0) & CAPS_0_SUPPORT_LL123) + od->ll123_supported = true; + + od->ddev.filter.map = od->plat->slave_map; + od->ddev.filter.mapcnt = od->plat->slavecnt; + od->ddev.filter.fn = omap_dma_filter_fn; + + if (od->ll123_supported) { + od->desc_pool = dma_pool_create(dev_name(&pdev->dev), + &pdev->dev, + sizeof(struct omap_type2_desc), + 4, 0); + if (!od->desc_pool) { + dev_err(&pdev->dev, + "unable to allocate descriptor pool\n"); + od->ll123_supported = false; + } + } + + rc = dma_async_device_register(&od->ddev); + if (rc) { + pr_warn("OMAP-DMA: failed to register slave DMA engine device: %d\n", + rc); + omap_dma_free(od); + return rc; + } + + platform_set_drvdata(pdev, od); + + if (pdev->dev.of_node) { + omap_dma_info.dma_cap = od->ddev.cap_mask; + + /* Device-tree DMA controller registration */ + rc = of_dma_controller_register(pdev->dev.of_node, + of_dma_simple_xlate, &omap_dma_info); + if (rc) { + pr_warn("OMAP-DMA: failed to register DMA controller\n"); + dma_async_device_unregister(&od->ddev); + omap_dma_free(od); + } + } + + omap_dma_init_gcr(od, DMA_DEFAULT_ARB_RATE, DMA_DEFAULT_FIFO_DEPTH, 0); + + if (od->cfg->needs_busy_check) { + od->nb.notifier_call = omap_dma_busy_notifier; + cpu_pm_register_notifier(&od->nb); + } else if (od->cfg->may_lose_context) { + od->nb.notifier_call = omap_dma_context_notifier; + cpu_pm_register_notifier(&od->nb); + } + + dev_info(&pdev->dev, "OMAP DMA engine driver%s\n", + od->ll123_supported ? " (LinkedList1/2/3 supported)" : ""); + + return rc; +} + +static int omap_dma_remove(struct platform_device *pdev) +{ + struct omap_dmadev *od = platform_get_drvdata(pdev); + int irq; + + if (od->cfg->may_lose_context) + cpu_pm_unregister_notifier(&od->nb); + + if (pdev->dev.of_node) + of_dma_controller_free(pdev->dev.of_node); + + irq = platform_get_irq(pdev, 1); + devm_free_irq(&pdev->dev, irq, od); + + dma_async_device_unregister(&od->ddev); + + if (!omap_dma_legacy(od)) { + /* Disable all interrupts */ + omap_dma_glbl_write(od, IRQENABLE_L0, 0); + } + + if (od->ll123_supported) + dma_pool_destroy(od->desc_pool); + + omap_dma_free(od); + + return 0; +} + +static const struct omap_dma_config omap2420_data = { + .lch_end = CCFN, + .rw_priority = true, + .needs_lch_clear = true, + .needs_busy_check = true, +}; + +static const struct omap_dma_config omap2430_data = { + .lch_end = CCFN, + .rw_priority = true, + .needs_lch_clear = true, +}; + +static const struct omap_dma_config omap3430_data = { + .lch_end = CCFN, + .rw_priority = true, + .needs_lch_clear = true, + .may_lose_context = true, +}; + +static const struct omap_dma_config omap3630_data = { + .lch_end = CCDN, + .rw_priority = true, + .needs_lch_clear = true, + .may_lose_context = true, +}; + +static const struct omap_dma_config omap4_data = { + .lch_end = CCDN, + .rw_priority = true, + .needs_lch_clear = true, +}; + +static const struct of_device_id omap_dma_match[] = { + { .compatible = "ti,omap2420-sdma", .data = &omap2420_data, }, + { .compatible = "ti,omap2430-sdma", .data = &omap2430_data, }, + { .compatible = "ti,omap3430-sdma", .data = &omap3430_data, }, + { .compatible = "ti,omap3630-sdma", .data = &omap3630_data, }, + { .compatible = "ti,omap4430-sdma", .data = &omap4_data, }, + {}, +}; +MODULE_DEVICE_TABLE(of, omap_dma_match); + +static struct platform_driver omap_dma_driver = { + .probe = omap_dma_probe, + .remove = omap_dma_remove, + .driver = { + .name = "omap-dma-engine", + .of_match_table = omap_dma_match, + }, +}; + +static bool omap_dma_filter_fn(struct dma_chan *chan, void *param) +{ + if (chan->device->dev->driver == &omap_dma_driver.driver) { + struct omap_dmadev *od = to_omap_dma_dev(chan->device); + struct omap_chan *c = to_omap_dma_chan(chan); + unsigned req = *(unsigned *)param; + + if (req <= od->dma_requests) { + c->dma_sig = req; + return true; + } + } + return false; +} + +static int omap_dma_init(void) +{ + return platform_driver_register(&omap_dma_driver); +} +subsys_initcall(omap_dma_init); + +static void __exit omap_dma_exit(void) +{ + platform_driver_unregister(&omap_dma_driver); +} +module_exit(omap_dma_exit); + +MODULE_AUTHOR("Russell King"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c new file mode 100644 index 0000000000..3f524be69e --- /dev/null +++ b/drivers/dma/timb_dma.c @@ -0,0 +1,773 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * timb_dma.c timberdale FPGA DMA driver + * Copyright (c) 2010 Intel Corporation + */ + +/* Supports: + * Timberdale FPGA DMA engine + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "dmaengine.h" + +#define DRIVER_NAME "timb-dma" + +/* Global DMA registers */ +#define TIMBDMA_ACR 0x34 +#define TIMBDMA_32BIT_ADDR 0x01 + +#define TIMBDMA_ISR 0x080000 +#define TIMBDMA_IPR 0x080004 +#define TIMBDMA_IER 0x080008 + +/* Channel specific registers */ +/* RX instances base addresses are 0x00, 0x40, 0x80 ... + * TX instances base addresses are 0x18, 0x58, 0x98 ... + */ +#define TIMBDMA_INSTANCE_OFFSET 0x40 +#define TIMBDMA_INSTANCE_TX_OFFSET 0x18 + +/* RX registers, relative the instance base */ +#define TIMBDMA_OFFS_RX_DHAR 0x00 +#define TIMBDMA_OFFS_RX_DLAR 0x04 +#define TIMBDMA_OFFS_RX_LR 0x0C +#define TIMBDMA_OFFS_RX_BLR 0x10 +#define TIMBDMA_OFFS_RX_ER 0x14 +#define TIMBDMA_RX_EN 0x01 +/* bytes per Row, video specific register + * which is placed after the TX registers... + */ +#define TIMBDMA_OFFS_RX_BPRR 0x30 + +/* TX registers, relative the instance base */ +#define TIMBDMA_OFFS_TX_DHAR 0x00 +#define TIMBDMA_OFFS_TX_DLAR 0x04 +#define TIMBDMA_OFFS_TX_BLR 0x0C +#define TIMBDMA_OFFS_TX_LR 0x14 + + +#define TIMB_DMA_DESC_SIZE 8 + +struct timb_dma_desc { + struct list_head desc_node; + struct dma_async_tx_descriptor txd; + u8 *desc_list; + unsigned int desc_list_len; + bool interrupt; +}; + +struct timb_dma_chan { + struct dma_chan chan; + void __iomem *membase; + spinlock_t lock; /* Used to protect data structures, + especially the lists and descriptors, + from races between the tasklet and calls + from above */ + bool ongoing; + struct list_head active_list; + struct list_head queue; + struct list_head free_list; + unsigned int bytes_per_line; + enum dma_transfer_direction direction; + unsigned int descs; /* Descriptors to allocate */ + unsigned int desc_elems; /* number of elems per descriptor */ +}; + +struct timb_dma { + struct dma_device dma; + void __iomem *membase; + struct tasklet_struct tasklet; + struct timb_dma_chan channels[]; +}; + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} +static struct device *chan2dmadev(struct dma_chan *chan) +{ + return chan2dev(chan)->parent->parent; +} + +static struct timb_dma *tdchantotd(struct timb_dma_chan *td_chan) +{ + int id = td_chan->chan.chan_id; + return (struct timb_dma *)((u8 *)td_chan - + id * sizeof(struct timb_dma_chan) - sizeof(struct timb_dma)); +} + +/* Must be called with the spinlock held */ +static void __td_enable_chan_irq(struct timb_dma_chan *td_chan) +{ + int id = td_chan->chan.chan_id; + struct timb_dma *td = tdchantotd(td_chan); + u32 ier; + + /* enable interrupt for this channel */ + ier = ioread32(td->membase + TIMBDMA_IER); + ier |= 1 << id; + dev_dbg(chan2dev(&td_chan->chan), "Enabling irq: %d, IER: 0x%x\n", id, + ier); + iowrite32(ier, td->membase + TIMBDMA_IER); +} + +/* Should be called with the spinlock held */ +static bool __td_dma_done_ack(struct timb_dma_chan *td_chan) +{ + int id = td_chan->chan.chan_id; + struct timb_dma *td = (struct timb_dma *)((u8 *)td_chan - + id * sizeof(struct timb_dma_chan) - sizeof(struct timb_dma)); + u32 isr; + bool done = false; + + dev_dbg(chan2dev(&td_chan->chan), "Checking irq: %d, td: %p\n", id, td); + + isr = ioread32(td->membase + TIMBDMA_ISR) & (1 << id); + if (isr) { + iowrite32(isr, td->membase + TIMBDMA_ISR); + done = true; + } + + return done; +} + +static int td_fill_desc(struct timb_dma_chan *td_chan, u8 *dma_desc, + struct scatterlist *sg, bool last) +{ + if (sg_dma_len(sg) > USHRT_MAX) { + dev_err(chan2dev(&td_chan->chan), "Too big sg element\n"); + return -EINVAL; + } + + /* length must be word aligned */ + if (sg_dma_len(sg) % sizeof(u32)) { + dev_err(chan2dev(&td_chan->chan), "Incorrect length: %d\n", + sg_dma_len(sg)); + return -EINVAL; + } + + dev_dbg(chan2dev(&td_chan->chan), "desc: %p, addr: 0x%llx\n", + dma_desc, (unsigned long long)sg_dma_address(sg)); + + dma_desc[7] = (sg_dma_address(sg) >> 24) & 0xff; + dma_desc[6] = (sg_dma_address(sg) >> 16) & 0xff; + dma_desc[5] = (sg_dma_address(sg) >> 8) & 0xff; + dma_desc[4] = (sg_dma_address(sg) >> 0) & 0xff; + + dma_desc[3] = (sg_dma_len(sg) >> 8) & 0xff; + dma_desc[2] = (sg_dma_len(sg) >> 0) & 0xff; + + dma_desc[1] = 0x00; + dma_desc[0] = 0x21 | (last ? 0x02 : 0); /* tran, valid */ + + return 0; +} + +/* Must be called with the spinlock held */ +static void __td_start_dma(struct timb_dma_chan *td_chan) +{ + struct timb_dma_desc *td_desc; + + if (td_chan->ongoing) { + dev_err(chan2dev(&td_chan->chan), + "Transfer already ongoing\n"); + return; + } + + td_desc = list_entry(td_chan->active_list.next, struct timb_dma_desc, + desc_node); + + dev_dbg(chan2dev(&td_chan->chan), + "td_chan: %p, chan: %d, membase: %p\n", + td_chan, td_chan->chan.chan_id, td_chan->membase); + + if (td_chan->direction == DMA_DEV_TO_MEM) { + + /* descriptor address */ + iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_DHAR); + iowrite32(td_desc->txd.phys, td_chan->membase + + TIMBDMA_OFFS_RX_DLAR); + /* Bytes per line */ + iowrite32(td_chan->bytes_per_line, td_chan->membase + + TIMBDMA_OFFS_RX_BPRR); + /* enable RX */ + iowrite32(TIMBDMA_RX_EN, td_chan->membase + TIMBDMA_OFFS_RX_ER); + } else { + /* address high */ + iowrite32(0, td_chan->membase + TIMBDMA_OFFS_TX_DHAR); + iowrite32(td_desc->txd.phys, td_chan->membase + + TIMBDMA_OFFS_TX_DLAR); + } + + td_chan->ongoing = true; + + if (td_desc->interrupt) + __td_enable_chan_irq(td_chan); +} + +static void __td_finish(struct timb_dma_chan *td_chan) +{ + struct dmaengine_desc_callback cb; + struct dma_async_tx_descriptor *txd; + struct timb_dma_desc *td_desc; + + /* can happen if the descriptor is canceled */ + if (list_empty(&td_chan->active_list)) + return; + + td_desc = list_entry(td_chan->active_list.next, struct timb_dma_desc, + desc_node); + txd = &td_desc->txd; + + dev_dbg(chan2dev(&td_chan->chan), "descriptor %u complete\n", + txd->cookie); + + /* make sure to stop the transfer */ + if (td_chan->direction == DMA_DEV_TO_MEM) + iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_ER); +/* Currently no support for stopping DMA transfers + else + iowrite32(0, td_chan->membase + TIMBDMA_OFFS_TX_DLAR); +*/ + dma_cookie_complete(txd); + td_chan->ongoing = false; + + dmaengine_desc_get_callback(txd, &cb); + + list_move(&td_desc->desc_node, &td_chan->free_list); + + dma_descriptor_unmap(txd); + /* + * The API requires that no submissions are done from a + * callback, so we don't need to drop the lock here + */ + dmaengine_desc_callback_invoke(&cb, NULL); +} + +static u32 __td_ier_mask(struct timb_dma *td) +{ + int i; + u32 ret = 0; + + for (i = 0; i < td->dma.chancnt; i++) { + struct timb_dma_chan *td_chan = td->channels + i; + if (td_chan->ongoing) { + struct timb_dma_desc *td_desc = + list_entry(td_chan->active_list.next, + struct timb_dma_desc, desc_node); + if (td_desc->interrupt) + ret |= 1 << i; + } + } + + return ret; +} + +static void __td_start_next(struct timb_dma_chan *td_chan) +{ + struct timb_dma_desc *td_desc; + + BUG_ON(list_empty(&td_chan->queue)); + BUG_ON(td_chan->ongoing); + + td_desc = list_entry(td_chan->queue.next, struct timb_dma_desc, + desc_node); + + dev_dbg(chan2dev(&td_chan->chan), "%s: started %u\n", + __func__, td_desc->txd.cookie); + + list_move(&td_desc->desc_node, &td_chan->active_list); + __td_start_dma(td_chan); +} + +static dma_cookie_t td_tx_submit(struct dma_async_tx_descriptor *txd) +{ + struct timb_dma_desc *td_desc = container_of(txd, struct timb_dma_desc, + txd); + struct timb_dma_chan *td_chan = container_of(txd->chan, + struct timb_dma_chan, chan); + dma_cookie_t cookie; + + spin_lock_bh(&td_chan->lock); + cookie = dma_cookie_assign(txd); + + if (list_empty(&td_chan->active_list)) { + dev_dbg(chan2dev(txd->chan), "%s: started %u\n", __func__, + txd->cookie); + list_add_tail(&td_desc->desc_node, &td_chan->active_list); + __td_start_dma(td_chan); + } else { + dev_dbg(chan2dev(txd->chan), "tx_submit: queued %u\n", + txd->cookie); + + list_add_tail(&td_desc->desc_node, &td_chan->queue); + } + + spin_unlock_bh(&td_chan->lock); + + return cookie; +} + +static struct timb_dma_desc *td_alloc_init_desc(struct timb_dma_chan *td_chan) +{ + struct dma_chan *chan = &td_chan->chan; + struct timb_dma_desc *td_desc; + int err; + + td_desc = kzalloc(sizeof(struct timb_dma_desc), GFP_KERNEL); + if (!td_desc) + goto out; + + td_desc->desc_list_len = td_chan->desc_elems * TIMB_DMA_DESC_SIZE; + + td_desc->desc_list = kzalloc(td_desc->desc_list_len, GFP_KERNEL); + if (!td_desc->desc_list) + goto err; + + dma_async_tx_descriptor_init(&td_desc->txd, chan); + td_desc->txd.tx_submit = td_tx_submit; + td_desc->txd.flags = DMA_CTRL_ACK; + + td_desc->txd.phys = dma_map_single(chan2dmadev(chan), + td_desc->desc_list, td_desc->desc_list_len, DMA_TO_DEVICE); + + err = dma_mapping_error(chan2dmadev(chan), td_desc->txd.phys); + if (err) { + dev_err(chan2dev(chan), "DMA mapping error: %d\n", err); + goto err; + } + + return td_desc; +err: + kfree(td_desc->desc_list); + kfree(td_desc); +out: + return NULL; + +} + +static void td_free_desc(struct timb_dma_desc *td_desc) +{ + dev_dbg(chan2dev(td_desc->txd.chan), "Freeing desc: %p\n", td_desc); + dma_unmap_single(chan2dmadev(td_desc->txd.chan), td_desc->txd.phys, + td_desc->desc_list_len, DMA_TO_DEVICE); + + kfree(td_desc->desc_list); + kfree(td_desc); +} + +static void td_desc_put(struct timb_dma_chan *td_chan, + struct timb_dma_desc *td_desc) +{ + dev_dbg(chan2dev(&td_chan->chan), "Putting desc: %p\n", td_desc); + + spin_lock_bh(&td_chan->lock); + list_add(&td_desc->desc_node, &td_chan->free_list); + spin_unlock_bh(&td_chan->lock); +} + +static struct timb_dma_desc *td_desc_get(struct timb_dma_chan *td_chan) +{ + struct timb_dma_desc *td_desc, *_td_desc; + struct timb_dma_desc *ret = NULL; + + spin_lock_bh(&td_chan->lock); + list_for_each_entry_safe(td_desc, _td_desc, &td_chan->free_list, + desc_node) { + if (async_tx_test_ack(&td_desc->txd)) { + list_del(&td_desc->desc_node); + ret = td_desc; + break; + } + dev_dbg(chan2dev(&td_chan->chan), "desc %p not ACKed\n", + td_desc); + } + spin_unlock_bh(&td_chan->lock); + + return ret; +} + +static int td_alloc_chan_resources(struct dma_chan *chan) +{ + struct timb_dma_chan *td_chan = + container_of(chan, struct timb_dma_chan, chan); + int i; + + dev_dbg(chan2dev(chan), "%s: entry\n", __func__); + + BUG_ON(!list_empty(&td_chan->free_list)); + for (i = 0; i < td_chan->descs; i++) { + struct timb_dma_desc *td_desc = td_alloc_init_desc(td_chan); + if (!td_desc) { + if (i) + break; + else { + dev_err(chan2dev(chan), + "Couldn't allocate any descriptors\n"); + return -ENOMEM; + } + } + + td_desc_put(td_chan, td_desc); + } + + spin_lock_bh(&td_chan->lock); + dma_cookie_init(chan); + spin_unlock_bh(&td_chan->lock); + + return 0; +} + +static void td_free_chan_resources(struct dma_chan *chan) +{ + struct timb_dma_chan *td_chan = + container_of(chan, struct timb_dma_chan, chan); + struct timb_dma_desc *td_desc, *_td_desc; + LIST_HEAD(list); + + dev_dbg(chan2dev(chan), "%s: Entry\n", __func__); + + /* check that all descriptors are free */ + BUG_ON(!list_empty(&td_chan->active_list)); + BUG_ON(!list_empty(&td_chan->queue)); + + spin_lock_bh(&td_chan->lock); + list_splice_init(&td_chan->free_list, &list); + spin_unlock_bh(&td_chan->lock); + + list_for_each_entry_safe(td_desc, _td_desc, &list, desc_node) { + dev_dbg(chan2dev(chan), "%s: Freeing desc: %p\n", __func__, + td_desc); + td_free_desc(td_desc); + } +} + +static enum dma_status td_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + enum dma_status ret; + + dev_dbg(chan2dev(chan), "%s: Entry\n", __func__); + + ret = dma_cookie_status(chan, cookie, txstate); + + dev_dbg(chan2dev(chan), "%s: exit, ret: %d\n", __func__, ret); + + return ret; +} + +static void td_issue_pending(struct dma_chan *chan) +{ + struct timb_dma_chan *td_chan = + container_of(chan, struct timb_dma_chan, chan); + + dev_dbg(chan2dev(chan), "%s: Entry\n", __func__); + spin_lock_bh(&td_chan->lock); + + if (!list_empty(&td_chan->active_list)) + /* transfer ongoing */ + if (__td_dma_done_ack(td_chan)) + __td_finish(td_chan); + + if (list_empty(&td_chan->active_list) && !list_empty(&td_chan->queue)) + __td_start_next(td_chan); + + spin_unlock_bh(&td_chan->lock); +} + +static struct dma_async_tx_descriptor *td_prep_slave_sg(struct dma_chan *chan, + struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, unsigned long flags, + void *context) +{ + struct timb_dma_chan *td_chan = + container_of(chan, struct timb_dma_chan, chan); + struct timb_dma_desc *td_desc; + struct scatterlist *sg; + unsigned int i; + unsigned int desc_usage = 0; + + if (!sgl || !sg_len) { + dev_err(chan2dev(chan), "%s: No SG list\n", __func__); + return NULL; + } + + /* even channels are for RX, odd for TX */ + if (td_chan->direction != direction) { + dev_err(chan2dev(chan), + "Requesting channel in wrong direction\n"); + return NULL; + } + + td_desc = td_desc_get(td_chan); + if (!td_desc) { + dev_err(chan2dev(chan), "Not enough descriptors available\n"); + return NULL; + } + + td_desc->interrupt = (flags & DMA_PREP_INTERRUPT) != 0; + + for_each_sg(sgl, sg, sg_len, i) { + int err; + if (desc_usage > td_desc->desc_list_len) { + dev_err(chan2dev(chan), "No descriptor space\n"); + return NULL; + } + + err = td_fill_desc(td_chan, td_desc->desc_list + desc_usage, sg, + i == (sg_len - 1)); + if (err) { + dev_err(chan2dev(chan), "Failed to update desc: %d\n", + err); + td_desc_put(td_chan, td_desc); + return NULL; + } + desc_usage += TIMB_DMA_DESC_SIZE; + } + + dma_sync_single_for_device(chan2dmadev(chan), td_desc->txd.phys, + td_desc->desc_list_len, DMA_TO_DEVICE); + + return &td_desc->txd; +} + +static int td_terminate_all(struct dma_chan *chan) +{ + struct timb_dma_chan *td_chan = + container_of(chan, struct timb_dma_chan, chan); + struct timb_dma_desc *td_desc, *_td_desc; + + dev_dbg(chan2dev(chan), "%s: Entry\n", __func__); + + /* first the easy part, put the queue into the free list */ + spin_lock_bh(&td_chan->lock); + list_for_each_entry_safe(td_desc, _td_desc, &td_chan->queue, + desc_node) + list_move(&td_desc->desc_node, &td_chan->free_list); + + /* now tear down the running */ + __td_finish(td_chan); + spin_unlock_bh(&td_chan->lock); + + return 0; +} + +static void td_tasklet(struct tasklet_struct *t) +{ + struct timb_dma *td = from_tasklet(td, t, tasklet); + u32 isr; + u32 ipr; + u32 ier; + int i; + + isr = ioread32(td->membase + TIMBDMA_ISR); + ipr = isr & __td_ier_mask(td); + + /* ack the interrupts */ + iowrite32(ipr, td->membase + TIMBDMA_ISR); + + for (i = 0; i < td->dma.chancnt; i++) + if (ipr & (1 << i)) { + struct timb_dma_chan *td_chan = td->channels + i; + spin_lock(&td_chan->lock); + __td_finish(td_chan); + if (!list_empty(&td_chan->queue)) + __td_start_next(td_chan); + spin_unlock(&td_chan->lock); + } + + ier = __td_ier_mask(td); + iowrite32(ier, td->membase + TIMBDMA_IER); +} + + +static irqreturn_t td_irq(int irq, void *devid) +{ + struct timb_dma *td = devid; + u32 ipr = ioread32(td->membase + TIMBDMA_IPR); + + if (ipr) { + /* disable interrupts, will be re-enabled in tasklet */ + iowrite32(0, td->membase + TIMBDMA_IER); + + tasklet_schedule(&td->tasklet); + + return IRQ_HANDLED; + } else + return IRQ_NONE; +} + + +static int td_probe(struct platform_device *pdev) +{ + struct timb_dma_platform_data *pdata = dev_get_platdata(&pdev->dev); + struct timb_dma *td; + struct resource *iomem; + int irq; + int err; + int i; + + if (!pdata) { + dev_err(&pdev->dev, "No platform data\n"); + return -EINVAL; + } + + iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!iomem) + return -EINVAL; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + if (!request_mem_region(iomem->start, resource_size(iomem), + DRIVER_NAME)) + return -EBUSY; + + td = kzalloc(struct_size(td, channels, pdata->nr_channels), + GFP_KERNEL); + if (!td) { + err = -ENOMEM; + goto err_release_region; + } + + dev_dbg(&pdev->dev, "Allocated TD: %p\n", td); + + td->membase = ioremap(iomem->start, resource_size(iomem)); + if (!td->membase) { + dev_err(&pdev->dev, "Failed to remap I/O memory\n"); + err = -ENOMEM; + goto err_free_mem; + } + + /* 32bit addressing */ + iowrite32(TIMBDMA_32BIT_ADDR, td->membase + TIMBDMA_ACR); + + /* disable and clear any interrupts */ + iowrite32(0x0, td->membase + TIMBDMA_IER); + iowrite32(0xFFFFFFFF, td->membase + TIMBDMA_ISR); + + tasklet_setup(&td->tasklet, td_tasklet); + + err = request_irq(irq, td_irq, IRQF_SHARED, DRIVER_NAME, td); + if (err) { + dev_err(&pdev->dev, "Failed to request IRQ\n"); + goto err_tasklet_kill; + } + + td->dma.device_alloc_chan_resources = td_alloc_chan_resources; + td->dma.device_free_chan_resources = td_free_chan_resources; + td->dma.device_tx_status = td_tx_status; + td->dma.device_issue_pending = td_issue_pending; + + dma_cap_set(DMA_SLAVE, td->dma.cap_mask); + dma_cap_set(DMA_PRIVATE, td->dma.cap_mask); + td->dma.device_prep_slave_sg = td_prep_slave_sg; + td->dma.device_terminate_all = td_terminate_all; + + td->dma.dev = &pdev->dev; + + INIT_LIST_HEAD(&td->dma.channels); + + for (i = 0; i < pdata->nr_channels; i++) { + struct timb_dma_chan *td_chan = &td->channels[i]; + struct timb_dma_platform_data_channel *pchan = + pdata->channels + i; + + /* even channels are RX, odd are TX */ + if ((i % 2) == pchan->rx) { + dev_err(&pdev->dev, "Wrong channel configuration\n"); + err = -EINVAL; + goto err_free_irq; + } + + td_chan->chan.device = &td->dma; + dma_cookie_init(&td_chan->chan); + spin_lock_init(&td_chan->lock); + INIT_LIST_HEAD(&td_chan->active_list); + INIT_LIST_HEAD(&td_chan->queue); + INIT_LIST_HEAD(&td_chan->free_list); + + td_chan->descs = pchan->descriptors; + td_chan->desc_elems = pchan->descriptor_elements; + td_chan->bytes_per_line = pchan->bytes_per_line; + td_chan->direction = pchan->rx ? DMA_DEV_TO_MEM : + DMA_MEM_TO_DEV; + + td_chan->membase = td->membase + + (i / 2) * TIMBDMA_INSTANCE_OFFSET + + (pchan->rx ? 0 : TIMBDMA_INSTANCE_TX_OFFSET); + + dev_dbg(&pdev->dev, "Chan: %d, membase: %p\n", + i, td_chan->membase); + + list_add_tail(&td_chan->chan.device_node, &td->dma.channels); + } + + err = dma_async_device_register(&td->dma); + if (err) { + dev_err(&pdev->dev, "Failed to register async device\n"); + goto err_free_irq; + } + + platform_set_drvdata(pdev, td); + + dev_dbg(&pdev->dev, "Probe result: %d\n", err); + return err; + +err_free_irq: + free_irq(irq, td); +err_tasklet_kill: + tasklet_kill(&td->tasklet); + iounmap(td->membase); +err_free_mem: + kfree(td); +err_release_region: + release_mem_region(iomem->start, resource_size(iomem)); + + return err; + +} + +static int td_remove(struct platform_device *pdev) +{ + struct timb_dma *td = platform_get_drvdata(pdev); + struct resource *iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + int irq = platform_get_irq(pdev, 0); + + dma_async_device_unregister(&td->dma); + free_irq(irq, td); + tasklet_kill(&td->tasklet); + iounmap(td->membase); + kfree(td); + release_mem_region(iomem->start, resource_size(iomem)); + + dev_dbg(&pdev->dev, "Removed...\n"); + return 0; +} + +static struct platform_driver td_driver = { + .driver = { + .name = DRIVER_NAME, + }, + .probe = td_probe, + .remove = td_remove, +}; + +module_platform_driver(td_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Timberdale DMA controller driver"); +MODULE_AUTHOR("Pelagicore AB "); +MODULE_ALIAS("platform:"DRIVER_NAME); diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c new file mode 100644 index 0000000000..5b6b375a25 --- /dev/null +++ b/drivers/dma/txx9dmac.c @@ -0,0 +1,1306 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Driver for the TXx9 SoC DMA Controller + * + * Copyright (C) 2009 Atsushi Nemoto + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" +#include "txx9dmac.h" + +static struct txx9dmac_chan *to_txx9dmac_chan(struct dma_chan *chan) +{ + return container_of(chan, struct txx9dmac_chan, chan); +} + +static struct txx9dmac_cregs __iomem *__dma_regs(const struct txx9dmac_chan *dc) +{ + return dc->ch_regs; +} + +static struct txx9dmac_cregs32 __iomem *__dma_regs32( + const struct txx9dmac_chan *dc) +{ + return dc->ch_regs; +} + +#define channel64_readq(dc, name) \ + __raw_readq(&(__dma_regs(dc)->name)) +#define channel64_writeq(dc, name, val) \ + __raw_writeq((val), &(__dma_regs(dc)->name)) +#define channel64_readl(dc, name) \ + __raw_readl(&(__dma_regs(dc)->name)) +#define channel64_writel(dc, name, val) \ + __raw_writel((val), &(__dma_regs(dc)->name)) + +#define channel32_readl(dc, name) \ + __raw_readl(&(__dma_regs32(dc)->name)) +#define channel32_writel(dc, name, val) \ + __raw_writel((val), &(__dma_regs32(dc)->name)) + +#define channel_readq(dc, name) channel64_readq(dc, name) +#define channel_writeq(dc, name, val) channel64_writeq(dc, name, val) +#define channel_readl(dc, name) \ + (is_dmac64(dc) ? \ + channel64_readl(dc, name) : channel32_readl(dc, name)) +#define channel_writel(dc, name, val) \ + (is_dmac64(dc) ? \ + channel64_writel(dc, name, val) : channel32_writel(dc, name, val)) + +static dma_addr_t channel64_read_CHAR(const struct txx9dmac_chan *dc) +{ + if (sizeof(__dma_regs(dc)->CHAR) == sizeof(u64)) + return channel64_readq(dc, CHAR); + else + return channel64_readl(dc, CHAR); +} + +static void channel64_write_CHAR(const struct txx9dmac_chan *dc, dma_addr_t val) +{ + if (sizeof(__dma_regs(dc)->CHAR) == sizeof(u64)) + channel64_writeq(dc, CHAR, val); + else + channel64_writel(dc, CHAR, val); +} + +static void channel64_clear_CHAR(const struct txx9dmac_chan *dc) +{ +#if defined(CONFIG_32BIT) && !defined(CONFIG_PHYS_ADDR_T_64BIT) + channel64_writel(dc, CHAR, 0); + channel64_writel(dc, __pad_CHAR, 0); +#else + channel64_writeq(dc, CHAR, 0); +#endif +} + +static dma_addr_t channel_read_CHAR(const struct txx9dmac_chan *dc) +{ + if (is_dmac64(dc)) + return channel64_read_CHAR(dc); + else + return channel32_readl(dc, CHAR); +} + +static void channel_write_CHAR(const struct txx9dmac_chan *dc, dma_addr_t val) +{ + if (is_dmac64(dc)) + channel64_write_CHAR(dc, val); + else + channel32_writel(dc, CHAR, val); +} + +static struct txx9dmac_regs __iomem *__txx9dmac_regs( + const struct txx9dmac_dev *ddev) +{ + return ddev->regs; +} + +static struct txx9dmac_regs32 __iomem *__txx9dmac_regs32( + const struct txx9dmac_dev *ddev) +{ + return ddev->regs; +} + +#define dma64_readl(ddev, name) \ + __raw_readl(&(__txx9dmac_regs(ddev)->name)) +#define dma64_writel(ddev, name, val) \ + __raw_writel((val), &(__txx9dmac_regs(ddev)->name)) + +#define dma32_readl(ddev, name) \ + __raw_readl(&(__txx9dmac_regs32(ddev)->name)) +#define dma32_writel(ddev, name, val) \ + __raw_writel((val), &(__txx9dmac_regs32(ddev)->name)) + +#define dma_readl(ddev, name) \ + (__is_dmac64(ddev) ? \ + dma64_readl(ddev, name) : dma32_readl(ddev, name)) +#define dma_writel(ddev, name, val) \ + (__is_dmac64(ddev) ? \ + dma64_writel(ddev, name, val) : dma32_writel(ddev, name, val)) + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} +static struct device *chan2parent(struct dma_chan *chan) +{ + return chan->dev->device.parent; +} + +static struct txx9dmac_desc * +txd_to_txx9dmac_desc(struct dma_async_tx_descriptor *txd) +{ + return container_of(txd, struct txx9dmac_desc, txd); +} + +static dma_addr_t desc_read_CHAR(const struct txx9dmac_chan *dc, + const struct txx9dmac_desc *desc) +{ + return is_dmac64(dc) ? desc->hwdesc.CHAR : desc->hwdesc32.CHAR; +} + +static void desc_write_CHAR(const struct txx9dmac_chan *dc, + struct txx9dmac_desc *desc, dma_addr_t val) +{ + if (is_dmac64(dc)) + desc->hwdesc.CHAR = val; + else + desc->hwdesc32.CHAR = val; +} + +#define TXX9_DMA_MAX_COUNT 0x04000000 + +#define TXX9_DMA_INITIAL_DESC_COUNT 64 + +static struct txx9dmac_desc *txx9dmac_first_active(struct txx9dmac_chan *dc) +{ + return list_entry(dc->active_list.next, + struct txx9dmac_desc, desc_node); +} + +static struct txx9dmac_desc *txx9dmac_last_active(struct txx9dmac_chan *dc) +{ + return list_entry(dc->active_list.prev, + struct txx9dmac_desc, desc_node); +} + +static struct txx9dmac_desc *txx9dmac_first_queued(struct txx9dmac_chan *dc) +{ + return list_entry(dc->queue.next, struct txx9dmac_desc, desc_node); +} + +static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc) +{ + if (!list_empty(&desc->tx_list)) + desc = list_entry(desc->tx_list.prev, typeof(*desc), desc_node); + return desc; +} + +static dma_cookie_t txx9dmac_tx_submit(struct dma_async_tx_descriptor *tx); + +static struct txx9dmac_desc *txx9dmac_desc_alloc(struct txx9dmac_chan *dc, + gfp_t flags) +{ + struct txx9dmac_dev *ddev = dc->ddev; + struct txx9dmac_desc *desc; + + desc = kzalloc(sizeof(*desc), flags); + if (!desc) + return NULL; + INIT_LIST_HEAD(&desc->tx_list); + dma_async_tx_descriptor_init(&desc->txd, &dc->chan); + desc->txd.tx_submit = txx9dmac_tx_submit; + /* txd.flags will be overwritten in prep funcs */ + desc->txd.flags = DMA_CTRL_ACK; + desc->txd.phys = dma_map_single(chan2parent(&dc->chan), &desc->hwdesc, + ddev->descsize, DMA_TO_DEVICE); + return desc; +} + +static struct txx9dmac_desc *txx9dmac_desc_get(struct txx9dmac_chan *dc) +{ + struct txx9dmac_desc *desc, *_desc; + struct txx9dmac_desc *ret = NULL; + unsigned int i = 0; + + spin_lock_bh(&dc->lock); + list_for_each_entry_safe(desc, _desc, &dc->free_list, desc_node) { + if (async_tx_test_ack(&desc->txd)) { + list_del(&desc->desc_node); + ret = desc; + break; + } + dev_dbg(chan2dev(&dc->chan), "desc %p not ACKed\n", desc); + i++; + } + spin_unlock_bh(&dc->lock); + + dev_vdbg(chan2dev(&dc->chan), "scanned %u descriptors on freelist\n", + i); + if (!ret) { + ret = txx9dmac_desc_alloc(dc, GFP_ATOMIC); + if (ret) { + spin_lock_bh(&dc->lock); + dc->descs_allocated++; + spin_unlock_bh(&dc->lock); + } else + dev_err(chan2dev(&dc->chan), + "not enough descriptors available\n"); + } + return ret; +} + +static void txx9dmac_sync_desc_for_cpu(struct txx9dmac_chan *dc, + struct txx9dmac_desc *desc) +{ + struct txx9dmac_dev *ddev = dc->ddev; + struct txx9dmac_desc *child; + + list_for_each_entry(child, &desc->tx_list, desc_node) + dma_sync_single_for_cpu(chan2parent(&dc->chan), + child->txd.phys, ddev->descsize, + DMA_TO_DEVICE); + dma_sync_single_for_cpu(chan2parent(&dc->chan), + desc->txd.phys, ddev->descsize, + DMA_TO_DEVICE); +} + +/* + * Move a descriptor, including any children, to the free list. + * `desc' must not be on any lists. + */ +static void txx9dmac_desc_put(struct txx9dmac_chan *dc, + struct txx9dmac_desc *desc) +{ + if (desc) { + struct txx9dmac_desc *child; + + txx9dmac_sync_desc_for_cpu(dc, desc); + + spin_lock_bh(&dc->lock); + list_for_each_entry(child, &desc->tx_list, desc_node) + dev_vdbg(chan2dev(&dc->chan), + "moving child desc %p to freelist\n", + child); + list_splice_init(&desc->tx_list, &dc->free_list); + dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n", + desc); + list_add(&desc->desc_node, &dc->free_list); + spin_unlock_bh(&dc->lock); + } +} + +/*----------------------------------------------------------------------*/ + +static void txx9dmac_dump_regs(struct txx9dmac_chan *dc) +{ + if (is_dmac64(dc)) + dev_err(chan2dev(&dc->chan), + " CHAR: %#llx SAR: %#llx DAR: %#llx CNTR: %#x" + " SAIR: %#x DAIR: %#x CCR: %#x CSR: %#x\n", + (u64)channel64_read_CHAR(dc), + channel64_readq(dc, SAR), + channel64_readq(dc, DAR), + channel64_readl(dc, CNTR), + channel64_readl(dc, SAIR), + channel64_readl(dc, DAIR), + channel64_readl(dc, CCR), + channel64_readl(dc, CSR)); + else + dev_err(chan2dev(&dc->chan), + " CHAR: %#x SAR: %#x DAR: %#x CNTR: %#x" + " SAIR: %#x DAIR: %#x CCR: %#x CSR: %#x\n", + channel32_readl(dc, CHAR), + channel32_readl(dc, SAR), + channel32_readl(dc, DAR), + channel32_readl(dc, CNTR), + channel32_readl(dc, SAIR), + channel32_readl(dc, DAIR), + channel32_readl(dc, CCR), + channel32_readl(dc, CSR)); +} + +static void txx9dmac_reset_chan(struct txx9dmac_chan *dc) +{ + channel_writel(dc, CCR, TXX9_DMA_CCR_CHRST); + if (is_dmac64(dc)) { + channel64_clear_CHAR(dc); + channel_writeq(dc, SAR, 0); + channel_writeq(dc, DAR, 0); + } else { + channel_writel(dc, CHAR, 0); + channel_writel(dc, SAR, 0); + channel_writel(dc, DAR, 0); + } + channel_writel(dc, CNTR, 0); + channel_writel(dc, SAIR, 0); + channel_writel(dc, DAIR, 0); + channel_writel(dc, CCR, 0); +} + +/* Called with dc->lock held and bh disabled */ +static void txx9dmac_dostart(struct txx9dmac_chan *dc, + struct txx9dmac_desc *first) +{ + struct txx9dmac_slave *ds = dc->chan.private; + u32 sai, dai; + + dev_vdbg(chan2dev(&dc->chan), "dostart %u %p\n", + first->txd.cookie, first); + /* ASSERT: channel is idle */ + if (channel_readl(dc, CSR) & TXX9_DMA_CSR_XFACT) { + dev_err(chan2dev(&dc->chan), + "BUG: Attempted to start non-idle channel\n"); + txx9dmac_dump_regs(dc); + /* The tasklet will hopefully advance the queue... */ + return; + } + + if (is_dmac64(dc)) { + channel64_writel(dc, CNTR, 0); + channel64_writel(dc, CSR, 0xffffffff); + if (ds) { + if (ds->tx_reg) { + sai = ds->reg_width; + dai = 0; + } else { + sai = 0; + dai = ds->reg_width; + } + } else { + sai = 8; + dai = 8; + } + channel64_writel(dc, SAIR, sai); + channel64_writel(dc, DAIR, dai); + /* All 64-bit DMAC supports SMPCHN */ + channel64_writel(dc, CCR, dc->ccr); + /* Writing a non zero value to CHAR will assert XFACT */ + channel64_write_CHAR(dc, first->txd.phys); + } else { + channel32_writel(dc, CNTR, 0); + channel32_writel(dc, CSR, 0xffffffff); + if (ds) { + if (ds->tx_reg) { + sai = ds->reg_width; + dai = 0; + } else { + sai = 0; + dai = ds->reg_width; + } + } else { + sai = 4; + dai = 4; + } + channel32_writel(dc, SAIR, sai); + channel32_writel(dc, DAIR, dai); + if (txx9_dma_have_SMPCHN()) { + channel32_writel(dc, CCR, dc->ccr); + /* Writing a non zero value to CHAR will assert XFACT */ + channel32_writel(dc, CHAR, first->txd.phys); + } else { + channel32_writel(dc, CHAR, first->txd.phys); + channel32_writel(dc, CCR, dc->ccr); + } + } +} + +/*----------------------------------------------------------------------*/ + +static void +txx9dmac_descriptor_complete(struct txx9dmac_chan *dc, + struct txx9dmac_desc *desc) +{ + struct dmaengine_desc_callback cb; + struct dma_async_tx_descriptor *txd = &desc->txd; + + dev_vdbg(chan2dev(&dc->chan), "descriptor %u %p complete\n", + txd->cookie, desc); + + dma_cookie_complete(txd); + dmaengine_desc_get_callback(txd, &cb); + + txx9dmac_sync_desc_for_cpu(dc, desc); + list_splice_init(&desc->tx_list, &dc->free_list); + list_move(&desc->desc_node, &dc->free_list); + + dma_descriptor_unmap(txd); + /* + * The API requires that no submissions are done from a + * callback, so we don't need to drop the lock here + */ + dmaengine_desc_callback_invoke(&cb, NULL); + dma_run_dependencies(txd); +} + +static void txx9dmac_dequeue(struct txx9dmac_chan *dc, struct list_head *list) +{ + struct txx9dmac_dev *ddev = dc->ddev; + struct txx9dmac_desc *desc; + struct txx9dmac_desc *prev = NULL; + + BUG_ON(!list_empty(list)); + do { + desc = txx9dmac_first_queued(dc); + if (prev) { + desc_write_CHAR(dc, prev, desc->txd.phys); + dma_sync_single_for_device(chan2parent(&dc->chan), + prev->txd.phys, ddev->descsize, + DMA_TO_DEVICE); + } + prev = txx9dmac_last_child(desc); + list_move_tail(&desc->desc_node, list); + /* Make chain-completion interrupt happen */ + if ((desc->txd.flags & DMA_PREP_INTERRUPT) && + !txx9dmac_chan_INTENT(dc)) + break; + } while (!list_empty(&dc->queue)); +} + +static void txx9dmac_complete_all(struct txx9dmac_chan *dc) +{ + struct txx9dmac_desc *desc, *_desc; + LIST_HEAD(list); + + /* + * Submit queued descriptors ASAP, i.e. before we go through + * the completed ones. + */ + list_splice_init(&dc->active_list, &list); + if (!list_empty(&dc->queue)) { + txx9dmac_dequeue(dc, &dc->active_list); + txx9dmac_dostart(dc, txx9dmac_first_active(dc)); + } + + list_for_each_entry_safe(desc, _desc, &list, desc_node) + txx9dmac_descriptor_complete(dc, desc); +} + +static void txx9dmac_dump_desc(struct txx9dmac_chan *dc, + struct txx9dmac_hwdesc *desc) +{ + if (is_dmac64(dc)) { +#ifdef TXX9_DMA_USE_SIMPLE_CHAIN + dev_crit(chan2dev(&dc->chan), + " desc: ch%#llx s%#llx d%#llx c%#x\n", + (u64)desc->CHAR, desc->SAR, desc->DAR, desc->CNTR); +#else + dev_crit(chan2dev(&dc->chan), + " desc: ch%#llx s%#llx d%#llx c%#x" + " si%#x di%#x cc%#x cs%#x\n", + (u64)desc->CHAR, desc->SAR, desc->DAR, desc->CNTR, + desc->SAIR, desc->DAIR, desc->CCR, desc->CSR); +#endif + } else { + struct txx9dmac_hwdesc32 *d = (struct txx9dmac_hwdesc32 *)desc; +#ifdef TXX9_DMA_USE_SIMPLE_CHAIN + dev_crit(chan2dev(&dc->chan), + " desc: ch%#x s%#x d%#x c%#x\n", + d->CHAR, d->SAR, d->DAR, d->CNTR); +#else + dev_crit(chan2dev(&dc->chan), + " desc: ch%#x s%#x d%#x c%#x" + " si%#x di%#x cc%#x cs%#x\n", + d->CHAR, d->SAR, d->DAR, d->CNTR, + d->SAIR, d->DAIR, d->CCR, d->CSR); +#endif + } +} + +static void txx9dmac_handle_error(struct txx9dmac_chan *dc, u32 csr) +{ + struct txx9dmac_desc *bad_desc; + struct txx9dmac_desc *child; + u32 errors; + + /* + * The descriptor currently at the head of the active list is + * borked. Since we don't have any way to report errors, we'll + * just have to scream loudly and try to carry on. + */ + dev_crit(chan2dev(&dc->chan), "Abnormal Chain Completion\n"); + txx9dmac_dump_regs(dc); + + bad_desc = txx9dmac_first_active(dc); + list_del_init(&bad_desc->desc_node); + + /* Clear all error flags and try to restart the controller */ + errors = csr & (TXX9_DMA_CSR_ABCHC | + TXX9_DMA_CSR_CFERR | TXX9_DMA_CSR_CHERR | + TXX9_DMA_CSR_DESERR | TXX9_DMA_CSR_SORERR); + channel_writel(dc, CSR, errors); + + if (list_empty(&dc->active_list) && !list_empty(&dc->queue)) + txx9dmac_dequeue(dc, &dc->active_list); + if (!list_empty(&dc->active_list)) + txx9dmac_dostart(dc, txx9dmac_first_active(dc)); + + dev_crit(chan2dev(&dc->chan), + "Bad descriptor submitted for DMA! (cookie: %d)\n", + bad_desc->txd.cookie); + txx9dmac_dump_desc(dc, &bad_desc->hwdesc); + list_for_each_entry(child, &bad_desc->tx_list, desc_node) + txx9dmac_dump_desc(dc, &child->hwdesc); + /* Pretend the descriptor completed successfully */ + txx9dmac_descriptor_complete(dc, bad_desc); +} + +static void txx9dmac_scan_descriptors(struct txx9dmac_chan *dc) +{ + dma_addr_t chain; + struct txx9dmac_desc *desc, *_desc; + struct txx9dmac_desc *child; + u32 csr; + + if (is_dmac64(dc)) { + chain = channel64_read_CHAR(dc); + csr = channel64_readl(dc, CSR); + channel64_writel(dc, CSR, csr); + } else { + chain = channel32_readl(dc, CHAR); + csr = channel32_readl(dc, CSR); + channel32_writel(dc, CSR, csr); + } + /* For dynamic chain, we should look at XFACT instead of NCHNC */ + if (!(csr & (TXX9_DMA_CSR_XFACT | TXX9_DMA_CSR_ABCHC))) { + /* Everything we've submitted is done */ + txx9dmac_complete_all(dc); + return; + } + if (!(csr & TXX9_DMA_CSR_CHNEN)) + chain = 0; /* last descriptor of this chain */ + + dev_vdbg(chan2dev(&dc->chan), "scan_descriptors: char=%#llx\n", + (u64)chain); + + list_for_each_entry_safe(desc, _desc, &dc->active_list, desc_node) { + if (desc_read_CHAR(dc, desc) == chain) { + /* This one is currently in progress */ + if (csr & TXX9_DMA_CSR_ABCHC) + goto scan_done; + return; + } + + list_for_each_entry(child, &desc->tx_list, desc_node) + if (desc_read_CHAR(dc, child) == chain) { + /* Currently in progress */ + if (csr & TXX9_DMA_CSR_ABCHC) + goto scan_done; + return; + } + + /* + * No descriptors so far seem to be in progress, i.e. + * this one must be done. + */ + txx9dmac_descriptor_complete(dc, desc); + } +scan_done: + if (csr & TXX9_DMA_CSR_ABCHC) { + txx9dmac_handle_error(dc, csr); + return; + } + + dev_err(chan2dev(&dc->chan), + "BUG: All descriptors done, but channel not idle!\n"); + + /* Try to continue after resetting the channel... */ + txx9dmac_reset_chan(dc); + + if (!list_empty(&dc->queue)) { + txx9dmac_dequeue(dc, &dc->active_list); + txx9dmac_dostart(dc, txx9dmac_first_active(dc)); + } +} + +static void txx9dmac_chan_tasklet(struct tasklet_struct *t) +{ + int irq; + u32 csr; + struct txx9dmac_chan *dc; + + dc = from_tasklet(dc, t, tasklet); + csr = channel_readl(dc, CSR); + dev_vdbg(chan2dev(&dc->chan), "tasklet: status=%x\n", csr); + + spin_lock(&dc->lock); + if (csr & (TXX9_DMA_CSR_ABCHC | TXX9_DMA_CSR_NCHNC | + TXX9_DMA_CSR_NTRNFC)) + txx9dmac_scan_descriptors(dc); + spin_unlock(&dc->lock); + irq = dc->irq; + + enable_irq(irq); +} + +static irqreturn_t txx9dmac_chan_interrupt(int irq, void *dev_id) +{ + struct txx9dmac_chan *dc = dev_id; + + dev_vdbg(chan2dev(&dc->chan), "interrupt: status=%#x\n", + channel_readl(dc, CSR)); + + tasklet_schedule(&dc->tasklet); + /* + * Just disable the interrupts. We'll turn them back on in the + * softirq handler. + */ + disable_irq_nosync(irq); + + return IRQ_HANDLED; +} + +static void txx9dmac_tasklet(struct tasklet_struct *t) +{ + int irq; + u32 csr; + struct txx9dmac_chan *dc; + + struct txx9dmac_dev *ddev = from_tasklet(ddev, t, tasklet); + u32 mcr; + int i; + + mcr = dma_readl(ddev, MCR); + dev_vdbg(ddev->chan[0]->dma.dev, "tasklet: mcr=%x\n", mcr); + for (i = 0; i < TXX9_DMA_MAX_NR_CHANNELS; i++) { + if ((mcr >> (24 + i)) & 0x11) { + dc = ddev->chan[i]; + csr = channel_readl(dc, CSR); + dev_vdbg(chan2dev(&dc->chan), "tasklet: status=%x\n", + csr); + spin_lock(&dc->lock); + if (csr & (TXX9_DMA_CSR_ABCHC | TXX9_DMA_CSR_NCHNC | + TXX9_DMA_CSR_NTRNFC)) + txx9dmac_scan_descriptors(dc); + spin_unlock(&dc->lock); + } + } + irq = ddev->irq; + + enable_irq(irq); +} + +static irqreturn_t txx9dmac_interrupt(int irq, void *dev_id) +{ + struct txx9dmac_dev *ddev = dev_id; + + dev_vdbg(ddev->chan[0]->dma.dev, "interrupt: status=%#x\n", + dma_readl(ddev, MCR)); + + tasklet_schedule(&ddev->tasklet); + /* + * Just disable the interrupts. We'll turn them back on in the + * softirq handler. + */ + disable_irq_nosync(irq); + + return IRQ_HANDLED; +} + +/*----------------------------------------------------------------------*/ + +static dma_cookie_t txx9dmac_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct txx9dmac_desc *desc = txd_to_txx9dmac_desc(tx); + struct txx9dmac_chan *dc = to_txx9dmac_chan(tx->chan); + dma_cookie_t cookie; + + spin_lock_bh(&dc->lock); + cookie = dma_cookie_assign(tx); + + dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u %p\n", + desc->txd.cookie, desc); + + list_add_tail(&desc->desc_node, &dc->queue); + spin_unlock_bh(&dc->lock); + + return cookie; +} + +static struct dma_async_tx_descriptor * +txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct txx9dmac_chan *dc = to_txx9dmac_chan(chan); + struct txx9dmac_dev *ddev = dc->ddev; + struct txx9dmac_desc *desc; + struct txx9dmac_desc *first; + struct txx9dmac_desc *prev; + size_t xfer_count; + size_t offset; + + dev_vdbg(chan2dev(chan), "prep_dma_memcpy d%#llx s%#llx l%#zx f%#lx\n", + (u64)dest, (u64)src, len, flags); + + if (unlikely(!len)) { + dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); + return NULL; + } + + prev = first = NULL; + + for (offset = 0; offset < len; offset += xfer_count) { + xfer_count = min_t(size_t, len - offset, TXX9_DMA_MAX_COUNT); + /* + * Workaround for ERT-TX49H2-033, ERT-TX49H3-020, + * ERT-TX49H4-016 (slightly conservative) + */ + if (__is_dmac64(ddev)) { + if (xfer_count > 0x100 && + (xfer_count & 0xff) >= 0xfa && + (xfer_count & 0xff) <= 0xff) + xfer_count -= 0x20; + } else { + if (xfer_count > 0x80 && + (xfer_count & 0x7f) >= 0x7e && + (xfer_count & 0x7f) <= 0x7f) + xfer_count -= 0x20; + } + + desc = txx9dmac_desc_get(dc); + if (!desc) { + txx9dmac_desc_put(dc, first); + return NULL; + } + + if (__is_dmac64(ddev)) { + desc->hwdesc.SAR = src + offset; + desc->hwdesc.DAR = dest + offset; + desc->hwdesc.CNTR = xfer_count; + txx9dmac_desc_set_nosimple(ddev, desc, 8, 8, + dc->ccr | TXX9_DMA_CCR_XFACT); + } else { + desc->hwdesc32.SAR = src + offset; + desc->hwdesc32.DAR = dest + offset; + desc->hwdesc32.CNTR = xfer_count; + txx9dmac_desc_set_nosimple(ddev, desc, 4, 4, + dc->ccr | TXX9_DMA_CCR_XFACT); + } + + /* + * The descriptors on tx_list are not reachable from + * the dc->queue list or dc->active_list after a + * submit. If we put all descriptors on active_list, + * calling of callback on the completion will be more + * complex. + */ + if (!first) { + first = desc; + } else { + desc_write_CHAR(dc, prev, desc->txd.phys); + dma_sync_single_for_device(chan2parent(&dc->chan), + prev->txd.phys, ddev->descsize, + DMA_TO_DEVICE); + list_add_tail(&desc->desc_node, &first->tx_list); + } + prev = desc; + } + + /* Trigger interrupt after last block */ + if (flags & DMA_PREP_INTERRUPT) + txx9dmac_desc_set_INTENT(ddev, prev); + + desc_write_CHAR(dc, prev, 0); + dma_sync_single_for_device(chan2parent(&dc->chan), + prev->txd.phys, ddev->descsize, + DMA_TO_DEVICE); + + first->txd.flags = flags; + first->len = len; + + return &first->txd; +} + +static struct dma_async_tx_descriptor * +txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct txx9dmac_chan *dc = to_txx9dmac_chan(chan); + struct txx9dmac_dev *ddev = dc->ddev; + struct txx9dmac_slave *ds = chan->private; + struct txx9dmac_desc *prev; + struct txx9dmac_desc *first; + unsigned int i; + struct scatterlist *sg; + + dev_vdbg(chan2dev(chan), "prep_dma_slave\n"); + + BUG_ON(!ds || !ds->reg_width); + if (ds->tx_reg) + BUG_ON(direction != DMA_MEM_TO_DEV); + else + BUG_ON(direction != DMA_DEV_TO_MEM); + if (unlikely(!sg_len)) + return NULL; + + prev = first = NULL; + + for_each_sg(sgl, sg, sg_len, i) { + struct txx9dmac_desc *desc; + dma_addr_t mem; + u32 sai, dai; + + desc = txx9dmac_desc_get(dc); + if (!desc) { + txx9dmac_desc_put(dc, first); + return NULL; + } + + mem = sg_dma_address(sg); + + if (__is_dmac64(ddev)) { + if (direction == DMA_MEM_TO_DEV) { + desc->hwdesc.SAR = mem; + desc->hwdesc.DAR = ds->tx_reg; + } else { + desc->hwdesc.SAR = ds->rx_reg; + desc->hwdesc.DAR = mem; + } + desc->hwdesc.CNTR = sg_dma_len(sg); + } else { + if (direction == DMA_MEM_TO_DEV) { + desc->hwdesc32.SAR = mem; + desc->hwdesc32.DAR = ds->tx_reg; + } else { + desc->hwdesc32.SAR = ds->rx_reg; + desc->hwdesc32.DAR = mem; + } + desc->hwdesc32.CNTR = sg_dma_len(sg); + } + if (direction == DMA_MEM_TO_DEV) { + sai = ds->reg_width; + dai = 0; + } else { + sai = 0; + dai = ds->reg_width; + } + txx9dmac_desc_set_nosimple(ddev, desc, sai, dai, + dc->ccr | TXX9_DMA_CCR_XFACT); + + if (!first) { + first = desc; + } else { + desc_write_CHAR(dc, prev, desc->txd.phys); + dma_sync_single_for_device(chan2parent(&dc->chan), + prev->txd.phys, + ddev->descsize, + DMA_TO_DEVICE); + list_add_tail(&desc->desc_node, &first->tx_list); + } + prev = desc; + } + + /* Trigger interrupt after last block */ + if (flags & DMA_PREP_INTERRUPT) + txx9dmac_desc_set_INTENT(ddev, prev); + + desc_write_CHAR(dc, prev, 0); + dma_sync_single_for_device(chan2parent(&dc->chan), + prev->txd.phys, ddev->descsize, + DMA_TO_DEVICE); + + first->txd.flags = flags; + first->len = 0; + + return &first->txd; +} + +static int txx9dmac_terminate_all(struct dma_chan *chan) +{ + struct txx9dmac_chan *dc = to_txx9dmac_chan(chan); + struct txx9dmac_desc *desc, *_desc; + LIST_HEAD(list); + + dev_vdbg(chan2dev(chan), "terminate_all\n"); + spin_lock_bh(&dc->lock); + + txx9dmac_reset_chan(dc); + + /* active_list entries will end up before queued entries */ + list_splice_init(&dc->queue, &list); + list_splice_init(&dc->active_list, &list); + + spin_unlock_bh(&dc->lock); + + /* Flush all pending and queued descriptors */ + list_for_each_entry_safe(desc, _desc, &list, desc_node) + txx9dmac_descriptor_complete(dc, desc); + + return 0; +} + +static enum dma_status +txx9dmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct txx9dmac_chan *dc = to_txx9dmac_chan(chan); + enum dma_status ret; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE) + return DMA_COMPLETE; + + spin_lock_bh(&dc->lock); + txx9dmac_scan_descriptors(dc); + spin_unlock_bh(&dc->lock); + + return dma_cookie_status(chan, cookie, txstate); +} + +static void txx9dmac_chain_dynamic(struct txx9dmac_chan *dc, + struct txx9dmac_desc *prev) +{ + struct txx9dmac_dev *ddev = dc->ddev; + struct txx9dmac_desc *desc; + LIST_HEAD(list); + + prev = txx9dmac_last_child(prev); + txx9dmac_dequeue(dc, &list); + desc = list_entry(list.next, struct txx9dmac_desc, desc_node); + desc_write_CHAR(dc, prev, desc->txd.phys); + dma_sync_single_for_device(chan2parent(&dc->chan), + prev->txd.phys, ddev->descsize, + DMA_TO_DEVICE); + if (!(channel_readl(dc, CSR) & TXX9_DMA_CSR_CHNEN) && + channel_read_CHAR(dc) == prev->txd.phys) + /* Restart chain DMA */ + channel_write_CHAR(dc, desc->txd.phys); + list_splice_tail(&list, &dc->active_list); +} + +static void txx9dmac_issue_pending(struct dma_chan *chan) +{ + struct txx9dmac_chan *dc = to_txx9dmac_chan(chan); + + spin_lock_bh(&dc->lock); + + if (!list_empty(&dc->active_list)) + txx9dmac_scan_descriptors(dc); + if (!list_empty(&dc->queue)) { + if (list_empty(&dc->active_list)) { + txx9dmac_dequeue(dc, &dc->active_list); + txx9dmac_dostart(dc, txx9dmac_first_active(dc)); + } else if (txx9_dma_have_SMPCHN()) { + struct txx9dmac_desc *prev = txx9dmac_last_active(dc); + + if (!(prev->txd.flags & DMA_PREP_INTERRUPT) || + txx9dmac_chan_INTENT(dc)) + txx9dmac_chain_dynamic(dc, prev); + } + } + + spin_unlock_bh(&dc->lock); +} + +static int txx9dmac_alloc_chan_resources(struct dma_chan *chan) +{ + struct txx9dmac_chan *dc = to_txx9dmac_chan(chan); + struct txx9dmac_slave *ds = chan->private; + struct txx9dmac_desc *desc; + int i; + + dev_vdbg(chan2dev(chan), "alloc_chan_resources\n"); + + /* ASSERT: channel is idle */ + if (channel_readl(dc, CSR) & TXX9_DMA_CSR_XFACT) { + dev_dbg(chan2dev(chan), "DMA channel not idle?\n"); + return -EIO; + } + + dma_cookie_init(chan); + + dc->ccr = TXX9_DMA_CCR_IMMCHN | TXX9_DMA_CCR_INTENE | CCR_LE; + txx9dmac_chan_set_SMPCHN(dc); + if (!txx9_dma_have_SMPCHN() || (dc->ccr & TXX9_DMA_CCR_SMPCHN)) + dc->ccr |= TXX9_DMA_CCR_INTENC; + if (chan->device->device_prep_dma_memcpy) { + if (ds) + return -EINVAL; + dc->ccr |= TXX9_DMA_CCR_XFSZ_X8; + } else { + if (!ds || + (ds->tx_reg && ds->rx_reg) || (!ds->tx_reg && !ds->rx_reg)) + return -EINVAL; + dc->ccr |= TXX9_DMA_CCR_EXTRQ | + TXX9_DMA_CCR_XFSZ(__ffs(ds->reg_width)); + txx9dmac_chan_set_INTENT(dc); + } + + spin_lock_bh(&dc->lock); + i = dc->descs_allocated; + while (dc->descs_allocated < TXX9_DMA_INITIAL_DESC_COUNT) { + spin_unlock_bh(&dc->lock); + + desc = txx9dmac_desc_alloc(dc, GFP_KERNEL); + if (!desc) { + dev_info(chan2dev(chan), + "only allocated %d descriptors\n", i); + spin_lock_bh(&dc->lock); + break; + } + txx9dmac_desc_put(dc, desc); + + spin_lock_bh(&dc->lock); + i = ++dc->descs_allocated; + } + spin_unlock_bh(&dc->lock); + + dev_dbg(chan2dev(chan), + "alloc_chan_resources allocated %d descriptors\n", i); + + return i; +} + +static void txx9dmac_free_chan_resources(struct dma_chan *chan) +{ + struct txx9dmac_chan *dc = to_txx9dmac_chan(chan); + struct txx9dmac_dev *ddev = dc->ddev; + struct txx9dmac_desc *desc, *_desc; + LIST_HEAD(list); + + dev_dbg(chan2dev(chan), "free_chan_resources (descs allocated=%u)\n", + dc->descs_allocated); + + /* ASSERT: channel is idle */ + BUG_ON(!list_empty(&dc->active_list)); + BUG_ON(!list_empty(&dc->queue)); + BUG_ON(channel_readl(dc, CSR) & TXX9_DMA_CSR_XFACT); + + spin_lock_bh(&dc->lock); + list_splice_init(&dc->free_list, &list); + dc->descs_allocated = 0; + spin_unlock_bh(&dc->lock); + + list_for_each_entry_safe(desc, _desc, &list, desc_node) { + dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc); + dma_unmap_single(chan2parent(chan), desc->txd.phys, + ddev->descsize, DMA_TO_DEVICE); + kfree(desc); + } + + dev_vdbg(chan2dev(chan), "free_chan_resources done\n"); +} + +/*----------------------------------------------------------------------*/ + +static void txx9dmac_off(struct txx9dmac_dev *ddev) +{ + dma_writel(ddev, MCR, 0); +} + +static int __init txx9dmac_chan_probe(struct platform_device *pdev) +{ + struct txx9dmac_chan_platform_data *cpdata = + dev_get_platdata(&pdev->dev); + struct platform_device *dmac_dev = cpdata->dmac_dev; + struct txx9dmac_platform_data *pdata = dev_get_platdata(&dmac_dev->dev); + struct txx9dmac_chan *dc; + int err; + int ch = pdev->id % TXX9_DMA_MAX_NR_CHANNELS; + int irq; + + dc = devm_kzalloc(&pdev->dev, sizeof(*dc), GFP_KERNEL); + if (!dc) + return -ENOMEM; + + dc->dma.dev = &pdev->dev; + dc->dma.device_alloc_chan_resources = txx9dmac_alloc_chan_resources; + dc->dma.device_free_chan_resources = txx9dmac_free_chan_resources; + dc->dma.device_terminate_all = txx9dmac_terminate_all; + dc->dma.device_tx_status = txx9dmac_tx_status; + dc->dma.device_issue_pending = txx9dmac_issue_pending; + if (pdata && pdata->memcpy_chan == ch) { + dc->dma.device_prep_dma_memcpy = txx9dmac_prep_dma_memcpy; + dma_cap_set(DMA_MEMCPY, dc->dma.cap_mask); + } else { + dc->dma.device_prep_slave_sg = txx9dmac_prep_slave_sg; + dma_cap_set(DMA_SLAVE, dc->dma.cap_mask); + dma_cap_set(DMA_PRIVATE, dc->dma.cap_mask); + } + + INIT_LIST_HEAD(&dc->dma.channels); + dc->ddev = platform_get_drvdata(dmac_dev); + if (dc->ddev->irq < 0) { + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + tasklet_setup(&dc->tasklet, txx9dmac_chan_tasklet); + dc->irq = irq; + err = devm_request_irq(&pdev->dev, dc->irq, + txx9dmac_chan_interrupt, 0, dev_name(&pdev->dev), dc); + if (err) + return err; + } else + dc->irq = -1; + dc->ddev->chan[ch] = dc; + dc->chan.device = &dc->dma; + list_add_tail(&dc->chan.device_node, &dc->chan.device->channels); + dma_cookie_init(&dc->chan); + + if (is_dmac64(dc)) + dc->ch_regs = &__txx9dmac_regs(dc->ddev)->CHAN[ch]; + else + dc->ch_regs = &__txx9dmac_regs32(dc->ddev)->CHAN[ch]; + spin_lock_init(&dc->lock); + + INIT_LIST_HEAD(&dc->active_list); + INIT_LIST_HEAD(&dc->queue); + INIT_LIST_HEAD(&dc->free_list); + + txx9dmac_reset_chan(dc); + + platform_set_drvdata(pdev, dc); + + err = dma_async_device_register(&dc->dma); + if (err) + return err; + dev_dbg(&pdev->dev, "TXx9 DMA Channel (dma%d%s%s)\n", + dc->dma.dev_id, + dma_has_cap(DMA_MEMCPY, dc->dma.cap_mask) ? " memcpy" : "", + dma_has_cap(DMA_SLAVE, dc->dma.cap_mask) ? " slave" : ""); + + return 0; +} + +static int txx9dmac_chan_remove(struct platform_device *pdev) +{ + struct txx9dmac_chan *dc = platform_get_drvdata(pdev); + + + dma_async_device_unregister(&dc->dma); + if (dc->irq >= 0) { + devm_free_irq(&pdev->dev, dc->irq, dc); + tasklet_kill(&dc->tasklet); + } + dc->ddev->chan[pdev->id % TXX9_DMA_MAX_NR_CHANNELS] = NULL; + return 0; +} + +static int __init txx9dmac_probe(struct platform_device *pdev) +{ + struct txx9dmac_platform_data *pdata = dev_get_platdata(&pdev->dev); + struct resource *io; + struct txx9dmac_dev *ddev; + u32 mcr; + int err; + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!io) + return -EINVAL; + + ddev = devm_kzalloc(&pdev->dev, sizeof(*ddev), GFP_KERNEL); + if (!ddev) + return -ENOMEM; + + if (!devm_request_mem_region(&pdev->dev, io->start, resource_size(io), + dev_name(&pdev->dev))) + return -EBUSY; + + ddev->regs = devm_ioremap(&pdev->dev, io->start, resource_size(io)); + if (!ddev->regs) + return -ENOMEM; + ddev->have_64bit_regs = pdata->have_64bit_regs; + if (__is_dmac64(ddev)) + ddev->descsize = sizeof(struct txx9dmac_hwdesc); + else + ddev->descsize = sizeof(struct txx9dmac_hwdesc32); + + /* force dma off, just in case */ + txx9dmac_off(ddev); + + ddev->irq = platform_get_irq(pdev, 0); + if (ddev->irq >= 0) { + tasklet_setup(&ddev->tasklet, txx9dmac_tasklet); + err = devm_request_irq(&pdev->dev, ddev->irq, + txx9dmac_interrupt, 0, dev_name(&pdev->dev), ddev); + if (err) + return err; + } + + mcr = TXX9_DMA_MCR_MSTEN | MCR_LE; + if (pdata && pdata->memcpy_chan >= 0) + mcr |= TXX9_DMA_MCR_FIFUM(pdata->memcpy_chan); + dma_writel(ddev, MCR, mcr); + + platform_set_drvdata(pdev, ddev); + return 0; +} + +static int txx9dmac_remove(struct platform_device *pdev) +{ + struct txx9dmac_dev *ddev = platform_get_drvdata(pdev); + + txx9dmac_off(ddev); + if (ddev->irq >= 0) { + devm_free_irq(&pdev->dev, ddev->irq, ddev); + tasklet_kill(&ddev->tasklet); + } + return 0; +} + +static void txx9dmac_shutdown(struct platform_device *pdev) +{ + struct txx9dmac_dev *ddev = platform_get_drvdata(pdev); + + txx9dmac_off(ddev); +} + +static int txx9dmac_suspend_noirq(struct device *dev) +{ + struct txx9dmac_dev *ddev = dev_get_drvdata(dev); + + txx9dmac_off(ddev); + return 0; +} + +static int txx9dmac_resume_noirq(struct device *dev) +{ + struct txx9dmac_dev *ddev = dev_get_drvdata(dev); + struct txx9dmac_platform_data *pdata = dev_get_platdata(dev); + u32 mcr; + + mcr = TXX9_DMA_MCR_MSTEN | MCR_LE; + if (pdata && pdata->memcpy_chan >= 0) + mcr |= TXX9_DMA_MCR_FIFUM(pdata->memcpy_chan); + dma_writel(ddev, MCR, mcr); + return 0; + +} + +static const struct dev_pm_ops txx9dmac_dev_pm_ops = { + .suspend_noirq = txx9dmac_suspend_noirq, + .resume_noirq = txx9dmac_resume_noirq, +}; + +static struct platform_driver txx9dmac_chan_driver = { + .remove = txx9dmac_chan_remove, + .driver = { + .name = "txx9dmac-chan", + }, +}; + +static struct platform_driver txx9dmac_driver = { + .remove = txx9dmac_remove, + .shutdown = txx9dmac_shutdown, + .driver = { + .name = "txx9dmac", + .pm = &txx9dmac_dev_pm_ops, + }, +}; + +static int __init txx9dmac_init(void) +{ + int rc; + + rc = platform_driver_probe(&txx9dmac_driver, txx9dmac_probe); + if (!rc) { + rc = platform_driver_probe(&txx9dmac_chan_driver, + txx9dmac_chan_probe); + if (rc) + platform_driver_unregister(&txx9dmac_driver); + } + return rc; +} +module_init(txx9dmac_init); + +static void __exit txx9dmac_exit(void) +{ + platform_driver_unregister(&txx9dmac_chan_driver); + platform_driver_unregister(&txx9dmac_driver); +} +module_exit(txx9dmac_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("TXx9 DMA Controller driver"); +MODULE_AUTHOR("Atsushi Nemoto "); +MODULE_ALIAS("platform:txx9dmac"); +MODULE_ALIAS("platform:txx9dmac-chan"); diff --git a/drivers/dma/txx9dmac.h b/drivers/dma/txx9dmac.h new file mode 100644 index 0000000000..aa53eafb15 --- /dev/null +++ b/drivers/dma/txx9dmac.h @@ -0,0 +1,304 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Driver for the TXx9 SoC DMA Controller + * + * Copyright (C) 2009 Atsushi Nemoto + */ +#ifndef TXX9DMAC_H +#define TXX9DMAC_H + +#include +#include + +/* + * Design Notes: + * + * This DMAC have four channels and one FIFO buffer. Each channel can + * be configured for memory-memory or device-memory transfer, but only + * one channel can do alignment-free memory-memory transfer at a time + * while the channel should occupy the FIFO buffer for effective + * transfers. + * + * Instead of dynamically assign the FIFO buffer to channels, I chose + * make one dedicated channel for memory-memory transfer. The + * dedicated channel is public. Other channels are private and used + * for slave transfer. Some devices in the SoC are wired to certain + * DMA channel. + */ + +#ifdef CONFIG_MACH_TX49XX +static inline bool txx9_dma_have_SMPCHN(void) +{ + return true; +} +#define TXX9_DMA_USE_SIMPLE_CHAIN +#else +static inline bool txx9_dma_have_SMPCHN(void) +{ + return false; +} +#endif + +#ifdef __LITTLE_ENDIAN +#ifdef CONFIG_MACH_TX49XX +#define CCR_LE TXX9_DMA_CCR_LE +#define MCR_LE 0 +#else +#define CCR_LE 0 +#define MCR_LE TXX9_DMA_MCR_LE +#endif +#else +#define CCR_LE 0 +#define MCR_LE 0 +#endif + +/* + * Redefine this macro to handle differences between 32- and 64-bit + * addressing, big vs. little endian, etc. + */ +#ifdef __BIG_ENDIAN +#define TXX9_DMA_REG32(name) u32 __pad_##name; u32 name +#else +#define TXX9_DMA_REG32(name) u32 name; u32 __pad_##name +#endif + +/* Hardware register definitions. */ +struct txx9dmac_cregs { +#if defined(CONFIG_32BIT) && !defined(CONFIG_PHYS_ADDR_T_64BIT) + TXX9_DMA_REG32(CHAR); /* Chain Address Register */ +#else + u64 CHAR; /* Chain Address Register */ +#endif + u64 SAR; /* Source Address Register */ + u64 DAR; /* Destination Address Register */ + TXX9_DMA_REG32(CNTR); /* Count Register */ + TXX9_DMA_REG32(SAIR); /* Source Address Increment Register */ + TXX9_DMA_REG32(DAIR); /* Destination Address Increment Register */ + TXX9_DMA_REG32(CCR); /* Channel Control Register */ + TXX9_DMA_REG32(CSR); /* Channel Status Register */ +}; +struct txx9dmac_cregs32 { + u32 CHAR; + u32 SAR; + u32 DAR; + u32 CNTR; + u32 SAIR; + u32 DAIR; + u32 CCR; + u32 CSR; +}; + +struct txx9dmac_regs { + /* per-channel registers */ + struct txx9dmac_cregs CHAN[TXX9_DMA_MAX_NR_CHANNELS]; + u64 __pad[9]; + u64 MFDR; /* Memory Fill Data Register */ + TXX9_DMA_REG32(MCR); /* Master Control Register */ +}; +struct txx9dmac_regs32 { + struct txx9dmac_cregs32 CHAN[TXX9_DMA_MAX_NR_CHANNELS]; + u32 __pad[9]; + u32 MFDR; + u32 MCR; +}; + +/* bits for MCR */ +#define TXX9_DMA_MCR_EIS(ch) (0x10000000<<(ch)) +#define TXX9_DMA_MCR_DIS(ch) (0x01000000<<(ch)) +#define TXX9_DMA_MCR_RSFIF 0x00000080 +#define TXX9_DMA_MCR_FIFUM(ch) (0x00000008<<(ch)) +#define TXX9_DMA_MCR_LE 0x00000004 +#define TXX9_DMA_MCR_RPRT 0x00000002 +#define TXX9_DMA_MCR_MSTEN 0x00000001 + +/* bits for CCRn */ +#define TXX9_DMA_CCR_IMMCHN 0x20000000 +#define TXX9_DMA_CCR_USEXFSZ 0x10000000 +#define TXX9_DMA_CCR_LE 0x08000000 +#define TXX9_DMA_CCR_DBINH 0x04000000 +#define TXX9_DMA_CCR_SBINH 0x02000000 +#define TXX9_DMA_CCR_CHRST 0x01000000 +#define TXX9_DMA_CCR_RVBYTE 0x00800000 +#define TXX9_DMA_CCR_ACKPOL 0x00400000 +#define TXX9_DMA_CCR_REQPL 0x00200000 +#define TXX9_DMA_CCR_EGREQ 0x00100000 +#define TXX9_DMA_CCR_CHDN 0x00080000 +#define TXX9_DMA_CCR_DNCTL 0x00060000 +#define TXX9_DMA_CCR_EXTRQ 0x00010000 +#define TXX9_DMA_CCR_INTRQD 0x0000e000 +#define TXX9_DMA_CCR_INTENE 0x00001000 +#define TXX9_DMA_CCR_INTENC 0x00000800 +#define TXX9_DMA_CCR_INTENT 0x00000400 +#define TXX9_DMA_CCR_CHNEN 0x00000200 +#define TXX9_DMA_CCR_XFACT 0x00000100 +#define TXX9_DMA_CCR_SMPCHN 0x00000020 +#define TXX9_DMA_CCR_XFSZ(order) (((order) << 2) & 0x0000001c) +#define TXX9_DMA_CCR_XFSZ_1 TXX9_DMA_CCR_XFSZ(0) +#define TXX9_DMA_CCR_XFSZ_2 TXX9_DMA_CCR_XFSZ(1) +#define TXX9_DMA_CCR_XFSZ_4 TXX9_DMA_CCR_XFSZ(2) +#define TXX9_DMA_CCR_XFSZ_8 TXX9_DMA_CCR_XFSZ(3) +#define TXX9_DMA_CCR_XFSZ_X4 TXX9_DMA_CCR_XFSZ(4) +#define TXX9_DMA_CCR_XFSZ_X8 TXX9_DMA_CCR_XFSZ(5) +#define TXX9_DMA_CCR_XFSZ_X16 TXX9_DMA_CCR_XFSZ(6) +#define TXX9_DMA_CCR_XFSZ_X32 TXX9_DMA_CCR_XFSZ(7) +#define TXX9_DMA_CCR_MEMIO 0x00000002 +#define TXX9_DMA_CCR_SNGAD 0x00000001 + +/* bits for CSRn */ +#define TXX9_DMA_CSR_CHNEN 0x00000400 +#define TXX9_DMA_CSR_STLXFER 0x00000200 +#define TXX9_DMA_CSR_XFACT 0x00000100 +#define TXX9_DMA_CSR_ABCHC 0x00000080 +#define TXX9_DMA_CSR_NCHNC 0x00000040 +#define TXX9_DMA_CSR_NTRNFC 0x00000020 +#define TXX9_DMA_CSR_EXTDN 0x00000010 +#define TXX9_DMA_CSR_CFERR 0x00000008 +#define TXX9_DMA_CSR_CHERR 0x00000004 +#define TXX9_DMA_CSR_DESERR 0x00000002 +#define TXX9_DMA_CSR_SORERR 0x00000001 + +struct txx9dmac_chan { + struct dma_chan chan; + struct dma_device dma; + struct txx9dmac_dev *ddev; + void __iomem *ch_regs; + struct tasklet_struct tasklet; + int irq; + u32 ccr; + + spinlock_t lock; + + /* these other elements are all protected by lock */ + struct list_head active_list; + struct list_head queue; + struct list_head free_list; + + unsigned int descs_allocated; +}; + +struct txx9dmac_dev { + void __iomem *regs; + struct tasklet_struct tasklet; + int irq; + struct txx9dmac_chan *chan[TXX9_DMA_MAX_NR_CHANNELS]; + bool have_64bit_regs; + unsigned int descsize; +}; + +static inline bool __is_dmac64(const struct txx9dmac_dev *ddev) +{ + return ddev->have_64bit_regs; +} + +static inline bool is_dmac64(const struct txx9dmac_chan *dc) +{ + return __is_dmac64(dc->ddev); +} + +#ifdef TXX9_DMA_USE_SIMPLE_CHAIN +/* Hardware descriptor definition. (for simple-chain) */ +struct txx9dmac_hwdesc { +#if defined(CONFIG_32BIT) && !defined(CONFIG_PHYS_ADDR_T_64BIT) + TXX9_DMA_REG32(CHAR); +#else + u64 CHAR; +#endif + u64 SAR; + u64 DAR; + TXX9_DMA_REG32(CNTR); +}; +struct txx9dmac_hwdesc32 { + u32 CHAR; + u32 SAR; + u32 DAR; + u32 CNTR; +}; +#else +#define txx9dmac_hwdesc txx9dmac_cregs +#define txx9dmac_hwdesc32 txx9dmac_cregs32 +#endif + +struct txx9dmac_desc { + /* FIRST values the hardware uses */ + union { + struct txx9dmac_hwdesc hwdesc; + struct txx9dmac_hwdesc32 hwdesc32; + }; + + /* THEN values for driver housekeeping */ + struct list_head desc_node ____cacheline_aligned; + struct list_head tx_list; + struct dma_async_tx_descriptor txd; + size_t len; +}; + +#ifdef TXX9_DMA_USE_SIMPLE_CHAIN + +static inline bool txx9dmac_chan_INTENT(struct txx9dmac_chan *dc) +{ + return (dc->ccr & TXX9_DMA_CCR_INTENT) != 0; +} + +static inline void txx9dmac_chan_set_INTENT(struct txx9dmac_chan *dc) +{ + dc->ccr |= TXX9_DMA_CCR_INTENT; +} + +static inline void txx9dmac_desc_set_INTENT(struct txx9dmac_dev *ddev, + struct txx9dmac_desc *desc) +{ +} + +static inline void txx9dmac_chan_set_SMPCHN(struct txx9dmac_chan *dc) +{ + dc->ccr |= TXX9_DMA_CCR_SMPCHN; +} + +static inline void txx9dmac_desc_set_nosimple(struct txx9dmac_dev *ddev, + struct txx9dmac_desc *desc, + u32 sair, u32 dair, u32 ccr) +{ +} + +#else /* TXX9_DMA_USE_SIMPLE_CHAIN */ + +static inline bool txx9dmac_chan_INTENT(struct txx9dmac_chan *dc) +{ + return true; +} + +static void txx9dmac_chan_set_INTENT(struct txx9dmac_chan *dc) +{ +} + +static inline void txx9dmac_desc_set_INTENT(struct txx9dmac_dev *ddev, + struct txx9dmac_desc *desc) +{ + if (__is_dmac64(ddev)) + desc->hwdesc.CCR |= TXX9_DMA_CCR_INTENT; + else + desc->hwdesc32.CCR |= TXX9_DMA_CCR_INTENT; +} + +static inline void txx9dmac_chan_set_SMPCHN(struct txx9dmac_chan *dc) +{ +} + +static inline void txx9dmac_desc_set_nosimple(struct txx9dmac_dev *ddev, + struct txx9dmac_desc *desc, + u32 sai, u32 dai, u32 ccr) +{ + if (__is_dmac64(ddev)) { + desc->hwdesc.SAIR = sai; + desc->hwdesc.DAIR = dai; + desc->hwdesc.CCR = ccr; + } else { + desc->hwdesc32.SAIR = sai; + desc->hwdesc32.DAIR = dai; + desc->hwdesc32.CCR = ccr; + } +} + +#endif /* TXX9_DMA_USE_SIMPLE_CHAIN */ + +#endif /* TXX9DMAC_H */ diff --git a/drivers/dma/uniphier-mdmac.c b/drivers/dma/uniphier-mdmac.c new file mode 100644 index 0000000000..618839df07 --- /dev/null +++ b/drivers/dma/uniphier-mdmac.c @@ -0,0 +1,501 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Copyright (C) 2018 Socionext Inc. +// Author: Masahiro Yamada + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virt-dma.h" + +/* registers common for all channels */ +#define UNIPHIER_MDMAC_CMD 0x000 /* issue DMA start/abort */ +#define UNIPHIER_MDMAC_CMD_ABORT BIT(31) /* 1: abort, 0: start */ + +/* per-channel registers */ +#define UNIPHIER_MDMAC_CH_OFFSET 0x100 +#define UNIPHIER_MDMAC_CH_STRIDE 0x040 + +#define UNIPHIER_MDMAC_CH_IRQ_STAT 0x010 /* current hw status (RO) */ +#define UNIPHIER_MDMAC_CH_IRQ_REQ 0x014 /* latched STAT (WOC) */ +#define UNIPHIER_MDMAC_CH_IRQ_EN 0x018 /* IRQ enable mask */ +#define UNIPHIER_MDMAC_CH_IRQ_DET 0x01c /* REQ & EN (RO) */ +#define UNIPHIER_MDMAC_CH_IRQ__ABORT BIT(13) +#define UNIPHIER_MDMAC_CH_IRQ__DONE BIT(1) +#define UNIPHIER_MDMAC_CH_SRC_MODE 0x020 /* mode of source */ +#define UNIPHIER_MDMAC_CH_DEST_MODE 0x024 /* mode of destination */ +#define UNIPHIER_MDMAC_CH_MODE__ADDR_INC (0 << 4) +#define UNIPHIER_MDMAC_CH_MODE__ADDR_DEC (1 << 4) +#define UNIPHIER_MDMAC_CH_MODE__ADDR_FIXED (2 << 4) +#define UNIPHIER_MDMAC_CH_SRC_ADDR 0x028 /* source address */ +#define UNIPHIER_MDMAC_CH_DEST_ADDR 0x02c /* destination address */ +#define UNIPHIER_MDMAC_CH_SIZE 0x030 /* transfer bytes */ + +#define UNIPHIER_MDMAC_SLAVE_BUSWIDTHS \ + (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) + +struct uniphier_mdmac_desc { + struct virt_dma_desc vd; + struct scatterlist *sgl; + unsigned int sg_len; + unsigned int sg_cur; + enum dma_transfer_direction dir; +}; + +struct uniphier_mdmac_chan { + struct virt_dma_chan vc; + struct uniphier_mdmac_device *mdev; + struct uniphier_mdmac_desc *md; + void __iomem *reg_ch_base; + unsigned int chan_id; +}; + +struct uniphier_mdmac_device { + struct dma_device ddev; + struct clk *clk; + void __iomem *reg_base; + struct uniphier_mdmac_chan channels[]; +}; + +static struct uniphier_mdmac_chan * +to_uniphier_mdmac_chan(struct virt_dma_chan *vc) +{ + return container_of(vc, struct uniphier_mdmac_chan, vc); +} + +static struct uniphier_mdmac_desc * +to_uniphier_mdmac_desc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct uniphier_mdmac_desc, vd); +} + +/* mc->vc.lock must be held by caller */ +static struct uniphier_mdmac_desc * +uniphier_mdmac_next_desc(struct uniphier_mdmac_chan *mc) +{ + struct virt_dma_desc *vd; + + vd = vchan_next_desc(&mc->vc); + if (!vd) { + mc->md = NULL; + return NULL; + } + + list_del(&vd->node); + + mc->md = to_uniphier_mdmac_desc(vd); + + return mc->md; +} + +/* mc->vc.lock must be held by caller */ +static void uniphier_mdmac_handle(struct uniphier_mdmac_chan *mc, + struct uniphier_mdmac_desc *md) +{ + struct uniphier_mdmac_device *mdev = mc->mdev; + struct scatterlist *sg; + u32 irq_flag = UNIPHIER_MDMAC_CH_IRQ__DONE; + u32 src_mode, src_addr, dest_mode, dest_addr, chunk_size; + + sg = &md->sgl[md->sg_cur]; + + if (md->dir == DMA_MEM_TO_DEV) { + src_mode = UNIPHIER_MDMAC_CH_MODE__ADDR_INC; + src_addr = sg_dma_address(sg); + dest_mode = UNIPHIER_MDMAC_CH_MODE__ADDR_FIXED; + dest_addr = 0; + } else { + src_mode = UNIPHIER_MDMAC_CH_MODE__ADDR_FIXED; + src_addr = 0; + dest_mode = UNIPHIER_MDMAC_CH_MODE__ADDR_INC; + dest_addr = sg_dma_address(sg); + } + + chunk_size = sg_dma_len(sg); + + writel(src_mode, mc->reg_ch_base + UNIPHIER_MDMAC_CH_SRC_MODE); + writel(dest_mode, mc->reg_ch_base + UNIPHIER_MDMAC_CH_DEST_MODE); + writel(src_addr, mc->reg_ch_base + UNIPHIER_MDMAC_CH_SRC_ADDR); + writel(dest_addr, mc->reg_ch_base + UNIPHIER_MDMAC_CH_DEST_ADDR); + writel(chunk_size, mc->reg_ch_base + UNIPHIER_MDMAC_CH_SIZE); + + /* write 1 to clear */ + writel(irq_flag, mc->reg_ch_base + UNIPHIER_MDMAC_CH_IRQ_REQ); + + writel(irq_flag, mc->reg_ch_base + UNIPHIER_MDMAC_CH_IRQ_EN); + + writel(BIT(mc->chan_id), mdev->reg_base + UNIPHIER_MDMAC_CMD); +} + +/* mc->vc.lock must be held by caller */ +static void uniphier_mdmac_start(struct uniphier_mdmac_chan *mc) +{ + struct uniphier_mdmac_desc *md; + + md = uniphier_mdmac_next_desc(mc); + if (md) + uniphier_mdmac_handle(mc, md); +} + +/* mc->vc.lock must be held by caller */ +static int uniphier_mdmac_abort(struct uniphier_mdmac_chan *mc) +{ + struct uniphier_mdmac_device *mdev = mc->mdev; + u32 irq_flag = UNIPHIER_MDMAC_CH_IRQ__ABORT; + u32 val; + + /* write 1 to clear */ + writel(irq_flag, mc->reg_ch_base + UNIPHIER_MDMAC_CH_IRQ_REQ); + + writel(UNIPHIER_MDMAC_CMD_ABORT | BIT(mc->chan_id), + mdev->reg_base + UNIPHIER_MDMAC_CMD); + + /* + * Abort should be accepted soon. We poll the bit here instead of + * waiting for the interrupt. + */ + return readl_poll_timeout(mc->reg_ch_base + UNIPHIER_MDMAC_CH_IRQ_REQ, + val, val & irq_flag, 0, 20); +} + +static irqreturn_t uniphier_mdmac_interrupt(int irq, void *dev_id) +{ + struct uniphier_mdmac_chan *mc = dev_id; + struct uniphier_mdmac_desc *md; + irqreturn_t ret = IRQ_HANDLED; + u32 irq_stat; + + spin_lock(&mc->vc.lock); + + irq_stat = readl(mc->reg_ch_base + UNIPHIER_MDMAC_CH_IRQ_DET); + + /* + * Some channels share a single interrupt line. If the IRQ status is 0, + * this is probably triggered by a different channel. + */ + if (!irq_stat) { + ret = IRQ_NONE; + goto out; + } + + /* write 1 to clear */ + writel(irq_stat, mc->reg_ch_base + UNIPHIER_MDMAC_CH_IRQ_REQ); + + /* + * UNIPHIER_MDMAC_CH_IRQ__DONE interrupt is asserted even when the DMA + * is aborted. To distinguish the normal completion and the abort, + * check mc->md. If it is NULL, we are aborting. + */ + md = mc->md; + if (!md) + goto out; + + md->sg_cur++; + + if (md->sg_cur >= md->sg_len) { + vchan_cookie_complete(&md->vd); + md = uniphier_mdmac_next_desc(mc); + if (!md) + goto out; + } + + uniphier_mdmac_handle(mc, md); + +out: + spin_unlock(&mc->vc.lock); + + return ret; +} + +static void uniphier_mdmac_free_chan_resources(struct dma_chan *chan) +{ + vchan_free_chan_resources(to_virt_chan(chan)); +} + +static struct dma_async_tx_descriptor * +uniphier_mdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, + enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct virt_dma_chan *vc = to_virt_chan(chan); + struct uniphier_mdmac_desc *md; + + if (!is_slave_direction(direction)) + return NULL; + + md = kzalloc(sizeof(*md), GFP_NOWAIT); + if (!md) + return NULL; + + md->sgl = sgl; + md->sg_len = sg_len; + md->dir = direction; + + return vchan_tx_prep(vc, &md->vd, flags); +} + +static int uniphier_mdmac_terminate_all(struct dma_chan *chan) +{ + struct virt_dma_chan *vc = to_virt_chan(chan); + struct uniphier_mdmac_chan *mc = to_uniphier_mdmac_chan(vc); + unsigned long flags; + int ret = 0; + LIST_HEAD(head); + + spin_lock_irqsave(&vc->lock, flags); + + if (mc->md) { + vchan_terminate_vdesc(&mc->md->vd); + mc->md = NULL; + ret = uniphier_mdmac_abort(mc); + } + vchan_get_all_descriptors(vc, &head); + + spin_unlock_irqrestore(&vc->lock, flags); + + vchan_dma_desc_free_list(vc, &head); + + return ret; +} + +static void uniphier_mdmac_synchronize(struct dma_chan *chan) +{ + vchan_synchronize(to_virt_chan(chan)); +} + +static enum dma_status uniphier_mdmac_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct virt_dma_chan *vc; + struct virt_dma_desc *vd; + struct uniphier_mdmac_chan *mc; + struct uniphier_mdmac_desc *md = NULL; + enum dma_status stat; + unsigned long flags; + int i; + + stat = dma_cookie_status(chan, cookie, txstate); + /* Return immediately if we do not need to compute the residue. */ + if (stat == DMA_COMPLETE || !txstate) + return stat; + + vc = to_virt_chan(chan); + + spin_lock_irqsave(&vc->lock, flags); + + mc = to_uniphier_mdmac_chan(vc); + + if (mc->md && mc->md->vd.tx.cookie == cookie) { + /* residue from the on-flight chunk */ + txstate->residue = readl(mc->reg_ch_base + + UNIPHIER_MDMAC_CH_SIZE); + md = mc->md; + } + + if (!md) { + vd = vchan_find_desc(vc, cookie); + if (vd) + md = to_uniphier_mdmac_desc(vd); + } + + if (md) { + /* residue from the queued chunks */ + for (i = md->sg_cur; i < md->sg_len; i++) + txstate->residue += sg_dma_len(&md->sgl[i]); + } + + spin_unlock_irqrestore(&vc->lock, flags); + + return stat; +} + +static void uniphier_mdmac_issue_pending(struct dma_chan *chan) +{ + struct virt_dma_chan *vc = to_virt_chan(chan); + struct uniphier_mdmac_chan *mc = to_uniphier_mdmac_chan(vc); + unsigned long flags; + + spin_lock_irqsave(&vc->lock, flags); + + if (vchan_issue_pending(vc) && !mc->md) + uniphier_mdmac_start(mc); + + spin_unlock_irqrestore(&vc->lock, flags); +} + +static void uniphier_mdmac_desc_free(struct virt_dma_desc *vd) +{ + kfree(to_uniphier_mdmac_desc(vd)); +} + +static int uniphier_mdmac_chan_init(struct platform_device *pdev, + struct uniphier_mdmac_device *mdev, + int chan_id) +{ + struct device *dev = &pdev->dev; + struct uniphier_mdmac_chan *mc = &mdev->channels[chan_id]; + char *irq_name; + int irq, ret; + + irq = platform_get_irq(pdev, chan_id); + if (irq < 0) + return irq; + + irq_name = devm_kasprintf(dev, GFP_KERNEL, "uniphier-mio-dmac-ch%d", + chan_id); + if (!irq_name) + return -ENOMEM; + + ret = devm_request_irq(dev, irq, uniphier_mdmac_interrupt, + IRQF_SHARED, irq_name, mc); + if (ret) + return ret; + + mc->mdev = mdev; + mc->reg_ch_base = mdev->reg_base + UNIPHIER_MDMAC_CH_OFFSET + + UNIPHIER_MDMAC_CH_STRIDE * chan_id; + mc->chan_id = chan_id; + mc->vc.desc_free = uniphier_mdmac_desc_free; + vchan_init(&mc->vc, &mdev->ddev); + + return 0; +} + +static int uniphier_mdmac_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct uniphier_mdmac_device *mdev; + struct dma_device *ddev; + int nr_chans, ret, i; + + nr_chans = platform_irq_count(pdev); + if (nr_chans < 0) + return nr_chans; + + ret = dma_set_mask(dev, DMA_BIT_MASK(32)); + if (ret) + return ret; + + mdev = devm_kzalloc(dev, struct_size(mdev, channels, nr_chans), + GFP_KERNEL); + if (!mdev) + return -ENOMEM; + + mdev->reg_base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(mdev->reg_base)) + return PTR_ERR(mdev->reg_base); + + mdev->clk = devm_clk_get(dev, NULL); + if (IS_ERR(mdev->clk)) { + dev_err(dev, "failed to get clock\n"); + return PTR_ERR(mdev->clk); + } + + ret = clk_prepare_enable(mdev->clk); + if (ret) + return ret; + + ddev = &mdev->ddev; + ddev->dev = dev; + dma_cap_set(DMA_PRIVATE, ddev->cap_mask); + ddev->src_addr_widths = UNIPHIER_MDMAC_SLAVE_BUSWIDTHS; + ddev->dst_addr_widths = UNIPHIER_MDMAC_SLAVE_BUSWIDTHS; + ddev->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); + ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + ddev->device_free_chan_resources = uniphier_mdmac_free_chan_resources; + ddev->device_prep_slave_sg = uniphier_mdmac_prep_slave_sg; + ddev->device_terminate_all = uniphier_mdmac_terminate_all; + ddev->device_synchronize = uniphier_mdmac_synchronize; + ddev->device_tx_status = uniphier_mdmac_tx_status; + ddev->device_issue_pending = uniphier_mdmac_issue_pending; + INIT_LIST_HEAD(&ddev->channels); + + for (i = 0; i < nr_chans; i++) { + ret = uniphier_mdmac_chan_init(pdev, mdev, i); + if (ret) + goto disable_clk; + } + + ret = dma_async_device_register(ddev); + if (ret) + goto disable_clk; + + ret = of_dma_controller_register(dev->of_node, of_dma_xlate_by_chan_id, + ddev); + if (ret) + goto unregister_dmac; + + platform_set_drvdata(pdev, mdev); + + return 0; + +unregister_dmac: + dma_async_device_unregister(ddev); +disable_clk: + clk_disable_unprepare(mdev->clk); + + return ret; +} + +static int uniphier_mdmac_remove(struct platform_device *pdev) +{ + struct uniphier_mdmac_device *mdev = platform_get_drvdata(pdev); + struct dma_chan *chan; + int ret; + + /* + * Before reaching here, almost all descriptors have been freed by the + * ->device_free_chan_resources() hook. However, each channel might + * be still holding one descriptor that was on-flight at that moment. + * Terminate it to make sure this hardware is no longer running. Then, + * free the channel resources once again to avoid memory leak. + */ + list_for_each_entry(chan, &mdev->ddev.channels, device_node) { + ret = dmaengine_terminate_sync(chan); + if (ret) + return ret; + uniphier_mdmac_free_chan_resources(chan); + } + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&mdev->ddev); + clk_disable_unprepare(mdev->clk); + + return 0; +} + +static const struct of_device_id uniphier_mdmac_match[] = { + { .compatible = "socionext,uniphier-mio-dmac" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, uniphier_mdmac_match); + +static struct platform_driver uniphier_mdmac_driver = { + .probe = uniphier_mdmac_probe, + .remove = uniphier_mdmac_remove, + .driver = { + .name = "uniphier-mio-dmac", + .of_match_table = uniphier_mdmac_match, + }, +}; +module_platform_driver(uniphier_mdmac_driver); + +MODULE_AUTHOR("Masahiro Yamada "); +MODULE_DESCRIPTION("UniPhier MIO DMAC driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/uniphier-xdmac.c b/drivers/dma/uniphier-xdmac.c new file mode 100644 index 0000000000..290836b7e1 --- /dev/null +++ b/drivers/dma/uniphier-xdmac.c @@ -0,0 +1,611 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * External DMA controller driver for UniPhier SoCs + * Copyright 2019 Socionext Inc. + * Author: Kunihiko Hayashi + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" +#include "virt-dma.h" + +#define XDMAC_CH_WIDTH 0x100 + +#define XDMAC_TFA 0x08 +#define XDMAC_TFA_MCNT_MASK GENMASK(23, 16) +#define XDMAC_TFA_MASK GENMASK(5, 0) +#define XDMAC_SADM 0x10 +#define XDMAC_SADM_STW_MASK GENMASK(25, 24) +#define XDMAC_SADM_SAM BIT(4) +#define XDMAC_SADM_SAM_FIXED XDMAC_SADM_SAM +#define XDMAC_SADM_SAM_INC 0 +#define XDMAC_DADM 0x14 +#define XDMAC_DADM_DTW_MASK XDMAC_SADM_STW_MASK +#define XDMAC_DADM_DAM XDMAC_SADM_SAM +#define XDMAC_DADM_DAM_FIXED XDMAC_SADM_SAM_FIXED +#define XDMAC_DADM_DAM_INC XDMAC_SADM_SAM_INC +#define XDMAC_EXSAD 0x18 +#define XDMAC_EXDAD 0x1c +#define XDMAC_SAD 0x20 +#define XDMAC_DAD 0x24 +#define XDMAC_ITS 0x28 +#define XDMAC_ITS_MASK GENMASK(25, 0) +#define XDMAC_TNUM 0x2c +#define XDMAC_TNUM_MASK GENMASK(15, 0) +#define XDMAC_TSS 0x30 +#define XDMAC_TSS_REQ BIT(0) +#define XDMAC_IEN 0x34 +#define XDMAC_IEN_ERRIEN BIT(1) +#define XDMAC_IEN_ENDIEN BIT(0) +#define XDMAC_STAT 0x40 +#define XDMAC_STAT_TENF BIT(0) +#define XDMAC_IR 0x44 +#define XDMAC_IR_ERRF BIT(1) +#define XDMAC_IR_ENDF BIT(0) +#define XDMAC_ID 0x48 +#define XDMAC_ID_ERRIDF BIT(1) +#define XDMAC_ID_ENDIDF BIT(0) + +#define XDMAC_MAX_CHANS 16 +#define XDMAC_INTERVAL_CLKS 20 +#define XDMAC_MAX_WORDS XDMAC_TNUM_MASK + +/* cut lower bit for maintain alignment of maximum transfer size */ +#define XDMAC_MAX_WORD_SIZE (XDMAC_ITS_MASK & ~GENMASK(3, 0)) + +#define UNIPHIER_XDMAC_BUSWIDTHS \ + (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)) + +struct uniphier_xdmac_desc_node { + dma_addr_t src; + dma_addr_t dst; + u32 burst_size; + u32 nr_burst; +}; + +struct uniphier_xdmac_desc { + struct virt_dma_desc vd; + + unsigned int nr_node; + unsigned int cur_node; + enum dma_transfer_direction dir; + struct uniphier_xdmac_desc_node nodes[]; +}; + +struct uniphier_xdmac_chan { + struct virt_dma_chan vc; + struct uniphier_xdmac_device *xdev; + struct uniphier_xdmac_desc *xd; + void __iomem *reg_ch_base; + struct dma_slave_config sconfig; + int id; + unsigned int req_factor; +}; + +struct uniphier_xdmac_device { + struct dma_device ddev; + void __iomem *reg_base; + int nr_chans; + struct uniphier_xdmac_chan channels[]; +}; + +static struct uniphier_xdmac_chan * +to_uniphier_xdmac_chan(struct virt_dma_chan *vc) +{ + return container_of(vc, struct uniphier_xdmac_chan, vc); +} + +static struct uniphier_xdmac_desc * +to_uniphier_xdmac_desc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct uniphier_xdmac_desc, vd); +} + +/* xc->vc.lock must be held by caller */ +static struct uniphier_xdmac_desc * +uniphier_xdmac_next_desc(struct uniphier_xdmac_chan *xc) +{ + struct virt_dma_desc *vd; + + vd = vchan_next_desc(&xc->vc); + if (!vd) + return NULL; + + list_del(&vd->node); + + return to_uniphier_xdmac_desc(vd); +} + +/* xc->vc.lock must be held by caller */ +static void uniphier_xdmac_chan_start(struct uniphier_xdmac_chan *xc, + struct uniphier_xdmac_desc *xd) +{ + u32 src_mode, src_width; + u32 dst_mode, dst_width; + dma_addr_t src_addr, dst_addr; + u32 val, its, tnum; + enum dma_slave_buswidth buswidth; + + src_addr = xd->nodes[xd->cur_node].src; + dst_addr = xd->nodes[xd->cur_node].dst; + its = xd->nodes[xd->cur_node].burst_size; + tnum = xd->nodes[xd->cur_node].nr_burst; + + /* + * The width of MEM side must be 4 or 8 bytes, that does not + * affect that of DEV side and transfer size. + */ + if (xd->dir == DMA_DEV_TO_MEM) { + src_mode = XDMAC_SADM_SAM_FIXED; + buswidth = xc->sconfig.src_addr_width; + } else { + src_mode = XDMAC_SADM_SAM_INC; + buswidth = DMA_SLAVE_BUSWIDTH_8_BYTES; + } + src_width = FIELD_PREP(XDMAC_SADM_STW_MASK, __ffs(buswidth)); + + if (xd->dir == DMA_MEM_TO_DEV) { + dst_mode = XDMAC_DADM_DAM_FIXED; + buswidth = xc->sconfig.dst_addr_width; + } else { + dst_mode = XDMAC_DADM_DAM_INC; + buswidth = DMA_SLAVE_BUSWIDTH_8_BYTES; + } + dst_width = FIELD_PREP(XDMAC_DADM_DTW_MASK, __ffs(buswidth)); + + /* setup transfer factor */ + val = FIELD_PREP(XDMAC_TFA_MCNT_MASK, XDMAC_INTERVAL_CLKS); + val |= FIELD_PREP(XDMAC_TFA_MASK, xc->req_factor); + writel(val, xc->reg_ch_base + XDMAC_TFA); + + /* setup the channel */ + writel(lower_32_bits(src_addr), xc->reg_ch_base + XDMAC_SAD); + writel(upper_32_bits(src_addr), xc->reg_ch_base + XDMAC_EXSAD); + + writel(lower_32_bits(dst_addr), xc->reg_ch_base + XDMAC_DAD); + writel(upper_32_bits(dst_addr), xc->reg_ch_base + XDMAC_EXDAD); + + src_mode |= src_width; + dst_mode |= dst_width; + writel(src_mode, xc->reg_ch_base + XDMAC_SADM); + writel(dst_mode, xc->reg_ch_base + XDMAC_DADM); + + writel(its, xc->reg_ch_base + XDMAC_ITS); + writel(tnum, xc->reg_ch_base + XDMAC_TNUM); + + /* enable interrupt */ + writel(XDMAC_IEN_ENDIEN | XDMAC_IEN_ERRIEN, + xc->reg_ch_base + XDMAC_IEN); + + /* start XDMAC */ + val = readl(xc->reg_ch_base + XDMAC_TSS); + val |= XDMAC_TSS_REQ; + writel(val, xc->reg_ch_base + XDMAC_TSS); +} + +/* xc->vc.lock must be held by caller */ +static int uniphier_xdmac_chan_stop(struct uniphier_xdmac_chan *xc) +{ + u32 val; + + /* disable interrupt */ + val = readl(xc->reg_ch_base + XDMAC_IEN); + val &= ~(XDMAC_IEN_ENDIEN | XDMAC_IEN_ERRIEN); + writel(val, xc->reg_ch_base + XDMAC_IEN); + + /* stop XDMAC */ + val = readl(xc->reg_ch_base + XDMAC_TSS); + val &= ~XDMAC_TSS_REQ; + writel(0, xc->reg_ch_base + XDMAC_TSS); + + /* wait until transfer is stopped */ + return readl_poll_timeout_atomic(xc->reg_ch_base + XDMAC_STAT, val, + !(val & XDMAC_STAT_TENF), 100, 1000); +} + +/* xc->vc.lock must be held by caller */ +static void uniphier_xdmac_start(struct uniphier_xdmac_chan *xc) +{ + struct uniphier_xdmac_desc *xd; + + xd = uniphier_xdmac_next_desc(xc); + if (xd) + uniphier_xdmac_chan_start(xc, xd); + + /* set desc to chan regardless of xd is null */ + xc->xd = xd; +} + +static void uniphier_xdmac_chan_irq(struct uniphier_xdmac_chan *xc) +{ + u32 stat; + int ret; + + spin_lock(&xc->vc.lock); + + stat = readl(xc->reg_ch_base + XDMAC_ID); + + if (stat & XDMAC_ID_ERRIDF) { + ret = uniphier_xdmac_chan_stop(xc); + if (ret) + dev_err(xc->xdev->ddev.dev, + "DMA transfer error with aborting issue\n"); + else + dev_err(xc->xdev->ddev.dev, + "DMA transfer error\n"); + + } else if ((stat & XDMAC_ID_ENDIDF) && xc->xd) { + xc->xd->cur_node++; + if (xc->xd->cur_node >= xc->xd->nr_node) { + vchan_cookie_complete(&xc->xd->vd); + uniphier_xdmac_start(xc); + } else { + uniphier_xdmac_chan_start(xc, xc->xd); + } + } + + /* write bits to clear */ + writel(stat, xc->reg_ch_base + XDMAC_IR); + + spin_unlock(&xc->vc.lock); +} + +static irqreturn_t uniphier_xdmac_irq_handler(int irq, void *dev_id) +{ + struct uniphier_xdmac_device *xdev = dev_id; + int i; + + for (i = 0; i < xdev->nr_chans; i++) + uniphier_xdmac_chan_irq(&xdev->channels[i]); + + return IRQ_HANDLED; +} + +static void uniphier_xdmac_free_chan_resources(struct dma_chan *chan) +{ + vchan_free_chan_resources(to_virt_chan(chan)); +} + +static struct dma_async_tx_descriptor * +uniphier_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct virt_dma_chan *vc = to_virt_chan(chan); + struct uniphier_xdmac_desc *xd; + unsigned int nr; + size_t burst_size, tlen; + int i; + + if (len > XDMAC_MAX_WORD_SIZE * XDMAC_MAX_WORDS) + return NULL; + + nr = 1 + len / XDMAC_MAX_WORD_SIZE; + + xd = kzalloc(struct_size(xd, nodes, nr), GFP_NOWAIT); + if (!xd) + return NULL; + + for (i = 0; i < nr; i++) { + burst_size = min_t(size_t, len, XDMAC_MAX_WORD_SIZE); + xd->nodes[i].src = src; + xd->nodes[i].dst = dst; + xd->nodes[i].burst_size = burst_size; + xd->nodes[i].nr_burst = len / burst_size; + tlen = rounddown(len, burst_size); + src += tlen; + dst += tlen; + len -= tlen; + } + + xd->dir = DMA_MEM_TO_MEM; + xd->nr_node = nr; + xd->cur_node = 0; + + return vchan_tx_prep(vc, &xd->vd, flags); +} + +static struct dma_async_tx_descriptor * +uniphier_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, + enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct virt_dma_chan *vc = to_virt_chan(chan); + struct uniphier_xdmac_chan *xc = to_uniphier_xdmac_chan(vc); + struct uniphier_xdmac_desc *xd; + struct scatterlist *sg; + enum dma_slave_buswidth buswidth; + u32 maxburst; + int i; + + if (!is_slave_direction(direction)) + return NULL; + + if (direction == DMA_DEV_TO_MEM) { + buswidth = xc->sconfig.src_addr_width; + maxburst = xc->sconfig.src_maxburst; + } else { + buswidth = xc->sconfig.dst_addr_width; + maxburst = xc->sconfig.dst_maxburst; + } + + if (!maxburst) + maxburst = 1; + if (maxburst > xc->xdev->ddev.max_burst) { + dev_err(xc->xdev->ddev.dev, + "Exceed maximum number of burst words\n"); + return NULL; + } + + xd = kzalloc(struct_size(xd, nodes, sg_len), GFP_NOWAIT); + if (!xd) + return NULL; + + for_each_sg(sgl, sg, sg_len, i) { + xd->nodes[i].src = (direction == DMA_DEV_TO_MEM) + ? xc->sconfig.src_addr : sg_dma_address(sg); + xd->nodes[i].dst = (direction == DMA_MEM_TO_DEV) + ? xc->sconfig.dst_addr : sg_dma_address(sg); + xd->nodes[i].burst_size = maxburst * buswidth; + xd->nodes[i].nr_burst = + sg_dma_len(sg) / xd->nodes[i].burst_size; + + /* + * Currently transfer that size doesn't align the unit size + * (the number of burst words * bus-width) is not allowed, + * because the driver does not support the way to transfer + * residue size. As a matter of fact, in order to transfer + * arbitrary size, 'src_maxburst' or 'dst_maxburst' of + * dma_slave_config must be 1. + */ + if (sg_dma_len(sg) % xd->nodes[i].burst_size) { + dev_err(xc->xdev->ddev.dev, + "Unaligned transfer size: %d", sg_dma_len(sg)); + kfree(xd); + return NULL; + } + + if (xd->nodes[i].nr_burst > XDMAC_MAX_WORDS) { + dev_err(xc->xdev->ddev.dev, + "Exceed maximum transfer size"); + kfree(xd); + return NULL; + } + } + + xd->dir = direction; + xd->nr_node = sg_len; + xd->cur_node = 0; + + return vchan_tx_prep(vc, &xd->vd, flags); +} + +static int uniphier_xdmac_slave_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct virt_dma_chan *vc = to_virt_chan(chan); + struct uniphier_xdmac_chan *xc = to_uniphier_xdmac_chan(vc); + + memcpy(&xc->sconfig, config, sizeof(*config)); + + return 0; +} + +static int uniphier_xdmac_terminate_all(struct dma_chan *chan) +{ + struct virt_dma_chan *vc = to_virt_chan(chan); + struct uniphier_xdmac_chan *xc = to_uniphier_xdmac_chan(vc); + unsigned long flags; + int ret = 0; + LIST_HEAD(head); + + spin_lock_irqsave(&vc->lock, flags); + + if (xc->xd) { + vchan_terminate_vdesc(&xc->xd->vd); + xc->xd = NULL; + ret = uniphier_xdmac_chan_stop(xc); + } + + vchan_get_all_descriptors(vc, &head); + + spin_unlock_irqrestore(&vc->lock, flags); + + vchan_dma_desc_free_list(vc, &head); + + return ret; +} + +static void uniphier_xdmac_synchronize(struct dma_chan *chan) +{ + vchan_synchronize(to_virt_chan(chan)); +} + +static void uniphier_xdmac_issue_pending(struct dma_chan *chan) +{ + struct virt_dma_chan *vc = to_virt_chan(chan); + struct uniphier_xdmac_chan *xc = to_uniphier_xdmac_chan(vc); + unsigned long flags; + + spin_lock_irqsave(&vc->lock, flags); + + if (vchan_issue_pending(vc) && !xc->xd) + uniphier_xdmac_start(xc); + + spin_unlock_irqrestore(&vc->lock, flags); +} + +static void uniphier_xdmac_desc_free(struct virt_dma_desc *vd) +{ + kfree(to_uniphier_xdmac_desc(vd)); +} + +static void uniphier_xdmac_chan_init(struct uniphier_xdmac_device *xdev, + int ch) +{ + struct uniphier_xdmac_chan *xc = &xdev->channels[ch]; + + xc->xdev = xdev; + xc->reg_ch_base = xdev->reg_base + XDMAC_CH_WIDTH * ch; + xc->vc.desc_free = uniphier_xdmac_desc_free; + + vchan_init(&xc->vc, &xdev->ddev); +} + +static struct dma_chan *of_dma_uniphier_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct uniphier_xdmac_device *xdev = ofdma->of_dma_data; + int chan_id = dma_spec->args[0]; + + if (chan_id >= xdev->nr_chans) + return NULL; + + xdev->channels[chan_id].id = chan_id; + xdev->channels[chan_id].req_factor = dma_spec->args[1]; + + return dma_get_slave_channel(&xdev->channels[chan_id].vc.chan); +} + +static int uniphier_xdmac_probe(struct platform_device *pdev) +{ + struct uniphier_xdmac_device *xdev; + struct device *dev = &pdev->dev; + struct dma_device *ddev; + int irq; + int nr_chans; + int i, ret; + + if (of_property_read_u32(dev->of_node, "dma-channels", &nr_chans)) + return -EINVAL; + if (nr_chans > XDMAC_MAX_CHANS) + nr_chans = XDMAC_MAX_CHANS; + + xdev = devm_kzalloc(dev, struct_size(xdev, channels, nr_chans), + GFP_KERNEL); + if (!xdev) + return -ENOMEM; + + xdev->nr_chans = nr_chans; + xdev->reg_base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(xdev->reg_base)) + return PTR_ERR(xdev->reg_base); + + ddev = &xdev->ddev; + ddev->dev = dev; + dma_cap_zero(ddev->cap_mask); + dma_cap_set(DMA_MEMCPY, ddev->cap_mask); + dma_cap_set(DMA_SLAVE, ddev->cap_mask); + ddev->src_addr_widths = UNIPHIER_XDMAC_BUSWIDTHS; + ddev->dst_addr_widths = UNIPHIER_XDMAC_BUSWIDTHS; + ddev->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV) | + BIT(DMA_MEM_TO_MEM); + ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + ddev->max_burst = XDMAC_MAX_WORDS; + ddev->device_free_chan_resources = uniphier_xdmac_free_chan_resources; + ddev->device_prep_dma_memcpy = uniphier_xdmac_prep_dma_memcpy; + ddev->device_prep_slave_sg = uniphier_xdmac_prep_slave_sg; + ddev->device_config = uniphier_xdmac_slave_config; + ddev->device_terminate_all = uniphier_xdmac_terminate_all; + ddev->device_synchronize = uniphier_xdmac_synchronize; + ddev->device_tx_status = dma_cookie_status; + ddev->device_issue_pending = uniphier_xdmac_issue_pending; + INIT_LIST_HEAD(&ddev->channels); + + for (i = 0; i < nr_chans; i++) + uniphier_xdmac_chan_init(xdev, i); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + ret = devm_request_irq(dev, irq, uniphier_xdmac_irq_handler, + IRQF_SHARED, "xdmac", xdev); + if (ret) { + dev_err(dev, "Failed to request IRQ\n"); + return ret; + } + + ret = dma_async_device_register(ddev); + if (ret) { + dev_err(dev, "Failed to register XDMA device\n"); + return ret; + } + + ret = of_dma_controller_register(dev->of_node, + of_dma_uniphier_xlate, xdev); + if (ret) { + dev_err(dev, "Failed to register XDMA controller\n"); + goto out_unregister_dmac; + } + + platform_set_drvdata(pdev, xdev); + + dev_info(&pdev->dev, "UniPhier XDMAC driver (%d channels)\n", + nr_chans); + + return 0; + +out_unregister_dmac: + dma_async_device_unregister(ddev); + + return ret; +} + +static int uniphier_xdmac_remove(struct platform_device *pdev) +{ + struct uniphier_xdmac_device *xdev = platform_get_drvdata(pdev); + struct dma_device *ddev = &xdev->ddev; + struct dma_chan *chan; + int ret; + + /* + * Before reaching here, almost all descriptors have been freed by the + * ->device_free_chan_resources() hook. However, each channel might + * be still holding one descriptor that was on-flight at that moment. + * Terminate it to make sure this hardware is no longer running. Then, + * free the channel resources once again to avoid memory leak. + */ + list_for_each_entry(chan, &ddev->channels, device_node) { + ret = dmaengine_terminate_sync(chan); + if (ret) + return ret; + uniphier_xdmac_free_chan_resources(chan); + } + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(ddev); + + return 0; +} + +static const struct of_device_id uniphier_xdmac_match[] = { + { .compatible = "socionext,uniphier-xdmac" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, uniphier_xdmac_match); + +static struct platform_driver uniphier_xdmac_driver = { + .probe = uniphier_xdmac_probe, + .remove = uniphier_xdmac_remove, + .driver = { + .name = "uniphier-xdmac", + .of_match_table = uniphier_xdmac_match, + }, +}; +module_platform_driver(uniphier_xdmac_driver); + +MODULE_AUTHOR("Kunihiko Hayashi "); +MODULE_DESCRIPTION("UniPhier external DMA controller driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c new file mode 100644 index 0000000000..a6f4265be0 --- /dev/null +++ b/drivers/dma/virt-dma.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Virtual DMA channel support for DMAengine + * + * Copyright (C) 2012 Russell King + */ +#include +#include +#include +#include + +#include "virt-dma.h" + +static struct virt_dma_desc *to_virt_desc(struct dma_async_tx_descriptor *tx) +{ + return container_of(tx, struct virt_dma_desc, tx); +} + +dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct virt_dma_chan *vc = to_virt_chan(tx->chan); + struct virt_dma_desc *vd = to_virt_desc(tx); + unsigned long flags; + dma_cookie_t cookie; + + spin_lock_irqsave(&vc->lock, flags); + cookie = dma_cookie_assign(tx); + + list_move_tail(&vd->node, &vc->desc_submitted); + spin_unlock_irqrestore(&vc->lock, flags); + + dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: submitted\n", + vc, vd, cookie); + + return cookie; +} +EXPORT_SYMBOL_GPL(vchan_tx_submit); + +/** + * vchan_tx_desc_free - free a reusable descriptor + * @tx: the transfer + * + * This function frees a previously allocated reusable descriptor. The only + * other way is to clear the DMA_CTRL_REUSE flag and submit one last time the + * transfer. + * + * Returns 0 upon success + */ +int vchan_tx_desc_free(struct dma_async_tx_descriptor *tx) +{ + struct virt_dma_chan *vc = to_virt_chan(tx->chan); + struct virt_dma_desc *vd = to_virt_desc(tx); + unsigned long flags; + + spin_lock_irqsave(&vc->lock, flags); + list_del(&vd->node); + spin_unlock_irqrestore(&vc->lock, flags); + + dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: freeing\n", + vc, vd, vd->tx.cookie); + vc->desc_free(vd); + return 0; +} +EXPORT_SYMBOL_GPL(vchan_tx_desc_free); + +struct virt_dma_desc *vchan_find_desc(struct virt_dma_chan *vc, + dma_cookie_t cookie) +{ + struct virt_dma_desc *vd; + + list_for_each_entry(vd, &vc->desc_issued, node) + if (vd->tx.cookie == cookie) + return vd; + + return NULL; +} +EXPORT_SYMBOL_GPL(vchan_find_desc); + +/* + * This tasklet handles the completion of a DMA descriptor by + * calling its callback and freeing it. + */ +static void vchan_complete(struct tasklet_struct *t) +{ + struct virt_dma_chan *vc = from_tasklet(vc, t, task); + struct virt_dma_desc *vd, *_vd; + struct dmaengine_desc_callback cb; + LIST_HEAD(head); + + spin_lock_irq(&vc->lock); + list_splice_tail_init(&vc->desc_completed, &head); + vd = vc->cyclic; + if (vd) { + vc->cyclic = NULL; + dmaengine_desc_get_callback(&vd->tx, &cb); + } else { + memset(&cb, 0, sizeof(cb)); + } + spin_unlock_irq(&vc->lock); + + dmaengine_desc_callback_invoke(&cb, &vd->tx_result); + + list_for_each_entry_safe(vd, _vd, &head, node) { + dmaengine_desc_get_callback(&vd->tx, &cb); + + list_del(&vd->node); + dmaengine_desc_callback_invoke(&cb, &vd->tx_result); + vchan_vdesc_fini(vd); + } +} + +void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head) +{ + struct virt_dma_desc *vd, *_vd; + + list_for_each_entry_safe(vd, _vd, head, node) { + list_del(&vd->node); + vchan_vdesc_fini(vd); + } +} +EXPORT_SYMBOL_GPL(vchan_dma_desc_free_list); + +void vchan_init(struct virt_dma_chan *vc, struct dma_device *dmadev) +{ + dma_cookie_init(&vc->chan); + + spin_lock_init(&vc->lock); + INIT_LIST_HEAD(&vc->desc_allocated); + INIT_LIST_HEAD(&vc->desc_submitted); + INIT_LIST_HEAD(&vc->desc_issued); + INIT_LIST_HEAD(&vc->desc_completed); + INIT_LIST_HEAD(&vc->desc_terminated); + + tasklet_setup(&vc->task, vchan_complete); + + vc->chan.device = dmadev; + list_add_tail(&vc->chan.device_node, &dmadev->channels); +} +EXPORT_SYMBOL_GPL(vchan_init); + +MODULE_AUTHOR("Russell King"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/virt-dma.h b/drivers/dma/virt-dma.h new file mode 100644 index 0000000000..e9f5250fbe --- /dev/null +++ b/drivers/dma/virt-dma.h @@ -0,0 +1,227 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Virtual DMA channel support for DMAengine + * + * Copyright (C) 2012 Russell King + */ +#ifndef VIRT_DMA_H +#define VIRT_DMA_H + +#include +#include + +#include "dmaengine.h" + +struct virt_dma_desc { + struct dma_async_tx_descriptor tx; + struct dmaengine_result tx_result; + /* protected by vc.lock */ + struct list_head node; +}; + +struct virt_dma_chan { + struct dma_chan chan; + struct tasklet_struct task; + void (*desc_free)(struct virt_dma_desc *); + + spinlock_t lock; + + /* protected by vc.lock */ + struct list_head desc_allocated; + struct list_head desc_submitted; + struct list_head desc_issued; + struct list_head desc_completed; + struct list_head desc_terminated; + + struct virt_dma_desc *cyclic; +}; + +static inline struct virt_dma_chan *to_virt_chan(struct dma_chan *chan) +{ + return container_of(chan, struct virt_dma_chan, chan); +} + +void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head); +void vchan_init(struct virt_dma_chan *vc, struct dma_device *dmadev); +struct virt_dma_desc *vchan_find_desc(struct virt_dma_chan *, dma_cookie_t); +extern dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *); +extern int vchan_tx_desc_free(struct dma_async_tx_descriptor *); + +/** + * vchan_tx_prep - prepare a descriptor + * @vc: virtual channel allocating this descriptor + * @vd: virtual descriptor to prepare + * @tx_flags: flags argument passed in to prepare function + */ +static inline struct dma_async_tx_descriptor *vchan_tx_prep(struct virt_dma_chan *vc, + struct virt_dma_desc *vd, unsigned long tx_flags) +{ + unsigned long flags; + + dma_async_tx_descriptor_init(&vd->tx, &vc->chan); + vd->tx.flags = tx_flags; + vd->tx.tx_submit = vchan_tx_submit; + vd->tx.desc_free = vchan_tx_desc_free; + + vd->tx_result.result = DMA_TRANS_NOERROR; + vd->tx_result.residue = 0; + + spin_lock_irqsave(&vc->lock, flags); + list_add_tail(&vd->node, &vc->desc_allocated); + spin_unlock_irqrestore(&vc->lock, flags); + + return &vd->tx; +} + +/** + * vchan_issue_pending - move submitted descriptors to issued list + * @vc: virtual channel to update + * + * vc.lock must be held by caller + */ +static inline bool vchan_issue_pending(struct virt_dma_chan *vc) +{ + list_splice_tail_init(&vc->desc_submitted, &vc->desc_issued); + return !list_empty(&vc->desc_issued); +} + +/** + * vchan_cookie_complete - report completion of a descriptor + * @vd: virtual descriptor to update + * + * vc.lock must be held by caller + */ +static inline void vchan_cookie_complete(struct virt_dma_desc *vd) +{ + struct virt_dma_chan *vc = to_virt_chan(vd->tx.chan); + dma_cookie_t cookie; + + cookie = vd->tx.cookie; + dma_cookie_complete(&vd->tx); + dev_vdbg(vc->chan.device->dev, "txd %p[%x]: marked complete\n", + vd, cookie); + list_add_tail(&vd->node, &vc->desc_completed); + + tasklet_schedule(&vc->task); +} + +/** + * vchan_vdesc_fini - Free or reuse a descriptor + * @vd: virtual descriptor to free/reuse + */ +static inline void vchan_vdesc_fini(struct virt_dma_desc *vd) +{ + struct virt_dma_chan *vc = to_virt_chan(vd->tx.chan); + + if (dmaengine_desc_test_reuse(&vd->tx)) { + unsigned long flags; + + spin_lock_irqsave(&vc->lock, flags); + list_add(&vd->node, &vc->desc_allocated); + spin_unlock_irqrestore(&vc->lock, flags); + } else { + vc->desc_free(vd); + } +} + +/** + * vchan_cyclic_callback - report the completion of a period + * @vd: virtual descriptor + */ +static inline void vchan_cyclic_callback(struct virt_dma_desc *vd) +{ + struct virt_dma_chan *vc = to_virt_chan(vd->tx.chan); + + vc->cyclic = vd; + tasklet_schedule(&vc->task); +} + +/** + * vchan_terminate_vdesc - Disable pending cyclic callback + * @vd: virtual descriptor to be terminated + * + * vc.lock must be held by caller + */ +static inline void vchan_terminate_vdesc(struct virt_dma_desc *vd) +{ + struct virt_dma_chan *vc = to_virt_chan(vd->tx.chan); + + list_add_tail(&vd->node, &vc->desc_terminated); + + if (vc->cyclic == vd) + vc->cyclic = NULL; +} + +/** + * vchan_next_desc - peek at the next descriptor to be processed + * @vc: virtual channel to obtain descriptor from + * + * vc.lock must be held by caller + */ +static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc) +{ + return list_first_entry_or_null(&vc->desc_issued, + struct virt_dma_desc, node); +} + +/** + * vchan_get_all_descriptors - obtain all submitted and issued descriptors + * @vc: virtual channel to get descriptors from + * @head: list of descriptors found + * + * vc.lock must be held by caller + * + * Removes all submitted and issued descriptors from internal lists, and + * provides a list of all descriptors found + */ +static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc, + struct list_head *head) +{ + list_splice_tail_init(&vc->desc_allocated, head); + list_splice_tail_init(&vc->desc_submitted, head); + list_splice_tail_init(&vc->desc_issued, head); + list_splice_tail_init(&vc->desc_completed, head); + list_splice_tail_init(&vc->desc_terminated, head); +} + +static inline void vchan_free_chan_resources(struct virt_dma_chan *vc) +{ + struct virt_dma_desc *vd; + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&vc->lock, flags); + vchan_get_all_descriptors(vc, &head); + list_for_each_entry(vd, &head, node) + dmaengine_desc_clear_reuse(&vd->tx); + spin_unlock_irqrestore(&vc->lock, flags); + + vchan_dma_desc_free_list(vc, &head); +} + +/** + * vchan_synchronize() - synchronize callback execution to the current context + * @vc: virtual channel to synchronize + * + * Makes sure that all scheduled or active callbacks have finished running. For + * proper operation the caller has to ensure that no new callbacks are scheduled + * after the invocation of this function started. + * Free up the terminated cyclic descriptor to prevent memory leakage. + */ +static inline void vchan_synchronize(struct virt_dma_chan *vc) +{ + LIST_HEAD(head); + unsigned long flags; + + tasklet_kill(&vc->task); + + spin_lock_irqsave(&vc->lock, flags); + + list_splice_tail_init(&vc->desc_terminated, &head); + + spin_unlock_irqrestore(&vc->lock, flags); + + vchan_dma_desc_free_list(vc, &head); +} + +#endif diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c new file mode 100644 index 0000000000..bb4ff8c867 --- /dev/null +++ b/drivers/dma/xgene-dma.c @@ -0,0 +1,1834 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Applied Micro X-Gene SoC DMA engine Driver + * + * Copyright (c) 2015, Applied Micro Circuits Corporation + * Authors: Rameshwar Prasad Sahu + * Loc Ho + * + * NOTE: PM support is currently not available. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" + +/* X-Gene DMA ring csr registers and bit definations */ +#define XGENE_DMA_RING_CONFIG 0x04 +#define XGENE_DMA_RING_ENABLE BIT(31) +#define XGENE_DMA_RING_ID 0x08 +#define XGENE_DMA_RING_ID_SETUP(v) ((v) | BIT(31)) +#define XGENE_DMA_RING_ID_BUF 0x0C +#define XGENE_DMA_RING_ID_BUF_SETUP(v) (((v) << 9) | BIT(21)) +#define XGENE_DMA_RING_THRESLD0_SET1 0x30 +#define XGENE_DMA_RING_THRESLD0_SET1_VAL 0X64 +#define XGENE_DMA_RING_THRESLD1_SET1 0x34 +#define XGENE_DMA_RING_THRESLD1_SET1_VAL 0xC8 +#define XGENE_DMA_RING_HYSTERESIS 0x68 +#define XGENE_DMA_RING_HYSTERESIS_VAL 0xFFFFFFFF +#define XGENE_DMA_RING_STATE 0x6C +#define XGENE_DMA_RING_STATE_WR_BASE 0x70 +#define XGENE_DMA_RING_NE_INT_MODE 0x017C +#define XGENE_DMA_RING_NE_INT_MODE_SET(m, v) \ + ((m) = ((m) & ~BIT(31 - (v))) | BIT(31 - (v))) +#define XGENE_DMA_RING_NE_INT_MODE_RESET(m, v) \ + ((m) &= (~BIT(31 - (v)))) +#define XGENE_DMA_RING_CLKEN 0xC208 +#define XGENE_DMA_RING_SRST 0xC200 +#define XGENE_DMA_RING_MEM_RAM_SHUTDOWN 0xD070 +#define XGENE_DMA_RING_BLK_MEM_RDY 0xD074 +#define XGENE_DMA_RING_BLK_MEM_RDY_VAL 0xFFFFFFFF +#define XGENE_DMA_RING_ID_GET(owner, num) (((owner) << 6) | (num)) +#define XGENE_DMA_RING_DST_ID(v) ((1 << 10) | (v)) +#define XGENE_DMA_RING_CMD_OFFSET 0x2C +#define XGENE_DMA_RING_CMD_BASE_OFFSET(v) ((v) << 6) +#define XGENE_DMA_RING_COHERENT_SET(m) \ + (((u32 *)(m))[2] |= BIT(4)) +#define XGENE_DMA_RING_ADDRL_SET(m, v) \ + (((u32 *)(m))[2] |= (((v) >> 8) << 5)) +#define XGENE_DMA_RING_ADDRH_SET(m, v) \ + (((u32 *)(m))[3] |= ((v) >> 35)) +#define XGENE_DMA_RING_ACCEPTLERR_SET(m) \ + (((u32 *)(m))[3] |= BIT(19)) +#define XGENE_DMA_RING_SIZE_SET(m, v) \ + (((u32 *)(m))[3] |= ((v) << 23)) +#define XGENE_DMA_RING_RECOMBBUF_SET(m) \ + (((u32 *)(m))[3] |= BIT(27)) +#define XGENE_DMA_RING_RECOMTIMEOUTL_SET(m) \ + (((u32 *)(m))[3] |= (0x7 << 28)) +#define XGENE_DMA_RING_RECOMTIMEOUTH_SET(m) \ + (((u32 *)(m))[4] |= 0x3) +#define XGENE_DMA_RING_SELTHRSH_SET(m) \ + (((u32 *)(m))[4] |= BIT(3)) +#define XGENE_DMA_RING_TYPE_SET(m, v) \ + (((u32 *)(m))[4] |= ((v) << 19)) + +/* X-Gene DMA device csr registers and bit definitions */ +#define XGENE_DMA_IPBRR 0x0 +#define XGENE_DMA_DEV_ID_RD(v) ((v) & 0x00000FFF) +#define XGENE_DMA_BUS_ID_RD(v) (((v) >> 12) & 3) +#define XGENE_DMA_REV_NO_RD(v) (((v) >> 14) & 3) +#define XGENE_DMA_GCR 0x10 +#define XGENE_DMA_CH_SETUP(v) \ + ((v) = ((v) & ~0x000FFFFF) | 0x000AAFFF) +#define XGENE_DMA_ENABLE(v) ((v) |= BIT(31)) +#define XGENE_DMA_DISABLE(v) ((v) &= ~BIT(31)) +#define XGENE_DMA_RAID6_CONT 0x14 +#define XGENE_DMA_RAID6_MULTI_CTRL(v) ((v) << 24) +#define XGENE_DMA_INT 0x70 +#define XGENE_DMA_INT_MASK 0x74 +#define XGENE_DMA_INT_ALL_MASK 0xFFFFFFFF +#define XGENE_DMA_INT_ALL_UNMASK 0x0 +#define XGENE_DMA_INT_MASK_SHIFT 0x14 +#define XGENE_DMA_RING_INT0_MASK 0x90A0 +#define XGENE_DMA_RING_INT1_MASK 0x90A8 +#define XGENE_DMA_RING_INT2_MASK 0x90B0 +#define XGENE_DMA_RING_INT3_MASK 0x90B8 +#define XGENE_DMA_RING_INT4_MASK 0x90C0 +#define XGENE_DMA_CFG_RING_WQ_ASSOC 0x90E0 +#define XGENE_DMA_ASSOC_RING_MNGR1 0xFFFFFFFF +#define XGENE_DMA_MEM_RAM_SHUTDOWN 0xD070 +#define XGENE_DMA_BLK_MEM_RDY 0xD074 +#define XGENE_DMA_BLK_MEM_RDY_VAL 0xFFFFFFFF +#define XGENE_DMA_RING_CMD_SM_OFFSET 0x8000 + +/* X-Gene SoC EFUSE csr register and bit defination */ +#define XGENE_SOC_JTAG1_SHADOW 0x18 +#define XGENE_DMA_PQ_DISABLE_MASK BIT(13) + +/* X-Gene DMA Descriptor format */ +#define XGENE_DMA_DESC_NV_BIT BIT_ULL(50) +#define XGENE_DMA_DESC_IN_BIT BIT_ULL(55) +#define XGENE_DMA_DESC_C_BIT BIT_ULL(63) +#define XGENE_DMA_DESC_DR_BIT BIT_ULL(61) +#define XGENE_DMA_DESC_ELERR_POS 46 +#define XGENE_DMA_DESC_RTYPE_POS 56 +#define XGENE_DMA_DESC_LERR_POS 60 +#define XGENE_DMA_DESC_BUFLEN_POS 48 +#define XGENE_DMA_DESC_HOENQ_NUM_POS 48 +#define XGENE_DMA_DESC_ELERR_RD(m) \ + (((m) >> XGENE_DMA_DESC_ELERR_POS) & 0x3) +#define XGENE_DMA_DESC_LERR_RD(m) \ + (((m) >> XGENE_DMA_DESC_LERR_POS) & 0x7) +#define XGENE_DMA_DESC_STATUS(elerr, lerr) \ + (((elerr) << 4) | (lerr)) + +/* X-Gene DMA descriptor empty s/w signature */ +#define XGENE_DMA_DESC_EMPTY_SIGNATURE ~0ULL + +/* X-Gene DMA configurable parameters defines */ +#define XGENE_DMA_RING_NUM 512 +#define XGENE_DMA_BUFNUM 0x0 +#define XGENE_DMA_CPU_BUFNUM 0x18 +#define XGENE_DMA_RING_OWNER_DMA 0x03 +#define XGENE_DMA_RING_OWNER_CPU 0x0F +#define XGENE_DMA_RING_TYPE_REGULAR 0x01 +#define XGENE_DMA_RING_WQ_DESC_SIZE 32 /* 32 Bytes */ +#define XGENE_DMA_RING_NUM_CONFIG 5 +#define XGENE_DMA_MAX_CHANNEL 4 +#define XGENE_DMA_XOR_CHANNEL 0 +#define XGENE_DMA_PQ_CHANNEL 1 +#define XGENE_DMA_MAX_BYTE_CNT 0x4000 /* 16 KB */ +#define XGENE_DMA_MAX_64B_DESC_BYTE_CNT 0x14000 /* 80 KB */ +#define XGENE_DMA_MAX_XOR_SRC 5 +#define XGENE_DMA_16K_BUFFER_LEN_CODE 0x0 +#define XGENE_DMA_INVALID_LEN_CODE 0x7800000000000000ULL + +/* X-Gene DMA descriptor error codes */ +#define ERR_DESC_AXI 0x01 +#define ERR_BAD_DESC 0x02 +#define ERR_READ_DATA_AXI 0x03 +#define ERR_WRITE_DATA_AXI 0x04 +#define ERR_FBP_TIMEOUT 0x05 +#define ERR_ECC 0x06 +#define ERR_DIFF_SIZE 0x08 +#define ERR_SCT_GAT_LEN 0x09 +#define ERR_CRC_ERR 0x11 +#define ERR_CHKSUM 0x12 +#define ERR_DIF 0x13 + +/* X-Gene DMA error interrupt codes */ +#define ERR_DIF_SIZE_INT 0x0 +#define ERR_GS_ERR_INT 0x1 +#define ERR_FPB_TIMEO_INT 0x2 +#define ERR_WFIFO_OVF_INT 0x3 +#define ERR_RFIFO_OVF_INT 0x4 +#define ERR_WR_TIMEO_INT 0x5 +#define ERR_RD_TIMEO_INT 0x6 +#define ERR_WR_ERR_INT 0x7 +#define ERR_RD_ERR_INT 0x8 +#define ERR_BAD_DESC_INT 0x9 +#define ERR_DESC_DST_INT 0xA +#define ERR_DESC_SRC_INT 0xB + +/* X-Gene DMA flyby operation code */ +#define FLYBY_2SRC_XOR 0x80 +#define FLYBY_3SRC_XOR 0x90 +#define FLYBY_4SRC_XOR 0xA0 +#define FLYBY_5SRC_XOR 0xB0 + +/* X-Gene DMA SW descriptor flags */ +#define XGENE_DMA_FLAG_64B_DESC BIT(0) + +/* Define to dump X-Gene DMA descriptor */ +#define XGENE_DMA_DESC_DUMP(desc, m) \ + print_hex_dump(KERN_ERR, (m), \ + DUMP_PREFIX_ADDRESS, 16, 8, (desc), 32, 0) + +#define to_dma_desc_sw(tx) \ + container_of(tx, struct xgene_dma_desc_sw, tx) +#define to_dma_chan(dchan) \ + container_of(dchan, struct xgene_dma_chan, dma_chan) + +#define chan_dbg(chan, fmt, arg...) \ + dev_dbg(chan->dev, "%s: " fmt, chan->name, ##arg) +#define chan_err(chan, fmt, arg...) \ + dev_err(chan->dev, "%s: " fmt, chan->name, ##arg) + +struct xgene_dma_desc_hw { + __le64 m0; + __le64 m1; + __le64 m2; + __le64 m3; +}; + +enum xgene_dma_ring_cfgsize { + XGENE_DMA_RING_CFG_SIZE_512B, + XGENE_DMA_RING_CFG_SIZE_2KB, + XGENE_DMA_RING_CFG_SIZE_16KB, + XGENE_DMA_RING_CFG_SIZE_64KB, + XGENE_DMA_RING_CFG_SIZE_512KB, + XGENE_DMA_RING_CFG_SIZE_INVALID +}; + +struct xgene_dma_ring { + struct xgene_dma *pdma; + u8 buf_num; + u16 id; + u16 num; + u16 head; + u16 owner; + u16 slots; + u16 dst_ring_num; + u32 size; + void __iomem *cmd; + void __iomem *cmd_base; + dma_addr_t desc_paddr; + u32 state[XGENE_DMA_RING_NUM_CONFIG]; + enum xgene_dma_ring_cfgsize cfgsize; + union { + void *desc_vaddr; + struct xgene_dma_desc_hw *desc_hw; + }; +}; + +struct xgene_dma_desc_sw { + struct xgene_dma_desc_hw desc1; + struct xgene_dma_desc_hw desc2; + u32 flags; + struct list_head node; + struct list_head tx_list; + struct dma_async_tx_descriptor tx; +}; + +/** + * struct xgene_dma_chan - internal representation of an X-Gene DMA channel + * @dma_chan: dmaengine channel object member + * @pdma: X-Gene DMA device structure reference + * @dev: struct device reference for dma mapping api + * @id: raw id of this channel + * @rx_irq: channel IRQ + * @name: name of X-Gene DMA channel + * @lock: serializes enqueue/dequeue operations to the descriptor pool + * @pending: number of transaction request pushed to DMA controller for + * execution, but still waiting for completion, + * @max_outstanding: max number of outstanding request we can push to channel + * @ld_pending: descriptors which are queued to run, but have not yet been + * submitted to the hardware for execution + * @ld_running: descriptors which are currently being executing by the hardware + * @ld_completed: descriptors which have finished execution by the hardware. + * These descriptors have already had their cleanup actions run. They + * are waiting for the ACK bit to be set by the async tx API. + * @desc_pool: descriptor pool for DMA operations + * @tasklet: bottom half where all completed descriptors cleans + * @tx_ring: transmit ring descriptor that we use to prepare actual + * descriptors for further executions + * @rx_ring: receive ring descriptor that we use to get completed DMA + * descriptors during cleanup time + */ +struct xgene_dma_chan { + struct dma_chan dma_chan; + struct xgene_dma *pdma; + struct device *dev; + int id; + int rx_irq; + char name[10]; + spinlock_t lock; + int pending; + int max_outstanding; + struct list_head ld_pending; + struct list_head ld_running; + struct list_head ld_completed; + struct dma_pool *desc_pool; + struct tasklet_struct tasklet; + struct xgene_dma_ring tx_ring; + struct xgene_dma_ring rx_ring; +}; + +/** + * struct xgene_dma - internal representation of an X-Gene DMA device + * @dev: reference to this device's struct device + * @clk: reference to this device's clock + * @err_irq: DMA error irq number + * @ring_num: start id number for DMA ring + * @csr_dma: base for DMA register access + * @csr_ring: base for DMA ring register access + * @csr_ring_cmd: base for DMA ring command register access + * @csr_efuse: base for efuse register access + * @dma_dev: embedded struct dma_device + * @chan: reference to X-Gene DMA channels + */ +struct xgene_dma { + struct device *dev; + struct clk *clk; + int err_irq; + int ring_num; + void __iomem *csr_dma; + void __iomem *csr_ring; + void __iomem *csr_ring_cmd; + void __iomem *csr_efuse; + struct dma_device dma_dev[XGENE_DMA_MAX_CHANNEL]; + struct xgene_dma_chan chan[XGENE_DMA_MAX_CHANNEL]; +}; + +static const char * const xgene_dma_desc_err[] = { + [ERR_DESC_AXI] = "AXI error when reading src/dst link list", + [ERR_BAD_DESC] = "ERR or El_ERR fields not set to zero in desc", + [ERR_READ_DATA_AXI] = "AXI error when reading data", + [ERR_WRITE_DATA_AXI] = "AXI error when writing data", + [ERR_FBP_TIMEOUT] = "Timeout on bufpool fetch", + [ERR_ECC] = "ECC double bit error", + [ERR_DIFF_SIZE] = "Bufpool too small to hold all the DIF result", + [ERR_SCT_GAT_LEN] = "Gather and scatter data length not same", + [ERR_CRC_ERR] = "CRC error", + [ERR_CHKSUM] = "Checksum error", + [ERR_DIF] = "DIF error", +}; + +static const char * const xgene_dma_err[] = { + [ERR_DIF_SIZE_INT] = "DIF size error", + [ERR_GS_ERR_INT] = "Gather scatter not same size error", + [ERR_FPB_TIMEO_INT] = "Free pool time out error", + [ERR_WFIFO_OVF_INT] = "Write FIFO over flow error", + [ERR_RFIFO_OVF_INT] = "Read FIFO over flow error", + [ERR_WR_TIMEO_INT] = "Write time out error", + [ERR_RD_TIMEO_INT] = "Read time out error", + [ERR_WR_ERR_INT] = "HBF bus write error", + [ERR_RD_ERR_INT] = "HBF bus read error", + [ERR_BAD_DESC_INT] = "Ring descriptor HE0 not set error", + [ERR_DESC_DST_INT] = "HFB reading dst link address error", + [ERR_DESC_SRC_INT] = "HFB reading src link address error", +}; + +static bool is_pq_enabled(struct xgene_dma *pdma) +{ + u32 val; + + val = ioread32(pdma->csr_efuse + XGENE_SOC_JTAG1_SHADOW); + return !(val & XGENE_DMA_PQ_DISABLE_MASK); +} + +static u64 xgene_dma_encode_len(size_t len) +{ + return (len < XGENE_DMA_MAX_BYTE_CNT) ? + ((u64)len << XGENE_DMA_DESC_BUFLEN_POS) : + XGENE_DMA_16K_BUFFER_LEN_CODE; +} + +static u8 xgene_dma_encode_xor_flyby(u32 src_cnt) +{ + static u8 flyby_type[] = { + FLYBY_2SRC_XOR, /* Dummy */ + FLYBY_2SRC_XOR, /* Dummy */ + FLYBY_2SRC_XOR, + FLYBY_3SRC_XOR, + FLYBY_4SRC_XOR, + FLYBY_5SRC_XOR + }; + + return flyby_type[src_cnt]; +} + +static void xgene_dma_set_src_buffer(__le64 *ext8, size_t *len, + dma_addr_t *paddr) +{ + size_t nbytes = (*len < XGENE_DMA_MAX_BYTE_CNT) ? + *len : XGENE_DMA_MAX_BYTE_CNT; + + *ext8 |= cpu_to_le64(*paddr); + *ext8 |= cpu_to_le64(xgene_dma_encode_len(nbytes)); + *len -= nbytes; + *paddr += nbytes; +} + +static __le64 *xgene_dma_lookup_ext8(struct xgene_dma_desc_hw *desc, int idx) +{ + switch (idx) { + case 0: + return &desc->m1; + case 1: + return &desc->m0; + case 2: + return &desc->m3; + case 3: + return &desc->m2; + default: + pr_err("Invalid dma descriptor index\n"); + } + + return NULL; +} + +static void xgene_dma_init_desc(struct xgene_dma_desc_hw *desc, + u16 dst_ring_num) +{ + desc->m0 |= cpu_to_le64(XGENE_DMA_DESC_IN_BIT); + desc->m0 |= cpu_to_le64((u64)XGENE_DMA_RING_OWNER_DMA << + XGENE_DMA_DESC_RTYPE_POS); + desc->m1 |= cpu_to_le64(XGENE_DMA_DESC_C_BIT); + desc->m3 |= cpu_to_le64((u64)dst_ring_num << + XGENE_DMA_DESC_HOENQ_NUM_POS); +} + +static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan, + struct xgene_dma_desc_sw *desc_sw, + dma_addr_t *dst, dma_addr_t *src, + u32 src_cnt, size_t *nbytes, + const u8 *scf) +{ + struct xgene_dma_desc_hw *desc1, *desc2; + size_t len = *nbytes; + int i; + + desc1 = &desc_sw->desc1; + desc2 = &desc_sw->desc2; + + /* Initialize DMA descriptor */ + xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num); + + /* Set destination address */ + desc1->m2 |= cpu_to_le64(XGENE_DMA_DESC_DR_BIT); + desc1->m3 |= cpu_to_le64(*dst); + + /* We have multiple source addresses, so need to set NV bit*/ + desc1->m0 |= cpu_to_le64(XGENE_DMA_DESC_NV_BIT); + + /* Set flyby opcode */ + desc1->m2 |= cpu_to_le64(xgene_dma_encode_xor_flyby(src_cnt)); + + /* Set 1st to 5th source addresses */ + for (i = 0; i < src_cnt; i++) { + len = *nbytes; + xgene_dma_set_src_buffer((i == 0) ? &desc1->m1 : + xgene_dma_lookup_ext8(desc2, i - 1), + &len, &src[i]); + desc1->m2 |= cpu_to_le64((scf[i] << ((i + 1) * 8))); + } + + /* Update meta data */ + *nbytes = len; + *dst += XGENE_DMA_MAX_BYTE_CNT; + + /* We need always 64B descriptor to perform xor or pq operations */ + desc_sw->flags |= XGENE_DMA_FLAG_64B_DESC; +} + +static dma_cookie_t xgene_dma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct xgene_dma_desc_sw *desc; + struct xgene_dma_chan *chan; + dma_cookie_t cookie; + + if (unlikely(!tx)) + return -EINVAL; + + chan = to_dma_chan(tx->chan); + desc = to_dma_desc_sw(tx); + + spin_lock_bh(&chan->lock); + + cookie = dma_cookie_assign(tx); + + /* Add this transaction list onto the tail of the pending queue */ + list_splice_tail_init(&desc->tx_list, &chan->ld_pending); + + spin_unlock_bh(&chan->lock); + + return cookie; +} + +static void xgene_dma_clean_descriptor(struct xgene_dma_chan *chan, + struct xgene_dma_desc_sw *desc) +{ + list_del(&desc->node); + chan_dbg(chan, "LD %p free\n", desc); + dma_pool_free(chan->desc_pool, desc, desc->tx.phys); +} + +static struct xgene_dma_desc_sw *xgene_dma_alloc_descriptor( + struct xgene_dma_chan *chan) +{ + struct xgene_dma_desc_sw *desc; + dma_addr_t phys; + + desc = dma_pool_zalloc(chan->desc_pool, GFP_NOWAIT, &phys); + if (!desc) { + chan_err(chan, "Failed to allocate LDs\n"); + return NULL; + } + + INIT_LIST_HEAD(&desc->tx_list); + desc->tx.phys = phys; + desc->tx.tx_submit = xgene_dma_tx_submit; + dma_async_tx_descriptor_init(&desc->tx, &chan->dma_chan); + + chan_dbg(chan, "LD %p allocated\n", desc); + + return desc; +} + +/** + * xgene_dma_clean_completed_descriptor - free all descriptors which + * has been completed and acked + * @chan: X-Gene DMA channel + * + * This function is used on all completed and acked descriptors. + */ +static void xgene_dma_clean_completed_descriptor(struct xgene_dma_chan *chan) +{ + struct xgene_dma_desc_sw *desc, *_desc; + + /* Run the callback for each descriptor, in order */ + list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node) { + if (async_tx_test_ack(&desc->tx)) + xgene_dma_clean_descriptor(chan, desc); + } +} + +/** + * xgene_dma_run_tx_complete_actions - cleanup a single link descriptor + * @chan: X-Gene DMA channel + * @desc: descriptor to cleanup and free + * + * This function is used on a descriptor which has been executed by the DMA + * controller. It will run any callbacks, submit any dependencies. + */ +static void xgene_dma_run_tx_complete_actions(struct xgene_dma_chan *chan, + struct xgene_dma_desc_sw *desc) +{ + struct dma_async_tx_descriptor *tx = &desc->tx; + + /* + * If this is not the last transaction in the group, + * then no need to complete cookie and run any callback as + * this is not the tx_descriptor which had been sent to caller + * of this DMA request + */ + + if (tx->cookie == 0) + return; + + dma_cookie_complete(tx); + dma_descriptor_unmap(tx); + + /* Run the link descriptor callback function */ + dmaengine_desc_get_callback_invoke(tx, NULL); + + /* Run any dependencies */ + dma_run_dependencies(tx); +} + +/** + * xgene_dma_clean_running_descriptor - move the completed descriptor from + * ld_running to ld_completed + * @chan: X-Gene DMA channel + * @desc: the descriptor which is completed + * + * Free the descriptor directly if acked by async_tx api, + * else move it to queue ld_completed. + */ +static void xgene_dma_clean_running_descriptor(struct xgene_dma_chan *chan, + struct xgene_dma_desc_sw *desc) +{ + /* Remove from the list of running transactions */ + list_del(&desc->node); + + /* + * the client is allowed to attach dependent operations + * until 'ack' is set + */ + if (!async_tx_test_ack(&desc->tx)) { + /* + * Move this descriptor to the list of descriptors which is + * completed, but still awaiting the 'ack' bit to be set. + */ + list_add_tail(&desc->node, &chan->ld_completed); + return; + } + + chan_dbg(chan, "LD %p free\n", desc); + dma_pool_free(chan->desc_pool, desc, desc->tx.phys); +} + +static void xgene_chan_xfer_request(struct xgene_dma_chan *chan, + struct xgene_dma_desc_sw *desc_sw) +{ + struct xgene_dma_ring *ring = &chan->tx_ring; + struct xgene_dma_desc_hw *desc_hw; + + /* Get hw descriptor from DMA tx ring */ + desc_hw = &ring->desc_hw[ring->head]; + + /* + * Increment the head count to point next + * descriptor for next time + */ + if (++ring->head == ring->slots) + ring->head = 0; + + /* Copy prepared sw descriptor data to hw descriptor */ + memcpy(desc_hw, &desc_sw->desc1, sizeof(*desc_hw)); + + /* + * Check if we have prepared 64B descriptor, + * in this case we need one more hw descriptor + */ + if (desc_sw->flags & XGENE_DMA_FLAG_64B_DESC) { + desc_hw = &ring->desc_hw[ring->head]; + + if (++ring->head == ring->slots) + ring->head = 0; + + memcpy(desc_hw, &desc_sw->desc2, sizeof(*desc_hw)); + } + + /* Increment the pending transaction count */ + chan->pending += ((desc_sw->flags & + XGENE_DMA_FLAG_64B_DESC) ? 2 : 1); + + /* Notify the hw that we have descriptor ready for execution */ + iowrite32((desc_sw->flags & XGENE_DMA_FLAG_64B_DESC) ? + 2 : 1, ring->cmd); +} + +/** + * xgene_chan_xfer_ld_pending - push any pending transactions to hw + * @chan : X-Gene DMA channel + * + * LOCKING: must hold chan->lock + */ +static void xgene_chan_xfer_ld_pending(struct xgene_dma_chan *chan) +{ + struct xgene_dma_desc_sw *desc_sw, *_desc_sw; + + /* + * If the list of pending descriptors is empty, then we + * don't need to do any work at all + */ + if (list_empty(&chan->ld_pending)) { + chan_dbg(chan, "No pending LDs\n"); + return; + } + + /* + * Move elements from the queue of pending transactions onto the list + * of running transactions and push it to hw for further executions + */ + list_for_each_entry_safe(desc_sw, _desc_sw, &chan->ld_pending, node) { + /* + * Check if have pushed max number of transactions to hw + * as capable, so let's stop here and will push remaining + * elements from pening ld queue after completing some + * descriptors that we have already pushed + */ + if (chan->pending >= chan->max_outstanding) + return; + + xgene_chan_xfer_request(chan, desc_sw); + + /* + * Delete this element from ld pending queue and append it to + * ld running queue + */ + list_move_tail(&desc_sw->node, &chan->ld_running); + } +} + +/** + * xgene_dma_cleanup_descriptors - cleanup link descriptors which are completed + * and move them to ld_completed to free until flag 'ack' is set + * @chan: X-Gene DMA channel + * + * This function is used on descriptors which have been executed by the DMA + * controller. It will run any callbacks, submit any dependencies, then + * free these descriptors if flag 'ack' is set. + */ +static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan) +{ + struct xgene_dma_ring *ring = &chan->rx_ring; + struct xgene_dma_desc_sw *desc_sw, *_desc_sw; + struct xgene_dma_desc_hw *desc_hw; + struct list_head ld_completed; + u8 status; + + INIT_LIST_HEAD(&ld_completed); + + spin_lock(&chan->lock); + + /* Clean already completed and acked descriptors */ + xgene_dma_clean_completed_descriptor(chan); + + /* Move all completed descriptors to ld completed queue, in order */ + list_for_each_entry_safe(desc_sw, _desc_sw, &chan->ld_running, node) { + /* Get subsequent hw descriptor from DMA rx ring */ + desc_hw = &ring->desc_hw[ring->head]; + + /* Check if this descriptor has been completed */ + if (unlikely(le64_to_cpu(desc_hw->m0) == + XGENE_DMA_DESC_EMPTY_SIGNATURE)) + break; + + if (++ring->head == ring->slots) + ring->head = 0; + + /* Check if we have any error with DMA transactions */ + status = XGENE_DMA_DESC_STATUS( + XGENE_DMA_DESC_ELERR_RD(le64_to_cpu( + desc_hw->m0)), + XGENE_DMA_DESC_LERR_RD(le64_to_cpu( + desc_hw->m0))); + if (status) { + /* Print the DMA error type */ + chan_err(chan, "%s\n", xgene_dma_desc_err[status]); + + /* + * We have DMA transactions error here. Dump DMA Tx + * and Rx descriptors for this request */ + XGENE_DMA_DESC_DUMP(&desc_sw->desc1, + "X-Gene DMA TX DESC1: "); + + if (desc_sw->flags & XGENE_DMA_FLAG_64B_DESC) + XGENE_DMA_DESC_DUMP(&desc_sw->desc2, + "X-Gene DMA TX DESC2: "); + + XGENE_DMA_DESC_DUMP(desc_hw, + "X-Gene DMA RX ERR DESC: "); + } + + /* Notify the hw about this completed descriptor */ + iowrite32(-1, ring->cmd); + + /* Mark this hw descriptor as processed */ + desc_hw->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE); + + /* + * Decrement the pending transaction count + * as we have processed one + */ + chan->pending -= ((desc_sw->flags & + XGENE_DMA_FLAG_64B_DESC) ? 2 : 1); + + /* + * Delete this node from ld running queue and append it to + * ld completed queue for further processing + */ + list_move_tail(&desc_sw->node, &ld_completed); + } + + /* + * Start any pending transactions automatically + * In the ideal case, we keep the DMA controller busy while we go + * ahead and free the descriptors below. + */ + xgene_chan_xfer_ld_pending(chan); + + spin_unlock(&chan->lock); + + /* Run the callback for each descriptor, in order */ + list_for_each_entry_safe(desc_sw, _desc_sw, &ld_completed, node) { + xgene_dma_run_tx_complete_actions(chan, desc_sw); + xgene_dma_clean_running_descriptor(chan, desc_sw); + } +} + +static int xgene_dma_alloc_chan_resources(struct dma_chan *dchan) +{ + struct xgene_dma_chan *chan = to_dma_chan(dchan); + + /* Has this channel already been allocated? */ + if (chan->desc_pool) + return 1; + + chan->desc_pool = dma_pool_create(chan->name, chan->dev, + sizeof(struct xgene_dma_desc_sw), + 0, 0); + if (!chan->desc_pool) { + chan_err(chan, "Failed to allocate descriptor pool\n"); + return -ENOMEM; + } + + chan_dbg(chan, "Allocate descriptor pool\n"); + + return 1; +} + +/** + * xgene_dma_free_desc_list - Free all descriptors in a queue + * @chan: X-Gene DMA channel + * @list: the list to free + * + * LOCKING: must hold chan->lock + */ +static void xgene_dma_free_desc_list(struct xgene_dma_chan *chan, + struct list_head *list) +{ + struct xgene_dma_desc_sw *desc, *_desc; + + list_for_each_entry_safe(desc, _desc, list, node) + xgene_dma_clean_descriptor(chan, desc); +} + +static void xgene_dma_free_chan_resources(struct dma_chan *dchan) +{ + struct xgene_dma_chan *chan = to_dma_chan(dchan); + + chan_dbg(chan, "Free all resources\n"); + + if (!chan->desc_pool) + return; + + /* Process all running descriptor */ + xgene_dma_cleanup_descriptors(chan); + + spin_lock_bh(&chan->lock); + + /* Clean all link descriptor queues */ + xgene_dma_free_desc_list(chan, &chan->ld_pending); + xgene_dma_free_desc_list(chan, &chan->ld_running); + xgene_dma_free_desc_list(chan, &chan->ld_completed); + + spin_unlock_bh(&chan->lock); + + /* Delete this channel DMA pool */ + dma_pool_destroy(chan->desc_pool); + chan->desc_pool = NULL; +} + +static struct dma_async_tx_descriptor *xgene_dma_prep_xor( + struct dma_chan *dchan, dma_addr_t dst, dma_addr_t *src, + u32 src_cnt, size_t len, unsigned long flags) +{ + struct xgene_dma_desc_sw *first = NULL, *new; + struct xgene_dma_chan *chan; + static u8 multi[XGENE_DMA_MAX_XOR_SRC] = { + 0x01, 0x01, 0x01, 0x01, 0x01}; + + if (unlikely(!dchan || !len)) + return NULL; + + chan = to_dma_chan(dchan); + + do { + /* Allocate the link descriptor from DMA pool */ + new = xgene_dma_alloc_descriptor(chan); + if (!new) + goto fail; + + /* Prepare xor DMA descriptor */ + xgene_dma_prep_xor_desc(chan, new, &dst, src, + src_cnt, &len, multi); + + if (!first) + first = new; + + new->tx.cookie = 0; + async_tx_ack(&new->tx); + + /* Insert the link descriptor to the LD ring */ + list_add_tail(&new->node, &first->tx_list); + } while (len); + + new->tx.flags = flags; /* client is in control of this ack */ + new->tx.cookie = -EBUSY; + list_splice(&first->tx_list, &new->tx_list); + + return &new->tx; + +fail: + if (!first) + return NULL; + + xgene_dma_free_desc_list(chan, &first->tx_list); + return NULL; +} + +static struct dma_async_tx_descriptor *xgene_dma_prep_pq( + struct dma_chan *dchan, dma_addr_t *dst, dma_addr_t *src, + u32 src_cnt, const u8 *scf, size_t len, unsigned long flags) +{ + struct xgene_dma_desc_sw *first = NULL, *new; + struct xgene_dma_chan *chan; + size_t _len = len; + dma_addr_t _src[XGENE_DMA_MAX_XOR_SRC]; + static u8 multi[XGENE_DMA_MAX_XOR_SRC] = {0x01, 0x01, 0x01, 0x01, 0x01}; + + if (unlikely(!dchan || !len)) + return NULL; + + chan = to_dma_chan(dchan); + + /* + * Save source addresses on local variable, may be we have to + * prepare two descriptor to generate P and Q if both enabled + * in the flags by client + */ + memcpy(_src, src, sizeof(*src) * src_cnt); + + if (flags & DMA_PREP_PQ_DISABLE_P) + len = 0; + + if (flags & DMA_PREP_PQ_DISABLE_Q) + _len = 0; + + do { + /* Allocate the link descriptor from DMA pool */ + new = xgene_dma_alloc_descriptor(chan); + if (!new) + goto fail; + + if (!first) + first = new; + + new->tx.cookie = 0; + async_tx_ack(&new->tx); + + /* Insert the link descriptor to the LD ring */ + list_add_tail(&new->node, &first->tx_list); + + /* + * Prepare DMA descriptor to generate P, + * if DMA_PREP_PQ_DISABLE_P flag is not set + */ + if (len) { + xgene_dma_prep_xor_desc(chan, new, &dst[0], src, + src_cnt, &len, multi); + continue; + } + + /* + * Prepare DMA descriptor to generate Q, + * if DMA_PREP_PQ_DISABLE_Q flag is not set + */ + if (_len) { + xgene_dma_prep_xor_desc(chan, new, &dst[1], _src, + src_cnt, &_len, scf); + } + } while (len || _len); + + new->tx.flags = flags; /* client is in control of this ack */ + new->tx.cookie = -EBUSY; + list_splice(&first->tx_list, &new->tx_list); + + return &new->tx; + +fail: + if (!first) + return NULL; + + xgene_dma_free_desc_list(chan, &first->tx_list); + return NULL; +} + +static void xgene_dma_issue_pending(struct dma_chan *dchan) +{ + struct xgene_dma_chan *chan = to_dma_chan(dchan); + + spin_lock_bh(&chan->lock); + xgene_chan_xfer_ld_pending(chan); + spin_unlock_bh(&chan->lock); +} + +static enum dma_status xgene_dma_tx_status(struct dma_chan *dchan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + return dma_cookie_status(dchan, cookie, txstate); +} + +static void xgene_dma_tasklet_cb(struct tasklet_struct *t) +{ + struct xgene_dma_chan *chan = from_tasklet(chan, t, tasklet); + + /* Run all cleanup for descriptors which have been completed */ + xgene_dma_cleanup_descriptors(chan); + + /* Re-enable DMA channel IRQ */ + enable_irq(chan->rx_irq); +} + +static irqreturn_t xgene_dma_chan_ring_isr(int irq, void *id) +{ + struct xgene_dma_chan *chan = (struct xgene_dma_chan *)id; + + BUG_ON(!chan); + + /* + * Disable DMA channel IRQ until we process completed + * descriptors + */ + disable_irq_nosync(chan->rx_irq); + + /* + * Schedule the tasklet to handle all cleanup of the current + * transaction. It will start a new transaction if there is + * one pending. + */ + tasklet_schedule(&chan->tasklet); + + return IRQ_HANDLED; +} + +static irqreturn_t xgene_dma_err_isr(int irq, void *id) +{ + struct xgene_dma *pdma = (struct xgene_dma *)id; + unsigned long int_mask; + u32 val, i; + + val = ioread32(pdma->csr_dma + XGENE_DMA_INT); + + /* Clear DMA interrupts */ + iowrite32(val, pdma->csr_dma + XGENE_DMA_INT); + + /* Print DMA error info */ + int_mask = val >> XGENE_DMA_INT_MASK_SHIFT; + for_each_set_bit(i, &int_mask, ARRAY_SIZE(xgene_dma_err)) + dev_err(pdma->dev, + "Interrupt status 0x%08X %s\n", val, xgene_dma_err[i]); + + return IRQ_HANDLED; +} + +static void xgene_dma_wr_ring_state(struct xgene_dma_ring *ring) +{ + int i; + + iowrite32(ring->num, ring->pdma->csr_ring + XGENE_DMA_RING_STATE); + + for (i = 0; i < XGENE_DMA_RING_NUM_CONFIG; i++) + iowrite32(ring->state[i], ring->pdma->csr_ring + + XGENE_DMA_RING_STATE_WR_BASE + (i * 4)); +} + +static void xgene_dma_clr_ring_state(struct xgene_dma_ring *ring) +{ + memset(ring->state, 0, sizeof(u32) * XGENE_DMA_RING_NUM_CONFIG); + xgene_dma_wr_ring_state(ring); +} + +static void xgene_dma_setup_ring(struct xgene_dma_ring *ring) +{ + void *ring_cfg = ring->state; + u64 addr = ring->desc_paddr; + u32 i, val; + + ring->slots = ring->size / XGENE_DMA_RING_WQ_DESC_SIZE; + + /* Clear DMA ring state */ + xgene_dma_clr_ring_state(ring); + + /* Set DMA ring type */ + XGENE_DMA_RING_TYPE_SET(ring_cfg, XGENE_DMA_RING_TYPE_REGULAR); + + if (ring->owner == XGENE_DMA_RING_OWNER_DMA) { + /* Set recombination buffer and timeout */ + XGENE_DMA_RING_RECOMBBUF_SET(ring_cfg); + XGENE_DMA_RING_RECOMTIMEOUTL_SET(ring_cfg); + XGENE_DMA_RING_RECOMTIMEOUTH_SET(ring_cfg); + } + + /* Initialize DMA ring state */ + XGENE_DMA_RING_SELTHRSH_SET(ring_cfg); + XGENE_DMA_RING_ACCEPTLERR_SET(ring_cfg); + XGENE_DMA_RING_COHERENT_SET(ring_cfg); + XGENE_DMA_RING_ADDRL_SET(ring_cfg, addr); + XGENE_DMA_RING_ADDRH_SET(ring_cfg, addr); + XGENE_DMA_RING_SIZE_SET(ring_cfg, ring->cfgsize); + + /* Write DMA ring configurations */ + xgene_dma_wr_ring_state(ring); + + /* Set DMA ring id */ + iowrite32(XGENE_DMA_RING_ID_SETUP(ring->id), + ring->pdma->csr_ring + XGENE_DMA_RING_ID); + + /* Set DMA ring buffer */ + iowrite32(XGENE_DMA_RING_ID_BUF_SETUP(ring->num), + ring->pdma->csr_ring + XGENE_DMA_RING_ID_BUF); + + if (ring->owner != XGENE_DMA_RING_OWNER_CPU) + return; + + /* Set empty signature to DMA Rx ring descriptors */ + for (i = 0; i < ring->slots; i++) { + struct xgene_dma_desc_hw *desc; + + desc = &ring->desc_hw[i]; + desc->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE); + } + + /* Enable DMA Rx ring interrupt */ + val = ioread32(ring->pdma->csr_ring + XGENE_DMA_RING_NE_INT_MODE); + XGENE_DMA_RING_NE_INT_MODE_SET(val, ring->buf_num); + iowrite32(val, ring->pdma->csr_ring + XGENE_DMA_RING_NE_INT_MODE); +} + +static void xgene_dma_clear_ring(struct xgene_dma_ring *ring) +{ + u32 ring_id, val; + + if (ring->owner == XGENE_DMA_RING_OWNER_CPU) { + /* Disable DMA Rx ring interrupt */ + val = ioread32(ring->pdma->csr_ring + + XGENE_DMA_RING_NE_INT_MODE); + XGENE_DMA_RING_NE_INT_MODE_RESET(val, ring->buf_num); + iowrite32(val, ring->pdma->csr_ring + + XGENE_DMA_RING_NE_INT_MODE); + } + + /* Clear DMA ring state */ + ring_id = XGENE_DMA_RING_ID_SETUP(ring->id); + iowrite32(ring_id, ring->pdma->csr_ring + XGENE_DMA_RING_ID); + + iowrite32(0, ring->pdma->csr_ring + XGENE_DMA_RING_ID_BUF); + xgene_dma_clr_ring_state(ring); +} + +static void xgene_dma_set_ring_cmd(struct xgene_dma_ring *ring) +{ + ring->cmd_base = ring->pdma->csr_ring_cmd + + XGENE_DMA_RING_CMD_BASE_OFFSET((ring->num - + XGENE_DMA_RING_NUM)); + + ring->cmd = ring->cmd_base + XGENE_DMA_RING_CMD_OFFSET; +} + +static int xgene_dma_get_ring_size(struct xgene_dma_chan *chan, + enum xgene_dma_ring_cfgsize cfgsize) +{ + int size; + + switch (cfgsize) { + case XGENE_DMA_RING_CFG_SIZE_512B: + size = 0x200; + break; + case XGENE_DMA_RING_CFG_SIZE_2KB: + size = 0x800; + break; + case XGENE_DMA_RING_CFG_SIZE_16KB: + size = 0x4000; + break; + case XGENE_DMA_RING_CFG_SIZE_64KB: + size = 0x10000; + break; + case XGENE_DMA_RING_CFG_SIZE_512KB: + size = 0x80000; + break; + default: + chan_err(chan, "Unsupported cfg ring size %d\n", cfgsize); + return -EINVAL; + } + + return size; +} + +static void xgene_dma_delete_ring_one(struct xgene_dma_ring *ring) +{ + /* Clear DMA ring configurations */ + xgene_dma_clear_ring(ring); + + /* De-allocate DMA ring descriptor */ + if (ring->desc_vaddr) { + dma_free_coherent(ring->pdma->dev, ring->size, + ring->desc_vaddr, ring->desc_paddr); + ring->desc_vaddr = NULL; + } +} + +static void xgene_dma_delete_chan_rings(struct xgene_dma_chan *chan) +{ + xgene_dma_delete_ring_one(&chan->rx_ring); + xgene_dma_delete_ring_one(&chan->tx_ring); +} + +static int xgene_dma_create_ring_one(struct xgene_dma_chan *chan, + struct xgene_dma_ring *ring, + enum xgene_dma_ring_cfgsize cfgsize) +{ + int ret; + + /* Setup DMA ring descriptor variables */ + ring->pdma = chan->pdma; + ring->cfgsize = cfgsize; + ring->num = chan->pdma->ring_num++; + ring->id = XGENE_DMA_RING_ID_GET(ring->owner, ring->buf_num); + + ret = xgene_dma_get_ring_size(chan, cfgsize); + if (ret <= 0) + return ret; + ring->size = ret; + + /* Allocate memory for DMA ring descriptor */ + ring->desc_vaddr = dma_alloc_coherent(chan->dev, ring->size, + &ring->desc_paddr, GFP_KERNEL); + if (!ring->desc_vaddr) { + chan_err(chan, "Failed to allocate ring desc\n"); + return -ENOMEM; + } + + /* Configure and enable DMA ring */ + xgene_dma_set_ring_cmd(ring); + xgene_dma_setup_ring(ring); + + return 0; +} + +static int xgene_dma_create_chan_rings(struct xgene_dma_chan *chan) +{ + struct xgene_dma_ring *rx_ring = &chan->rx_ring; + struct xgene_dma_ring *tx_ring = &chan->tx_ring; + int ret; + + /* Create DMA Rx ring descriptor */ + rx_ring->owner = XGENE_DMA_RING_OWNER_CPU; + rx_ring->buf_num = XGENE_DMA_CPU_BUFNUM + chan->id; + + ret = xgene_dma_create_ring_one(chan, rx_ring, + XGENE_DMA_RING_CFG_SIZE_64KB); + if (ret) + return ret; + + chan_dbg(chan, "Rx ring id 0x%X num %d desc 0x%p\n", + rx_ring->id, rx_ring->num, rx_ring->desc_vaddr); + + /* Create DMA Tx ring descriptor */ + tx_ring->owner = XGENE_DMA_RING_OWNER_DMA; + tx_ring->buf_num = XGENE_DMA_BUFNUM + chan->id; + + ret = xgene_dma_create_ring_one(chan, tx_ring, + XGENE_DMA_RING_CFG_SIZE_64KB); + if (ret) { + xgene_dma_delete_ring_one(rx_ring); + return ret; + } + + tx_ring->dst_ring_num = XGENE_DMA_RING_DST_ID(rx_ring->num); + + chan_dbg(chan, + "Tx ring id 0x%X num %d desc 0x%p\n", + tx_ring->id, tx_ring->num, tx_ring->desc_vaddr); + + /* Set the max outstanding request possible to this channel */ + chan->max_outstanding = tx_ring->slots; + + return ret; +} + +static int xgene_dma_init_rings(struct xgene_dma *pdma) +{ + int ret, i, j; + + for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) { + ret = xgene_dma_create_chan_rings(&pdma->chan[i]); + if (ret) { + for (j = 0; j < i; j++) + xgene_dma_delete_chan_rings(&pdma->chan[j]); + return ret; + } + } + + return ret; +} + +static void xgene_dma_enable(struct xgene_dma *pdma) +{ + u32 val; + + /* Configure and enable DMA engine */ + val = ioread32(pdma->csr_dma + XGENE_DMA_GCR); + XGENE_DMA_CH_SETUP(val); + XGENE_DMA_ENABLE(val); + iowrite32(val, pdma->csr_dma + XGENE_DMA_GCR); +} + +static void xgene_dma_disable(struct xgene_dma *pdma) +{ + u32 val; + + val = ioread32(pdma->csr_dma + XGENE_DMA_GCR); + XGENE_DMA_DISABLE(val); + iowrite32(val, pdma->csr_dma + XGENE_DMA_GCR); +} + +static void xgene_dma_mask_interrupts(struct xgene_dma *pdma) +{ + /* + * Mask DMA ring overflow, underflow and + * AXI write/read error interrupts + */ + iowrite32(XGENE_DMA_INT_ALL_MASK, + pdma->csr_dma + XGENE_DMA_RING_INT0_MASK); + iowrite32(XGENE_DMA_INT_ALL_MASK, + pdma->csr_dma + XGENE_DMA_RING_INT1_MASK); + iowrite32(XGENE_DMA_INT_ALL_MASK, + pdma->csr_dma + XGENE_DMA_RING_INT2_MASK); + iowrite32(XGENE_DMA_INT_ALL_MASK, + pdma->csr_dma + XGENE_DMA_RING_INT3_MASK); + iowrite32(XGENE_DMA_INT_ALL_MASK, + pdma->csr_dma + XGENE_DMA_RING_INT4_MASK); + + /* Mask DMA error interrupts */ + iowrite32(XGENE_DMA_INT_ALL_MASK, pdma->csr_dma + XGENE_DMA_INT_MASK); +} + +static void xgene_dma_unmask_interrupts(struct xgene_dma *pdma) +{ + /* + * Unmask DMA ring overflow, underflow and + * AXI write/read error interrupts + */ + iowrite32(XGENE_DMA_INT_ALL_UNMASK, + pdma->csr_dma + XGENE_DMA_RING_INT0_MASK); + iowrite32(XGENE_DMA_INT_ALL_UNMASK, + pdma->csr_dma + XGENE_DMA_RING_INT1_MASK); + iowrite32(XGENE_DMA_INT_ALL_UNMASK, + pdma->csr_dma + XGENE_DMA_RING_INT2_MASK); + iowrite32(XGENE_DMA_INT_ALL_UNMASK, + pdma->csr_dma + XGENE_DMA_RING_INT3_MASK); + iowrite32(XGENE_DMA_INT_ALL_UNMASK, + pdma->csr_dma + XGENE_DMA_RING_INT4_MASK); + + /* Unmask DMA error interrupts */ + iowrite32(XGENE_DMA_INT_ALL_UNMASK, + pdma->csr_dma + XGENE_DMA_INT_MASK); +} + +static void xgene_dma_init_hw(struct xgene_dma *pdma) +{ + u32 val; + + /* Associate DMA ring to corresponding ring HW */ + iowrite32(XGENE_DMA_ASSOC_RING_MNGR1, + pdma->csr_dma + XGENE_DMA_CFG_RING_WQ_ASSOC); + + /* Configure RAID6 polynomial control setting */ + if (is_pq_enabled(pdma)) + iowrite32(XGENE_DMA_RAID6_MULTI_CTRL(0x1D), + pdma->csr_dma + XGENE_DMA_RAID6_CONT); + else + dev_info(pdma->dev, "PQ is disabled in HW\n"); + + xgene_dma_enable(pdma); + xgene_dma_unmask_interrupts(pdma); + + /* Get DMA id and version info */ + val = ioread32(pdma->csr_dma + XGENE_DMA_IPBRR); + + /* DMA device info */ + dev_info(pdma->dev, + "X-Gene DMA v%d.%02d.%02d driver registered %d channels", + XGENE_DMA_REV_NO_RD(val), XGENE_DMA_BUS_ID_RD(val), + XGENE_DMA_DEV_ID_RD(val), XGENE_DMA_MAX_CHANNEL); +} + +static int xgene_dma_init_ring_mngr(struct xgene_dma *pdma) +{ + if (ioread32(pdma->csr_ring + XGENE_DMA_RING_CLKEN) && + (!ioread32(pdma->csr_ring + XGENE_DMA_RING_SRST))) + return 0; + + iowrite32(0x3, pdma->csr_ring + XGENE_DMA_RING_CLKEN); + iowrite32(0x0, pdma->csr_ring + XGENE_DMA_RING_SRST); + + /* Bring up memory */ + iowrite32(0x0, pdma->csr_ring + XGENE_DMA_RING_MEM_RAM_SHUTDOWN); + + /* Force a barrier */ + ioread32(pdma->csr_ring + XGENE_DMA_RING_MEM_RAM_SHUTDOWN); + + /* reset may take up to 1ms */ + usleep_range(1000, 1100); + + if (ioread32(pdma->csr_ring + XGENE_DMA_RING_BLK_MEM_RDY) + != XGENE_DMA_RING_BLK_MEM_RDY_VAL) { + dev_err(pdma->dev, + "Failed to release ring mngr memory from shutdown\n"); + return -ENODEV; + } + + /* program threshold set 1 and all hysteresis */ + iowrite32(XGENE_DMA_RING_THRESLD0_SET1_VAL, + pdma->csr_ring + XGENE_DMA_RING_THRESLD0_SET1); + iowrite32(XGENE_DMA_RING_THRESLD1_SET1_VAL, + pdma->csr_ring + XGENE_DMA_RING_THRESLD1_SET1); + iowrite32(XGENE_DMA_RING_HYSTERESIS_VAL, + pdma->csr_ring + XGENE_DMA_RING_HYSTERESIS); + + /* Enable QPcore and assign error queue */ + iowrite32(XGENE_DMA_RING_ENABLE, + pdma->csr_ring + XGENE_DMA_RING_CONFIG); + + return 0; +} + +static int xgene_dma_init_mem(struct xgene_dma *pdma) +{ + int ret; + + ret = xgene_dma_init_ring_mngr(pdma); + if (ret) + return ret; + + /* Bring up memory */ + iowrite32(0x0, pdma->csr_dma + XGENE_DMA_MEM_RAM_SHUTDOWN); + + /* Force a barrier */ + ioread32(pdma->csr_dma + XGENE_DMA_MEM_RAM_SHUTDOWN); + + /* reset may take up to 1ms */ + usleep_range(1000, 1100); + + if (ioread32(pdma->csr_dma + XGENE_DMA_BLK_MEM_RDY) + != XGENE_DMA_BLK_MEM_RDY_VAL) { + dev_err(pdma->dev, + "Failed to release DMA memory from shutdown\n"); + return -ENODEV; + } + + return 0; +} + +static int xgene_dma_request_irqs(struct xgene_dma *pdma) +{ + struct xgene_dma_chan *chan; + int ret, i, j; + + /* Register DMA error irq */ + ret = devm_request_irq(pdma->dev, pdma->err_irq, xgene_dma_err_isr, + 0, "dma_error", pdma); + if (ret) { + dev_err(pdma->dev, + "Failed to register error IRQ %d\n", pdma->err_irq); + return ret; + } + + /* Register DMA channel rx irq */ + for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) { + chan = &pdma->chan[i]; + irq_set_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY); + ret = devm_request_irq(chan->dev, chan->rx_irq, + xgene_dma_chan_ring_isr, + 0, chan->name, chan); + if (ret) { + chan_err(chan, "Failed to register Rx IRQ %d\n", + chan->rx_irq); + devm_free_irq(pdma->dev, pdma->err_irq, pdma); + + for (j = 0; j < i; j++) { + chan = &pdma->chan[i]; + irq_clear_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY); + devm_free_irq(chan->dev, chan->rx_irq, chan); + } + + return ret; + } + } + + return 0; +} + +static void xgene_dma_free_irqs(struct xgene_dma *pdma) +{ + struct xgene_dma_chan *chan; + int i; + + /* Free DMA device error irq */ + devm_free_irq(pdma->dev, pdma->err_irq, pdma); + + for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) { + chan = &pdma->chan[i]; + irq_clear_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY); + devm_free_irq(chan->dev, chan->rx_irq, chan); + } +} + +static void xgene_dma_set_caps(struct xgene_dma_chan *chan, + struct dma_device *dma_dev) +{ + /* Initialize DMA device capability mask */ + dma_cap_zero(dma_dev->cap_mask); + + /* Set DMA device capability */ + + /* Basically here, the X-Gene SoC DMA engine channel 0 supports XOR + * and channel 1 supports XOR, PQ both. First thing here is we have + * mechanism in hw to enable/disable PQ/XOR supports on channel 1, + * we can make sure this by reading SoC Efuse register. + * Second thing, we have hw errata that if we run channel 0 and + * channel 1 simultaneously with executing XOR and PQ request, + * suddenly DMA engine hangs, So here we enable XOR on channel 0 only + * if XOR and PQ supports on channel 1 is disabled. + */ + if ((chan->id == XGENE_DMA_PQ_CHANNEL) && + is_pq_enabled(chan->pdma)) { + dma_cap_set(DMA_PQ, dma_dev->cap_mask); + dma_cap_set(DMA_XOR, dma_dev->cap_mask); + } else if ((chan->id == XGENE_DMA_XOR_CHANNEL) && + !is_pq_enabled(chan->pdma)) { + dma_cap_set(DMA_XOR, dma_dev->cap_mask); + } + + /* Set base and prep routines */ + dma_dev->dev = chan->dev; + dma_dev->device_alloc_chan_resources = xgene_dma_alloc_chan_resources; + dma_dev->device_free_chan_resources = xgene_dma_free_chan_resources; + dma_dev->device_issue_pending = xgene_dma_issue_pending; + dma_dev->device_tx_status = xgene_dma_tx_status; + + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { + dma_dev->device_prep_dma_xor = xgene_dma_prep_xor; + dma_dev->max_xor = XGENE_DMA_MAX_XOR_SRC; + dma_dev->xor_align = DMAENGINE_ALIGN_64_BYTES; + } + + if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { + dma_dev->device_prep_dma_pq = xgene_dma_prep_pq; + dma_dev->max_pq = XGENE_DMA_MAX_XOR_SRC; + dma_dev->pq_align = DMAENGINE_ALIGN_64_BYTES; + } +} + +static int xgene_dma_async_register(struct xgene_dma *pdma, int id) +{ + struct xgene_dma_chan *chan = &pdma->chan[id]; + struct dma_device *dma_dev = &pdma->dma_dev[id]; + int ret; + + chan->dma_chan.device = dma_dev; + + spin_lock_init(&chan->lock); + INIT_LIST_HEAD(&chan->ld_pending); + INIT_LIST_HEAD(&chan->ld_running); + INIT_LIST_HEAD(&chan->ld_completed); + tasklet_setup(&chan->tasklet, xgene_dma_tasklet_cb); + + chan->pending = 0; + chan->desc_pool = NULL; + dma_cookie_init(&chan->dma_chan); + + /* Setup dma device capabilities and prep routines */ + xgene_dma_set_caps(chan, dma_dev); + + /* Initialize DMA device list head */ + INIT_LIST_HEAD(&dma_dev->channels); + list_add_tail(&chan->dma_chan.device_node, &dma_dev->channels); + + /* Register with Linux async DMA framework*/ + ret = dma_async_device_register(dma_dev); + if (ret) { + chan_err(chan, "Failed to register async device %d", ret); + tasklet_kill(&chan->tasklet); + + return ret; + } + + /* DMA capability info */ + dev_info(pdma->dev, + "%s: CAPABILITY ( %s%s)\n", dma_chan_name(&chan->dma_chan), + dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "XOR " : "", + dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "PQ " : ""); + + return 0; +} + +static int xgene_dma_init_async(struct xgene_dma *pdma) +{ + int ret, i, j; + + for (i = 0; i < XGENE_DMA_MAX_CHANNEL ; i++) { + ret = xgene_dma_async_register(pdma, i); + if (ret) { + for (j = 0; j < i; j++) { + dma_async_device_unregister(&pdma->dma_dev[j]); + tasklet_kill(&pdma->chan[j].tasklet); + } + + return ret; + } + } + + return ret; +} + +static void xgene_dma_async_unregister(struct xgene_dma *pdma) +{ + int i; + + for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) + dma_async_device_unregister(&pdma->dma_dev[i]); +} + +static void xgene_dma_init_channels(struct xgene_dma *pdma) +{ + struct xgene_dma_chan *chan; + int i; + + pdma->ring_num = XGENE_DMA_RING_NUM; + + for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) { + chan = &pdma->chan[i]; + chan->dev = pdma->dev; + chan->pdma = pdma; + chan->id = i; + snprintf(chan->name, sizeof(chan->name), "dmachan%d", chan->id); + } +} + +static int xgene_dma_get_resources(struct platform_device *pdev, + struct xgene_dma *pdma) +{ + struct resource *res; + int irq, i; + + /* Get DMA csr region */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "Failed to get csr region\n"); + return -ENXIO; + } + + pdma->csr_dma = devm_ioremap(&pdev->dev, res->start, + resource_size(res)); + if (!pdma->csr_dma) { + dev_err(&pdev->dev, "Failed to ioremap csr region"); + return -ENOMEM; + } + + /* Get DMA ring csr region */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!res) { + dev_err(&pdev->dev, "Failed to get ring csr region\n"); + return -ENXIO; + } + + pdma->csr_ring = devm_ioremap(&pdev->dev, res->start, + resource_size(res)); + if (!pdma->csr_ring) { + dev_err(&pdev->dev, "Failed to ioremap ring csr region"); + return -ENOMEM; + } + + /* Get DMA ring cmd csr region */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 2); + if (!res) { + dev_err(&pdev->dev, "Failed to get ring cmd csr region\n"); + return -ENXIO; + } + + pdma->csr_ring_cmd = devm_ioremap(&pdev->dev, res->start, + resource_size(res)); + if (!pdma->csr_ring_cmd) { + dev_err(&pdev->dev, "Failed to ioremap ring cmd csr region"); + return -ENOMEM; + } + + pdma->csr_ring_cmd += XGENE_DMA_RING_CMD_SM_OFFSET; + + /* Get efuse csr region */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 3); + if (!res) { + dev_err(&pdev->dev, "Failed to get efuse csr region\n"); + return -ENXIO; + } + + pdma->csr_efuse = devm_ioremap(&pdev->dev, res->start, + resource_size(res)); + if (!pdma->csr_efuse) { + dev_err(&pdev->dev, "Failed to ioremap efuse csr region"); + return -ENOMEM; + } + + /* Get DMA error interrupt */ + irq = platform_get_irq(pdev, 0); + if (irq <= 0) + return -ENXIO; + + pdma->err_irq = irq; + + /* Get DMA Rx ring descriptor interrupts for all DMA channels */ + for (i = 1; i <= XGENE_DMA_MAX_CHANNEL; i++) { + irq = platform_get_irq(pdev, i); + if (irq <= 0) + return -ENXIO; + + pdma->chan[i - 1].rx_irq = irq; + } + + return 0; +} + +static int xgene_dma_probe(struct platform_device *pdev) +{ + struct xgene_dma *pdma; + int ret, i; + + pdma = devm_kzalloc(&pdev->dev, sizeof(*pdma), GFP_KERNEL); + if (!pdma) + return -ENOMEM; + + pdma->dev = &pdev->dev; + platform_set_drvdata(pdev, pdma); + + ret = xgene_dma_get_resources(pdev, pdma); + if (ret) + return ret; + + pdma->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(pdma->clk) && !ACPI_COMPANION(&pdev->dev)) { + dev_err(&pdev->dev, "Failed to get clk\n"); + return PTR_ERR(pdma->clk); + } + + /* Enable clk before accessing registers */ + if (!IS_ERR(pdma->clk)) { + ret = clk_prepare_enable(pdma->clk); + if (ret) { + dev_err(&pdev->dev, "Failed to enable clk %d\n", ret); + return ret; + } + } + + /* Remove DMA RAM out of shutdown */ + ret = xgene_dma_init_mem(pdma); + if (ret) + goto err_clk_enable; + + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(42)); + if (ret) { + dev_err(&pdev->dev, "No usable DMA configuration\n"); + goto err_dma_mask; + } + + /* Initialize DMA channels software state */ + xgene_dma_init_channels(pdma); + + /* Configue DMA rings */ + ret = xgene_dma_init_rings(pdma); + if (ret) + goto err_clk_enable; + + ret = xgene_dma_request_irqs(pdma); + if (ret) + goto err_request_irq; + + /* Configure and enable DMA engine */ + xgene_dma_init_hw(pdma); + + /* Register DMA device with linux async framework */ + ret = xgene_dma_init_async(pdma); + if (ret) + goto err_async_init; + + return 0; + +err_async_init: + xgene_dma_free_irqs(pdma); + +err_request_irq: + for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) + xgene_dma_delete_chan_rings(&pdma->chan[i]); + +err_dma_mask: +err_clk_enable: + if (!IS_ERR(pdma->clk)) + clk_disable_unprepare(pdma->clk); + + return ret; +} + +static int xgene_dma_remove(struct platform_device *pdev) +{ + struct xgene_dma *pdma = platform_get_drvdata(pdev); + struct xgene_dma_chan *chan; + int i; + + xgene_dma_async_unregister(pdma); + + /* Mask interrupts and disable DMA engine */ + xgene_dma_mask_interrupts(pdma); + xgene_dma_disable(pdma); + xgene_dma_free_irqs(pdma); + + for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) { + chan = &pdma->chan[i]; + tasklet_kill(&chan->tasklet); + xgene_dma_delete_chan_rings(chan); + } + + if (!IS_ERR(pdma->clk)) + clk_disable_unprepare(pdma->clk); + + return 0; +} + +#ifdef CONFIG_ACPI +static const struct acpi_device_id xgene_dma_acpi_match_ptr[] = { + {"APMC0D43", 0}, + {}, +}; +MODULE_DEVICE_TABLE(acpi, xgene_dma_acpi_match_ptr); +#endif + +static const struct of_device_id xgene_dma_of_match_ptr[] = { + {.compatible = "apm,xgene-storm-dma",}, + {}, +}; +MODULE_DEVICE_TABLE(of, xgene_dma_of_match_ptr); + +static struct platform_driver xgene_dma_driver = { + .probe = xgene_dma_probe, + .remove = xgene_dma_remove, + .driver = { + .name = "X-Gene-DMA", + .of_match_table = xgene_dma_of_match_ptr, + .acpi_match_table = ACPI_PTR(xgene_dma_acpi_match_ptr), + }, +}; + +module_platform_driver(xgene_dma_driver); + +MODULE_DESCRIPTION("APM X-Gene SoC DMA driver"); +MODULE_AUTHOR("Rameshwar Prasad Sahu "); +MODULE_AUTHOR("Loc Ho "); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.0"); diff --git a/drivers/dma/xilinx/Makefile b/drivers/dma/xilinx/Makefile new file mode 100644 index 0000000000..ebaa93644c --- /dev/null +++ b/drivers/dma/xilinx/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_XILINX_DMA) += xilinx_dma.o +obj-$(CONFIG_XILINX_XDMA) += xdma.o +obj-$(CONFIG_XILINX_ZYNQMP_DMA) += zynqmp_dma.o +obj-$(CONFIG_XILINX_ZYNQMP_DPDMA) += xilinx_dpdma.o diff --git a/drivers/dma/xilinx/xdma-regs.h b/drivers/dma/xilinx/xdma-regs.h new file mode 100644 index 0000000000..dd98b4526b --- /dev/null +++ b/drivers/dma/xilinx/xdma-regs.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2017-2020 Xilinx, Inc. All rights reserved. + * Copyright (C) 2022, Advanced Micro Devices, Inc. + */ + +#ifndef __DMA_XDMA_REGS_H +#define __DMA_XDMA_REGS_H + +/* The length of register space exposed to host */ +#define XDMA_REG_SPACE_LEN 65536 + +/* + * maximum number of DMA channels for each direction: + * Host to Card (H2C) or Card to Host (C2H) + */ +#define XDMA_MAX_CHANNELS 4 + +/* + * macros to define the number of descriptor blocks can be used in one + * DMA transfer request. + * the DMA engine uses a linked list of descriptor blocks that specify the + * source, destination, and length of the DMA transfers. + */ +#define XDMA_DESC_BLOCK_NUM BIT(7) +#define XDMA_DESC_BLOCK_MASK (XDMA_DESC_BLOCK_NUM - 1) + +/* descriptor definitions */ +#define XDMA_DESC_ADJACENT 32 +#define XDMA_DESC_ADJACENT_MASK (XDMA_DESC_ADJACENT - 1) +#define XDMA_DESC_ADJACENT_BITS GENMASK(13, 8) +#define XDMA_DESC_MAGIC 0xad4bUL +#define XDMA_DESC_MAGIC_BITS GENMASK(31, 16) +#define XDMA_DESC_FLAGS_BITS GENMASK(7, 0) +#define XDMA_DESC_STOPPED BIT(0) +#define XDMA_DESC_COMPLETED BIT(1) +#define XDMA_DESC_BLEN_BITS 28 +#define XDMA_DESC_BLEN_MAX (BIT(XDMA_DESC_BLEN_BITS) - PAGE_SIZE) + +/* macros to construct the descriptor control word */ +#define XDMA_DESC_CONTROL(adjacent, flag) \ + (FIELD_PREP(XDMA_DESC_MAGIC_BITS, XDMA_DESC_MAGIC) | \ + FIELD_PREP(XDMA_DESC_ADJACENT_BITS, (adjacent) - 1) | \ + FIELD_PREP(XDMA_DESC_FLAGS_BITS, (flag))) +#define XDMA_DESC_CONTROL_LAST \ + XDMA_DESC_CONTROL(1, XDMA_DESC_STOPPED | XDMA_DESC_COMPLETED) + +/* + * Descriptor for a single contiguous memory block transfer. + * + * Multiple descriptors are linked by means of the next pointer. An additional + * extra adjacent number gives the amount of extra contiguous descriptors. + * + * The descriptors are in root complex memory, and the bytes in the 32-bit + * words must be in little-endian byte ordering. + */ +struct xdma_hw_desc { + __le32 control; + __le32 bytes; + __le64 src_addr; + __le64 dst_addr; + __le64 next_desc; +}; + +#define XDMA_DESC_SIZE sizeof(struct xdma_hw_desc) +#define XDMA_DESC_BLOCK_SIZE (XDMA_DESC_SIZE * XDMA_DESC_ADJACENT) +#define XDMA_DESC_BLOCK_ALIGN 4096 + +/* + * Channel registers + */ +#define XDMA_CHAN_IDENTIFIER 0x0 +#define XDMA_CHAN_CONTROL 0x4 +#define XDMA_CHAN_CONTROL_W1S 0x8 +#define XDMA_CHAN_CONTROL_W1C 0xc +#define XDMA_CHAN_STATUS 0x40 +#define XDMA_CHAN_COMPLETED_DESC 0x48 +#define XDMA_CHAN_ALIGNMENTS 0x4c +#define XDMA_CHAN_INTR_ENABLE 0x90 +#define XDMA_CHAN_INTR_ENABLE_W1S 0x94 +#define XDMA_CHAN_INTR_ENABLE_W1C 0x9c + +#define XDMA_CHAN_STRIDE 0x100 +#define XDMA_CHAN_H2C_OFFSET 0x0 +#define XDMA_CHAN_C2H_OFFSET 0x1000 +#define XDMA_CHAN_H2C_TARGET 0x0 +#define XDMA_CHAN_C2H_TARGET 0x1 + +/* macro to check if channel is available */ +#define XDMA_CHAN_MAGIC 0x1fc0 +#define XDMA_CHAN_CHECK_TARGET(id, target) \ + (((u32)(id) >> 16) == XDMA_CHAN_MAGIC + (target)) + +/* bits of the channel control register */ +#define CHAN_CTRL_RUN_STOP BIT(0) +#define CHAN_CTRL_IE_DESC_STOPPED BIT(1) +#define CHAN_CTRL_IE_DESC_COMPLETED BIT(2) +#define CHAN_CTRL_IE_DESC_ALIGN_MISMATCH BIT(3) +#define CHAN_CTRL_IE_MAGIC_STOPPED BIT(4) +#define CHAN_CTRL_IE_IDLE_STOPPED BIT(6) +#define CHAN_CTRL_IE_READ_ERROR GENMASK(13, 9) +#define CHAN_CTRL_IE_DESC_ERROR GENMASK(23, 19) +#define CHAN_CTRL_NON_INCR_ADDR BIT(25) +#define CHAN_CTRL_POLL_MODE_WB BIT(26) + +#define CHAN_CTRL_START (CHAN_CTRL_RUN_STOP | \ + CHAN_CTRL_IE_DESC_STOPPED | \ + CHAN_CTRL_IE_DESC_COMPLETED | \ + CHAN_CTRL_IE_DESC_ALIGN_MISMATCH | \ + CHAN_CTRL_IE_MAGIC_STOPPED | \ + CHAN_CTRL_IE_READ_ERROR | \ + CHAN_CTRL_IE_DESC_ERROR) + +/* bits of the channel interrupt enable mask */ +#define CHAN_IM_DESC_ERROR BIT(19) +#define CHAN_IM_READ_ERROR BIT(9) +#define CHAN_IM_IDLE_STOPPED BIT(6) +#define CHAN_IM_MAGIC_STOPPED BIT(4) +#define CHAN_IM_DESC_COMPLETED BIT(2) +#define CHAN_IM_DESC_STOPPED BIT(1) + +#define CHAN_IM_ALL (CHAN_IM_DESC_ERROR | CHAN_IM_READ_ERROR | \ + CHAN_IM_IDLE_STOPPED | CHAN_IM_MAGIC_STOPPED | \ + CHAN_IM_DESC_COMPLETED | CHAN_IM_DESC_STOPPED) + +/* + * Channel SGDMA registers + */ +#define XDMA_SGDMA_IDENTIFIER 0x4000 +#define XDMA_SGDMA_DESC_LO 0x4080 +#define XDMA_SGDMA_DESC_HI 0x4084 +#define XDMA_SGDMA_DESC_ADJ 0x4088 +#define XDMA_SGDMA_DESC_CREDIT 0x408c + +/* bits of the SG DMA control register */ +#define XDMA_CTRL_RUN_STOP BIT(0) +#define XDMA_CTRL_IE_DESC_STOPPED BIT(1) +#define XDMA_CTRL_IE_DESC_COMPLETED BIT(2) +#define XDMA_CTRL_IE_DESC_ALIGN_MISMATCH BIT(3) +#define XDMA_CTRL_IE_MAGIC_STOPPED BIT(4) +#define XDMA_CTRL_IE_IDLE_STOPPED BIT(6) +#define XDMA_CTRL_IE_READ_ERROR GENMASK(13, 9) +#define XDMA_CTRL_IE_DESC_ERROR GENMASK(23, 19) +#define XDMA_CTRL_NON_INCR_ADDR BIT(25) +#define XDMA_CTRL_POLL_MODE_WB BIT(26) + +/* + * interrupt registers + */ +#define XDMA_IRQ_IDENTIFIER 0x2000 +#define XDMA_IRQ_USER_INT_EN 0x2004 +#define XDMA_IRQ_USER_INT_EN_W1S 0x2008 +#define XDMA_IRQ_USER_INT_EN_W1C 0x200c +#define XDMA_IRQ_CHAN_INT_EN 0x2010 +#define XDMA_IRQ_CHAN_INT_EN_W1S 0x2014 +#define XDMA_IRQ_CHAN_INT_EN_W1C 0x2018 +#define XDMA_IRQ_USER_INT_REQ 0x2040 +#define XDMA_IRQ_CHAN_INT_REQ 0x2044 +#define XDMA_IRQ_USER_INT_PEND 0x2048 +#define XDMA_IRQ_CHAN_INT_PEND 0x204c +#define XDMA_IRQ_USER_VEC_NUM 0x2080 +#define XDMA_IRQ_CHAN_VEC_NUM 0x20a0 + +#define XDMA_IRQ_VEC_SHIFT 8 + +#endif /* __DMA_XDMA_REGS_H */ diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c new file mode 100644 index 0000000000..e0bfd129d5 --- /dev/null +++ b/drivers/dma/xilinx/xdma.c @@ -0,0 +1,976 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * DMA driver for Xilinx DMA/Bridge Subsystem + * + * Copyright (C) 2017-2020 Xilinx, Inc. All rights reserved. + * Copyright (C) 2022, Advanced Micro Devices, Inc. + */ + +/* + * The DMA/Bridge Subsystem for PCI Express allows for the movement of data + * between Host memory and the DMA subsystem. It does this by operating on + * 'descriptors' that contain information about the source, destination and + * amount of data to transfer. These direct memory transfers can be both in + * the Host to Card (H2C) and Card to Host (C2H) transfers. The DMA can be + * configured to have a single AXI4 Master interface shared by all channels + * or one AXI4-Stream interface for each channel enabled. Memory transfers are + * specified on a per-channel basis in descriptor linked lists, which the DMA + * fetches from host memory and processes. Events such as descriptor completion + * and errors are signaled using interrupts. The core also provides up to 16 + * user interrupt wires that generate interrupts to the host. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../virt-dma.h" +#include "xdma-regs.h" + +/* mmio regmap config for all XDMA registers */ +static const struct regmap_config xdma_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .max_register = XDMA_REG_SPACE_LEN, +}; + +/** + * struct xdma_desc_block - Descriptor block + * @virt_addr: Virtual address of block start + * @dma_addr: DMA address of block start + */ +struct xdma_desc_block { + void *virt_addr; + dma_addr_t dma_addr; +}; + +/** + * struct xdma_chan - Driver specific DMA channel structure + * @vchan: Virtual channel + * @xdev_hdl: Pointer to DMA device structure + * @base: Offset of channel registers + * @desc_pool: Descriptor pool + * @busy: Busy flag of the channel + * @dir: Transferring direction of the channel + * @cfg: Transferring config of the channel + * @irq: IRQ assigned to the channel + */ +struct xdma_chan { + struct virt_dma_chan vchan; + void *xdev_hdl; + u32 base; + struct dma_pool *desc_pool; + bool busy; + enum dma_transfer_direction dir; + struct dma_slave_config cfg; + u32 irq; +}; + +/** + * struct xdma_desc - DMA desc structure + * @vdesc: Virtual DMA descriptor + * @chan: DMA channel pointer + * @dir: Transferring direction of the request + * @dev_addr: Physical address on DMA device side + * @desc_blocks: Hardware descriptor blocks + * @dblk_num: Number of hardware descriptor blocks + * @desc_num: Number of hardware descriptors + * @completed_desc_num: Completed hardware descriptors + */ +struct xdma_desc { + struct virt_dma_desc vdesc; + struct xdma_chan *chan; + enum dma_transfer_direction dir; + u64 dev_addr; + struct xdma_desc_block *desc_blocks; + u32 dblk_num; + u32 desc_num; + u32 completed_desc_num; +}; + +#define XDMA_DEV_STATUS_REG_DMA BIT(0) +#define XDMA_DEV_STATUS_INIT_MSIX BIT(1) + +/** + * struct xdma_device - DMA device structure + * @pdev: Platform device pointer + * @dma_dev: DMA device structure + * @rmap: MMIO regmap for DMA registers + * @h2c_chans: Host to Card channels + * @c2h_chans: Card to Host channels + * @h2c_chan_num: Number of H2C channels + * @c2h_chan_num: Number of C2H channels + * @irq_start: Start IRQ assigned to device + * @irq_num: Number of IRQ assigned to device + * @status: Initialization status + */ +struct xdma_device { + struct platform_device *pdev; + struct dma_device dma_dev; + struct regmap *rmap; + struct xdma_chan *h2c_chans; + struct xdma_chan *c2h_chans; + u32 h2c_chan_num; + u32 c2h_chan_num; + u32 irq_start; + u32 irq_num; + u32 status; +}; + +#define xdma_err(xdev, fmt, args...) \ + dev_err(&(xdev)->pdev->dev, fmt, ##args) +#define XDMA_CHAN_NUM(_xd) ({ \ + typeof(_xd) (xd) = (_xd); \ + ((xd)->h2c_chan_num + (xd)->c2h_chan_num); }) + +/* Get the last desc in a desc block */ +static inline void *xdma_blk_last_desc(struct xdma_desc_block *block) +{ + return block->virt_addr + (XDMA_DESC_ADJACENT - 1) * XDMA_DESC_SIZE; +} + +/** + * xdma_link_desc_blocks - Link descriptor blocks for DMA transfer + * @sw_desc: Tx descriptor pointer + */ +static void xdma_link_desc_blocks(struct xdma_desc *sw_desc) +{ + struct xdma_desc_block *block; + u32 last_blk_desc, desc_control; + struct xdma_hw_desc *desc; + int i; + + desc_control = XDMA_DESC_CONTROL(XDMA_DESC_ADJACENT, 0); + for (i = 1; i < sw_desc->dblk_num; i++) { + block = &sw_desc->desc_blocks[i - 1]; + desc = xdma_blk_last_desc(block); + + if (!(i & XDMA_DESC_BLOCK_MASK)) { + desc->control = cpu_to_le32(XDMA_DESC_CONTROL_LAST); + continue; + } + desc->control = cpu_to_le32(desc_control); + desc->next_desc = cpu_to_le64(block[1].dma_addr); + } + + /* update the last block */ + last_blk_desc = (sw_desc->desc_num - 1) & XDMA_DESC_ADJACENT_MASK; + if (((sw_desc->dblk_num - 1) & XDMA_DESC_BLOCK_MASK) > 0) { + block = &sw_desc->desc_blocks[sw_desc->dblk_num - 2]; + desc = xdma_blk_last_desc(block); + desc_control = XDMA_DESC_CONTROL(last_blk_desc + 1, 0); + desc->control = cpu_to_le32(desc_control); + } + + block = &sw_desc->desc_blocks[sw_desc->dblk_num - 1]; + desc = block->virt_addr + last_blk_desc * XDMA_DESC_SIZE; + desc->control = cpu_to_le32(XDMA_DESC_CONTROL_LAST); +} + +static inline struct xdma_chan *to_xdma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct xdma_chan, vchan.chan); +} + +static inline struct xdma_desc *to_xdma_desc(struct virt_dma_desc *vdesc) +{ + return container_of(vdesc, struct xdma_desc, vdesc); +} + +/** + * xdma_channel_init - Initialize DMA channel registers + * @chan: DMA channel pointer + */ +static int xdma_channel_init(struct xdma_chan *chan) +{ + struct xdma_device *xdev = chan->xdev_hdl; + int ret; + + ret = regmap_write(xdev->rmap, chan->base + XDMA_CHAN_CONTROL_W1C, + CHAN_CTRL_NON_INCR_ADDR); + if (ret) + return ret; + + ret = regmap_write(xdev->rmap, chan->base + XDMA_CHAN_INTR_ENABLE, + CHAN_IM_ALL); + if (ret) + return ret; + + return 0; +} + +/** + * xdma_free_desc - Free descriptor + * @vdesc: Virtual DMA descriptor + */ +static void xdma_free_desc(struct virt_dma_desc *vdesc) +{ + struct xdma_desc *sw_desc; + int i; + + sw_desc = to_xdma_desc(vdesc); + for (i = 0; i < sw_desc->dblk_num; i++) { + if (!sw_desc->desc_blocks[i].virt_addr) + break; + dma_pool_free(sw_desc->chan->desc_pool, + sw_desc->desc_blocks[i].virt_addr, + sw_desc->desc_blocks[i].dma_addr); + } + kfree(sw_desc->desc_blocks); + kfree(sw_desc); +} + +/** + * xdma_alloc_desc - Allocate descriptor + * @chan: DMA channel pointer + * @desc_num: Number of hardware descriptors + */ +static struct xdma_desc * +xdma_alloc_desc(struct xdma_chan *chan, u32 desc_num) +{ + struct xdma_desc *sw_desc; + struct xdma_hw_desc *desc; + dma_addr_t dma_addr; + u32 dblk_num; + void *addr; + int i, j; + + sw_desc = kzalloc(sizeof(*sw_desc), GFP_NOWAIT); + if (!sw_desc) + return NULL; + + sw_desc->chan = chan; + sw_desc->desc_num = desc_num; + dblk_num = DIV_ROUND_UP(desc_num, XDMA_DESC_ADJACENT); + sw_desc->desc_blocks = kcalloc(dblk_num, sizeof(*sw_desc->desc_blocks), + GFP_NOWAIT); + if (!sw_desc->desc_blocks) + goto failed; + + sw_desc->dblk_num = dblk_num; + for (i = 0; i < sw_desc->dblk_num; i++) { + addr = dma_pool_alloc(chan->desc_pool, GFP_NOWAIT, &dma_addr); + if (!addr) + goto failed; + + sw_desc->desc_blocks[i].virt_addr = addr; + sw_desc->desc_blocks[i].dma_addr = dma_addr; + for (j = 0, desc = addr; j < XDMA_DESC_ADJACENT; j++) + desc[j].control = cpu_to_le32(XDMA_DESC_CONTROL(1, 0)); + } + + xdma_link_desc_blocks(sw_desc); + + return sw_desc; + +failed: + xdma_free_desc(&sw_desc->vdesc); + return NULL; +} + +/** + * xdma_xfer_start - Start DMA transfer + * @xchan: DMA channel pointer + */ +static int xdma_xfer_start(struct xdma_chan *xchan) +{ + struct virt_dma_desc *vd = vchan_next_desc(&xchan->vchan); + struct xdma_device *xdev = xchan->xdev_hdl; + struct xdma_desc_block *block; + u32 val, completed_blocks; + struct xdma_desc *desc; + int ret; + + /* + * check if there is not any submitted descriptor or channel is busy. + * vchan lock should be held where this function is called. + */ + if (!vd || xchan->busy) + return -EINVAL; + + /* clear run stop bit to get ready for transfer */ + ret = regmap_write(xdev->rmap, xchan->base + XDMA_CHAN_CONTROL_W1C, + CHAN_CTRL_RUN_STOP); + if (ret) + return ret; + + desc = to_xdma_desc(vd); + if (desc->dir != xchan->dir) { + xdma_err(xdev, "incorrect request direction"); + return -EINVAL; + } + + /* set DMA engine to the first descriptor block */ + completed_blocks = desc->completed_desc_num / XDMA_DESC_ADJACENT; + block = &desc->desc_blocks[completed_blocks]; + val = lower_32_bits(block->dma_addr); + ret = regmap_write(xdev->rmap, xchan->base + XDMA_SGDMA_DESC_LO, val); + if (ret) + return ret; + + val = upper_32_bits(block->dma_addr); + ret = regmap_write(xdev->rmap, xchan->base + XDMA_SGDMA_DESC_HI, val); + if (ret) + return ret; + + if (completed_blocks + 1 == desc->dblk_num) + val = (desc->desc_num - 1) & XDMA_DESC_ADJACENT_MASK; + else + val = XDMA_DESC_ADJACENT - 1; + ret = regmap_write(xdev->rmap, xchan->base + XDMA_SGDMA_DESC_ADJ, val); + if (ret) + return ret; + + /* kick off DMA transfer */ + ret = regmap_write(xdev->rmap, xchan->base + XDMA_CHAN_CONTROL, + CHAN_CTRL_START); + if (ret) + return ret; + + xchan->busy = true; + return 0; +} + +/** + * xdma_alloc_channels - Detect and allocate DMA channels + * @xdev: DMA device pointer + * @dir: Channel direction + */ +static int xdma_alloc_channels(struct xdma_device *xdev, + enum dma_transfer_direction dir) +{ + struct xdma_platdata *pdata = dev_get_platdata(&xdev->pdev->dev); + struct xdma_chan **chans, *xchan; + u32 base, identifier, target; + u32 *chan_num; + int i, j, ret; + + if (dir == DMA_MEM_TO_DEV) { + base = XDMA_CHAN_H2C_OFFSET; + target = XDMA_CHAN_H2C_TARGET; + chans = &xdev->h2c_chans; + chan_num = &xdev->h2c_chan_num; + } else if (dir == DMA_DEV_TO_MEM) { + base = XDMA_CHAN_C2H_OFFSET; + target = XDMA_CHAN_C2H_TARGET; + chans = &xdev->c2h_chans; + chan_num = &xdev->c2h_chan_num; + } else { + xdma_err(xdev, "invalid direction specified"); + return -EINVAL; + } + + /* detect number of available DMA channels */ + for (i = 0, *chan_num = 0; i < pdata->max_dma_channels; i++) { + ret = regmap_read(xdev->rmap, base + i * XDMA_CHAN_STRIDE, + &identifier); + if (ret) + return ret; + + /* check if it is available DMA channel */ + if (XDMA_CHAN_CHECK_TARGET(identifier, target)) + (*chan_num)++; + } + + if (!*chan_num) { + xdma_err(xdev, "does not probe any channel"); + return -EINVAL; + } + + *chans = devm_kcalloc(&xdev->pdev->dev, *chan_num, sizeof(**chans), + GFP_KERNEL); + if (!*chans) + return -ENOMEM; + + for (i = 0, j = 0; i < pdata->max_dma_channels; i++) { + ret = regmap_read(xdev->rmap, base + i * XDMA_CHAN_STRIDE, + &identifier); + if (ret) + return ret; + + if (!XDMA_CHAN_CHECK_TARGET(identifier, target)) + continue; + + if (j == *chan_num) { + xdma_err(xdev, "invalid channel number"); + return -EIO; + } + + /* init channel structure and hardware */ + xchan = &(*chans)[j]; + xchan->xdev_hdl = xdev; + xchan->base = base + i * XDMA_CHAN_STRIDE; + xchan->dir = dir; + + ret = xdma_channel_init(xchan); + if (ret) + return ret; + xchan->vchan.desc_free = xdma_free_desc; + vchan_init(&xchan->vchan, &xdev->dma_dev); + + j++; + } + + dev_info(&xdev->pdev->dev, "configured %d %s channels", j, + (dir == DMA_MEM_TO_DEV) ? "H2C" : "C2H"); + + return 0; +} + +/** + * xdma_issue_pending - Issue pending transactions + * @chan: DMA channel pointer + */ +static void xdma_issue_pending(struct dma_chan *chan) +{ + struct xdma_chan *xdma_chan = to_xdma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&xdma_chan->vchan.lock, flags); + if (vchan_issue_pending(&xdma_chan->vchan)) + xdma_xfer_start(xdma_chan); + spin_unlock_irqrestore(&xdma_chan->vchan.lock, flags); +} + +/** + * xdma_prep_device_sg - prepare a descriptor for a DMA transaction + * @chan: DMA channel pointer + * @sgl: Transfer scatter gather list + * @sg_len: Length of scatter gather list + * @dir: Transfer direction + * @flags: transfer ack flags + * @context: APP words of the descriptor + */ +static struct dma_async_tx_descriptor * +xdma_prep_device_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct xdma_chan *xdma_chan = to_xdma_chan(chan); + struct dma_async_tx_descriptor *tx_desc; + u32 desc_num = 0, i, len, rest; + struct xdma_desc_block *dblk; + struct xdma_hw_desc *desc; + struct xdma_desc *sw_desc; + u64 dev_addr, *src, *dst; + struct scatterlist *sg; + u64 addr; + + for_each_sg(sgl, sg, sg_len, i) + desc_num += DIV_ROUND_UP(sg_dma_len(sg), XDMA_DESC_BLEN_MAX); + + sw_desc = xdma_alloc_desc(xdma_chan, desc_num); + if (!sw_desc) + return NULL; + sw_desc->dir = dir; + + if (dir == DMA_MEM_TO_DEV) { + dev_addr = xdma_chan->cfg.dst_addr; + src = &addr; + dst = &dev_addr; + } else { + dev_addr = xdma_chan->cfg.src_addr; + src = &dev_addr; + dst = &addr; + } + + dblk = sw_desc->desc_blocks; + desc = dblk->virt_addr; + desc_num = 1; + for_each_sg(sgl, sg, sg_len, i) { + addr = sg_dma_address(sg); + rest = sg_dma_len(sg); + + do { + len = min_t(u32, rest, XDMA_DESC_BLEN_MAX); + /* set hardware descriptor */ + desc->bytes = cpu_to_le32(len); + desc->src_addr = cpu_to_le64(*src); + desc->dst_addr = cpu_to_le64(*dst); + + if (!(desc_num & XDMA_DESC_ADJACENT_MASK)) { + dblk++; + desc = dblk->virt_addr; + } else { + desc++; + } + + desc_num++; + dev_addr += len; + addr += len; + rest -= len; + } while (rest); + } + + tx_desc = vchan_tx_prep(&xdma_chan->vchan, &sw_desc->vdesc, flags); + if (!tx_desc) + goto failed; + + return tx_desc; + +failed: + xdma_free_desc(&sw_desc->vdesc); + + return NULL; +} + +/** + * xdma_device_config - Configure the DMA channel + * @chan: DMA channel + * @cfg: channel configuration + */ +static int xdma_device_config(struct dma_chan *chan, + struct dma_slave_config *cfg) +{ + struct xdma_chan *xdma_chan = to_xdma_chan(chan); + + memcpy(&xdma_chan->cfg, cfg, sizeof(*cfg)); + + return 0; +} + +/** + * xdma_free_chan_resources - Free channel resources + * @chan: DMA channel + */ +static void xdma_free_chan_resources(struct dma_chan *chan) +{ + struct xdma_chan *xdma_chan = to_xdma_chan(chan); + + vchan_free_chan_resources(&xdma_chan->vchan); + dma_pool_destroy(xdma_chan->desc_pool); + xdma_chan->desc_pool = NULL; +} + +/** + * xdma_alloc_chan_resources - Allocate channel resources + * @chan: DMA channel + */ +static int xdma_alloc_chan_resources(struct dma_chan *chan) +{ + struct xdma_chan *xdma_chan = to_xdma_chan(chan); + struct xdma_device *xdev = xdma_chan->xdev_hdl; + struct device *dev = xdev->dma_dev.dev; + + while (dev && !dev_is_pci(dev)) + dev = dev->parent; + if (!dev) { + xdma_err(xdev, "unable to find pci device"); + return -EINVAL; + } + + xdma_chan->desc_pool = dma_pool_create(dma_chan_name(chan), + dev, XDMA_DESC_BLOCK_SIZE, + XDMA_DESC_BLOCK_ALIGN, 0); + if (!xdma_chan->desc_pool) { + xdma_err(xdev, "unable to allocate descriptor pool"); + return -ENOMEM; + } + + return 0; +} + +/** + * xdma_channel_isr - XDMA channel interrupt handler + * @irq: IRQ number + * @dev_id: Pointer to the DMA channel structure + */ +static irqreturn_t xdma_channel_isr(int irq, void *dev_id) +{ + struct xdma_chan *xchan = dev_id; + u32 complete_desc_num = 0; + struct xdma_device *xdev; + struct virt_dma_desc *vd; + struct xdma_desc *desc; + int ret; + + spin_lock(&xchan->vchan.lock); + + /* get submitted request */ + vd = vchan_next_desc(&xchan->vchan); + if (!vd) + goto out; + + xchan->busy = false; + desc = to_xdma_desc(vd); + xdev = xchan->xdev_hdl; + + ret = regmap_read(xdev->rmap, xchan->base + XDMA_CHAN_COMPLETED_DESC, + &complete_desc_num); + if (ret) + goto out; + + desc->completed_desc_num += complete_desc_num; + /* + * if all data blocks are transferred, remove and complete the request + */ + if (desc->completed_desc_num == desc->desc_num) { + list_del(&vd->node); + vchan_cookie_complete(vd); + goto out; + } + + if (desc->completed_desc_num > desc->desc_num || + complete_desc_num != XDMA_DESC_BLOCK_NUM * XDMA_DESC_ADJACENT) + goto out; + + /* transfer the rest of data */ + xdma_xfer_start(xchan); + +out: + spin_unlock(&xchan->vchan.lock); + return IRQ_HANDLED; +} + +/** + * xdma_irq_fini - Uninitialize IRQ + * @xdev: DMA device pointer + */ +static void xdma_irq_fini(struct xdma_device *xdev) +{ + int i; + + /* disable interrupt */ + regmap_write(xdev->rmap, XDMA_IRQ_CHAN_INT_EN_W1C, ~0); + + /* free irq handler */ + for (i = 0; i < xdev->h2c_chan_num; i++) + free_irq(xdev->h2c_chans[i].irq, &xdev->h2c_chans[i]); + + for (i = 0; i < xdev->c2h_chan_num; i++) + free_irq(xdev->c2h_chans[i].irq, &xdev->c2h_chans[i]); +} + +/** + * xdma_set_vector_reg - configure hardware IRQ registers + * @xdev: DMA device pointer + * @vec_tbl_start: Start of IRQ registers + * @irq_start: Start of IRQ + * @irq_num: Number of IRQ + */ +static int xdma_set_vector_reg(struct xdma_device *xdev, u32 vec_tbl_start, + u32 irq_start, u32 irq_num) +{ + u32 shift, i, val = 0; + int ret; + + /* Each IRQ register is 32 bit and contains 4 IRQs */ + while (irq_num > 0) { + for (i = 0; i < 4; i++) { + shift = XDMA_IRQ_VEC_SHIFT * i; + val |= irq_start << shift; + irq_start++; + irq_num--; + if (!irq_num) + break; + } + + /* write IRQ register */ + ret = regmap_write(xdev->rmap, vec_tbl_start, val); + if (ret) + return ret; + vec_tbl_start += sizeof(u32); + val = 0; + } + + return 0; +} + +/** + * xdma_irq_init - initialize IRQs + * @xdev: DMA device pointer + */ +static int xdma_irq_init(struct xdma_device *xdev) +{ + u32 irq = xdev->irq_start; + u32 user_irq_start; + int i, j, ret; + + /* return failure if there are not enough IRQs */ + if (xdev->irq_num < XDMA_CHAN_NUM(xdev)) { + xdma_err(xdev, "not enough irq"); + return -EINVAL; + } + + /* setup H2C interrupt handler */ + for (i = 0; i < xdev->h2c_chan_num; i++) { + ret = request_irq(irq, xdma_channel_isr, 0, + "xdma-h2c-channel", &xdev->h2c_chans[i]); + if (ret) { + xdma_err(xdev, "H2C channel%d request irq%d failed: %d", + i, irq, ret); + goto failed_init_h2c; + } + xdev->h2c_chans[i].irq = irq; + irq++; + } + + /* setup C2H interrupt handler */ + for (j = 0; j < xdev->c2h_chan_num; j++) { + ret = request_irq(irq, xdma_channel_isr, 0, + "xdma-c2h-channel", &xdev->c2h_chans[j]); + if (ret) { + xdma_err(xdev, "C2H channel%d request irq%d failed: %d", + j, irq, ret); + goto failed_init_c2h; + } + xdev->c2h_chans[j].irq = irq; + irq++; + } + + /* config hardware IRQ registers */ + ret = xdma_set_vector_reg(xdev, XDMA_IRQ_CHAN_VEC_NUM, 0, + XDMA_CHAN_NUM(xdev)); + if (ret) { + xdma_err(xdev, "failed to set channel vectors: %d", ret); + goto failed_init_c2h; + } + + /* config user IRQ registers if needed */ + user_irq_start = XDMA_CHAN_NUM(xdev); + if (xdev->irq_num > user_irq_start) { + ret = xdma_set_vector_reg(xdev, XDMA_IRQ_USER_VEC_NUM, + user_irq_start, + xdev->irq_num - user_irq_start); + if (ret) { + xdma_err(xdev, "failed to set user vectors: %d", ret); + goto failed_init_c2h; + } + } + + /* enable interrupt */ + ret = regmap_write(xdev->rmap, XDMA_IRQ_CHAN_INT_EN_W1S, ~0); + if (ret) + goto failed_init_c2h; + + return 0; + +failed_init_c2h: + while (j--) + free_irq(xdev->c2h_chans[j].irq, &xdev->c2h_chans[j]); +failed_init_h2c: + while (i--) + free_irq(xdev->h2c_chans[i].irq, &xdev->h2c_chans[i]); + + return ret; +} + +static bool xdma_filter_fn(struct dma_chan *chan, void *param) +{ + struct xdma_chan *xdma_chan = to_xdma_chan(chan); + struct xdma_chan_info *chan_info = param; + + return chan_info->dir == xdma_chan->dir; +} + +/** + * xdma_disable_user_irq - Disable user interrupt + * @pdev: Pointer to the platform_device structure + * @irq_num: System IRQ number + */ +void xdma_disable_user_irq(struct platform_device *pdev, u32 irq_num) +{ + struct xdma_device *xdev = platform_get_drvdata(pdev); + u32 index; + + index = irq_num - xdev->irq_start; + if (index < XDMA_CHAN_NUM(xdev) || index >= xdev->irq_num) { + xdma_err(xdev, "invalid user irq number"); + return; + } + index -= XDMA_CHAN_NUM(xdev); + + regmap_write(xdev->rmap, XDMA_IRQ_USER_INT_EN_W1C, 1 << index); +} +EXPORT_SYMBOL(xdma_disable_user_irq); + +/** + * xdma_enable_user_irq - Enable user logic interrupt + * @pdev: Pointer to the platform_device structure + * @irq_num: System IRQ number + */ +int xdma_enable_user_irq(struct platform_device *pdev, u32 irq_num) +{ + struct xdma_device *xdev = platform_get_drvdata(pdev); + u32 index; + int ret; + + index = irq_num - xdev->irq_start; + if (index < XDMA_CHAN_NUM(xdev) || index >= xdev->irq_num) { + xdma_err(xdev, "invalid user irq number"); + return -EINVAL; + } + index -= XDMA_CHAN_NUM(xdev); + + ret = regmap_write(xdev->rmap, XDMA_IRQ_USER_INT_EN_W1S, 1 << index); + if (ret) + return ret; + + return 0; +} +EXPORT_SYMBOL(xdma_enable_user_irq); + +/** + * xdma_get_user_irq - Get system IRQ number + * @pdev: Pointer to the platform_device structure + * @user_irq_index: User logic IRQ wire index + * + * Return: The system IRQ number allocated for the given wire index. + */ +int xdma_get_user_irq(struct platform_device *pdev, u32 user_irq_index) +{ + struct xdma_device *xdev = platform_get_drvdata(pdev); + + if (XDMA_CHAN_NUM(xdev) + user_irq_index >= xdev->irq_num) { + xdma_err(xdev, "invalid user irq index"); + return -EINVAL; + } + + return xdev->irq_start + XDMA_CHAN_NUM(xdev) + user_irq_index; +} +EXPORT_SYMBOL(xdma_get_user_irq); + +/** + * xdma_remove - Driver remove function + * @pdev: Pointer to the platform_device structure + */ +static int xdma_remove(struct platform_device *pdev) +{ + struct xdma_device *xdev = platform_get_drvdata(pdev); + + if (xdev->status & XDMA_DEV_STATUS_INIT_MSIX) + xdma_irq_fini(xdev); + + if (xdev->status & XDMA_DEV_STATUS_REG_DMA) + dma_async_device_unregister(&xdev->dma_dev); + + return 0; +} + +/** + * xdma_probe - Driver probe function + * @pdev: Pointer to the platform_device structure + */ +static int xdma_probe(struct platform_device *pdev) +{ + struct xdma_platdata *pdata = dev_get_platdata(&pdev->dev); + struct xdma_device *xdev; + void __iomem *reg_base; + struct resource *res; + int ret = -ENODEV; + + if (pdata->max_dma_channels > XDMA_MAX_CHANNELS) { + dev_err(&pdev->dev, "invalid max dma channels %d", + pdata->max_dma_channels); + return -EINVAL; + } + + xdev = devm_kzalloc(&pdev->dev, sizeof(*xdev), GFP_KERNEL); + if (!xdev) + return -ENOMEM; + + platform_set_drvdata(pdev, xdev); + xdev->pdev = pdev; + + res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!res) { + xdma_err(xdev, "failed to get irq resource"); + goto failed; + } + xdev->irq_start = res->start; + xdev->irq_num = res->end - res->start + 1; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + xdma_err(xdev, "failed to get io resource"); + goto failed; + } + + reg_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(reg_base)) { + xdma_err(xdev, "ioremap failed"); + goto failed; + } + + xdev->rmap = devm_regmap_init_mmio(&pdev->dev, reg_base, + &xdma_regmap_config); + if (!xdev->rmap) { + xdma_err(xdev, "config regmap failed: %d", ret); + goto failed; + } + INIT_LIST_HEAD(&xdev->dma_dev.channels); + + ret = xdma_alloc_channels(xdev, DMA_MEM_TO_DEV); + if (ret) { + xdma_err(xdev, "config H2C channels failed: %d", ret); + goto failed; + } + + ret = xdma_alloc_channels(xdev, DMA_DEV_TO_MEM); + if (ret) { + xdma_err(xdev, "config C2H channels failed: %d", ret); + goto failed; + } + + dma_cap_set(DMA_SLAVE, xdev->dma_dev.cap_mask); + dma_cap_set(DMA_PRIVATE, xdev->dma_dev.cap_mask); + + xdev->dma_dev.dev = &pdev->dev; + xdev->dma_dev.device_free_chan_resources = xdma_free_chan_resources; + xdev->dma_dev.device_alloc_chan_resources = xdma_alloc_chan_resources; + xdev->dma_dev.device_tx_status = dma_cookie_status; + xdev->dma_dev.device_prep_slave_sg = xdma_prep_device_sg; + xdev->dma_dev.device_config = xdma_device_config; + xdev->dma_dev.device_issue_pending = xdma_issue_pending; + xdev->dma_dev.filter.map = pdata->device_map; + xdev->dma_dev.filter.mapcnt = pdata->device_map_cnt; + xdev->dma_dev.filter.fn = xdma_filter_fn; + + ret = dma_async_device_register(&xdev->dma_dev); + if (ret) { + xdma_err(xdev, "failed to register Xilinx XDMA: %d", ret); + goto failed; + } + xdev->status |= XDMA_DEV_STATUS_REG_DMA; + + ret = xdma_irq_init(xdev); + if (ret) { + xdma_err(xdev, "failed to init msix: %d", ret); + goto failed; + } + xdev->status |= XDMA_DEV_STATUS_INIT_MSIX; + + return 0; + +failed: + xdma_remove(pdev); + + return ret; +} + +static const struct platform_device_id xdma_id_table[] = { + { "xdma", 0}, + { }, +}; + +static struct platform_driver xdma_driver = { + .driver = { + .name = "xdma", + }, + .id_table = xdma_id_table, + .probe = xdma_probe, + .remove = xdma_remove, +}; + +module_platform_driver(xdma_driver); + +MODULE_DESCRIPTION("AMD XDMA driver"); +MODULE_AUTHOR("XRT Team "); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c new file mode 100644 index 0000000000..0a3b2e22f2 --- /dev/null +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -0,0 +1,3279 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * DMA driver for Xilinx Video DMA Engine + * + * Copyright (C) 2010-2014 Xilinx, Inc. All rights reserved. + * + * Based on the Freescale DMA driver. + * + * Description: + * The AXI Video Direct Memory Access (AXI VDMA) core is a soft Xilinx IP + * core that provides high-bandwidth direct memory access between memory + * and AXI4-Stream type video target peripherals. The core provides efficient + * two dimensional DMA operations with independent asynchronous read (S2MM) + * and write (MM2S) channel operation. It can be configured to have either + * one channel or two channels. If configured as two channels, one is to + * transmit to the video device (MM2S) and another is to receive from the + * video device (S2MM). Initialization, status, interrupt and management + * registers are accessed through an AXI4-Lite slave interface. + * + * The AXI Direct Memory Access (AXI DMA) core is a soft Xilinx IP core that + * provides high-bandwidth one dimensional direct memory access between memory + * and AXI4-Stream target peripherals. It supports one receive and one + * transmit channel, both of them optional at synthesis time. + * + * The AXI CDMA, is a soft IP, which provides high-bandwidth Direct Memory + * Access (DMA) between a memory-mapped source address and a memory-mapped + * destination address. + * + * The AXI Multichannel Direct Memory Access (AXI MCDMA) core is a soft + * Xilinx IP that provides high-bandwidth direct memory access between + * memory and AXI4-Stream target peripherals. It provides scatter gather + * (SG) interface with multiple channels independent configuration support. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../dmaengine.h" + +/* Register/Descriptor Offsets */ +#define XILINX_DMA_MM2S_CTRL_OFFSET 0x0000 +#define XILINX_DMA_S2MM_CTRL_OFFSET 0x0030 +#define XILINX_VDMA_MM2S_DESC_OFFSET 0x0050 +#define XILINX_VDMA_S2MM_DESC_OFFSET 0x00a0 + +/* Control Registers */ +#define XILINX_DMA_REG_DMACR 0x0000 +#define XILINX_DMA_DMACR_DELAY_MAX 0xff +#define XILINX_DMA_DMACR_DELAY_SHIFT 24 +#define XILINX_DMA_DMACR_FRAME_COUNT_MAX 0xff +#define XILINX_DMA_DMACR_FRAME_COUNT_SHIFT 16 +#define XILINX_DMA_DMACR_ERR_IRQ BIT(14) +#define XILINX_DMA_DMACR_DLY_CNT_IRQ BIT(13) +#define XILINX_DMA_DMACR_FRM_CNT_IRQ BIT(12) +#define XILINX_DMA_DMACR_MASTER_SHIFT 8 +#define XILINX_DMA_DMACR_FSYNCSRC_SHIFT 5 +#define XILINX_DMA_DMACR_FRAMECNT_EN BIT(4) +#define XILINX_DMA_DMACR_GENLOCK_EN BIT(3) +#define XILINX_DMA_DMACR_RESET BIT(2) +#define XILINX_DMA_DMACR_CIRC_EN BIT(1) +#define XILINX_DMA_DMACR_RUNSTOP BIT(0) +#define XILINX_DMA_DMACR_FSYNCSRC_MASK GENMASK(6, 5) +#define XILINX_DMA_DMACR_DELAY_MASK GENMASK(31, 24) +#define XILINX_DMA_DMACR_FRAME_COUNT_MASK GENMASK(23, 16) +#define XILINX_DMA_DMACR_MASTER_MASK GENMASK(11, 8) + +#define XILINX_DMA_REG_DMASR 0x0004 +#define XILINX_DMA_DMASR_EOL_LATE_ERR BIT(15) +#define XILINX_DMA_DMASR_ERR_IRQ BIT(14) +#define XILINX_DMA_DMASR_DLY_CNT_IRQ BIT(13) +#define XILINX_DMA_DMASR_FRM_CNT_IRQ BIT(12) +#define XILINX_DMA_DMASR_SOF_LATE_ERR BIT(11) +#define XILINX_DMA_DMASR_SG_DEC_ERR BIT(10) +#define XILINX_DMA_DMASR_SG_SLV_ERR BIT(9) +#define XILINX_DMA_DMASR_EOF_EARLY_ERR BIT(8) +#define XILINX_DMA_DMASR_SOF_EARLY_ERR BIT(7) +#define XILINX_DMA_DMASR_DMA_DEC_ERR BIT(6) +#define XILINX_DMA_DMASR_DMA_SLAVE_ERR BIT(5) +#define XILINX_DMA_DMASR_DMA_INT_ERR BIT(4) +#define XILINX_DMA_DMASR_SG_MASK BIT(3) +#define XILINX_DMA_DMASR_IDLE BIT(1) +#define XILINX_DMA_DMASR_HALTED BIT(0) +#define XILINX_DMA_DMASR_DELAY_MASK GENMASK(31, 24) +#define XILINX_DMA_DMASR_FRAME_COUNT_MASK GENMASK(23, 16) + +#define XILINX_DMA_REG_CURDESC 0x0008 +#define XILINX_DMA_REG_TAILDESC 0x0010 +#define XILINX_DMA_REG_REG_INDEX 0x0014 +#define XILINX_DMA_REG_FRMSTORE 0x0018 +#define XILINX_DMA_REG_THRESHOLD 0x001c +#define XILINX_DMA_REG_FRMPTR_STS 0x0024 +#define XILINX_DMA_REG_PARK_PTR 0x0028 +#define XILINX_DMA_PARK_PTR_WR_REF_SHIFT 8 +#define XILINX_DMA_PARK_PTR_WR_REF_MASK GENMASK(12, 8) +#define XILINX_DMA_PARK_PTR_RD_REF_SHIFT 0 +#define XILINX_DMA_PARK_PTR_RD_REF_MASK GENMASK(4, 0) +#define XILINX_DMA_REG_VDMA_VERSION 0x002c + +/* Register Direct Mode Registers */ +#define XILINX_DMA_REG_VSIZE 0x0000 +#define XILINX_DMA_REG_HSIZE 0x0004 + +#define XILINX_DMA_REG_FRMDLY_STRIDE 0x0008 +#define XILINX_DMA_FRMDLY_STRIDE_FRMDLY_SHIFT 24 +#define XILINX_DMA_FRMDLY_STRIDE_STRIDE_SHIFT 0 + +#define XILINX_VDMA_REG_START_ADDRESS(n) (0x000c + 4 * (n)) +#define XILINX_VDMA_REG_START_ADDRESS_64(n) (0x000c + 8 * (n)) + +#define XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP 0x00ec +#define XILINX_VDMA_ENABLE_VERTICAL_FLIP BIT(0) + +/* HW specific definitions */ +#define XILINX_MCDMA_MAX_CHANS_PER_DEVICE 0x20 +#define XILINX_DMA_MAX_CHANS_PER_DEVICE 0x2 +#define XILINX_CDMA_MAX_CHANS_PER_DEVICE 0x1 + +#define XILINX_DMA_DMAXR_ALL_IRQ_MASK \ + (XILINX_DMA_DMASR_FRM_CNT_IRQ | \ + XILINX_DMA_DMASR_DLY_CNT_IRQ | \ + XILINX_DMA_DMASR_ERR_IRQ) + +#define XILINX_DMA_DMASR_ALL_ERR_MASK \ + (XILINX_DMA_DMASR_EOL_LATE_ERR | \ + XILINX_DMA_DMASR_SOF_LATE_ERR | \ + XILINX_DMA_DMASR_SG_DEC_ERR | \ + XILINX_DMA_DMASR_SG_SLV_ERR | \ + XILINX_DMA_DMASR_EOF_EARLY_ERR | \ + XILINX_DMA_DMASR_SOF_EARLY_ERR | \ + XILINX_DMA_DMASR_DMA_DEC_ERR | \ + XILINX_DMA_DMASR_DMA_SLAVE_ERR | \ + XILINX_DMA_DMASR_DMA_INT_ERR) + +/* + * Recoverable errors are DMA Internal error, SOF Early, EOF Early + * and SOF Late. They are only recoverable when C_FLUSH_ON_FSYNC + * is enabled in the h/w system. + */ +#define XILINX_DMA_DMASR_ERR_RECOVER_MASK \ + (XILINX_DMA_DMASR_SOF_LATE_ERR | \ + XILINX_DMA_DMASR_EOF_EARLY_ERR | \ + XILINX_DMA_DMASR_SOF_EARLY_ERR | \ + XILINX_DMA_DMASR_DMA_INT_ERR) + +/* Axi VDMA Flush on Fsync bits */ +#define XILINX_DMA_FLUSH_S2MM 3 +#define XILINX_DMA_FLUSH_MM2S 2 +#define XILINX_DMA_FLUSH_BOTH 1 + +/* Delay loop counter to prevent hardware failure */ +#define XILINX_DMA_LOOP_COUNT 1000000 + +/* AXI DMA Specific Registers/Offsets */ +#define XILINX_DMA_REG_SRCDSTADDR 0x18 +#define XILINX_DMA_REG_BTT 0x28 + +/* AXI DMA Specific Masks/Bit fields */ +#define XILINX_DMA_MAX_TRANS_LEN_MIN 8 +#define XILINX_DMA_MAX_TRANS_LEN_MAX 23 +#define XILINX_DMA_V2_MAX_TRANS_LEN_MAX 26 +#define XILINX_DMA_CR_COALESCE_MAX GENMASK(23, 16) +#define XILINX_DMA_CR_DELAY_MAX GENMASK(31, 24) +#define XILINX_DMA_CR_CYCLIC_BD_EN_MASK BIT(4) +#define XILINX_DMA_CR_COALESCE_SHIFT 16 +#define XILINX_DMA_CR_DELAY_SHIFT 24 +#define XILINX_DMA_BD_SOP BIT(27) +#define XILINX_DMA_BD_EOP BIT(26) +#define XILINX_DMA_BD_COMP_MASK BIT(31) +#define XILINX_DMA_COALESCE_MAX 255 +#define XILINX_DMA_NUM_DESCS 512 +#define XILINX_DMA_NUM_APP_WORDS 5 + +/* AXI CDMA Specific Registers/Offsets */ +#define XILINX_CDMA_REG_SRCADDR 0x18 +#define XILINX_CDMA_REG_DSTADDR 0x20 + +/* AXI CDMA Specific Masks */ +#define XILINX_CDMA_CR_SGMODE BIT(3) + +#define xilinx_prep_dma_addr_t(addr) \ + ((dma_addr_t)((u64)addr##_##msb << 32 | (addr))) + +/* AXI MCDMA Specific Registers/Offsets */ +#define XILINX_MCDMA_MM2S_CTRL_OFFSET 0x0000 +#define XILINX_MCDMA_S2MM_CTRL_OFFSET 0x0500 +#define XILINX_MCDMA_CHEN_OFFSET 0x0008 +#define XILINX_MCDMA_CH_ERR_OFFSET 0x0010 +#define XILINX_MCDMA_RXINT_SER_OFFSET 0x0020 +#define XILINX_MCDMA_TXINT_SER_OFFSET 0x0028 +#define XILINX_MCDMA_CHAN_CR_OFFSET(x) (0x40 + (x) * 0x40) +#define XILINX_MCDMA_CHAN_SR_OFFSET(x) (0x44 + (x) * 0x40) +#define XILINX_MCDMA_CHAN_CDESC_OFFSET(x) (0x48 + (x) * 0x40) +#define XILINX_MCDMA_CHAN_TDESC_OFFSET(x) (0x50 + (x) * 0x40) + +/* AXI MCDMA Specific Masks/Shifts */ +#define XILINX_MCDMA_COALESCE_SHIFT 16 +#define XILINX_MCDMA_COALESCE_MAX 24 +#define XILINX_MCDMA_IRQ_ALL_MASK GENMASK(7, 5) +#define XILINX_MCDMA_COALESCE_MASK GENMASK(23, 16) +#define XILINX_MCDMA_CR_RUNSTOP_MASK BIT(0) +#define XILINX_MCDMA_IRQ_IOC_MASK BIT(5) +#define XILINX_MCDMA_IRQ_DELAY_MASK BIT(6) +#define XILINX_MCDMA_IRQ_ERR_MASK BIT(7) +#define XILINX_MCDMA_BD_EOP BIT(30) +#define XILINX_MCDMA_BD_SOP BIT(31) + +/** + * struct xilinx_vdma_desc_hw - Hardware Descriptor + * @next_desc: Next Descriptor Pointer @0x00 + * @pad1: Reserved @0x04 + * @buf_addr: Buffer address @0x08 + * @buf_addr_msb: MSB of Buffer address @0x0C + * @vsize: Vertical Size @0x10 + * @hsize: Horizontal Size @0x14 + * @stride: Number of bytes between the first + * pixels of each horizontal line @0x18 + */ +struct xilinx_vdma_desc_hw { + u32 next_desc; + u32 pad1; + u32 buf_addr; + u32 buf_addr_msb; + u32 vsize; + u32 hsize; + u32 stride; +} __aligned(64); + +/** + * struct xilinx_axidma_desc_hw - Hardware Descriptor for AXI DMA + * @next_desc: Next Descriptor Pointer @0x00 + * @next_desc_msb: MSB of Next Descriptor Pointer @0x04 + * @buf_addr: Buffer address @0x08 + * @buf_addr_msb: MSB of Buffer address @0x0C + * @reserved1: Reserved @0x10 + * @reserved2: Reserved @0x14 + * @control: Control field @0x18 + * @status: Status field @0x1C + * @app: APP Fields @0x20 - 0x30 + */ +struct xilinx_axidma_desc_hw { + u32 next_desc; + u32 next_desc_msb; + u32 buf_addr; + u32 buf_addr_msb; + u32 reserved1; + u32 reserved2; + u32 control; + u32 status; + u32 app[XILINX_DMA_NUM_APP_WORDS]; +} __aligned(64); + +/** + * struct xilinx_aximcdma_desc_hw - Hardware Descriptor for AXI MCDMA + * @next_desc: Next Descriptor Pointer @0x00 + * @next_desc_msb: MSB of Next Descriptor Pointer @0x04 + * @buf_addr: Buffer address @0x08 + * @buf_addr_msb: MSB of Buffer address @0x0C + * @rsvd: Reserved field @0x10 + * @control: Control Information field @0x14 + * @status: Status field @0x18 + * @sideband_status: Status of sideband signals @0x1C + * @app: APP Fields @0x20 - 0x30 + */ +struct xilinx_aximcdma_desc_hw { + u32 next_desc; + u32 next_desc_msb; + u32 buf_addr; + u32 buf_addr_msb; + u32 rsvd; + u32 control; + u32 status; + u32 sideband_status; + u32 app[XILINX_DMA_NUM_APP_WORDS]; +} __aligned(64); + +/** + * struct xilinx_cdma_desc_hw - Hardware Descriptor + * @next_desc: Next Descriptor Pointer @0x00 + * @next_desc_msb: Next Descriptor Pointer MSB @0x04 + * @src_addr: Source address @0x08 + * @src_addr_msb: Source address MSB @0x0C + * @dest_addr: Destination address @0x10 + * @dest_addr_msb: Destination address MSB @0x14 + * @control: Control field @0x18 + * @status: Status field @0x1C + */ +struct xilinx_cdma_desc_hw { + u32 next_desc; + u32 next_desc_msb; + u32 src_addr; + u32 src_addr_msb; + u32 dest_addr; + u32 dest_addr_msb; + u32 control; + u32 status; +} __aligned(64); + +/** + * struct xilinx_vdma_tx_segment - Descriptor segment + * @hw: Hardware descriptor + * @node: Node in the descriptor segments list + * @phys: Physical address of segment + */ +struct xilinx_vdma_tx_segment { + struct xilinx_vdma_desc_hw hw; + struct list_head node; + dma_addr_t phys; +} __aligned(64); + +/** + * struct xilinx_axidma_tx_segment - Descriptor segment + * @hw: Hardware descriptor + * @node: Node in the descriptor segments list + * @phys: Physical address of segment + */ +struct xilinx_axidma_tx_segment { + struct xilinx_axidma_desc_hw hw; + struct list_head node; + dma_addr_t phys; +} __aligned(64); + +/** + * struct xilinx_aximcdma_tx_segment - Descriptor segment + * @hw: Hardware descriptor + * @node: Node in the descriptor segments list + * @phys: Physical address of segment + */ +struct xilinx_aximcdma_tx_segment { + struct xilinx_aximcdma_desc_hw hw; + struct list_head node; + dma_addr_t phys; +} __aligned(64); + +/** + * struct xilinx_cdma_tx_segment - Descriptor segment + * @hw: Hardware descriptor + * @node: Node in the descriptor segments list + * @phys: Physical address of segment + */ +struct xilinx_cdma_tx_segment { + struct xilinx_cdma_desc_hw hw; + struct list_head node; + dma_addr_t phys; +} __aligned(64); + +/** + * struct xilinx_dma_tx_descriptor - Per Transaction structure + * @async_tx: Async transaction descriptor + * @segments: TX segments list + * @node: Node in the channel descriptors list + * @cyclic: Check for cyclic transfers. + * @err: Whether the descriptor has an error. + * @residue: Residue of the completed descriptor + */ +struct xilinx_dma_tx_descriptor { + struct dma_async_tx_descriptor async_tx; + struct list_head segments; + struct list_head node; + bool cyclic; + bool err; + u32 residue; +}; + +/** + * struct xilinx_dma_chan - Driver specific DMA channel structure + * @xdev: Driver specific device structure + * @ctrl_offset: Control registers offset + * @desc_offset: TX descriptor registers offset + * @lock: Descriptor operation lock + * @pending_list: Descriptors waiting + * @active_list: Descriptors ready to submit + * @done_list: Complete descriptors + * @free_seg_list: Free descriptors + * @common: DMA common channel + * @desc_pool: Descriptors pool + * @dev: The dma device + * @irq: Channel IRQ + * @id: Channel ID + * @direction: Transfer direction + * @num_frms: Number of frames + * @has_sg: Support scatter transfers + * @cyclic: Check for cyclic transfers. + * @genlock: Support genlock mode + * @err: Channel has errors + * @idle: Check for channel idle + * @terminating: Check for channel being synchronized by user + * @tasklet: Cleanup work after irq + * @config: Device configuration info + * @flush_on_fsync: Flush on Frame sync + * @desc_pendingcount: Descriptor pending count + * @ext_addr: Indicates 64 bit addressing is supported by dma channel + * @desc_submitcount: Descriptor h/w submitted count + * @seg_v: Statically allocated segments base + * @seg_mv: Statically allocated segments base for MCDMA + * @seg_p: Physical allocated segments base + * @cyclic_seg_v: Statically allocated segment base for cyclic transfers + * @cyclic_seg_p: Physical allocated segments base for cyclic dma + * @start_transfer: Differentiate b/w DMA IP's transfer + * @stop_transfer: Differentiate b/w DMA IP's quiesce + * @tdest: TDEST value for mcdma + * @has_vflip: S2MM vertical flip + * @irq_delay: Interrupt delay timeout + */ +struct xilinx_dma_chan { + struct xilinx_dma_device *xdev; + u32 ctrl_offset; + u32 desc_offset; + spinlock_t lock; + struct list_head pending_list; + struct list_head active_list; + struct list_head done_list; + struct list_head free_seg_list; + struct dma_chan common; + struct dma_pool *desc_pool; + struct device *dev; + int irq; + int id; + enum dma_transfer_direction direction; + int num_frms; + bool has_sg; + bool cyclic; + bool genlock; + bool err; + bool idle; + bool terminating; + struct tasklet_struct tasklet; + struct xilinx_vdma_config config; + bool flush_on_fsync; + u32 desc_pendingcount; + bool ext_addr; + u32 desc_submitcount; + struct xilinx_axidma_tx_segment *seg_v; + struct xilinx_aximcdma_tx_segment *seg_mv; + dma_addr_t seg_p; + struct xilinx_axidma_tx_segment *cyclic_seg_v; + dma_addr_t cyclic_seg_p; + void (*start_transfer)(struct xilinx_dma_chan *chan); + int (*stop_transfer)(struct xilinx_dma_chan *chan); + u16 tdest; + bool has_vflip; + u8 irq_delay; +}; + +/** + * enum xdma_ip_type - DMA IP type. + * + * @XDMA_TYPE_AXIDMA: Axi dma ip. + * @XDMA_TYPE_CDMA: Axi cdma ip. + * @XDMA_TYPE_VDMA: Axi vdma ip. + * @XDMA_TYPE_AXIMCDMA: Axi MCDMA ip. + * + */ +enum xdma_ip_type { + XDMA_TYPE_AXIDMA = 0, + XDMA_TYPE_CDMA, + XDMA_TYPE_VDMA, + XDMA_TYPE_AXIMCDMA +}; + +struct xilinx_dma_config { + enum xdma_ip_type dmatype; + int (*clk_init)(struct platform_device *pdev, struct clk **axi_clk, + struct clk **tx_clk, struct clk **txs_clk, + struct clk **rx_clk, struct clk **rxs_clk); + irqreturn_t (*irq_handler)(int irq, void *data); + const int max_channels; +}; + +/** + * struct xilinx_dma_device - DMA device structure + * @regs: I/O mapped base address + * @dev: Device Structure + * @common: DMA device structure + * @chan: Driver specific DMA channel + * @flush_on_fsync: Flush on frame sync + * @ext_addr: Indicates 64 bit addressing is supported by dma device + * @pdev: Platform device structure pointer + * @dma_config: DMA config structure + * @axi_clk: DMA Axi4-lite interace clock + * @tx_clk: DMA mm2s clock + * @txs_clk: DMA mm2s stream clock + * @rx_clk: DMA s2mm clock + * @rxs_clk: DMA s2mm stream clock + * @s2mm_chan_id: DMA s2mm channel identifier + * @mm2s_chan_id: DMA mm2s channel identifier + * @max_buffer_len: Max buffer length + * @has_axistream_connected: AXI DMA connected to AXI Stream IP + */ +struct xilinx_dma_device { + void __iomem *regs; + struct device *dev; + struct dma_device common; + struct xilinx_dma_chan *chan[XILINX_MCDMA_MAX_CHANS_PER_DEVICE]; + u32 flush_on_fsync; + bool ext_addr; + struct platform_device *pdev; + const struct xilinx_dma_config *dma_config; + struct clk *axi_clk; + struct clk *tx_clk; + struct clk *txs_clk; + struct clk *rx_clk; + struct clk *rxs_clk; + u32 s2mm_chan_id; + u32 mm2s_chan_id; + u32 max_buffer_len; + bool has_axistream_connected; +}; + +/* Macros */ +#define to_xilinx_chan(chan) \ + container_of(chan, struct xilinx_dma_chan, common) +#define to_dma_tx_descriptor(tx) \ + container_of(tx, struct xilinx_dma_tx_descriptor, async_tx) +#define xilinx_dma_poll_timeout(chan, reg, val, cond, delay_us, timeout_us) \ + readl_poll_timeout_atomic(chan->xdev->regs + chan->ctrl_offset + reg, \ + val, cond, delay_us, timeout_us) + +/* IO accessors */ +static inline u32 dma_read(struct xilinx_dma_chan *chan, u32 reg) +{ + return ioread32(chan->xdev->regs + reg); +} + +static inline void dma_write(struct xilinx_dma_chan *chan, u32 reg, u32 value) +{ + iowrite32(value, chan->xdev->regs + reg); +} + +static inline void vdma_desc_write(struct xilinx_dma_chan *chan, u32 reg, + u32 value) +{ + dma_write(chan, chan->desc_offset + reg, value); +} + +static inline u32 dma_ctrl_read(struct xilinx_dma_chan *chan, u32 reg) +{ + return dma_read(chan, chan->ctrl_offset + reg); +} + +static inline void dma_ctrl_write(struct xilinx_dma_chan *chan, u32 reg, + u32 value) +{ + dma_write(chan, chan->ctrl_offset + reg, value); +} + +static inline void dma_ctrl_clr(struct xilinx_dma_chan *chan, u32 reg, + u32 clr) +{ + dma_ctrl_write(chan, reg, dma_ctrl_read(chan, reg) & ~clr); +} + +static inline void dma_ctrl_set(struct xilinx_dma_chan *chan, u32 reg, + u32 set) +{ + dma_ctrl_write(chan, reg, dma_ctrl_read(chan, reg) | set); +} + +/** + * vdma_desc_write_64 - 64-bit descriptor write + * @chan: Driver specific VDMA channel + * @reg: Register to write + * @value_lsb: lower address of the descriptor. + * @value_msb: upper address of the descriptor. + * + * Since vdma driver is trying to write to a register offset which is not a + * multiple of 64 bits(ex : 0x5c), we are writing as two separate 32 bits + * instead of a single 64 bit register write. + */ +static inline void vdma_desc_write_64(struct xilinx_dma_chan *chan, u32 reg, + u32 value_lsb, u32 value_msb) +{ + /* Write the lsb 32 bits*/ + writel(value_lsb, chan->xdev->regs + chan->desc_offset + reg); + + /* Write the msb 32 bits */ + writel(value_msb, chan->xdev->regs + chan->desc_offset + reg + 4); +} + +static inline void dma_writeq(struct xilinx_dma_chan *chan, u32 reg, u64 value) +{ + lo_hi_writeq(value, chan->xdev->regs + chan->ctrl_offset + reg); +} + +static inline void xilinx_write(struct xilinx_dma_chan *chan, u32 reg, + dma_addr_t addr) +{ + if (chan->ext_addr) + dma_writeq(chan, reg, addr); + else + dma_ctrl_write(chan, reg, addr); +} + +static inline void xilinx_axidma_buf(struct xilinx_dma_chan *chan, + struct xilinx_axidma_desc_hw *hw, + dma_addr_t buf_addr, size_t sg_used, + size_t period_len) +{ + if (chan->ext_addr) { + hw->buf_addr = lower_32_bits(buf_addr + sg_used + period_len); + hw->buf_addr_msb = upper_32_bits(buf_addr + sg_used + + period_len); + } else { + hw->buf_addr = buf_addr + sg_used + period_len; + } +} + +static inline void xilinx_aximcdma_buf(struct xilinx_dma_chan *chan, + struct xilinx_aximcdma_desc_hw *hw, + dma_addr_t buf_addr, size_t sg_used) +{ + if (chan->ext_addr) { + hw->buf_addr = lower_32_bits(buf_addr + sg_used); + hw->buf_addr_msb = upper_32_bits(buf_addr + sg_used); + } else { + hw->buf_addr = buf_addr + sg_used; + } +} + +/** + * xilinx_dma_get_metadata_ptr- Populate metadata pointer and payload length + * @tx: async transaction descriptor + * @payload_len: metadata payload length + * @max_len: metadata max length + * Return: The app field pointer. + */ +static void *xilinx_dma_get_metadata_ptr(struct dma_async_tx_descriptor *tx, + size_t *payload_len, size_t *max_len) +{ + struct xilinx_dma_tx_descriptor *desc = to_dma_tx_descriptor(tx); + struct xilinx_axidma_tx_segment *seg; + + *max_len = *payload_len = sizeof(u32) * XILINX_DMA_NUM_APP_WORDS; + seg = list_first_entry(&desc->segments, + struct xilinx_axidma_tx_segment, node); + return seg->hw.app; +} + +static struct dma_descriptor_metadata_ops xilinx_dma_metadata_ops = { + .get_ptr = xilinx_dma_get_metadata_ptr, +}; + +/* ----------------------------------------------------------------------------- + * Descriptors and segments alloc and free + */ + +/** + * xilinx_vdma_alloc_tx_segment - Allocate transaction segment + * @chan: Driver specific DMA channel + * + * Return: The allocated segment on success and NULL on failure. + */ +static struct xilinx_vdma_tx_segment * +xilinx_vdma_alloc_tx_segment(struct xilinx_dma_chan *chan) +{ + struct xilinx_vdma_tx_segment *segment; + dma_addr_t phys; + + segment = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &phys); + if (!segment) + return NULL; + + segment->phys = phys; + + return segment; +} + +/** + * xilinx_cdma_alloc_tx_segment - Allocate transaction segment + * @chan: Driver specific DMA channel + * + * Return: The allocated segment on success and NULL on failure. + */ +static struct xilinx_cdma_tx_segment * +xilinx_cdma_alloc_tx_segment(struct xilinx_dma_chan *chan) +{ + struct xilinx_cdma_tx_segment *segment; + dma_addr_t phys; + + segment = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &phys); + if (!segment) + return NULL; + + segment->phys = phys; + + return segment; +} + +/** + * xilinx_axidma_alloc_tx_segment - Allocate transaction segment + * @chan: Driver specific DMA channel + * + * Return: The allocated segment on success and NULL on failure. + */ +static struct xilinx_axidma_tx_segment * +xilinx_axidma_alloc_tx_segment(struct xilinx_dma_chan *chan) +{ + struct xilinx_axidma_tx_segment *segment = NULL; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + if (!list_empty(&chan->free_seg_list)) { + segment = list_first_entry(&chan->free_seg_list, + struct xilinx_axidma_tx_segment, + node); + list_del(&segment->node); + } + spin_unlock_irqrestore(&chan->lock, flags); + + if (!segment) + dev_dbg(chan->dev, "Could not find free tx segment\n"); + + return segment; +} + +/** + * xilinx_aximcdma_alloc_tx_segment - Allocate transaction segment + * @chan: Driver specific DMA channel + * + * Return: The allocated segment on success and NULL on failure. + */ +static struct xilinx_aximcdma_tx_segment * +xilinx_aximcdma_alloc_tx_segment(struct xilinx_dma_chan *chan) +{ + struct xilinx_aximcdma_tx_segment *segment = NULL; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + if (!list_empty(&chan->free_seg_list)) { + segment = list_first_entry(&chan->free_seg_list, + struct xilinx_aximcdma_tx_segment, + node); + list_del(&segment->node); + } + spin_unlock_irqrestore(&chan->lock, flags); + + return segment; +} + +static void xilinx_dma_clean_hw_desc(struct xilinx_axidma_desc_hw *hw) +{ + u32 next_desc = hw->next_desc; + u32 next_desc_msb = hw->next_desc_msb; + + memset(hw, 0, sizeof(struct xilinx_axidma_desc_hw)); + + hw->next_desc = next_desc; + hw->next_desc_msb = next_desc_msb; +} + +static void xilinx_mcdma_clean_hw_desc(struct xilinx_aximcdma_desc_hw *hw) +{ + u32 next_desc = hw->next_desc; + u32 next_desc_msb = hw->next_desc_msb; + + memset(hw, 0, sizeof(struct xilinx_aximcdma_desc_hw)); + + hw->next_desc = next_desc; + hw->next_desc_msb = next_desc_msb; +} + +/** + * xilinx_dma_free_tx_segment - Free transaction segment + * @chan: Driver specific DMA channel + * @segment: DMA transaction segment + */ +static void xilinx_dma_free_tx_segment(struct xilinx_dma_chan *chan, + struct xilinx_axidma_tx_segment *segment) +{ + xilinx_dma_clean_hw_desc(&segment->hw); + + list_add_tail(&segment->node, &chan->free_seg_list); +} + +/** + * xilinx_mcdma_free_tx_segment - Free transaction segment + * @chan: Driver specific DMA channel + * @segment: DMA transaction segment + */ +static void xilinx_mcdma_free_tx_segment(struct xilinx_dma_chan *chan, + struct xilinx_aximcdma_tx_segment * + segment) +{ + xilinx_mcdma_clean_hw_desc(&segment->hw); + + list_add_tail(&segment->node, &chan->free_seg_list); +} + +/** + * xilinx_cdma_free_tx_segment - Free transaction segment + * @chan: Driver specific DMA channel + * @segment: DMA transaction segment + */ +static void xilinx_cdma_free_tx_segment(struct xilinx_dma_chan *chan, + struct xilinx_cdma_tx_segment *segment) +{ + dma_pool_free(chan->desc_pool, segment, segment->phys); +} + +/** + * xilinx_vdma_free_tx_segment - Free transaction segment + * @chan: Driver specific DMA channel + * @segment: DMA transaction segment + */ +static void xilinx_vdma_free_tx_segment(struct xilinx_dma_chan *chan, + struct xilinx_vdma_tx_segment *segment) +{ + dma_pool_free(chan->desc_pool, segment, segment->phys); +} + +/** + * xilinx_dma_alloc_tx_descriptor - Allocate transaction descriptor + * @chan: Driver specific DMA channel + * + * Return: The allocated descriptor on success and NULL on failure. + */ +static struct xilinx_dma_tx_descriptor * +xilinx_dma_alloc_tx_descriptor(struct xilinx_dma_chan *chan) +{ + struct xilinx_dma_tx_descriptor *desc; + + desc = kzalloc(sizeof(*desc), GFP_NOWAIT); + if (!desc) + return NULL; + + INIT_LIST_HEAD(&desc->segments); + + return desc; +} + +/** + * xilinx_dma_free_tx_descriptor - Free transaction descriptor + * @chan: Driver specific DMA channel + * @desc: DMA transaction descriptor + */ +static void +xilinx_dma_free_tx_descriptor(struct xilinx_dma_chan *chan, + struct xilinx_dma_tx_descriptor *desc) +{ + struct xilinx_vdma_tx_segment *segment, *next; + struct xilinx_cdma_tx_segment *cdma_segment, *cdma_next; + struct xilinx_axidma_tx_segment *axidma_segment, *axidma_next; + struct xilinx_aximcdma_tx_segment *aximcdma_segment, *aximcdma_next; + + if (!desc) + return; + + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { + list_for_each_entry_safe(segment, next, &desc->segments, node) { + list_del(&segment->node); + xilinx_vdma_free_tx_segment(chan, segment); + } + } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) { + list_for_each_entry_safe(cdma_segment, cdma_next, + &desc->segments, node) { + list_del(&cdma_segment->node); + xilinx_cdma_free_tx_segment(chan, cdma_segment); + } + } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { + list_for_each_entry_safe(axidma_segment, axidma_next, + &desc->segments, node) { + list_del(&axidma_segment->node); + xilinx_dma_free_tx_segment(chan, axidma_segment); + } + } else { + list_for_each_entry_safe(aximcdma_segment, aximcdma_next, + &desc->segments, node) { + list_del(&aximcdma_segment->node); + xilinx_mcdma_free_tx_segment(chan, aximcdma_segment); + } + } + + kfree(desc); +} + +/* Required functions */ + +/** + * xilinx_dma_free_desc_list - Free descriptors list + * @chan: Driver specific DMA channel + * @list: List to parse and delete the descriptor + */ +static void xilinx_dma_free_desc_list(struct xilinx_dma_chan *chan, + struct list_head *list) +{ + struct xilinx_dma_tx_descriptor *desc, *next; + + list_for_each_entry_safe(desc, next, list, node) { + list_del(&desc->node); + xilinx_dma_free_tx_descriptor(chan, desc); + } +} + +/** + * xilinx_dma_free_descriptors - Free channel descriptors + * @chan: Driver specific DMA channel + */ +static void xilinx_dma_free_descriptors(struct xilinx_dma_chan *chan) +{ + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + + xilinx_dma_free_desc_list(chan, &chan->pending_list); + xilinx_dma_free_desc_list(chan, &chan->done_list); + xilinx_dma_free_desc_list(chan, &chan->active_list); + + spin_unlock_irqrestore(&chan->lock, flags); +} + +/** + * xilinx_dma_free_chan_resources - Free channel resources + * @dchan: DMA channel + */ +static void xilinx_dma_free_chan_resources(struct dma_chan *dchan) +{ + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + unsigned long flags; + + dev_dbg(chan->dev, "Free all channel resources.\n"); + + xilinx_dma_free_descriptors(chan); + + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { + spin_lock_irqsave(&chan->lock, flags); + INIT_LIST_HEAD(&chan->free_seg_list); + spin_unlock_irqrestore(&chan->lock, flags); + + /* Free memory that is allocated for BD */ + dma_free_coherent(chan->dev, sizeof(*chan->seg_v) * + XILINX_DMA_NUM_DESCS, chan->seg_v, + chan->seg_p); + + /* Free Memory that is allocated for cyclic DMA Mode */ + dma_free_coherent(chan->dev, sizeof(*chan->cyclic_seg_v), + chan->cyclic_seg_v, chan->cyclic_seg_p); + } + + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) { + spin_lock_irqsave(&chan->lock, flags); + INIT_LIST_HEAD(&chan->free_seg_list); + spin_unlock_irqrestore(&chan->lock, flags); + + /* Free memory that is allocated for BD */ + dma_free_coherent(chan->dev, sizeof(*chan->seg_mv) * + XILINX_DMA_NUM_DESCS, chan->seg_mv, + chan->seg_p); + } + + if (chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIDMA && + chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIMCDMA) { + dma_pool_destroy(chan->desc_pool); + chan->desc_pool = NULL; + } + +} + +/** + * xilinx_dma_get_residue - Compute residue for a given descriptor + * @chan: Driver specific dma channel + * @desc: dma transaction descriptor + * + * Return: The number of residue bytes for the descriptor. + */ +static u32 xilinx_dma_get_residue(struct xilinx_dma_chan *chan, + struct xilinx_dma_tx_descriptor *desc) +{ + struct xilinx_cdma_tx_segment *cdma_seg; + struct xilinx_axidma_tx_segment *axidma_seg; + struct xilinx_aximcdma_tx_segment *aximcdma_seg; + struct xilinx_cdma_desc_hw *cdma_hw; + struct xilinx_axidma_desc_hw *axidma_hw; + struct xilinx_aximcdma_desc_hw *aximcdma_hw; + struct list_head *entry; + u32 residue = 0; + + list_for_each(entry, &desc->segments) { + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) { + cdma_seg = list_entry(entry, + struct xilinx_cdma_tx_segment, + node); + cdma_hw = &cdma_seg->hw; + residue += (cdma_hw->control - cdma_hw->status) & + chan->xdev->max_buffer_len; + } else if (chan->xdev->dma_config->dmatype == + XDMA_TYPE_AXIDMA) { + axidma_seg = list_entry(entry, + struct xilinx_axidma_tx_segment, + node); + axidma_hw = &axidma_seg->hw; + residue += (axidma_hw->control - axidma_hw->status) & + chan->xdev->max_buffer_len; + } else { + aximcdma_seg = + list_entry(entry, + struct xilinx_aximcdma_tx_segment, + node); + aximcdma_hw = &aximcdma_seg->hw; + residue += + (aximcdma_hw->control - aximcdma_hw->status) & + chan->xdev->max_buffer_len; + } + } + + return residue; +} + +/** + * xilinx_dma_chan_handle_cyclic - Cyclic dma callback + * @chan: Driver specific dma channel + * @desc: dma transaction descriptor + * @flags: flags for spin lock + */ +static void xilinx_dma_chan_handle_cyclic(struct xilinx_dma_chan *chan, + struct xilinx_dma_tx_descriptor *desc, + unsigned long *flags) +{ + struct dmaengine_desc_callback cb; + + dmaengine_desc_get_callback(&desc->async_tx, &cb); + if (dmaengine_desc_callback_valid(&cb)) { + spin_unlock_irqrestore(&chan->lock, *flags); + dmaengine_desc_callback_invoke(&cb, NULL); + spin_lock_irqsave(&chan->lock, *flags); + } +} + +/** + * xilinx_dma_chan_desc_cleanup - Clean channel descriptors + * @chan: Driver specific DMA channel + */ +static void xilinx_dma_chan_desc_cleanup(struct xilinx_dma_chan *chan) +{ + struct xilinx_dma_tx_descriptor *desc, *next; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + + list_for_each_entry_safe(desc, next, &chan->done_list, node) { + struct dmaengine_result result; + + if (desc->cyclic) { + xilinx_dma_chan_handle_cyclic(chan, desc, &flags); + break; + } + + /* Remove from the list of running transactions */ + list_del(&desc->node); + + if (unlikely(desc->err)) { + if (chan->direction == DMA_DEV_TO_MEM) + result.result = DMA_TRANS_READ_FAILED; + else + result.result = DMA_TRANS_WRITE_FAILED; + } else { + result.result = DMA_TRANS_NOERROR; + } + + result.residue = desc->residue; + + /* Run the link descriptor callback function */ + spin_unlock_irqrestore(&chan->lock, flags); + dmaengine_desc_get_callback_invoke(&desc->async_tx, &result); + spin_lock_irqsave(&chan->lock, flags); + + /* Run any dependencies, then free the descriptor */ + dma_run_dependencies(&desc->async_tx); + xilinx_dma_free_tx_descriptor(chan, desc); + + /* + * While we ran a callback the user called a terminate function, + * which takes care of cleaning up any remaining descriptors + */ + if (chan->terminating) + break; + } + + spin_unlock_irqrestore(&chan->lock, flags); +} + +/** + * xilinx_dma_do_tasklet - Schedule completion tasklet + * @t: Pointer to the Xilinx DMA channel structure + */ +static void xilinx_dma_do_tasklet(struct tasklet_struct *t) +{ + struct xilinx_dma_chan *chan = from_tasklet(chan, t, tasklet); + + xilinx_dma_chan_desc_cleanup(chan); +} + +/** + * xilinx_dma_alloc_chan_resources - Allocate channel resources + * @dchan: DMA channel + * + * Return: '0' on success and failure value on error + */ +static int xilinx_dma_alloc_chan_resources(struct dma_chan *dchan) +{ + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + int i; + + /* Has this channel already been allocated? */ + if (chan->desc_pool) + return 0; + + /* + * We need the descriptor to be aligned to 64bytes + * for meeting Xilinx VDMA specification requirement. + */ + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { + /* Allocate the buffer descriptors. */ + chan->seg_v = dma_alloc_coherent(chan->dev, + sizeof(*chan->seg_v) * XILINX_DMA_NUM_DESCS, + &chan->seg_p, GFP_KERNEL); + if (!chan->seg_v) { + dev_err(chan->dev, + "unable to allocate channel %d descriptors\n", + chan->id); + return -ENOMEM; + } + /* + * For cyclic DMA mode we need to program the tail Descriptor + * register with a value which is not a part of the BD chain + * so allocating a desc segment during channel allocation for + * programming tail descriptor. + */ + chan->cyclic_seg_v = dma_alloc_coherent(chan->dev, + sizeof(*chan->cyclic_seg_v), + &chan->cyclic_seg_p, + GFP_KERNEL); + if (!chan->cyclic_seg_v) { + dev_err(chan->dev, + "unable to allocate desc segment for cyclic DMA\n"); + dma_free_coherent(chan->dev, sizeof(*chan->seg_v) * + XILINX_DMA_NUM_DESCS, chan->seg_v, + chan->seg_p); + return -ENOMEM; + } + chan->cyclic_seg_v->phys = chan->cyclic_seg_p; + + for (i = 0; i < XILINX_DMA_NUM_DESCS; i++) { + chan->seg_v[i].hw.next_desc = + lower_32_bits(chan->seg_p + sizeof(*chan->seg_v) * + ((i + 1) % XILINX_DMA_NUM_DESCS)); + chan->seg_v[i].hw.next_desc_msb = + upper_32_bits(chan->seg_p + sizeof(*chan->seg_v) * + ((i + 1) % XILINX_DMA_NUM_DESCS)); + chan->seg_v[i].phys = chan->seg_p + + sizeof(*chan->seg_v) * i; + list_add_tail(&chan->seg_v[i].node, + &chan->free_seg_list); + } + } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) { + /* Allocate the buffer descriptors. */ + chan->seg_mv = dma_alloc_coherent(chan->dev, + sizeof(*chan->seg_mv) * + XILINX_DMA_NUM_DESCS, + &chan->seg_p, GFP_KERNEL); + if (!chan->seg_mv) { + dev_err(chan->dev, + "unable to allocate channel %d descriptors\n", + chan->id); + return -ENOMEM; + } + for (i = 0; i < XILINX_DMA_NUM_DESCS; i++) { + chan->seg_mv[i].hw.next_desc = + lower_32_bits(chan->seg_p + sizeof(*chan->seg_mv) * + ((i + 1) % XILINX_DMA_NUM_DESCS)); + chan->seg_mv[i].hw.next_desc_msb = + upper_32_bits(chan->seg_p + sizeof(*chan->seg_mv) * + ((i + 1) % XILINX_DMA_NUM_DESCS)); + chan->seg_mv[i].phys = chan->seg_p + + sizeof(*chan->seg_mv) * i; + list_add_tail(&chan->seg_mv[i].node, + &chan->free_seg_list); + } + } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) { + chan->desc_pool = dma_pool_create("xilinx_cdma_desc_pool", + chan->dev, + sizeof(struct xilinx_cdma_tx_segment), + __alignof__(struct xilinx_cdma_tx_segment), + 0); + } else { + chan->desc_pool = dma_pool_create("xilinx_vdma_desc_pool", + chan->dev, + sizeof(struct xilinx_vdma_tx_segment), + __alignof__(struct xilinx_vdma_tx_segment), + 0); + } + + if (!chan->desc_pool && + ((chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIDMA) && + chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIMCDMA)) { + dev_err(chan->dev, + "unable to allocate channel %d descriptor pool\n", + chan->id); + return -ENOMEM; + } + + dma_cookie_init(dchan); + + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { + /* For AXI DMA resetting once channel will reset the + * other channel as well so enable the interrupts here. + */ + dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, + XILINX_DMA_DMAXR_ALL_IRQ_MASK); + } + + if ((chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) && chan->has_sg) + dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, + XILINX_CDMA_CR_SGMODE); + + return 0; +} + +/** + * xilinx_dma_calc_copysize - Calculate the amount of data to copy + * @chan: Driver specific DMA channel + * @size: Total data that needs to be copied + * @done: Amount of data that has been already copied + * + * Return: Amount of data that has to be copied + */ +static int xilinx_dma_calc_copysize(struct xilinx_dma_chan *chan, + int size, int done) +{ + size_t copy; + + copy = min_t(size_t, size - done, + chan->xdev->max_buffer_len); + + if ((copy + done < size) && + chan->xdev->common.copy_align) { + /* + * If this is not the last descriptor, make sure + * the next one will be properly aligned + */ + copy = rounddown(copy, + (1 << chan->xdev->common.copy_align)); + } + return copy; +} + +/** + * xilinx_dma_tx_status - Get DMA transaction status + * @dchan: DMA channel + * @cookie: Transaction identifier + * @txstate: Transaction state + * + * Return: DMA transaction status + */ +static enum dma_status xilinx_dma_tx_status(struct dma_chan *dchan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_tx_descriptor *desc; + enum dma_status ret; + unsigned long flags; + u32 residue = 0; + + ret = dma_cookie_status(dchan, cookie, txstate); + if (ret == DMA_COMPLETE || !txstate) + return ret; + + spin_lock_irqsave(&chan->lock, flags); + if (!list_empty(&chan->active_list)) { + desc = list_last_entry(&chan->active_list, + struct xilinx_dma_tx_descriptor, node); + /* + * VDMA and simple mode do not support residue reporting, so the + * residue field will always be 0. + */ + if (chan->has_sg && chan->xdev->dma_config->dmatype != XDMA_TYPE_VDMA) + residue = xilinx_dma_get_residue(chan, desc); + } + spin_unlock_irqrestore(&chan->lock, flags); + + dma_set_residue(txstate, residue); + + return ret; +} + +/** + * xilinx_dma_stop_transfer - Halt DMA channel + * @chan: Driver specific DMA channel + * + * Return: '0' on success and failure value on error + */ +static int xilinx_dma_stop_transfer(struct xilinx_dma_chan *chan) +{ + u32 val; + + dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RUNSTOP); + + /* Wait for the hardware to halt */ + return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val, + val & XILINX_DMA_DMASR_HALTED, 0, + XILINX_DMA_LOOP_COUNT); +} + +/** + * xilinx_cdma_stop_transfer - Wait for the current transfer to complete + * @chan: Driver specific DMA channel + * + * Return: '0' on success and failure value on error + */ +static int xilinx_cdma_stop_transfer(struct xilinx_dma_chan *chan) +{ + u32 val; + + return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val, + val & XILINX_DMA_DMASR_IDLE, 0, + XILINX_DMA_LOOP_COUNT); +} + +/** + * xilinx_dma_start - Start DMA channel + * @chan: Driver specific DMA channel + */ +static void xilinx_dma_start(struct xilinx_dma_chan *chan) +{ + int err; + u32 val; + + dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RUNSTOP); + + /* Wait for the hardware to start */ + err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val, + !(val & XILINX_DMA_DMASR_HALTED), 0, + XILINX_DMA_LOOP_COUNT); + + if (err) { + dev_err(chan->dev, "Cannot start channel %p: %x\n", + chan, dma_ctrl_read(chan, XILINX_DMA_REG_DMASR)); + + chan->err = true; + } +} + +/** + * xilinx_vdma_start_transfer - Starts VDMA transfer + * @chan: Driver specific channel struct pointer + */ +static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan) +{ + struct xilinx_vdma_config *config = &chan->config; + struct xilinx_dma_tx_descriptor *desc; + u32 reg, j; + struct xilinx_vdma_tx_segment *segment, *last = NULL; + int i = 0; + + /* This function was invoked with lock held */ + if (chan->err) + return; + + if (!chan->idle) + return; + + if (list_empty(&chan->pending_list)) + return; + + desc = list_first_entry(&chan->pending_list, + struct xilinx_dma_tx_descriptor, node); + + /* Configure the hardware using info in the config structure */ + if (chan->has_vflip) { + reg = dma_read(chan, XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP); + reg &= ~XILINX_VDMA_ENABLE_VERTICAL_FLIP; + reg |= config->vflip_en; + dma_write(chan, XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP, + reg); + } + + reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR); + + if (config->frm_cnt_en) + reg |= XILINX_DMA_DMACR_FRAMECNT_EN; + else + reg &= ~XILINX_DMA_DMACR_FRAMECNT_EN; + + /* If not parking, enable circular mode */ + if (config->park) + reg &= ~XILINX_DMA_DMACR_CIRC_EN; + else + reg |= XILINX_DMA_DMACR_CIRC_EN; + + dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg); + + j = chan->desc_submitcount; + reg = dma_read(chan, XILINX_DMA_REG_PARK_PTR); + if (chan->direction == DMA_MEM_TO_DEV) { + reg &= ~XILINX_DMA_PARK_PTR_RD_REF_MASK; + reg |= j << XILINX_DMA_PARK_PTR_RD_REF_SHIFT; + } else { + reg &= ~XILINX_DMA_PARK_PTR_WR_REF_MASK; + reg |= j << XILINX_DMA_PARK_PTR_WR_REF_SHIFT; + } + dma_write(chan, XILINX_DMA_REG_PARK_PTR, reg); + + /* Start the hardware */ + xilinx_dma_start(chan); + + if (chan->err) + return; + + /* Start the transfer */ + if (chan->desc_submitcount < chan->num_frms) + i = chan->desc_submitcount; + + list_for_each_entry(segment, &desc->segments, node) { + if (chan->ext_addr) + vdma_desc_write_64(chan, + XILINX_VDMA_REG_START_ADDRESS_64(i++), + segment->hw.buf_addr, + segment->hw.buf_addr_msb); + else + vdma_desc_write(chan, + XILINX_VDMA_REG_START_ADDRESS(i++), + segment->hw.buf_addr); + + last = segment; + } + + if (!last) + return; + + /* HW expects these parameters to be same for one transaction */ + vdma_desc_write(chan, XILINX_DMA_REG_HSIZE, last->hw.hsize); + vdma_desc_write(chan, XILINX_DMA_REG_FRMDLY_STRIDE, + last->hw.stride); + vdma_desc_write(chan, XILINX_DMA_REG_VSIZE, last->hw.vsize); + + chan->desc_submitcount++; + chan->desc_pendingcount--; + list_move_tail(&desc->node, &chan->active_list); + if (chan->desc_submitcount == chan->num_frms) + chan->desc_submitcount = 0; + + chan->idle = false; +} + +/** + * xilinx_cdma_start_transfer - Starts cdma transfer + * @chan: Driver specific channel struct pointer + */ +static void xilinx_cdma_start_transfer(struct xilinx_dma_chan *chan) +{ + struct xilinx_dma_tx_descriptor *head_desc, *tail_desc; + struct xilinx_cdma_tx_segment *tail_segment; + u32 ctrl_reg = dma_read(chan, XILINX_DMA_REG_DMACR); + + if (chan->err) + return; + + if (!chan->idle) + return; + + if (list_empty(&chan->pending_list)) + return; + + head_desc = list_first_entry(&chan->pending_list, + struct xilinx_dma_tx_descriptor, node); + tail_desc = list_last_entry(&chan->pending_list, + struct xilinx_dma_tx_descriptor, node); + tail_segment = list_last_entry(&tail_desc->segments, + struct xilinx_cdma_tx_segment, node); + + if (chan->desc_pendingcount <= XILINX_DMA_COALESCE_MAX) { + ctrl_reg &= ~XILINX_DMA_CR_COALESCE_MAX; + ctrl_reg |= chan->desc_pendingcount << + XILINX_DMA_CR_COALESCE_SHIFT; + dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, ctrl_reg); + } + + if (chan->has_sg) { + dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR, + XILINX_CDMA_CR_SGMODE); + + dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, + XILINX_CDMA_CR_SGMODE); + + xilinx_write(chan, XILINX_DMA_REG_CURDESC, + head_desc->async_tx.phys); + + /* Update tail ptr register which will start the transfer */ + xilinx_write(chan, XILINX_DMA_REG_TAILDESC, + tail_segment->phys); + } else { + /* In simple mode */ + struct xilinx_cdma_tx_segment *segment; + struct xilinx_cdma_desc_hw *hw; + + segment = list_first_entry(&head_desc->segments, + struct xilinx_cdma_tx_segment, + node); + + hw = &segment->hw; + + xilinx_write(chan, XILINX_CDMA_REG_SRCADDR, + xilinx_prep_dma_addr_t(hw->src_addr)); + xilinx_write(chan, XILINX_CDMA_REG_DSTADDR, + xilinx_prep_dma_addr_t(hw->dest_addr)); + + /* Start the transfer */ + dma_ctrl_write(chan, XILINX_DMA_REG_BTT, + hw->control & chan->xdev->max_buffer_len); + } + + list_splice_tail_init(&chan->pending_list, &chan->active_list); + chan->desc_pendingcount = 0; + chan->idle = false; +} + +/** + * xilinx_dma_start_transfer - Starts DMA transfer + * @chan: Driver specific channel struct pointer + */ +static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan) +{ + struct xilinx_dma_tx_descriptor *head_desc, *tail_desc; + struct xilinx_axidma_tx_segment *tail_segment; + u32 reg; + + if (chan->err) + return; + + if (list_empty(&chan->pending_list)) + return; + + if (!chan->idle) + return; + + head_desc = list_first_entry(&chan->pending_list, + struct xilinx_dma_tx_descriptor, node); + tail_desc = list_last_entry(&chan->pending_list, + struct xilinx_dma_tx_descriptor, node); + tail_segment = list_last_entry(&tail_desc->segments, + struct xilinx_axidma_tx_segment, node); + + reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR); + + if (chan->desc_pendingcount <= XILINX_DMA_COALESCE_MAX) { + reg &= ~XILINX_DMA_CR_COALESCE_MAX; + reg |= chan->desc_pendingcount << + XILINX_DMA_CR_COALESCE_SHIFT; + dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg); + } + + if (chan->has_sg) + xilinx_write(chan, XILINX_DMA_REG_CURDESC, + head_desc->async_tx.phys); + reg &= ~XILINX_DMA_CR_DELAY_MAX; + reg |= chan->irq_delay << XILINX_DMA_CR_DELAY_SHIFT; + dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg); + + xilinx_dma_start(chan); + + if (chan->err) + return; + + /* Start the transfer */ + if (chan->has_sg) { + if (chan->cyclic) + xilinx_write(chan, XILINX_DMA_REG_TAILDESC, + chan->cyclic_seg_v->phys); + else + xilinx_write(chan, XILINX_DMA_REG_TAILDESC, + tail_segment->phys); + } else { + struct xilinx_axidma_tx_segment *segment; + struct xilinx_axidma_desc_hw *hw; + + segment = list_first_entry(&head_desc->segments, + struct xilinx_axidma_tx_segment, + node); + hw = &segment->hw; + + xilinx_write(chan, XILINX_DMA_REG_SRCDSTADDR, + xilinx_prep_dma_addr_t(hw->buf_addr)); + + /* Start the transfer */ + dma_ctrl_write(chan, XILINX_DMA_REG_BTT, + hw->control & chan->xdev->max_buffer_len); + } + + list_splice_tail_init(&chan->pending_list, &chan->active_list); + chan->desc_pendingcount = 0; + chan->idle = false; +} + +/** + * xilinx_mcdma_start_transfer - Starts MCDMA transfer + * @chan: Driver specific channel struct pointer + */ +static void xilinx_mcdma_start_transfer(struct xilinx_dma_chan *chan) +{ + struct xilinx_dma_tx_descriptor *head_desc, *tail_desc; + struct xilinx_aximcdma_tx_segment *tail_segment; + u32 reg; + + /* + * lock has been held by calling functions, so we don't need it + * to take it here again. + */ + + if (chan->err) + return; + + if (!chan->idle) + return; + + if (list_empty(&chan->pending_list)) + return; + + head_desc = list_first_entry(&chan->pending_list, + struct xilinx_dma_tx_descriptor, node); + tail_desc = list_last_entry(&chan->pending_list, + struct xilinx_dma_tx_descriptor, node); + tail_segment = list_last_entry(&tail_desc->segments, + struct xilinx_aximcdma_tx_segment, node); + + reg = dma_ctrl_read(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest)); + + if (chan->desc_pendingcount <= XILINX_MCDMA_COALESCE_MAX) { + reg &= ~XILINX_MCDMA_COALESCE_MASK; + reg |= chan->desc_pendingcount << + XILINX_MCDMA_COALESCE_SHIFT; + } + + reg |= XILINX_MCDMA_IRQ_ALL_MASK; + dma_ctrl_write(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest), reg); + + /* Program current descriptor */ + xilinx_write(chan, XILINX_MCDMA_CHAN_CDESC_OFFSET(chan->tdest), + head_desc->async_tx.phys); + + /* Program channel enable register */ + reg = dma_ctrl_read(chan, XILINX_MCDMA_CHEN_OFFSET); + reg |= BIT(chan->tdest); + dma_ctrl_write(chan, XILINX_MCDMA_CHEN_OFFSET, reg); + + /* Start the fetch of BDs for the channel */ + reg = dma_ctrl_read(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest)); + reg |= XILINX_MCDMA_CR_RUNSTOP_MASK; + dma_ctrl_write(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest), reg); + + xilinx_dma_start(chan); + + if (chan->err) + return; + + /* Start the transfer */ + xilinx_write(chan, XILINX_MCDMA_CHAN_TDESC_OFFSET(chan->tdest), + tail_segment->phys); + + list_splice_tail_init(&chan->pending_list, &chan->active_list); + chan->desc_pendingcount = 0; + chan->idle = false; +} + +/** + * xilinx_dma_issue_pending - Issue pending transactions + * @dchan: DMA channel + */ +static void xilinx_dma_issue_pending(struct dma_chan *dchan) +{ + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + chan->start_transfer(chan); + spin_unlock_irqrestore(&chan->lock, flags); +} + +/** + * xilinx_dma_device_config - Configure the DMA channel + * @dchan: DMA channel + * @config: channel configuration + * + * Return: 0 always. + */ +static int xilinx_dma_device_config(struct dma_chan *dchan, + struct dma_slave_config *config) +{ + return 0; +} + +/** + * xilinx_dma_complete_descriptor - Mark the active descriptor as complete + * @chan : xilinx DMA channel + * + * CONTEXT: hardirq + */ +static void xilinx_dma_complete_descriptor(struct xilinx_dma_chan *chan) +{ + struct xilinx_dma_tx_descriptor *desc, *next; + + /* This function was invoked with lock held */ + if (list_empty(&chan->active_list)) + return; + + list_for_each_entry_safe(desc, next, &chan->active_list, node) { + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { + struct xilinx_axidma_tx_segment *seg; + + seg = list_last_entry(&desc->segments, + struct xilinx_axidma_tx_segment, node); + if (!(seg->hw.status & XILINX_DMA_BD_COMP_MASK) && chan->has_sg) + break; + } + if (chan->has_sg && chan->xdev->dma_config->dmatype != + XDMA_TYPE_VDMA) + desc->residue = xilinx_dma_get_residue(chan, desc); + else + desc->residue = 0; + desc->err = chan->err; + + list_del(&desc->node); + if (!desc->cyclic) + dma_cookie_complete(&desc->async_tx); + list_add_tail(&desc->node, &chan->done_list); + } +} + +/** + * xilinx_dma_reset - Reset DMA channel + * @chan: Driver specific DMA channel + * + * Return: '0' on success and failure value on error + */ +static int xilinx_dma_reset(struct xilinx_dma_chan *chan) +{ + int err; + u32 tmp; + + dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RESET); + + /* Wait for the hardware to finish reset */ + err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMACR, tmp, + !(tmp & XILINX_DMA_DMACR_RESET), 0, + XILINX_DMA_LOOP_COUNT); + + if (err) { + dev_err(chan->dev, "reset timeout, cr %x, sr %x\n", + dma_ctrl_read(chan, XILINX_DMA_REG_DMACR), + dma_ctrl_read(chan, XILINX_DMA_REG_DMASR)); + return -ETIMEDOUT; + } + + chan->err = false; + chan->idle = true; + chan->desc_pendingcount = 0; + chan->desc_submitcount = 0; + + return err; +} + +/** + * xilinx_dma_chan_reset - Reset DMA channel and enable interrupts + * @chan: Driver specific DMA channel + * + * Return: '0' on success and failure value on error + */ +static int xilinx_dma_chan_reset(struct xilinx_dma_chan *chan) +{ + int err; + + /* Reset VDMA */ + err = xilinx_dma_reset(chan); + if (err) + return err; + + /* Enable interrupts */ + dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, + XILINX_DMA_DMAXR_ALL_IRQ_MASK); + + return 0; +} + +/** + * xilinx_mcdma_irq_handler - MCDMA Interrupt handler + * @irq: IRQ number + * @data: Pointer to the Xilinx MCDMA channel structure + * + * Return: IRQ_HANDLED/IRQ_NONE + */ +static irqreturn_t xilinx_mcdma_irq_handler(int irq, void *data) +{ + struct xilinx_dma_chan *chan = data; + u32 status, ser_offset, chan_sermask, chan_offset = 0, chan_id; + + if (chan->direction == DMA_DEV_TO_MEM) + ser_offset = XILINX_MCDMA_RXINT_SER_OFFSET; + else + ser_offset = XILINX_MCDMA_TXINT_SER_OFFSET; + + /* Read the channel id raising the interrupt*/ + chan_sermask = dma_ctrl_read(chan, ser_offset); + chan_id = ffs(chan_sermask); + + if (!chan_id) + return IRQ_NONE; + + if (chan->direction == DMA_DEV_TO_MEM) + chan_offset = chan->xdev->dma_config->max_channels / 2; + + chan_offset = chan_offset + (chan_id - 1); + chan = chan->xdev->chan[chan_offset]; + /* Read the status and ack the interrupts. */ + status = dma_ctrl_read(chan, XILINX_MCDMA_CHAN_SR_OFFSET(chan->tdest)); + if (!(status & XILINX_MCDMA_IRQ_ALL_MASK)) + return IRQ_NONE; + + dma_ctrl_write(chan, XILINX_MCDMA_CHAN_SR_OFFSET(chan->tdest), + status & XILINX_MCDMA_IRQ_ALL_MASK); + + if (status & XILINX_MCDMA_IRQ_ERR_MASK) { + dev_err(chan->dev, "Channel %p has errors %x cdr %x tdr %x\n", + chan, + dma_ctrl_read(chan, XILINX_MCDMA_CH_ERR_OFFSET), + dma_ctrl_read(chan, XILINX_MCDMA_CHAN_CDESC_OFFSET + (chan->tdest)), + dma_ctrl_read(chan, XILINX_MCDMA_CHAN_TDESC_OFFSET + (chan->tdest))); + chan->err = true; + } + + if (status & XILINX_MCDMA_IRQ_DELAY_MASK) { + /* + * Device takes too long to do the transfer when user requires + * responsiveness. + */ + dev_dbg(chan->dev, "Inter-packet latency too long\n"); + } + + if (status & XILINX_MCDMA_IRQ_IOC_MASK) { + spin_lock(&chan->lock); + xilinx_dma_complete_descriptor(chan); + chan->idle = true; + chan->start_transfer(chan); + spin_unlock(&chan->lock); + } + + tasklet_hi_schedule(&chan->tasklet); + return IRQ_HANDLED; +} + +/** + * xilinx_dma_irq_handler - DMA Interrupt handler + * @irq: IRQ number + * @data: Pointer to the Xilinx DMA channel structure + * + * Return: IRQ_HANDLED/IRQ_NONE + */ +static irqreturn_t xilinx_dma_irq_handler(int irq, void *data) +{ + struct xilinx_dma_chan *chan = data; + u32 status; + + /* Read the status and ack the interrupts. */ + status = dma_ctrl_read(chan, XILINX_DMA_REG_DMASR); + if (!(status & XILINX_DMA_DMAXR_ALL_IRQ_MASK)) + return IRQ_NONE; + + dma_ctrl_write(chan, XILINX_DMA_REG_DMASR, + status & XILINX_DMA_DMAXR_ALL_IRQ_MASK); + + if (status & XILINX_DMA_DMASR_ERR_IRQ) { + /* + * An error occurred. If C_FLUSH_ON_FSYNC is enabled and the + * error is recoverable, ignore it. Otherwise flag the error. + * + * Only recoverable errors can be cleared in the DMASR register, + * make sure not to write to other error bits to 1. + */ + u32 errors = status & XILINX_DMA_DMASR_ALL_ERR_MASK; + + dma_ctrl_write(chan, XILINX_DMA_REG_DMASR, + errors & XILINX_DMA_DMASR_ERR_RECOVER_MASK); + + if (!chan->flush_on_fsync || + (errors & ~XILINX_DMA_DMASR_ERR_RECOVER_MASK)) { + dev_err(chan->dev, + "Channel %p has errors %x, cdr %x tdr %x\n", + chan, errors, + dma_ctrl_read(chan, XILINX_DMA_REG_CURDESC), + dma_ctrl_read(chan, XILINX_DMA_REG_TAILDESC)); + chan->err = true; + } + } + + if (status & (XILINX_DMA_DMASR_FRM_CNT_IRQ | + XILINX_DMA_DMASR_DLY_CNT_IRQ)) { + spin_lock(&chan->lock); + xilinx_dma_complete_descriptor(chan); + chan->idle = true; + chan->start_transfer(chan); + spin_unlock(&chan->lock); + } + + tasklet_schedule(&chan->tasklet); + return IRQ_HANDLED; +} + +/** + * append_desc_queue - Queuing descriptor + * @chan: Driver specific dma channel + * @desc: dma transaction descriptor + */ +static void append_desc_queue(struct xilinx_dma_chan *chan, + struct xilinx_dma_tx_descriptor *desc) +{ + struct xilinx_vdma_tx_segment *tail_segment; + struct xilinx_dma_tx_descriptor *tail_desc; + struct xilinx_axidma_tx_segment *axidma_tail_segment; + struct xilinx_aximcdma_tx_segment *aximcdma_tail_segment; + struct xilinx_cdma_tx_segment *cdma_tail_segment; + + if (list_empty(&chan->pending_list)) + goto append; + + /* + * Add the hardware descriptor to the chain of hardware descriptors + * that already exists in memory. + */ + tail_desc = list_last_entry(&chan->pending_list, + struct xilinx_dma_tx_descriptor, node); + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { + tail_segment = list_last_entry(&tail_desc->segments, + struct xilinx_vdma_tx_segment, + node); + tail_segment->hw.next_desc = (u32)desc->async_tx.phys; + } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) { + cdma_tail_segment = list_last_entry(&tail_desc->segments, + struct xilinx_cdma_tx_segment, + node); + cdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys; + } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { + axidma_tail_segment = list_last_entry(&tail_desc->segments, + struct xilinx_axidma_tx_segment, + node); + axidma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys; + } else { + aximcdma_tail_segment = + list_last_entry(&tail_desc->segments, + struct xilinx_aximcdma_tx_segment, + node); + aximcdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys; + } + + /* + * Add the software descriptor and all children to the list + * of pending transactions + */ +append: + list_add_tail(&desc->node, &chan->pending_list); + chan->desc_pendingcount++; + + if (chan->has_sg && (chan->xdev->dma_config->dmatype == XDMA_TYPE_VDMA) + && unlikely(chan->desc_pendingcount > chan->num_frms)) { + dev_dbg(chan->dev, "desc pendingcount is too high\n"); + chan->desc_pendingcount = chan->num_frms; + } +} + +/** + * xilinx_dma_tx_submit - Submit DMA transaction + * @tx: Async transaction descriptor + * + * Return: cookie value on success and failure value on error + */ +static dma_cookie_t xilinx_dma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct xilinx_dma_tx_descriptor *desc = to_dma_tx_descriptor(tx); + struct xilinx_dma_chan *chan = to_xilinx_chan(tx->chan); + dma_cookie_t cookie; + unsigned long flags; + int err; + + if (chan->cyclic) { + xilinx_dma_free_tx_descriptor(chan, desc); + return -EBUSY; + } + + if (chan->err) { + /* + * If reset fails, need to hard reset the system. + * Channel is no longer functional + */ + err = xilinx_dma_chan_reset(chan); + if (err < 0) + return err; + } + + spin_lock_irqsave(&chan->lock, flags); + + cookie = dma_cookie_assign(tx); + + /* Put this transaction onto the tail of the pending queue */ + append_desc_queue(chan, desc); + + if (desc->cyclic) + chan->cyclic = true; + + chan->terminating = false; + + spin_unlock_irqrestore(&chan->lock, flags); + + return cookie; +} + +/** + * xilinx_vdma_dma_prep_interleaved - prepare a descriptor for a + * DMA_SLAVE transaction + * @dchan: DMA channel + * @xt: Interleaved template pointer + * @flags: transfer ack flags + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor * +xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan, + struct dma_interleaved_template *xt, + unsigned long flags) +{ + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_tx_descriptor *desc; + struct xilinx_vdma_tx_segment *segment; + struct xilinx_vdma_desc_hw *hw; + + if (!is_slave_direction(xt->dir)) + return NULL; + + if (!xt->numf || !xt->sgl[0].size) + return NULL; + + if (xt->frame_size != 1) + return NULL; + + /* Allocate a transaction descriptor. */ + desc = xilinx_dma_alloc_tx_descriptor(chan); + if (!desc) + return NULL; + + dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); + desc->async_tx.tx_submit = xilinx_dma_tx_submit; + async_tx_ack(&desc->async_tx); + + /* Allocate the link descriptor from DMA pool */ + segment = xilinx_vdma_alloc_tx_segment(chan); + if (!segment) + goto error; + + /* Fill in the hardware descriptor */ + hw = &segment->hw; + hw->vsize = xt->numf; + hw->hsize = xt->sgl[0].size; + hw->stride = (xt->sgl[0].icg + xt->sgl[0].size) << + XILINX_DMA_FRMDLY_STRIDE_STRIDE_SHIFT; + hw->stride |= chan->config.frm_dly << + XILINX_DMA_FRMDLY_STRIDE_FRMDLY_SHIFT; + + if (xt->dir != DMA_MEM_TO_DEV) { + if (chan->ext_addr) { + hw->buf_addr = lower_32_bits(xt->dst_start); + hw->buf_addr_msb = upper_32_bits(xt->dst_start); + } else { + hw->buf_addr = xt->dst_start; + } + } else { + if (chan->ext_addr) { + hw->buf_addr = lower_32_bits(xt->src_start); + hw->buf_addr_msb = upper_32_bits(xt->src_start); + } else { + hw->buf_addr = xt->src_start; + } + } + + /* Insert the segment into the descriptor segments list. */ + list_add_tail(&segment->node, &desc->segments); + + /* Link the last hardware descriptor with the first. */ + segment = list_first_entry(&desc->segments, + struct xilinx_vdma_tx_segment, node); + desc->async_tx.phys = segment->phys; + + return &desc->async_tx; + +error: + xilinx_dma_free_tx_descriptor(chan, desc); + return NULL; +} + +/** + * xilinx_cdma_prep_memcpy - prepare descriptors for a memcpy transaction + * @dchan: DMA channel + * @dma_dst: destination address + * @dma_src: source address + * @len: transfer length + * @flags: transfer ack flags + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor * +xilinx_cdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_tx_descriptor *desc; + struct xilinx_cdma_tx_segment *segment; + struct xilinx_cdma_desc_hw *hw; + + if (!len || len > chan->xdev->max_buffer_len) + return NULL; + + desc = xilinx_dma_alloc_tx_descriptor(chan); + if (!desc) + return NULL; + + dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); + desc->async_tx.tx_submit = xilinx_dma_tx_submit; + + /* Allocate the link descriptor from DMA pool */ + segment = xilinx_cdma_alloc_tx_segment(chan); + if (!segment) + goto error; + + hw = &segment->hw; + hw->control = len; + hw->src_addr = dma_src; + hw->dest_addr = dma_dst; + if (chan->ext_addr) { + hw->src_addr_msb = upper_32_bits(dma_src); + hw->dest_addr_msb = upper_32_bits(dma_dst); + } + + /* Insert the segment into the descriptor segments list. */ + list_add_tail(&segment->node, &desc->segments); + + desc->async_tx.phys = segment->phys; + hw->next_desc = segment->phys; + + return &desc->async_tx; + +error: + xilinx_dma_free_tx_descriptor(chan, desc); + return NULL; +} + +/** + * xilinx_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction + * @dchan: DMA channel + * @sgl: scatterlist to transfer to/from + * @sg_len: number of entries in @scatterlist + * @direction: DMA direction + * @flags: transfer ack flags + * @context: APP words of the descriptor + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor *xilinx_dma_prep_slave_sg( + struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, unsigned long flags, + void *context) +{ + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_tx_descriptor *desc; + struct xilinx_axidma_tx_segment *segment = NULL; + u32 *app_w = (u32 *)context; + struct scatterlist *sg; + size_t copy; + size_t sg_used; + unsigned int i; + + if (!is_slave_direction(direction)) + return NULL; + + /* Allocate a transaction descriptor. */ + desc = xilinx_dma_alloc_tx_descriptor(chan); + if (!desc) + return NULL; + + dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); + desc->async_tx.tx_submit = xilinx_dma_tx_submit; + + /* Build transactions using information in the scatter gather list */ + for_each_sg(sgl, sg, sg_len, i) { + sg_used = 0; + + /* Loop until the entire scatterlist entry is used */ + while (sg_used < sg_dma_len(sg)) { + struct xilinx_axidma_desc_hw *hw; + + /* Get a free segment */ + segment = xilinx_axidma_alloc_tx_segment(chan); + if (!segment) + goto error; + + /* + * Calculate the maximum number of bytes to transfer, + * making sure it is less than the hw limit + */ + copy = xilinx_dma_calc_copysize(chan, sg_dma_len(sg), + sg_used); + hw = &segment->hw; + + /* Fill in the descriptor */ + xilinx_axidma_buf(chan, hw, sg_dma_address(sg), + sg_used, 0); + + hw->control = copy; + + if (chan->direction == DMA_MEM_TO_DEV) { + if (app_w) + memcpy(hw->app, app_w, sizeof(u32) * + XILINX_DMA_NUM_APP_WORDS); + } + + sg_used += copy; + + /* + * Insert the segment into the descriptor segments + * list. + */ + list_add_tail(&segment->node, &desc->segments); + } + } + + segment = list_first_entry(&desc->segments, + struct xilinx_axidma_tx_segment, node); + desc->async_tx.phys = segment->phys; + + /* For the last DMA_MEM_TO_DEV transfer, set EOP */ + if (chan->direction == DMA_MEM_TO_DEV) { + segment->hw.control |= XILINX_DMA_BD_SOP; + segment = list_last_entry(&desc->segments, + struct xilinx_axidma_tx_segment, + node); + segment->hw.control |= XILINX_DMA_BD_EOP; + } + + if (chan->xdev->has_axistream_connected) + desc->async_tx.metadata_ops = &xilinx_dma_metadata_ops; + + return &desc->async_tx; + +error: + xilinx_dma_free_tx_descriptor(chan, desc); + return NULL; +} + +/** + * xilinx_dma_prep_dma_cyclic - prepare descriptors for a DMA_SLAVE transaction + * @dchan: DMA channel + * @buf_addr: Physical address of the buffer + * @buf_len: Total length of the cyclic buffers + * @period_len: length of individual cyclic buffer + * @direction: DMA direction + * @flags: transfer ack flags + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor *xilinx_dma_prep_dma_cyclic( + struct dma_chan *dchan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_tx_descriptor *desc; + struct xilinx_axidma_tx_segment *segment, *head_segment, *prev = NULL; + size_t copy, sg_used; + unsigned int num_periods; + int i; + u32 reg; + + if (!period_len) + return NULL; + + num_periods = buf_len / period_len; + + if (!num_periods) + return NULL; + + if (!is_slave_direction(direction)) + return NULL; + + /* Allocate a transaction descriptor. */ + desc = xilinx_dma_alloc_tx_descriptor(chan); + if (!desc) + return NULL; + + chan->direction = direction; + dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); + desc->async_tx.tx_submit = xilinx_dma_tx_submit; + + for (i = 0; i < num_periods; ++i) { + sg_used = 0; + + while (sg_used < period_len) { + struct xilinx_axidma_desc_hw *hw; + + /* Get a free segment */ + segment = xilinx_axidma_alloc_tx_segment(chan); + if (!segment) + goto error; + + /* + * Calculate the maximum number of bytes to transfer, + * making sure it is less than the hw limit + */ + copy = xilinx_dma_calc_copysize(chan, period_len, + sg_used); + hw = &segment->hw; + xilinx_axidma_buf(chan, hw, buf_addr, sg_used, + period_len * i); + hw->control = copy; + + if (prev) + prev->hw.next_desc = segment->phys; + + prev = segment; + sg_used += copy; + + /* + * Insert the segment into the descriptor segments + * list. + */ + list_add_tail(&segment->node, &desc->segments); + } + } + + head_segment = list_first_entry(&desc->segments, + struct xilinx_axidma_tx_segment, node); + desc->async_tx.phys = head_segment->phys; + + desc->cyclic = true; + reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR); + reg |= XILINX_DMA_CR_CYCLIC_BD_EN_MASK; + dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg); + + segment = list_last_entry(&desc->segments, + struct xilinx_axidma_tx_segment, + node); + segment->hw.next_desc = (u32) head_segment->phys; + + /* For the last DMA_MEM_TO_DEV transfer, set EOP */ + if (direction == DMA_MEM_TO_DEV) { + head_segment->hw.control |= XILINX_DMA_BD_SOP; + segment->hw.control |= XILINX_DMA_BD_EOP; + } + + return &desc->async_tx; + +error: + xilinx_dma_free_tx_descriptor(chan, desc); + return NULL; +} + +/** + * xilinx_mcdma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction + * @dchan: DMA channel + * @sgl: scatterlist to transfer to/from + * @sg_len: number of entries in @scatterlist + * @direction: DMA direction + * @flags: transfer ack flags + * @context: APP words of the descriptor + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor * +xilinx_mcdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl, + unsigned int sg_len, + enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_tx_descriptor *desc; + struct xilinx_aximcdma_tx_segment *segment = NULL; + u32 *app_w = (u32 *)context; + struct scatterlist *sg; + size_t copy; + size_t sg_used; + unsigned int i; + + if (!is_slave_direction(direction)) + return NULL; + + /* Allocate a transaction descriptor. */ + desc = xilinx_dma_alloc_tx_descriptor(chan); + if (!desc) + return NULL; + + dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); + desc->async_tx.tx_submit = xilinx_dma_tx_submit; + + /* Build transactions using information in the scatter gather list */ + for_each_sg(sgl, sg, sg_len, i) { + sg_used = 0; + + /* Loop until the entire scatterlist entry is used */ + while (sg_used < sg_dma_len(sg)) { + struct xilinx_aximcdma_desc_hw *hw; + + /* Get a free segment */ + segment = xilinx_aximcdma_alloc_tx_segment(chan); + if (!segment) + goto error; + + /* + * Calculate the maximum number of bytes to transfer, + * making sure it is less than the hw limit + */ + copy = min_t(size_t, sg_dma_len(sg) - sg_used, + chan->xdev->max_buffer_len); + hw = &segment->hw; + + /* Fill in the descriptor */ + xilinx_aximcdma_buf(chan, hw, sg_dma_address(sg), + sg_used); + hw->control = copy; + + if (chan->direction == DMA_MEM_TO_DEV && app_w) { + memcpy(hw->app, app_w, sizeof(u32) * + XILINX_DMA_NUM_APP_WORDS); + } + + sg_used += copy; + /* + * Insert the segment into the descriptor segments + * list. + */ + list_add_tail(&segment->node, &desc->segments); + } + } + + segment = list_first_entry(&desc->segments, + struct xilinx_aximcdma_tx_segment, node); + desc->async_tx.phys = segment->phys; + + /* For the last DMA_MEM_TO_DEV transfer, set EOP */ + if (chan->direction == DMA_MEM_TO_DEV) { + segment->hw.control |= XILINX_MCDMA_BD_SOP; + segment = list_last_entry(&desc->segments, + struct xilinx_aximcdma_tx_segment, + node); + segment->hw.control |= XILINX_MCDMA_BD_EOP; + } + + return &desc->async_tx; + +error: + xilinx_dma_free_tx_descriptor(chan, desc); + + return NULL; +} + +/** + * xilinx_dma_terminate_all - Halt the channel and free descriptors + * @dchan: Driver specific DMA Channel pointer + * + * Return: '0' always. + */ +static int xilinx_dma_terminate_all(struct dma_chan *dchan) +{ + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + u32 reg; + int err; + + if (!chan->cyclic) { + err = chan->stop_transfer(chan); + if (err) { + dev_err(chan->dev, "Cannot stop channel %p: %x\n", + chan, dma_ctrl_read(chan, + XILINX_DMA_REG_DMASR)); + chan->err = true; + } + } + + xilinx_dma_chan_reset(chan); + /* Remove and free all of the descriptors in the lists */ + chan->terminating = true; + xilinx_dma_free_descriptors(chan); + chan->idle = true; + + if (chan->cyclic) { + reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR); + reg &= ~XILINX_DMA_CR_CYCLIC_BD_EN_MASK; + dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg); + chan->cyclic = false; + } + + if ((chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) && chan->has_sg) + dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR, + XILINX_CDMA_CR_SGMODE); + + return 0; +} + +static void xilinx_dma_synchronize(struct dma_chan *dchan) +{ + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + + tasklet_kill(&chan->tasklet); +} + +/** + * xilinx_vdma_channel_set_config - Configure VDMA channel + * Run-time configuration for Axi VDMA, supports: + * . halt the channel + * . configure interrupt coalescing and inter-packet delay threshold + * . start/stop parking + * . enable genlock + * + * @dchan: DMA channel + * @cfg: VDMA device configuration pointer + * + * Return: '0' on success and failure value on error + */ +int xilinx_vdma_channel_set_config(struct dma_chan *dchan, + struct xilinx_vdma_config *cfg) +{ + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + u32 dmacr; + + if (cfg->reset) + return xilinx_dma_chan_reset(chan); + + dmacr = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR); + + chan->config.frm_dly = cfg->frm_dly; + chan->config.park = cfg->park; + + /* genlock settings */ + chan->config.gen_lock = cfg->gen_lock; + chan->config.master = cfg->master; + + dmacr &= ~XILINX_DMA_DMACR_GENLOCK_EN; + if (cfg->gen_lock && chan->genlock) { + dmacr |= XILINX_DMA_DMACR_GENLOCK_EN; + dmacr &= ~XILINX_DMA_DMACR_MASTER_MASK; + dmacr |= cfg->master << XILINX_DMA_DMACR_MASTER_SHIFT; + } + + chan->config.frm_cnt_en = cfg->frm_cnt_en; + chan->config.vflip_en = cfg->vflip_en; + + if (cfg->park) + chan->config.park_frm = cfg->park_frm; + else + chan->config.park_frm = -1; + + chan->config.coalesc = cfg->coalesc; + chan->config.delay = cfg->delay; + + if (cfg->coalesc <= XILINX_DMA_DMACR_FRAME_COUNT_MAX) { + dmacr &= ~XILINX_DMA_DMACR_FRAME_COUNT_MASK; + dmacr |= cfg->coalesc << XILINX_DMA_DMACR_FRAME_COUNT_SHIFT; + chan->config.coalesc = cfg->coalesc; + } + + if (cfg->delay <= XILINX_DMA_DMACR_DELAY_MAX) { + dmacr &= ~XILINX_DMA_DMACR_DELAY_MASK; + dmacr |= cfg->delay << XILINX_DMA_DMACR_DELAY_SHIFT; + chan->config.delay = cfg->delay; + } + + /* FSync Source selection */ + dmacr &= ~XILINX_DMA_DMACR_FSYNCSRC_MASK; + dmacr |= cfg->ext_fsync << XILINX_DMA_DMACR_FSYNCSRC_SHIFT; + + dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, dmacr); + + return 0; +} +EXPORT_SYMBOL(xilinx_vdma_channel_set_config); + +/* ----------------------------------------------------------------------------- + * Probe and remove + */ + +/** + * xilinx_dma_chan_remove - Per Channel remove function + * @chan: Driver specific DMA channel + */ +static void xilinx_dma_chan_remove(struct xilinx_dma_chan *chan) +{ + /* Disable all interrupts */ + dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR, + XILINX_DMA_DMAXR_ALL_IRQ_MASK); + + if (chan->irq > 0) + free_irq(chan->irq, chan); + + tasklet_kill(&chan->tasklet); + + list_del(&chan->common.device_node); +} + +static int axidma_clk_init(struct platform_device *pdev, struct clk **axi_clk, + struct clk **tx_clk, struct clk **rx_clk, + struct clk **sg_clk, struct clk **tmp_clk) +{ + int err; + + *tmp_clk = NULL; + + *axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk"); + if (IS_ERR(*axi_clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(*axi_clk), "failed to get axi_aclk\n"); + + *tx_clk = devm_clk_get(&pdev->dev, "m_axi_mm2s_aclk"); + if (IS_ERR(*tx_clk)) + *tx_clk = NULL; + + *rx_clk = devm_clk_get(&pdev->dev, "m_axi_s2mm_aclk"); + if (IS_ERR(*rx_clk)) + *rx_clk = NULL; + + *sg_clk = devm_clk_get(&pdev->dev, "m_axi_sg_aclk"); + if (IS_ERR(*sg_clk)) + *sg_clk = NULL; + + err = clk_prepare_enable(*axi_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n", err); + return err; + } + + err = clk_prepare_enable(*tx_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable tx_clk (%d)\n", err); + goto err_disable_axiclk; + } + + err = clk_prepare_enable(*rx_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable rx_clk (%d)\n", err); + goto err_disable_txclk; + } + + err = clk_prepare_enable(*sg_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable sg_clk (%d)\n", err); + goto err_disable_rxclk; + } + + return 0; + +err_disable_rxclk: + clk_disable_unprepare(*rx_clk); +err_disable_txclk: + clk_disable_unprepare(*tx_clk); +err_disable_axiclk: + clk_disable_unprepare(*axi_clk); + + return err; +} + +static int axicdma_clk_init(struct platform_device *pdev, struct clk **axi_clk, + struct clk **dev_clk, struct clk **tmp_clk, + struct clk **tmp1_clk, struct clk **tmp2_clk) +{ + int err; + + *tmp_clk = NULL; + *tmp1_clk = NULL; + *tmp2_clk = NULL; + + *axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk"); + if (IS_ERR(*axi_clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(*axi_clk), "failed to get axi_aclk\n"); + + *dev_clk = devm_clk_get(&pdev->dev, "m_axi_aclk"); + if (IS_ERR(*dev_clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(*dev_clk), "failed to get dev_clk\n"); + + err = clk_prepare_enable(*axi_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n", err); + return err; + } + + err = clk_prepare_enable(*dev_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable dev_clk (%d)\n", err); + goto err_disable_axiclk; + } + + return 0; + +err_disable_axiclk: + clk_disable_unprepare(*axi_clk); + + return err; +} + +static int axivdma_clk_init(struct platform_device *pdev, struct clk **axi_clk, + struct clk **tx_clk, struct clk **txs_clk, + struct clk **rx_clk, struct clk **rxs_clk) +{ + int err; + + *axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk"); + if (IS_ERR(*axi_clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(*axi_clk), "failed to get axi_aclk\n"); + + *tx_clk = devm_clk_get(&pdev->dev, "m_axi_mm2s_aclk"); + if (IS_ERR(*tx_clk)) + *tx_clk = NULL; + + *txs_clk = devm_clk_get(&pdev->dev, "m_axis_mm2s_aclk"); + if (IS_ERR(*txs_clk)) + *txs_clk = NULL; + + *rx_clk = devm_clk_get(&pdev->dev, "m_axi_s2mm_aclk"); + if (IS_ERR(*rx_clk)) + *rx_clk = NULL; + + *rxs_clk = devm_clk_get(&pdev->dev, "s_axis_s2mm_aclk"); + if (IS_ERR(*rxs_clk)) + *rxs_clk = NULL; + + err = clk_prepare_enable(*axi_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n", + err); + return err; + } + + err = clk_prepare_enable(*tx_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable tx_clk (%d)\n", err); + goto err_disable_axiclk; + } + + err = clk_prepare_enable(*txs_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable txs_clk (%d)\n", err); + goto err_disable_txclk; + } + + err = clk_prepare_enable(*rx_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable rx_clk (%d)\n", err); + goto err_disable_txsclk; + } + + err = clk_prepare_enable(*rxs_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable rxs_clk (%d)\n", err); + goto err_disable_rxclk; + } + + return 0; + +err_disable_rxclk: + clk_disable_unprepare(*rx_clk); +err_disable_txsclk: + clk_disable_unprepare(*txs_clk); +err_disable_txclk: + clk_disable_unprepare(*tx_clk); +err_disable_axiclk: + clk_disable_unprepare(*axi_clk); + + return err; +} + +static void xdma_disable_allclks(struct xilinx_dma_device *xdev) +{ + clk_disable_unprepare(xdev->rxs_clk); + clk_disable_unprepare(xdev->rx_clk); + clk_disable_unprepare(xdev->txs_clk); + clk_disable_unprepare(xdev->tx_clk); + clk_disable_unprepare(xdev->axi_clk); +} + +/** + * xilinx_dma_chan_probe - Per Channel Probing + * It get channel features from the device tree entry and + * initialize special channel handling routines + * + * @xdev: Driver specific device structure + * @node: Device node + * + * Return: '0' on success and failure value on error + */ +static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, + struct device_node *node) +{ + struct xilinx_dma_chan *chan; + bool has_dre = false; + u32 value, width; + int err; + + /* Allocate and initialize the channel structure */ + chan = devm_kzalloc(xdev->dev, sizeof(*chan), GFP_KERNEL); + if (!chan) + return -ENOMEM; + + chan->dev = xdev->dev; + chan->xdev = xdev; + chan->desc_pendingcount = 0x0; + chan->ext_addr = xdev->ext_addr; + /* This variable ensures that descriptors are not + * Submitted when dma engine is in progress. This variable is + * Added to avoid polling for a bit in the status register to + * Know dma state in the driver hot path. + */ + chan->idle = true; + + spin_lock_init(&chan->lock); + INIT_LIST_HEAD(&chan->pending_list); + INIT_LIST_HEAD(&chan->done_list); + INIT_LIST_HEAD(&chan->active_list); + INIT_LIST_HEAD(&chan->free_seg_list); + + /* Retrieve the channel properties from the device tree */ + has_dre = of_property_read_bool(node, "xlnx,include-dre"); + + of_property_read_u8(node, "xlnx,irq-delay", &chan->irq_delay); + + chan->genlock = of_property_read_bool(node, "xlnx,genlock-mode"); + + err = of_property_read_u32(node, "xlnx,datawidth", &value); + if (err) { + dev_err(xdev->dev, "missing xlnx,datawidth property\n"); + return err; + } + width = value >> 3; /* Convert bits to bytes */ + + /* If data width is greater than 8 bytes, DRE is not in hw */ + if (width > 8) + has_dre = false; + + if (!has_dre) + xdev->common.copy_align = (enum dmaengine_alignment)fls(width - 1); + + if (of_device_is_compatible(node, "xlnx,axi-vdma-mm2s-channel") || + of_device_is_compatible(node, "xlnx,axi-dma-mm2s-channel") || + of_device_is_compatible(node, "xlnx,axi-cdma-channel")) { + chan->direction = DMA_MEM_TO_DEV; + chan->id = xdev->mm2s_chan_id++; + chan->tdest = chan->id; + + chan->ctrl_offset = XILINX_DMA_MM2S_CTRL_OFFSET; + if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { + chan->desc_offset = XILINX_VDMA_MM2S_DESC_OFFSET; + chan->config.park = 1; + + if (xdev->flush_on_fsync == XILINX_DMA_FLUSH_BOTH || + xdev->flush_on_fsync == XILINX_DMA_FLUSH_MM2S) + chan->flush_on_fsync = true; + } + } else if (of_device_is_compatible(node, + "xlnx,axi-vdma-s2mm-channel") || + of_device_is_compatible(node, + "xlnx,axi-dma-s2mm-channel")) { + chan->direction = DMA_DEV_TO_MEM; + chan->id = xdev->s2mm_chan_id++; + chan->tdest = chan->id - xdev->dma_config->max_channels / 2; + chan->has_vflip = of_property_read_bool(node, + "xlnx,enable-vert-flip"); + if (chan->has_vflip) { + chan->config.vflip_en = dma_read(chan, + XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP) & + XILINX_VDMA_ENABLE_VERTICAL_FLIP; + } + + if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) + chan->ctrl_offset = XILINX_MCDMA_S2MM_CTRL_OFFSET; + else + chan->ctrl_offset = XILINX_DMA_S2MM_CTRL_OFFSET; + + if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { + chan->desc_offset = XILINX_VDMA_S2MM_DESC_OFFSET; + chan->config.park = 1; + + if (xdev->flush_on_fsync == XILINX_DMA_FLUSH_BOTH || + xdev->flush_on_fsync == XILINX_DMA_FLUSH_S2MM) + chan->flush_on_fsync = true; + } + } else { + dev_err(xdev->dev, "Invalid channel compatible node\n"); + return -EINVAL; + } + + /* Request the interrupt */ + chan->irq = of_irq_get(node, chan->tdest); + if (chan->irq < 0) + return dev_err_probe(xdev->dev, chan->irq, "failed to get irq\n"); + err = request_irq(chan->irq, xdev->dma_config->irq_handler, + IRQF_SHARED, "xilinx-dma-controller", chan); + if (err) { + dev_err(xdev->dev, "unable to request IRQ %d\n", chan->irq); + return err; + } + + if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { + chan->start_transfer = xilinx_dma_start_transfer; + chan->stop_transfer = xilinx_dma_stop_transfer; + } else if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) { + chan->start_transfer = xilinx_mcdma_start_transfer; + chan->stop_transfer = xilinx_dma_stop_transfer; + } else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) { + chan->start_transfer = xilinx_cdma_start_transfer; + chan->stop_transfer = xilinx_cdma_stop_transfer; + } else { + chan->start_transfer = xilinx_vdma_start_transfer; + chan->stop_transfer = xilinx_dma_stop_transfer; + } + + /* check if SG is enabled (only for AXIDMA, AXIMCDMA, and CDMA) */ + if (xdev->dma_config->dmatype != XDMA_TYPE_VDMA) { + if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA || + dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) & + XILINX_DMA_DMASR_SG_MASK) + chan->has_sg = true; + dev_dbg(chan->dev, "ch %d: SG %s\n", chan->id, + chan->has_sg ? "enabled" : "disabled"); + } + + /* Initialize the tasklet */ + tasklet_setup(&chan->tasklet, xilinx_dma_do_tasklet); + + /* + * Initialize the DMA channel and add it to the DMA engine channels + * list. + */ + chan->common.device = &xdev->common; + + list_add_tail(&chan->common.device_node, &xdev->common.channels); + xdev->chan[chan->id] = chan; + + /* Reset the channel */ + err = xilinx_dma_chan_reset(chan); + if (err < 0) { + dev_err(xdev->dev, "Reset channel failed\n"); + return err; + } + + return 0; +} + +/** + * xilinx_dma_child_probe - Per child node probe + * It get number of dma-channels per child node from + * device-tree and initializes all the channels. + * + * @xdev: Driver specific device structure + * @node: Device node + * + * Return: '0' on success and failure value on error. + */ +static int xilinx_dma_child_probe(struct xilinx_dma_device *xdev, + struct device_node *node) +{ + int ret, i; + u32 nr_channels = 1; + + ret = of_property_read_u32(node, "dma-channels", &nr_channels); + if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA && ret < 0) + dev_warn(xdev->dev, "missing dma-channels property\n"); + + for (i = 0; i < nr_channels; i++) { + ret = xilinx_dma_chan_probe(xdev, node); + if (ret) + return ret; + } + + return 0; +} + +/** + * of_dma_xilinx_xlate - Translation function + * @dma_spec: Pointer to DMA specifier as found in the device tree + * @ofdma: Pointer to DMA controller data + * + * Return: DMA channel pointer on success and NULL on error + */ +static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct xilinx_dma_device *xdev = ofdma->of_dma_data; + int chan_id = dma_spec->args[0]; + + if (chan_id >= xdev->dma_config->max_channels || !xdev->chan[chan_id]) + return NULL; + + return dma_get_slave_channel(&xdev->chan[chan_id]->common); +} + +static const struct xilinx_dma_config axidma_config = { + .dmatype = XDMA_TYPE_AXIDMA, + .clk_init = axidma_clk_init, + .irq_handler = xilinx_dma_irq_handler, + .max_channels = XILINX_DMA_MAX_CHANS_PER_DEVICE, +}; + +static const struct xilinx_dma_config aximcdma_config = { + .dmatype = XDMA_TYPE_AXIMCDMA, + .clk_init = axidma_clk_init, + .irq_handler = xilinx_mcdma_irq_handler, + .max_channels = XILINX_MCDMA_MAX_CHANS_PER_DEVICE, +}; +static const struct xilinx_dma_config axicdma_config = { + .dmatype = XDMA_TYPE_CDMA, + .clk_init = axicdma_clk_init, + .irq_handler = xilinx_dma_irq_handler, + .max_channels = XILINX_CDMA_MAX_CHANS_PER_DEVICE, +}; + +static const struct xilinx_dma_config axivdma_config = { + .dmatype = XDMA_TYPE_VDMA, + .clk_init = axivdma_clk_init, + .irq_handler = xilinx_dma_irq_handler, + .max_channels = XILINX_DMA_MAX_CHANS_PER_DEVICE, +}; + +static const struct of_device_id xilinx_dma_of_ids[] = { + { .compatible = "xlnx,axi-dma-1.00.a", .data = &axidma_config }, + { .compatible = "xlnx,axi-cdma-1.00.a", .data = &axicdma_config }, + { .compatible = "xlnx,axi-vdma-1.00.a", .data = &axivdma_config }, + { .compatible = "xlnx,axi-mcdma-1.00.a", .data = &aximcdma_config }, + {} +}; +MODULE_DEVICE_TABLE(of, xilinx_dma_of_ids); + +/** + * xilinx_dma_probe - Driver probe function + * @pdev: Pointer to the platform_device structure + * + * Return: '0' on success and failure value on error + */ +static int xilinx_dma_probe(struct platform_device *pdev) +{ + int (*clk_init)(struct platform_device *, struct clk **, struct clk **, + struct clk **, struct clk **, struct clk **) + = axivdma_clk_init; + struct device_node *node = pdev->dev.of_node; + struct xilinx_dma_device *xdev; + struct device_node *child, *np = pdev->dev.of_node; + u32 num_frames, addr_width, len_width; + int i, err; + + /* Allocate and initialize the DMA engine structure */ + xdev = devm_kzalloc(&pdev->dev, sizeof(*xdev), GFP_KERNEL); + if (!xdev) + return -ENOMEM; + + xdev->dev = &pdev->dev; + if (np) { + const struct of_device_id *match; + + match = of_match_node(xilinx_dma_of_ids, np); + if (match && match->data) { + xdev->dma_config = match->data; + clk_init = xdev->dma_config->clk_init; + } + } + + err = clk_init(pdev, &xdev->axi_clk, &xdev->tx_clk, &xdev->txs_clk, + &xdev->rx_clk, &xdev->rxs_clk); + if (err) + return err; + + /* Request and map I/O memory */ + xdev->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(xdev->regs)) { + err = PTR_ERR(xdev->regs); + goto disable_clks; + } + /* Retrieve the DMA engine properties from the device tree */ + xdev->max_buffer_len = GENMASK(XILINX_DMA_MAX_TRANS_LEN_MAX - 1, 0); + xdev->s2mm_chan_id = xdev->dma_config->max_channels / 2; + + if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA || + xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) { + if (!of_property_read_u32(node, "xlnx,sg-length-width", + &len_width)) { + if (len_width < XILINX_DMA_MAX_TRANS_LEN_MIN || + len_width > XILINX_DMA_V2_MAX_TRANS_LEN_MAX) { + dev_warn(xdev->dev, + "invalid xlnx,sg-length-width property value. Using default width\n"); + } else { + if (len_width > XILINX_DMA_MAX_TRANS_LEN_MAX) + dev_warn(xdev->dev, "Please ensure that IP supports buffer length > 23 bits\n"); + xdev->max_buffer_len = + GENMASK(len_width - 1, 0); + } + } + } + + if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { + xdev->has_axistream_connected = + of_property_read_bool(node, "xlnx,axistream-connected"); + } + + if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { + err = of_property_read_u32(node, "xlnx,num-fstores", + &num_frames); + if (err < 0) { + dev_err(xdev->dev, + "missing xlnx,num-fstores property\n"); + goto disable_clks; + } + + err = of_property_read_u32(node, "xlnx,flush-fsync", + &xdev->flush_on_fsync); + if (err < 0) + dev_warn(xdev->dev, + "missing xlnx,flush-fsync property\n"); + } + + err = of_property_read_u32(node, "xlnx,addrwidth", &addr_width); + if (err < 0) + dev_warn(xdev->dev, "missing xlnx,addrwidth property\n"); + + if (addr_width > 32) + xdev->ext_addr = true; + else + xdev->ext_addr = false; + + /* Set metadata mode */ + if (xdev->has_axistream_connected) + xdev->common.desc_metadata_modes = DESC_METADATA_ENGINE; + + /* Set the dma mask bits */ + err = dma_set_mask_and_coherent(xdev->dev, DMA_BIT_MASK(addr_width)); + if (err < 0) { + dev_err(xdev->dev, "DMA mask error %d\n", err); + goto disable_clks; + } + + /* Initialize the DMA engine */ + xdev->common.dev = &pdev->dev; + + INIT_LIST_HEAD(&xdev->common.channels); + if (!(xdev->dma_config->dmatype == XDMA_TYPE_CDMA)) { + dma_cap_set(DMA_SLAVE, xdev->common.cap_mask); + dma_cap_set(DMA_PRIVATE, xdev->common.cap_mask); + } + + xdev->common.device_alloc_chan_resources = + xilinx_dma_alloc_chan_resources; + xdev->common.device_free_chan_resources = + xilinx_dma_free_chan_resources; + xdev->common.device_terminate_all = xilinx_dma_terminate_all; + xdev->common.device_synchronize = xilinx_dma_synchronize; + xdev->common.device_tx_status = xilinx_dma_tx_status; + xdev->common.device_issue_pending = xilinx_dma_issue_pending; + xdev->common.device_config = xilinx_dma_device_config; + if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { + dma_cap_set(DMA_CYCLIC, xdev->common.cap_mask); + xdev->common.device_prep_slave_sg = xilinx_dma_prep_slave_sg; + xdev->common.device_prep_dma_cyclic = + xilinx_dma_prep_dma_cyclic; + /* Residue calculation is supported by only AXI DMA and CDMA */ + xdev->common.residue_granularity = + DMA_RESIDUE_GRANULARITY_SEGMENT; + } else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) { + dma_cap_set(DMA_MEMCPY, xdev->common.cap_mask); + xdev->common.device_prep_dma_memcpy = xilinx_cdma_prep_memcpy; + /* Residue calculation is supported by only AXI DMA and CDMA */ + xdev->common.residue_granularity = + DMA_RESIDUE_GRANULARITY_SEGMENT; + } else if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) { + xdev->common.device_prep_slave_sg = xilinx_mcdma_prep_slave_sg; + } else { + xdev->common.device_prep_interleaved_dma = + xilinx_vdma_dma_prep_interleaved; + } + + platform_set_drvdata(pdev, xdev); + + /* Initialize the channels */ + for_each_child_of_node(node, child) { + err = xilinx_dma_child_probe(xdev, child); + if (err < 0) { + of_node_put(child); + goto error; + } + } + + if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { + for (i = 0; i < xdev->dma_config->max_channels; i++) + if (xdev->chan[i]) + xdev->chan[i]->num_frms = num_frames; + } + + /* Register the DMA engine with the core */ + err = dma_async_device_register(&xdev->common); + if (err) { + dev_err(xdev->dev, "failed to register the dma device\n"); + goto error; + } + + err = of_dma_controller_register(node, of_dma_xilinx_xlate, + xdev); + if (err < 0) { + dev_err(&pdev->dev, "Unable to register DMA to DT\n"); + dma_async_device_unregister(&xdev->common); + goto error; + } + + if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) + dev_info(&pdev->dev, "Xilinx AXI DMA Engine Driver Probed!!\n"); + else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) + dev_info(&pdev->dev, "Xilinx AXI CDMA Engine Driver Probed!!\n"); + else if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) + dev_info(&pdev->dev, "Xilinx AXI MCDMA Engine Driver Probed!!\n"); + else + dev_info(&pdev->dev, "Xilinx AXI VDMA Engine Driver Probed!!\n"); + + return 0; + +error: + for (i = 0; i < xdev->dma_config->max_channels; i++) + if (xdev->chan[i]) + xilinx_dma_chan_remove(xdev->chan[i]); +disable_clks: + xdma_disable_allclks(xdev); + + return err; +} + +/** + * xilinx_dma_remove - Driver remove function + * @pdev: Pointer to the platform_device structure + * + * Return: Always '0' + */ +static int xilinx_dma_remove(struct platform_device *pdev) +{ + struct xilinx_dma_device *xdev = platform_get_drvdata(pdev); + int i; + + of_dma_controller_free(pdev->dev.of_node); + + dma_async_device_unregister(&xdev->common); + + for (i = 0; i < xdev->dma_config->max_channels; i++) + if (xdev->chan[i]) + xilinx_dma_chan_remove(xdev->chan[i]); + + xdma_disable_allclks(xdev); + + return 0; +} + +static struct platform_driver xilinx_vdma_driver = { + .driver = { + .name = "xilinx-vdma", + .of_match_table = xilinx_dma_of_ids, + }, + .probe = xilinx_dma_probe, + .remove = xilinx_dma_remove, +}; + +module_platform_driver(xilinx_vdma_driver); + +MODULE_AUTHOR("Xilinx, Inc."); +MODULE_DESCRIPTION("Xilinx VDMA driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c new file mode 100644 index 0000000000..84dc5240a8 --- /dev/null +++ b/drivers/dma/xilinx/xilinx_dpdma.c @@ -0,0 +1,1777 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Xilinx ZynqMP DPDMA Engine driver + * + * Copyright (C) 2015 - 2020 Xilinx, Inc. + * + * Author: Hyun Woo Kwon + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "../dmaengine.h" +#include "../virt-dma.h" + +/* DPDMA registers */ +#define XILINX_DPDMA_ERR_CTRL 0x000 +#define XILINX_DPDMA_ISR 0x004 +#define XILINX_DPDMA_IMR 0x008 +#define XILINX_DPDMA_IEN 0x00c +#define XILINX_DPDMA_IDS 0x010 +#define XILINX_DPDMA_INTR_DESC_DONE(n) BIT((n) + 0) +#define XILINX_DPDMA_INTR_DESC_DONE_MASK GENMASK(5, 0) +#define XILINX_DPDMA_INTR_NO_OSTAND(n) BIT((n) + 6) +#define XILINX_DPDMA_INTR_NO_OSTAND_MASK GENMASK(11, 6) +#define XILINX_DPDMA_INTR_AXI_ERR(n) BIT((n) + 12) +#define XILINX_DPDMA_INTR_AXI_ERR_MASK GENMASK(17, 12) +#define XILINX_DPDMA_INTR_DESC_ERR(n) BIT((n) + 16) +#define XILINX_DPDMA_INTR_DESC_ERR_MASK GENMASK(23, 18) +#define XILINX_DPDMA_INTR_WR_CMD_FIFO_FULL BIT(24) +#define XILINX_DPDMA_INTR_WR_DATA_FIFO_FULL BIT(25) +#define XILINX_DPDMA_INTR_AXI_4K_CROSS BIT(26) +#define XILINX_DPDMA_INTR_VSYNC BIT(27) +#define XILINX_DPDMA_INTR_CHAN_ERR_MASK 0x00041000 +#define XILINX_DPDMA_INTR_CHAN_ERR 0x00fff000 +#define XILINX_DPDMA_INTR_GLOBAL_ERR 0x07000000 +#define XILINX_DPDMA_INTR_ERR_ALL 0x07fff000 +#define XILINX_DPDMA_INTR_CHAN_MASK 0x00041041 +#define XILINX_DPDMA_INTR_GLOBAL_MASK 0x0f000000 +#define XILINX_DPDMA_INTR_ALL 0x0fffffff +#define XILINX_DPDMA_EISR 0x014 +#define XILINX_DPDMA_EIMR 0x018 +#define XILINX_DPDMA_EIEN 0x01c +#define XILINX_DPDMA_EIDS 0x020 +#define XILINX_DPDMA_EINTR_INV_APB BIT(0) +#define XILINX_DPDMA_EINTR_RD_AXI_ERR(n) BIT((n) + 1) +#define XILINX_DPDMA_EINTR_RD_AXI_ERR_MASK GENMASK(6, 1) +#define XILINX_DPDMA_EINTR_PRE_ERR(n) BIT((n) + 7) +#define XILINX_DPDMA_EINTR_PRE_ERR_MASK GENMASK(12, 7) +#define XILINX_DPDMA_EINTR_CRC_ERR(n) BIT((n) + 13) +#define XILINX_DPDMA_EINTR_CRC_ERR_MASK GENMASK(18, 13) +#define XILINX_DPDMA_EINTR_WR_AXI_ERR(n) BIT((n) + 19) +#define XILINX_DPDMA_EINTR_WR_AXI_ERR_MASK GENMASK(24, 19) +#define XILINX_DPDMA_EINTR_DESC_DONE_ERR(n) BIT((n) + 25) +#define XILINX_DPDMA_EINTR_DESC_DONE_ERR_MASK GENMASK(30, 25) +#define XILINX_DPDMA_EINTR_RD_CMD_FIFO_FULL BIT(32) +#define XILINX_DPDMA_EINTR_CHAN_ERR_MASK 0x02082082 +#define XILINX_DPDMA_EINTR_CHAN_ERR 0x7ffffffe +#define XILINX_DPDMA_EINTR_GLOBAL_ERR 0x80000001 +#define XILINX_DPDMA_EINTR_ALL 0xffffffff +#define XILINX_DPDMA_CNTL 0x100 +#define XILINX_DPDMA_GBL 0x104 +#define XILINX_DPDMA_GBL_TRIG_MASK(n) ((n) << 0) +#define XILINX_DPDMA_GBL_RETRIG_MASK(n) ((n) << 6) +#define XILINX_DPDMA_ALC0_CNTL 0x108 +#define XILINX_DPDMA_ALC0_STATUS 0x10c +#define XILINX_DPDMA_ALC0_MAX 0x110 +#define XILINX_DPDMA_ALC0_MIN 0x114 +#define XILINX_DPDMA_ALC0_ACC 0x118 +#define XILINX_DPDMA_ALC0_ACC_TRAN 0x11c +#define XILINX_DPDMA_ALC1_CNTL 0x120 +#define XILINX_DPDMA_ALC1_STATUS 0x124 +#define XILINX_DPDMA_ALC1_MAX 0x128 +#define XILINX_DPDMA_ALC1_MIN 0x12c +#define XILINX_DPDMA_ALC1_ACC 0x130 +#define XILINX_DPDMA_ALC1_ACC_TRAN 0x134 + +/* Channel register */ +#define XILINX_DPDMA_CH_BASE 0x200 +#define XILINX_DPDMA_CH_OFFSET 0x100 +#define XILINX_DPDMA_CH_DESC_START_ADDRE 0x000 +#define XILINX_DPDMA_CH_DESC_START_ADDRE_MASK GENMASK(15, 0) +#define XILINX_DPDMA_CH_DESC_START_ADDR 0x004 +#define XILINX_DPDMA_CH_DESC_NEXT_ADDRE 0x008 +#define XILINX_DPDMA_CH_DESC_NEXT_ADDR 0x00c +#define XILINX_DPDMA_CH_PYLD_CUR_ADDRE 0x010 +#define XILINX_DPDMA_CH_PYLD_CUR_ADDR 0x014 +#define XILINX_DPDMA_CH_CNTL 0x018 +#define XILINX_DPDMA_CH_CNTL_ENABLE BIT(0) +#define XILINX_DPDMA_CH_CNTL_PAUSE BIT(1) +#define XILINX_DPDMA_CH_CNTL_QOS_DSCR_WR_MASK GENMASK(5, 2) +#define XILINX_DPDMA_CH_CNTL_QOS_DSCR_RD_MASK GENMASK(9, 6) +#define XILINX_DPDMA_CH_CNTL_QOS_DATA_RD_MASK GENMASK(13, 10) +#define XILINX_DPDMA_CH_CNTL_QOS_VID_CLASS 11 +#define XILINX_DPDMA_CH_STATUS 0x01c +#define XILINX_DPDMA_CH_STATUS_OTRAN_CNT_MASK GENMASK(24, 21) +#define XILINX_DPDMA_CH_VDO 0x020 +#define XILINX_DPDMA_CH_PYLD_SZ 0x024 +#define XILINX_DPDMA_CH_DESC_ID 0x028 +#define XILINX_DPDMA_CH_DESC_ID_MASK GENMASK(15, 0) + +/* DPDMA descriptor fields */ +#define XILINX_DPDMA_DESC_CONTROL_PREEMBLE 0xa5 +#define XILINX_DPDMA_DESC_CONTROL_COMPLETE_INTR BIT(8) +#define XILINX_DPDMA_DESC_CONTROL_DESC_UPDATE BIT(9) +#define XILINX_DPDMA_DESC_CONTROL_IGNORE_DONE BIT(10) +#define XILINX_DPDMA_DESC_CONTROL_FRAG_MODE BIT(18) +#define XILINX_DPDMA_DESC_CONTROL_LAST BIT(19) +#define XILINX_DPDMA_DESC_CONTROL_ENABLE_CRC BIT(20) +#define XILINX_DPDMA_DESC_CONTROL_LAST_OF_FRAME BIT(21) +#define XILINX_DPDMA_DESC_ID_MASK GENMASK(15, 0) +#define XILINX_DPDMA_DESC_HSIZE_STRIDE_HSIZE_MASK GENMASK(17, 0) +#define XILINX_DPDMA_DESC_HSIZE_STRIDE_STRIDE_MASK GENMASK(31, 18) +#define XILINX_DPDMA_DESC_ADDR_EXT_NEXT_ADDR_MASK GENMASK(15, 0) +#define XILINX_DPDMA_DESC_ADDR_EXT_SRC_ADDR_MASK GENMASK(31, 16) + +#define XILINX_DPDMA_ALIGN_BYTES 256 +#define XILINX_DPDMA_LINESIZE_ALIGN_BITS 128 + +#define XILINX_DPDMA_NUM_CHAN 6 + +struct xilinx_dpdma_chan; + +/** + * struct xilinx_dpdma_hw_desc - DPDMA hardware descriptor + * @control: control configuration field + * @desc_id: descriptor ID + * @xfer_size: transfer size + * @hsize_stride: horizontal size and stride + * @timestamp_lsb: LSB of time stamp + * @timestamp_msb: MSB of time stamp + * @addr_ext: upper 16 bit of 48 bit address (next_desc and src_addr) + * @next_desc: next descriptor 32 bit address + * @src_addr: payload source address (1st page, 32 LSB) + * @addr_ext_23: payload source address (3nd and 3rd pages, 16 LSBs) + * @addr_ext_45: payload source address (4th and 5th pages, 16 LSBs) + * @src_addr2: payload source address (2nd page, 32 LSB) + * @src_addr3: payload source address (3rd page, 32 LSB) + * @src_addr4: payload source address (4th page, 32 LSB) + * @src_addr5: payload source address (5th page, 32 LSB) + * @crc: descriptor CRC + */ +struct xilinx_dpdma_hw_desc { + u32 control; + u32 desc_id; + u32 xfer_size; + u32 hsize_stride; + u32 timestamp_lsb; + u32 timestamp_msb; + u32 addr_ext; + u32 next_desc; + u32 src_addr; + u32 addr_ext_23; + u32 addr_ext_45; + u32 src_addr2; + u32 src_addr3; + u32 src_addr4; + u32 src_addr5; + u32 crc; +} __aligned(XILINX_DPDMA_ALIGN_BYTES); + +/** + * struct xilinx_dpdma_sw_desc - DPDMA software descriptor + * @hw: DPDMA hardware descriptor + * @node: list node for software descriptors + * @dma_addr: DMA address of the software descriptor + */ +struct xilinx_dpdma_sw_desc { + struct xilinx_dpdma_hw_desc hw; + struct list_head node; + dma_addr_t dma_addr; +}; + +/** + * struct xilinx_dpdma_tx_desc - DPDMA transaction descriptor + * @vdesc: virtual DMA descriptor + * @chan: DMA channel + * @descriptors: list of software descriptors + * @error: an error has been detected with this descriptor + */ +struct xilinx_dpdma_tx_desc { + struct virt_dma_desc vdesc; + struct xilinx_dpdma_chan *chan; + struct list_head descriptors; + bool error; +}; + +#define to_dpdma_tx_desc(_desc) \ + container_of(_desc, struct xilinx_dpdma_tx_desc, vdesc) + +/** + * struct xilinx_dpdma_chan - DPDMA channel + * @vchan: virtual DMA channel + * @reg: register base address + * @id: channel ID + * @wait_to_stop: queue to wait for outstanding transacitons before stopping + * @running: true if the channel is running + * @first_frame: flag for the first frame of stream + * @video_group: flag if multi-channel operation is needed for video channels + * @lock: lock to access struct xilinx_dpdma_chan + * @desc_pool: descriptor allocation pool + * @err_task: error IRQ bottom half handler + * @desc: References to descriptors being processed + * @desc.pending: Descriptor schedule to the hardware, pending execution + * @desc.active: Descriptor being executed by the hardware + * @xdev: DPDMA device + */ +struct xilinx_dpdma_chan { + struct virt_dma_chan vchan; + void __iomem *reg; + unsigned int id; + + wait_queue_head_t wait_to_stop; + bool running; + bool first_frame; + bool video_group; + + spinlock_t lock; /* lock to access struct xilinx_dpdma_chan */ + struct dma_pool *desc_pool; + struct tasklet_struct err_task; + + struct { + struct xilinx_dpdma_tx_desc *pending; + struct xilinx_dpdma_tx_desc *active; + } desc; + + struct xilinx_dpdma_device *xdev; +}; + +#define to_xilinx_chan(_chan) \ + container_of(_chan, struct xilinx_dpdma_chan, vchan.chan) + +/** + * struct xilinx_dpdma_device - DPDMA device + * @common: generic dma device structure + * @reg: register base address + * @dev: generic device structure + * @irq: the interrupt number + * @axi_clk: axi clock + * @chan: DPDMA channels + * @ext_addr: flag for 64 bit system (48 bit addressing) + */ +struct xilinx_dpdma_device { + struct dma_device common; + void __iomem *reg; + struct device *dev; + int irq; + + struct clk *axi_clk; + struct xilinx_dpdma_chan *chan[XILINX_DPDMA_NUM_CHAN]; + + bool ext_addr; +}; + +/* ----------------------------------------------------------------------------- + * DebugFS + */ +#define XILINX_DPDMA_DEBUGFS_READ_MAX_SIZE 32 +#define XILINX_DPDMA_DEBUGFS_UINT16_MAX_STR "65535" + +/* Match xilinx_dpdma_testcases vs dpdma_debugfs_reqs[] entry */ +enum xilinx_dpdma_testcases { + DPDMA_TC_INTR_DONE, + DPDMA_TC_NONE +}; + +struct xilinx_dpdma_debugfs { + enum xilinx_dpdma_testcases testcase; + u16 xilinx_dpdma_irq_done_count; + unsigned int chan_id; +}; + +static struct xilinx_dpdma_debugfs dpdma_debugfs; +struct xilinx_dpdma_debugfs_request { + const char *name; + enum xilinx_dpdma_testcases tc; + ssize_t (*read)(char *buf); + int (*write)(char *args); +}; + +static void xilinx_dpdma_debugfs_desc_done_irq(struct xilinx_dpdma_chan *chan) +{ + if (IS_ENABLED(CONFIG_DEBUG_FS) && chan->id == dpdma_debugfs.chan_id) + dpdma_debugfs.xilinx_dpdma_irq_done_count++; +} + +static ssize_t xilinx_dpdma_debugfs_desc_done_irq_read(char *buf) +{ + size_t out_str_len; + + dpdma_debugfs.testcase = DPDMA_TC_NONE; + + out_str_len = strlen(XILINX_DPDMA_DEBUGFS_UINT16_MAX_STR); + out_str_len = min_t(size_t, XILINX_DPDMA_DEBUGFS_READ_MAX_SIZE, + out_str_len); + snprintf(buf, out_str_len, "%d", + dpdma_debugfs.xilinx_dpdma_irq_done_count); + + return 0; +} + +static int xilinx_dpdma_debugfs_desc_done_irq_write(char *args) +{ + char *arg; + int ret; + u32 id; + + arg = strsep(&args, " "); + if (!arg || strncasecmp(arg, "start", 5)) + return -EINVAL; + + arg = strsep(&args, " "); + if (!arg) + return -EINVAL; + + ret = kstrtou32(arg, 0, &id); + if (ret < 0) + return ret; + + if (id < ZYNQMP_DPDMA_VIDEO0 || id > ZYNQMP_DPDMA_AUDIO1) + return -EINVAL; + + dpdma_debugfs.testcase = DPDMA_TC_INTR_DONE; + dpdma_debugfs.xilinx_dpdma_irq_done_count = 0; + dpdma_debugfs.chan_id = id; + + return 0; +} + +/* Match xilinx_dpdma_testcases vs dpdma_debugfs_reqs[] entry */ +static struct xilinx_dpdma_debugfs_request dpdma_debugfs_reqs[] = { + { + .name = "DESCRIPTOR_DONE_INTR", + .tc = DPDMA_TC_INTR_DONE, + .read = xilinx_dpdma_debugfs_desc_done_irq_read, + .write = xilinx_dpdma_debugfs_desc_done_irq_write, + }, +}; + +static ssize_t xilinx_dpdma_debugfs_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + enum xilinx_dpdma_testcases testcase; + char *kern_buff; + int ret = 0; + + if (*pos != 0 || size <= 0) + return -EINVAL; + + kern_buff = kzalloc(XILINX_DPDMA_DEBUGFS_READ_MAX_SIZE, GFP_KERNEL); + if (!kern_buff) { + dpdma_debugfs.testcase = DPDMA_TC_NONE; + return -ENOMEM; + } + + testcase = READ_ONCE(dpdma_debugfs.testcase); + if (testcase != DPDMA_TC_NONE) { + ret = dpdma_debugfs_reqs[testcase].read(kern_buff); + if (ret < 0) + goto done; + } else { + strscpy(kern_buff, "No testcase executed", + XILINX_DPDMA_DEBUGFS_READ_MAX_SIZE); + } + + size = min(size, strlen(kern_buff)); + if (copy_to_user(buf, kern_buff, size)) + ret = -EFAULT; + +done: + kfree(kern_buff); + if (ret) + return ret; + + *pos = size + 1; + return size; +} + +static ssize_t xilinx_dpdma_debugfs_write(struct file *f, + const char __user *buf, size_t size, + loff_t *pos) +{ + char *kern_buff, *kern_buff_start; + char *testcase; + unsigned int i; + int ret; + + if (*pos != 0 || size <= 0) + return -EINVAL; + + /* Supporting single instance of test as of now. */ + if (dpdma_debugfs.testcase != DPDMA_TC_NONE) + return -EBUSY; + + kern_buff = kzalloc(size, GFP_KERNEL); + if (!kern_buff) + return -ENOMEM; + kern_buff_start = kern_buff; + + ret = strncpy_from_user(kern_buff, buf, size); + if (ret < 0) + goto done; + + /* Read the testcase name from a user request. */ + testcase = strsep(&kern_buff, " "); + + for (i = 0; i < ARRAY_SIZE(dpdma_debugfs_reqs); i++) { + if (!strcasecmp(testcase, dpdma_debugfs_reqs[i].name)) + break; + } + + if (i == ARRAY_SIZE(dpdma_debugfs_reqs)) { + ret = -EINVAL; + goto done; + } + + ret = dpdma_debugfs_reqs[i].write(kern_buff); + if (ret < 0) + goto done; + + ret = size; + +done: + kfree(kern_buff_start); + return ret; +} + +static const struct file_operations fops_xilinx_dpdma_dbgfs = { + .owner = THIS_MODULE, + .read = xilinx_dpdma_debugfs_read, + .write = xilinx_dpdma_debugfs_write, +}; + +static void xilinx_dpdma_debugfs_init(struct xilinx_dpdma_device *xdev) +{ + struct dentry *dent; + + dpdma_debugfs.testcase = DPDMA_TC_NONE; + + dent = debugfs_create_file("testcase", 0444, xdev->common.dbg_dev_root, + NULL, &fops_xilinx_dpdma_dbgfs); + if (IS_ERR(dent)) + dev_err(xdev->dev, "Failed to create debugfs testcase file\n"); +} + +/* ----------------------------------------------------------------------------- + * I/O Accessors + */ + +static inline u32 dpdma_read(void __iomem *base, u32 offset) +{ + return ioread32(base + offset); +} + +static inline void dpdma_write(void __iomem *base, u32 offset, u32 val) +{ + iowrite32(val, base + offset); +} + +static inline void dpdma_clr(void __iomem *base, u32 offset, u32 clr) +{ + dpdma_write(base, offset, dpdma_read(base, offset) & ~clr); +} + +static inline void dpdma_set(void __iomem *base, u32 offset, u32 set) +{ + dpdma_write(base, offset, dpdma_read(base, offset) | set); +} + +/* ----------------------------------------------------------------------------- + * Descriptor Operations + */ + +/** + * xilinx_dpdma_sw_desc_set_dma_addrs - Set DMA addresses in the descriptor + * @xdev: DPDMA device + * @sw_desc: The software descriptor in which to set DMA addresses + * @prev: The previous descriptor + * @dma_addr: array of dma addresses + * @num_src_addr: number of addresses in @dma_addr + * + * Set all the DMA addresses in the hardware descriptor corresponding to @dev + * from @dma_addr. If a previous descriptor is specified in @prev, its next + * descriptor DMA address is set to the DMA address of @sw_desc. @prev may be + * identical to @sw_desc for cyclic transfers. + */ +static void xilinx_dpdma_sw_desc_set_dma_addrs(struct xilinx_dpdma_device *xdev, + struct xilinx_dpdma_sw_desc *sw_desc, + struct xilinx_dpdma_sw_desc *prev, + dma_addr_t dma_addr[], + unsigned int num_src_addr) +{ + struct xilinx_dpdma_hw_desc *hw_desc = &sw_desc->hw; + unsigned int i; + + hw_desc->src_addr = lower_32_bits(dma_addr[0]); + if (xdev->ext_addr) + hw_desc->addr_ext |= + FIELD_PREP(XILINX_DPDMA_DESC_ADDR_EXT_SRC_ADDR_MASK, + upper_32_bits(dma_addr[0])); + + for (i = 1; i < num_src_addr; i++) { + u32 *addr = &hw_desc->src_addr2; + + addr[i - 1] = lower_32_bits(dma_addr[i]); + + if (xdev->ext_addr) { + u32 *addr_ext = &hw_desc->addr_ext_23; + u32 addr_msb; + + addr_msb = upper_32_bits(dma_addr[i]) & GENMASK(15, 0); + addr_msb <<= 16 * ((i - 1) % 2); + addr_ext[(i - 1) / 2] |= addr_msb; + } + } + + if (!prev) + return; + + prev->hw.next_desc = lower_32_bits(sw_desc->dma_addr); + if (xdev->ext_addr) + prev->hw.addr_ext |= + FIELD_PREP(XILINX_DPDMA_DESC_ADDR_EXT_NEXT_ADDR_MASK, + upper_32_bits(sw_desc->dma_addr)); +} + +/** + * xilinx_dpdma_chan_alloc_sw_desc - Allocate a software descriptor + * @chan: DPDMA channel + * + * Allocate a software descriptor from the channel's descriptor pool. + * + * Return: a software descriptor or NULL. + */ +static struct xilinx_dpdma_sw_desc * +xilinx_dpdma_chan_alloc_sw_desc(struct xilinx_dpdma_chan *chan) +{ + struct xilinx_dpdma_sw_desc *sw_desc; + dma_addr_t dma_addr; + + sw_desc = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &dma_addr); + if (!sw_desc) + return NULL; + + sw_desc->dma_addr = dma_addr; + + return sw_desc; +} + +/** + * xilinx_dpdma_chan_free_sw_desc - Free a software descriptor + * @chan: DPDMA channel + * @sw_desc: software descriptor to free + * + * Free a software descriptor from the channel's descriptor pool. + */ +static void +xilinx_dpdma_chan_free_sw_desc(struct xilinx_dpdma_chan *chan, + struct xilinx_dpdma_sw_desc *sw_desc) +{ + dma_pool_free(chan->desc_pool, sw_desc, sw_desc->dma_addr); +} + +/** + * xilinx_dpdma_chan_dump_tx_desc - Dump a tx descriptor + * @chan: DPDMA channel + * @tx_desc: tx descriptor to dump + * + * Dump contents of a tx descriptor + */ +static void xilinx_dpdma_chan_dump_tx_desc(struct xilinx_dpdma_chan *chan, + struct xilinx_dpdma_tx_desc *tx_desc) +{ + struct xilinx_dpdma_sw_desc *sw_desc; + struct device *dev = chan->xdev->dev; + unsigned int i = 0; + + dev_dbg(dev, "------- TX descriptor dump start -------\n"); + dev_dbg(dev, "------- channel ID = %d -------\n", chan->id); + + list_for_each_entry(sw_desc, &tx_desc->descriptors, node) { + struct xilinx_dpdma_hw_desc *hw_desc = &sw_desc->hw; + + dev_dbg(dev, "------- HW descriptor %d -------\n", i++); + dev_dbg(dev, "descriptor DMA addr: %pad\n", &sw_desc->dma_addr); + dev_dbg(dev, "control: 0x%08x\n", hw_desc->control); + dev_dbg(dev, "desc_id: 0x%08x\n", hw_desc->desc_id); + dev_dbg(dev, "xfer_size: 0x%08x\n", hw_desc->xfer_size); + dev_dbg(dev, "hsize_stride: 0x%08x\n", hw_desc->hsize_stride); + dev_dbg(dev, "timestamp_lsb: 0x%08x\n", hw_desc->timestamp_lsb); + dev_dbg(dev, "timestamp_msb: 0x%08x\n", hw_desc->timestamp_msb); + dev_dbg(dev, "addr_ext: 0x%08x\n", hw_desc->addr_ext); + dev_dbg(dev, "next_desc: 0x%08x\n", hw_desc->next_desc); + dev_dbg(dev, "src_addr: 0x%08x\n", hw_desc->src_addr); + dev_dbg(dev, "addr_ext_23: 0x%08x\n", hw_desc->addr_ext_23); + dev_dbg(dev, "addr_ext_45: 0x%08x\n", hw_desc->addr_ext_45); + dev_dbg(dev, "src_addr2: 0x%08x\n", hw_desc->src_addr2); + dev_dbg(dev, "src_addr3: 0x%08x\n", hw_desc->src_addr3); + dev_dbg(dev, "src_addr4: 0x%08x\n", hw_desc->src_addr4); + dev_dbg(dev, "src_addr5: 0x%08x\n", hw_desc->src_addr5); + dev_dbg(dev, "crc: 0x%08x\n", hw_desc->crc); + } + + dev_dbg(dev, "------- TX descriptor dump end -------\n"); +} + +/** + * xilinx_dpdma_chan_alloc_tx_desc - Allocate a transaction descriptor + * @chan: DPDMA channel + * + * Allocate a tx descriptor. + * + * Return: a tx descriptor or NULL. + */ +static struct xilinx_dpdma_tx_desc * +xilinx_dpdma_chan_alloc_tx_desc(struct xilinx_dpdma_chan *chan) +{ + struct xilinx_dpdma_tx_desc *tx_desc; + + tx_desc = kzalloc(sizeof(*tx_desc), GFP_NOWAIT); + if (!tx_desc) + return NULL; + + INIT_LIST_HEAD(&tx_desc->descriptors); + tx_desc->chan = chan; + tx_desc->error = false; + + return tx_desc; +} + +/** + * xilinx_dpdma_chan_free_tx_desc - Free a virtual DMA descriptor + * @vdesc: virtual DMA descriptor + * + * Free the virtual DMA descriptor @vdesc including its software descriptors. + */ +static void xilinx_dpdma_chan_free_tx_desc(struct virt_dma_desc *vdesc) +{ + struct xilinx_dpdma_sw_desc *sw_desc, *next; + struct xilinx_dpdma_tx_desc *desc; + + if (!vdesc) + return; + + desc = to_dpdma_tx_desc(vdesc); + + list_for_each_entry_safe(sw_desc, next, &desc->descriptors, node) { + list_del(&sw_desc->node); + xilinx_dpdma_chan_free_sw_desc(desc->chan, sw_desc); + } + + kfree(desc); +} + +/** + * xilinx_dpdma_chan_prep_interleaved_dma - Prepare an interleaved dma + * descriptor + * @chan: DPDMA channel + * @xt: dma interleaved template + * + * Prepare a tx descriptor including internal software/hardware descriptors + * based on @xt. + * + * Return: A DPDMA TX descriptor on success, or NULL. + */ +static struct xilinx_dpdma_tx_desc * +xilinx_dpdma_chan_prep_interleaved_dma(struct xilinx_dpdma_chan *chan, + struct dma_interleaved_template *xt) +{ + struct xilinx_dpdma_tx_desc *tx_desc; + struct xilinx_dpdma_sw_desc *sw_desc; + struct xilinx_dpdma_hw_desc *hw_desc; + size_t hsize = xt->sgl[0].size; + size_t stride = hsize + xt->sgl[0].icg; + + if (!IS_ALIGNED(xt->src_start, XILINX_DPDMA_ALIGN_BYTES)) { + dev_err(chan->xdev->dev, + "chan%u: buffer should be aligned at %d B\n", + chan->id, XILINX_DPDMA_ALIGN_BYTES); + return NULL; + } + + tx_desc = xilinx_dpdma_chan_alloc_tx_desc(chan); + if (!tx_desc) + return NULL; + + sw_desc = xilinx_dpdma_chan_alloc_sw_desc(chan); + if (!sw_desc) { + xilinx_dpdma_chan_free_tx_desc(&tx_desc->vdesc); + return NULL; + } + + xilinx_dpdma_sw_desc_set_dma_addrs(chan->xdev, sw_desc, sw_desc, + &xt->src_start, 1); + + hw_desc = &sw_desc->hw; + hsize = ALIGN(hsize, XILINX_DPDMA_LINESIZE_ALIGN_BITS / 8); + hw_desc->xfer_size = hsize * xt->numf; + hw_desc->hsize_stride = + FIELD_PREP(XILINX_DPDMA_DESC_HSIZE_STRIDE_HSIZE_MASK, hsize) | + FIELD_PREP(XILINX_DPDMA_DESC_HSIZE_STRIDE_STRIDE_MASK, + stride / 16); + hw_desc->control |= XILINX_DPDMA_DESC_CONTROL_PREEMBLE; + hw_desc->control |= XILINX_DPDMA_DESC_CONTROL_COMPLETE_INTR; + hw_desc->control |= XILINX_DPDMA_DESC_CONTROL_IGNORE_DONE; + hw_desc->control |= XILINX_DPDMA_DESC_CONTROL_LAST_OF_FRAME; + + list_add_tail(&sw_desc->node, &tx_desc->descriptors); + + return tx_desc; +} + +/* ----------------------------------------------------------------------------- + * DPDMA Channel Operations + */ + +/** + * xilinx_dpdma_chan_enable - Enable the channel + * @chan: DPDMA channel + * + * Enable the channel and its interrupts. Set the QoS values for video class. + */ +static void xilinx_dpdma_chan_enable(struct xilinx_dpdma_chan *chan) +{ + u32 reg; + + reg = (XILINX_DPDMA_INTR_CHAN_MASK << chan->id) + | XILINX_DPDMA_INTR_GLOBAL_MASK; + dpdma_write(chan->xdev->reg, XILINX_DPDMA_IEN, reg); + reg = (XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id) + | XILINX_DPDMA_INTR_GLOBAL_ERR; + dpdma_write(chan->xdev->reg, XILINX_DPDMA_EIEN, reg); + + reg = XILINX_DPDMA_CH_CNTL_ENABLE + | FIELD_PREP(XILINX_DPDMA_CH_CNTL_QOS_DSCR_WR_MASK, + XILINX_DPDMA_CH_CNTL_QOS_VID_CLASS) + | FIELD_PREP(XILINX_DPDMA_CH_CNTL_QOS_DSCR_RD_MASK, + XILINX_DPDMA_CH_CNTL_QOS_VID_CLASS) + | FIELD_PREP(XILINX_DPDMA_CH_CNTL_QOS_DATA_RD_MASK, + XILINX_DPDMA_CH_CNTL_QOS_VID_CLASS); + dpdma_set(chan->reg, XILINX_DPDMA_CH_CNTL, reg); +} + +/** + * xilinx_dpdma_chan_disable - Disable the channel + * @chan: DPDMA channel + * + * Disable the channel and its interrupts. + */ +static void xilinx_dpdma_chan_disable(struct xilinx_dpdma_chan *chan) +{ + u32 reg; + + reg = XILINX_DPDMA_INTR_CHAN_MASK << chan->id; + dpdma_write(chan->xdev->reg, XILINX_DPDMA_IEN, reg); + reg = XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id; + dpdma_write(chan->xdev->reg, XILINX_DPDMA_EIEN, reg); + + dpdma_clr(chan->reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_ENABLE); +} + +/** + * xilinx_dpdma_chan_pause - Pause the channel + * @chan: DPDMA channel + * + * Pause the channel. + */ +static void xilinx_dpdma_chan_pause(struct xilinx_dpdma_chan *chan) +{ + dpdma_set(chan->reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_PAUSE); +} + +/** + * xilinx_dpdma_chan_unpause - Unpause the channel + * @chan: DPDMA channel + * + * Unpause the channel. + */ +static void xilinx_dpdma_chan_unpause(struct xilinx_dpdma_chan *chan) +{ + dpdma_clr(chan->reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_PAUSE); +} + +static u32 xilinx_dpdma_chan_video_group_ready(struct xilinx_dpdma_chan *chan) +{ + struct xilinx_dpdma_device *xdev = chan->xdev; + u32 channels = 0; + unsigned int i; + + for (i = ZYNQMP_DPDMA_VIDEO0; i <= ZYNQMP_DPDMA_VIDEO2; i++) { + if (xdev->chan[i]->video_group && !xdev->chan[i]->running) + return 0; + + if (xdev->chan[i]->video_group) + channels |= BIT(i); + } + + return channels; +} + +/** + * xilinx_dpdma_chan_queue_transfer - Queue the next transfer + * @chan: DPDMA channel + * + * Queue the next descriptor, if any, to the hardware. If the channel is + * stopped, start it first. Otherwise retrigger it with the next descriptor. + */ +static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan) +{ + struct xilinx_dpdma_device *xdev = chan->xdev; + struct xilinx_dpdma_sw_desc *sw_desc; + struct xilinx_dpdma_tx_desc *desc; + struct virt_dma_desc *vdesc; + u32 reg, channels; + bool first_frame; + + lockdep_assert_held(&chan->lock); + + if (chan->desc.pending) + return; + + if (!chan->running) { + xilinx_dpdma_chan_unpause(chan); + xilinx_dpdma_chan_enable(chan); + chan->first_frame = true; + chan->running = true; + } + + vdesc = vchan_next_desc(&chan->vchan); + if (!vdesc) + return; + + desc = to_dpdma_tx_desc(vdesc); + chan->desc.pending = desc; + list_del(&desc->vdesc.node); + + /* + * Assign the cookie to descriptors in this transaction. Only 16 bit + * will be used, but it should be enough. + */ + list_for_each_entry(sw_desc, &desc->descriptors, node) + sw_desc->hw.desc_id = desc->vdesc.tx.cookie + & XILINX_DPDMA_CH_DESC_ID_MASK; + + sw_desc = list_first_entry(&desc->descriptors, + struct xilinx_dpdma_sw_desc, node); + dpdma_write(chan->reg, XILINX_DPDMA_CH_DESC_START_ADDR, + lower_32_bits(sw_desc->dma_addr)); + if (xdev->ext_addr) + dpdma_write(chan->reg, XILINX_DPDMA_CH_DESC_START_ADDRE, + FIELD_PREP(XILINX_DPDMA_CH_DESC_START_ADDRE_MASK, + upper_32_bits(sw_desc->dma_addr))); + + first_frame = chan->first_frame; + chan->first_frame = false; + + if (chan->video_group) { + channels = xilinx_dpdma_chan_video_group_ready(chan); + /* + * Trigger the transfer only when all channels in the group are + * ready. + */ + if (!channels) + return; + } else { + channels = BIT(chan->id); + } + + if (first_frame) + reg = XILINX_DPDMA_GBL_TRIG_MASK(channels); + else + reg = XILINX_DPDMA_GBL_RETRIG_MASK(channels); + + dpdma_write(xdev->reg, XILINX_DPDMA_GBL, reg); +} + +/** + * xilinx_dpdma_chan_ostand - Number of outstanding transactions + * @chan: DPDMA channel + * + * Read and return the number of outstanding transactions from register. + * + * Return: Number of outstanding transactions from the status register. + */ +static u32 xilinx_dpdma_chan_ostand(struct xilinx_dpdma_chan *chan) +{ + return FIELD_GET(XILINX_DPDMA_CH_STATUS_OTRAN_CNT_MASK, + dpdma_read(chan->reg, XILINX_DPDMA_CH_STATUS)); +} + +/** + * xilinx_dpdma_chan_notify_no_ostand - Notify no outstanding transaction event + * @chan: DPDMA channel + * + * Notify waiters for no outstanding event, so waiters can stop the channel + * safely. This function is supposed to be called when 'no outstanding' + * interrupt is generated. The 'no outstanding' interrupt is disabled and + * should be re-enabled when this event is handled. If the channel status + * register still shows some number of outstanding transactions, the interrupt + * remains enabled. + * + * Return: 0 on success. On failure, -EWOULDBLOCK if there's still outstanding + * transaction(s). + */ +static int xilinx_dpdma_chan_notify_no_ostand(struct xilinx_dpdma_chan *chan) +{ + u32 cnt; + + cnt = xilinx_dpdma_chan_ostand(chan); + if (cnt) { + dev_dbg(chan->xdev->dev, + "chan%u: %d outstanding transactions\n", + chan->id, cnt); + return -EWOULDBLOCK; + } + + /* Disable 'no outstanding' interrupt */ + dpdma_write(chan->xdev->reg, XILINX_DPDMA_IDS, + XILINX_DPDMA_INTR_NO_OSTAND(chan->id)); + wake_up(&chan->wait_to_stop); + + return 0; +} + +/** + * xilinx_dpdma_chan_wait_no_ostand - Wait for the no outstanding irq + * @chan: DPDMA channel + * + * Wait for the no outstanding transaction interrupt. This functions can sleep + * for 50ms. + * + * Return: 0 on success. On failure, -ETIMEOUT for time out, or the error code + * from wait_event_interruptible_timeout(). + */ +static int xilinx_dpdma_chan_wait_no_ostand(struct xilinx_dpdma_chan *chan) +{ + int ret; + + /* Wait for a no outstanding transaction interrupt upto 50msec */ + ret = wait_event_interruptible_timeout(chan->wait_to_stop, + !xilinx_dpdma_chan_ostand(chan), + msecs_to_jiffies(50)); + if (ret > 0) { + dpdma_write(chan->xdev->reg, XILINX_DPDMA_IEN, + XILINX_DPDMA_INTR_NO_OSTAND(chan->id)); + return 0; + } + + dev_err(chan->xdev->dev, "chan%u: not ready to stop: %d trans\n", + chan->id, xilinx_dpdma_chan_ostand(chan)); + + if (ret == 0) + return -ETIMEDOUT; + + return ret; +} + +/** + * xilinx_dpdma_chan_poll_no_ostand - Poll the outstanding transaction status + * @chan: DPDMA channel + * + * Poll the outstanding transaction status, and return when there's no + * outstanding transaction. This functions can be used in the interrupt context + * or where the atomicity is required. Calling thread may wait more than 50ms. + * + * Return: 0 on success, or -ETIMEDOUT. + */ +static int xilinx_dpdma_chan_poll_no_ostand(struct xilinx_dpdma_chan *chan) +{ + u32 cnt, loop = 50000; + + /* Poll at least for 50ms (20 fps). */ + do { + cnt = xilinx_dpdma_chan_ostand(chan); + udelay(1); + } while (loop-- > 0 && cnt); + + if (loop) { + dpdma_write(chan->xdev->reg, XILINX_DPDMA_IEN, + XILINX_DPDMA_INTR_NO_OSTAND(chan->id)); + return 0; + } + + dev_err(chan->xdev->dev, "chan%u: not ready to stop: %d trans\n", + chan->id, xilinx_dpdma_chan_ostand(chan)); + + return -ETIMEDOUT; +} + +/** + * xilinx_dpdma_chan_stop - Stop the channel + * @chan: DPDMA channel + * + * Stop a previously paused channel by first waiting for completion of all + * outstanding transaction and then disabling the channel. + * + * Return: 0 on success, or -ETIMEDOUT if the channel failed to stop. + */ +static int xilinx_dpdma_chan_stop(struct xilinx_dpdma_chan *chan) +{ + unsigned long flags; + int ret; + + ret = xilinx_dpdma_chan_wait_no_ostand(chan); + if (ret) + return ret; + + spin_lock_irqsave(&chan->lock, flags); + xilinx_dpdma_chan_disable(chan); + chan->running = false; + spin_unlock_irqrestore(&chan->lock, flags); + + return 0; +} + +/** + * xilinx_dpdma_chan_done_irq - Handle hardware descriptor completion + * @chan: DPDMA channel + * + * Handle completion of the currently active descriptor (@chan->desc.active). As + * we currently support cyclic transfers only, this just invokes the cyclic + * callback. The descriptor will be completed at the VSYNC interrupt when a new + * descriptor replaces it. + */ +static void xilinx_dpdma_chan_done_irq(struct xilinx_dpdma_chan *chan) +{ + struct xilinx_dpdma_tx_desc *active; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + + xilinx_dpdma_debugfs_desc_done_irq(chan); + + active = chan->desc.active; + if (active) + vchan_cyclic_callback(&active->vdesc); + else + dev_warn(chan->xdev->dev, + "chan%u: DONE IRQ with no active descriptor!\n", + chan->id); + + spin_unlock_irqrestore(&chan->lock, flags); +} + +/** + * xilinx_dpdma_chan_vsync_irq - Handle hardware descriptor scheduling + * @chan: DPDMA channel + * + * At VSYNC the active descriptor may have been replaced by the pending + * descriptor. Detect this through the DESC_ID and perform appropriate + * bookkeeping. + */ +static void xilinx_dpdma_chan_vsync_irq(struct xilinx_dpdma_chan *chan) +{ + struct xilinx_dpdma_tx_desc *pending; + struct xilinx_dpdma_sw_desc *sw_desc; + unsigned long flags; + u32 desc_id; + + spin_lock_irqsave(&chan->lock, flags); + + pending = chan->desc.pending; + if (!chan->running || !pending) + goto out; + + desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID) + & XILINX_DPDMA_CH_DESC_ID_MASK; + + /* If the retrigger raced with vsync, retry at the next frame. */ + sw_desc = list_first_entry(&pending->descriptors, + struct xilinx_dpdma_sw_desc, node); + if (sw_desc->hw.desc_id != desc_id) { + dev_dbg(chan->xdev->dev, + "chan%u: vsync race lost (%u != %u), retrying\n", + chan->id, sw_desc->hw.desc_id, desc_id); + goto out; + } + + /* + * Complete the active descriptor, if any, promote the pending + * descriptor to active, and queue the next transfer, if any. + */ + if (chan->desc.active) + vchan_cookie_complete(&chan->desc.active->vdesc); + chan->desc.active = pending; + chan->desc.pending = NULL; + + xilinx_dpdma_chan_queue_transfer(chan); + +out: + spin_unlock_irqrestore(&chan->lock, flags); +} + +/** + * xilinx_dpdma_chan_err - Detect any channel error + * @chan: DPDMA channel + * @isr: masked Interrupt Status Register + * @eisr: Error Interrupt Status Register + * + * Return: true if any channel error occurs, or false otherwise. + */ +static bool +xilinx_dpdma_chan_err(struct xilinx_dpdma_chan *chan, u32 isr, u32 eisr) +{ + if (!chan) + return false; + + if (chan->running && + ((isr & (XILINX_DPDMA_INTR_CHAN_ERR_MASK << chan->id)) || + (eisr & (XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id)))) + return true; + + return false; +} + +/** + * xilinx_dpdma_chan_handle_err - DPDMA channel error handling + * @chan: DPDMA channel + * + * This function is called when any channel error or any global error occurs. + * The function disables the paused channel by errors and determines + * if the current active descriptor can be rescheduled depending on + * the descriptor status. + */ +static void xilinx_dpdma_chan_handle_err(struct xilinx_dpdma_chan *chan) +{ + struct xilinx_dpdma_device *xdev = chan->xdev; + struct xilinx_dpdma_tx_desc *active; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + + dev_dbg(xdev->dev, "chan%u: cur desc addr = 0x%04x%08x\n", + chan->id, + dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_START_ADDRE), + dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_START_ADDR)); + dev_dbg(xdev->dev, "chan%u: cur payload addr = 0x%04x%08x\n", + chan->id, + dpdma_read(chan->reg, XILINX_DPDMA_CH_PYLD_CUR_ADDRE), + dpdma_read(chan->reg, XILINX_DPDMA_CH_PYLD_CUR_ADDR)); + + xilinx_dpdma_chan_disable(chan); + chan->running = false; + + if (!chan->desc.active) + goto out_unlock; + + active = chan->desc.active; + chan->desc.active = NULL; + + xilinx_dpdma_chan_dump_tx_desc(chan, active); + + if (active->error) + dev_dbg(xdev->dev, "chan%u: repeated error on desc\n", + chan->id); + + /* Reschedule if there's no new descriptor */ + if (!chan->desc.pending && + list_empty(&chan->vchan.desc_issued)) { + active->error = true; + list_add_tail(&active->vdesc.node, + &chan->vchan.desc_issued); + } else { + xilinx_dpdma_chan_free_tx_desc(&active->vdesc); + } + +out_unlock: + spin_unlock_irqrestore(&chan->lock, flags); +} + +/* ----------------------------------------------------------------------------- + * DMA Engine Operations + */ + +static struct dma_async_tx_descriptor * +xilinx_dpdma_prep_interleaved_dma(struct dma_chan *dchan, + struct dma_interleaved_template *xt, + unsigned long flags) +{ + struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dpdma_tx_desc *desc; + + if (xt->dir != DMA_MEM_TO_DEV) + return NULL; + + if (!xt->numf || !xt->sgl[0].size) + return NULL; + + if (!(flags & DMA_PREP_REPEAT) || !(flags & DMA_PREP_LOAD_EOT)) + return NULL; + + desc = xilinx_dpdma_chan_prep_interleaved_dma(chan, xt); + if (!desc) + return NULL; + + vchan_tx_prep(&chan->vchan, &desc->vdesc, flags | DMA_CTRL_ACK); + + return &desc->vdesc.tx; +} + +/** + * xilinx_dpdma_alloc_chan_resources - Allocate resources for the channel + * @dchan: DMA channel + * + * Allocate a descriptor pool for the channel. + * + * Return: 0 on success, or -ENOMEM if failed to allocate a pool. + */ +static int xilinx_dpdma_alloc_chan_resources(struct dma_chan *dchan) +{ + struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan); + size_t align = __alignof__(struct xilinx_dpdma_sw_desc); + + chan->desc_pool = dma_pool_create(dev_name(chan->xdev->dev), + chan->xdev->dev, + sizeof(struct xilinx_dpdma_sw_desc), + align, 0); + if (!chan->desc_pool) { + dev_err(chan->xdev->dev, + "chan%u: failed to allocate a descriptor pool\n", + chan->id); + return -ENOMEM; + } + + return 0; +} + +/** + * xilinx_dpdma_free_chan_resources - Free all resources for the channel + * @dchan: DMA channel + * + * Free resources associated with the virtual DMA channel, and destroy the + * descriptor pool. + */ +static void xilinx_dpdma_free_chan_resources(struct dma_chan *dchan) +{ + struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan); + + vchan_free_chan_resources(&chan->vchan); + + dma_pool_destroy(chan->desc_pool); + chan->desc_pool = NULL; +} + +static void xilinx_dpdma_issue_pending(struct dma_chan *dchan) +{ + struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan); + unsigned long flags; + + spin_lock_irqsave(&chan->vchan.lock, flags); + if (vchan_issue_pending(&chan->vchan)) + xilinx_dpdma_chan_queue_transfer(chan); + spin_unlock_irqrestore(&chan->vchan.lock, flags); +} + +static int xilinx_dpdma_config(struct dma_chan *dchan, + struct dma_slave_config *config) +{ + struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dpdma_peripheral_config *pconfig; + unsigned long flags; + + /* + * The destination address doesn't need to be specified as the DPDMA is + * hardwired to the destination (the DP controller). The transfer + * width, burst size and port window size are thus meaningless, they're + * fixed both on the DPDMA side and on the DP controller side. + */ + + /* + * Use the peripheral_config to indicate that the channel is part + * of a video group. This requires matching use of the custom + * structure in each driver. + */ + pconfig = config->peripheral_config; + if (WARN_ON(pconfig && config->peripheral_size != sizeof(*pconfig))) + return -EINVAL; + + spin_lock_irqsave(&chan->lock, flags); + if (chan->id <= ZYNQMP_DPDMA_VIDEO2 && pconfig) + chan->video_group = pconfig->video_group; + spin_unlock_irqrestore(&chan->lock, flags); + + return 0; +} + +static int xilinx_dpdma_pause(struct dma_chan *dchan) +{ + xilinx_dpdma_chan_pause(to_xilinx_chan(dchan)); + + return 0; +} + +static int xilinx_dpdma_resume(struct dma_chan *dchan) +{ + xilinx_dpdma_chan_unpause(to_xilinx_chan(dchan)); + + return 0; +} + +/** + * xilinx_dpdma_terminate_all - Terminate the channel and descriptors + * @dchan: DMA channel + * + * Pause the channel without waiting for ongoing transfers to complete. Waiting + * for completion is performed by xilinx_dpdma_synchronize() that will disable + * the channel to complete the stop. + * + * All the descriptors associated with the channel that are guaranteed not to + * be touched by the hardware. The pending and active descriptor are not + * touched, and will be freed either upon completion, or by + * xilinx_dpdma_synchronize(). + * + * Return: 0 on success, or -ETIMEDOUT if the channel failed to stop. + */ +static int xilinx_dpdma_terminate_all(struct dma_chan *dchan) +{ + struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dpdma_device *xdev = chan->xdev; + LIST_HEAD(descriptors); + unsigned long flags; + unsigned int i; + + /* Pause the channel (including the whole video group if applicable). */ + if (chan->video_group) { + for (i = ZYNQMP_DPDMA_VIDEO0; i <= ZYNQMP_DPDMA_VIDEO2; i++) { + if (xdev->chan[i]->video_group && + xdev->chan[i]->running) { + xilinx_dpdma_chan_pause(xdev->chan[i]); + xdev->chan[i]->video_group = false; + } + } + } else { + xilinx_dpdma_chan_pause(chan); + } + + /* Gather all the descriptors we can free and free them. */ + spin_lock_irqsave(&chan->vchan.lock, flags); + vchan_get_all_descriptors(&chan->vchan, &descriptors); + spin_unlock_irqrestore(&chan->vchan.lock, flags); + + vchan_dma_desc_free_list(&chan->vchan, &descriptors); + + return 0; +} + +/** + * xilinx_dpdma_synchronize - Synchronize callback execution + * @dchan: DMA channel + * + * Synchronizing callback execution ensures that all previously issued + * transfers have completed and all associated callbacks have been called and + * have returned. + * + * This function waits for the DMA channel to stop. It assumes it has been + * paused by a previous call to dmaengine_terminate_async(), and that no new + * pending descriptors have been issued with dma_async_issue_pending(). The + * behaviour is undefined otherwise. + */ +static void xilinx_dpdma_synchronize(struct dma_chan *dchan) +{ + struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan); + unsigned long flags; + + xilinx_dpdma_chan_stop(chan); + + spin_lock_irqsave(&chan->vchan.lock, flags); + if (chan->desc.pending) { + vchan_terminate_vdesc(&chan->desc.pending->vdesc); + chan->desc.pending = NULL; + } + if (chan->desc.active) { + vchan_terminate_vdesc(&chan->desc.active->vdesc); + chan->desc.active = NULL; + } + spin_unlock_irqrestore(&chan->vchan.lock, flags); + + vchan_synchronize(&chan->vchan); +} + +/* ----------------------------------------------------------------------------- + * Interrupt and Tasklet Handling + */ + +/** + * xilinx_dpdma_err - Detect any global error + * @isr: Interrupt Status Register + * @eisr: Error Interrupt Status Register + * + * Return: True if any global error occurs, or false otherwise. + */ +static bool xilinx_dpdma_err(u32 isr, u32 eisr) +{ + if (isr & XILINX_DPDMA_INTR_GLOBAL_ERR || + eisr & XILINX_DPDMA_EINTR_GLOBAL_ERR) + return true; + + return false; +} + +/** + * xilinx_dpdma_handle_err_irq - Handle DPDMA error interrupt + * @xdev: DPDMA device + * @isr: masked Interrupt Status Register + * @eisr: Error Interrupt Status Register + * + * Handle if any error occurs based on @isr and @eisr. This function disables + * corresponding error interrupts, and those should be re-enabled once handling + * is done. + */ +static void xilinx_dpdma_handle_err_irq(struct xilinx_dpdma_device *xdev, + u32 isr, u32 eisr) +{ + bool err = xilinx_dpdma_err(isr, eisr); + unsigned int i; + + dev_dbg_ratelimited(xdev->dev, + "error irq: isr = 0x%08x, eisr = 0x%08x\n", + isr, eisr); + + /* Disable channel error interrupts until errors are handled. */ + dpdma_write(xdev->reg, XILINX_DPDMA_IDS, + isr & ~XILINX_DPDMA_INTR_GLOBAL_ERR); + dpdma_write(xdev->reg, XILINX_DPDMA_EIDS, + eisr & ~XILINX_DPDMA_EINTR_GLOBAL_ERR); + + for (i = 0; i < ARRAY_SIZE(xdev->chan); i++) + if (err || xilinx_dpdma_chan_err(xdev->chan[i], isr, eisr)) + tasklet_schedule(&xdev->chan[i]->err_task); +} + +/** + * xilinx_dpdma_enable_irq - Enable interrupts + * @xdev: DPDMA device + * + * Enable interrupts. + */ +static void xilinx_dpdma_enable_irq(struct xilinx_dpdma_device *xdev) +{ + dpdma_write(xdev->reg, XILINX_DPDMA_IEN, XILINX_DPDMA_INTR_ALL); + dpdma_write(xdev->reg, XILINX_DPDMA_EIEN, XILINX_DPDMA_EINTR_ALL); +} + +/** + * xilinx_dpdma_disable_irq - Disable interrupts + * @xdev: DPDMA device + * + * Disable interrupts. + */ +static void xilinx_dpdma_disable_irq(struct xilinx_dpdma_device *xdev) +{ + dpdma_write(xdev->reg, XILINX_DPDMA_IDS, XILINX_DPDMA_INTR_ALL); + dpdma_write(xdev->reg, XILINX_DPDMA_EIDS, XILINX_DPDMA_EINTR_ALL); +} + +/** + * xilinx_dpdma_chan_err_task - Per channel tasklet for error handling + * @t: pointer to the tasklet associated with this handler + * + * Per channel error handling tasklet. This function waits for the outstanding + * transaction to complete and triggers error handling. After error handling, + * re-enable channel error interrupts, and restart the channel if needed. + */ +static void xilinx_dpdma_chan_err_task(struct tasklet_struct *t) +{ + struct xilinx_dpdma_chan *chan = from_tasklet(chan, t, err_task); + struct xilinx_dpdma_device *xdev = chan->xdev; + unsigned long flags; + + /* Proceed error handling even when polling fails. */ + xilinx_dpdma_chan_poll_no_ostand(chan); + + xilinx_dpdma_chan_handle_err(chan); + + dpdma_write(xdev->reg, XILINX_DPDMA_IEN, + XILINX_DPDMA_INTR_CHAN_ERR_MASK << chan->id); + dpdma_write(xdev->reg, XILINX_DPDMA_EIEN, + XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id); + + spin_lock_irqsave(&chan->lock, flags); + xilinx_dpdma_chan_queue_transfer(chan); + spin_unlock_irqrestore(&chan->lock, flags); +} + +static irqreturn_t xilinx_dpdma_irq_handler(int irq, void *data) +{ + struct xilinx_dpdma_device *xdev = data; + unsigned long mask; + unsigned int i; + u32 status; + u32 error; + + status = dpdma_read(xdev->reg, XILINX_DPDMA_ISR); + error = dpdma_read(xdev->reg, XILINX_DPDMA_EISR); + if (!status && !error) + return IRQ_NONE; + + dpdma_write(xdev->reg, XILINX_DPDMA_ISR, status); + dpdma_write(xdev->reg, XILINX_DPDMA_EISR, error); + + if (status & XILINX_DPDMA_INTR_VSYNC) { + /* + * There's a single VSYNC interrupt that needs to be processed + * by each running channel to update the active descriptor. + */ + for (i = 0; i < ARRAY_SIZE(xdev->chan); i++) { + struct xilinx_dpdma_chan *chan = xdev->chan[i]; + + if (chan) + xilinx_dpdma_chan_vsync_irq(chan); + } + } + + mask = FIELD_GET(XILINX_DPDMA_INTR_DESC_DONE_MASK, status); + if (mask) { + for_each_set_bit(i, &mask, ARRAY_SIZE(xdev->chan)) + xilinx_dpdma_chan_done_irq(xdev->chan[i]); + } + + mask = FIELD_GET(XILINX_DPDMA_INTR_NO_OSTAND_MASK, status); + if (mask) { + for_each_set_bit(i, &mask, ARRAY_SIZE(xdev->chan)) + xilinx_dpdma_chan_notify_no_ostand(xdev->chan[i]); + } + + mask = status & XILINX_DPDMA_INTR_ERR_ALL; + if (mask || error) + xilinx_dpdma_handle_err_irq(xdev, mask, error); + + return IRQ_HANDLED; +} + +/* ----------------------------------------------------------------------------- + * Initialization & Cleanup + */ + +static int xilinx_dpdma_chan_init(struct xilinx_dpdma_device *xdev, + unsigned int chan_id) +{ + struct xilinx_dpdma_chan *chan; + + chan = devm_kzalloc(xdev->dev, sizeof(*chan), GFP_KERNEL); + if (!chan) + return -ENOMEM; + + chan->id = chan_id; + chan->reg = xdev->reg + XILINX_DPDMA_CH_BASE + + XILINX_DPDMA_CH_OFFSET * chan->id; + chan->running = false; + chan->xdev = xdev; + + spin_lock_init(&chan->lock); + init_waitqueue_head(&chan->wait_to_stop); + + tasklet_setup(&chan->err_task, xilinx_dpdma_chan_err_task); + + chan->vchan.desc_free = xilinx_dpdma_chan_free_tx_desc; + vchan_init(&chan->vchan, &xdev->common); + + xdev->chan[chan->id] = chan; + + return 0; +} + +static void xilinx_dpdma_chan_remove(struct xilinx_dpdma_chan *chan) +{ + if (!chan) + return; + + tasklet_kill(&chan->err_task); + list_del(&chan->vchan.chan.device_node); +} + +static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct xilinx_dpdma_device *xdev = ofdma->of_dma_data; + u32 chan_id = dma_spec->args[0]; + + if (chan_id >= ARRAY_SIZE(xdev->chan)) + return NULL; + + if (!xdev->chan[chan_id]) + return NULL; + + return dma_get_slave_channel(&xdev->chan[chan_id]->vchan.chan); +} + +static void dpdma_hw_init(struct xilinx_dpdma_device *xdev) +{ + unsigned int i; + void __iomem *reg; + + /* Disable all interrupts */ + xilinx_dpdma_disable_irq(xdev); + + /* Stop all channels */ + for (i = 0; i < ARRAY_SIZE(xdev->chan); i++) { + reg = xdev->reg + XILINX_DPDMA_CH_BASE + + XILINX_DPDMA_CH_OFFSET * i; + dpdma_clr(reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_ENABLE); + } + + /* Clear the interrupt status registers */ + dpdma_write(xdev->reg, XILINX_DPDMA_ISR, XILINX_DPDMA_INTR_ALL); + dpdma_write(xdev->reg, XILINX_DPDMA_EISR, XILINX_DPDMA_EINTR_ALL); +} + +static int xilinx_dpdma_probe(struct platform_device *pdev) +{ + struct xilinx_dpdma_device *xdev; + struct dma_device *ddev; + unsigned int i; + int ret; + + xdev = devm_kzalloc(&pdev->dev, sizeof(*xdev), GFP_KERNEL); + if (!xdev) + return -ENOMEM; + + xdev->dev = &pdev->dev; + xdev->ext_addr = sizeof(dma_addr_t) > 4; + + INIT_LIST_HEAD(&xdev->common.channels); + + platform_set_drvdata(pdev, xdev); + + xdev->axi_clk = devm_clk_get(xdev->dev, "axi_clk"); + if (IS_ERR(xdev->axi_clk)) + return PTR_ERR(xdev->axi_clk); + + xdev->reg = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(xdev->reg)) + return PTR_ERR(xdev->reg); + + dpdma_hw_init(xdev); + + xdev->irq = platform_get_irq(pdev, 0); + if (xdev->irq < 0) + return xdev->irq; + + ret = request_irq(xdev->irq, xilinx_dpdma_irq_handler, IRQF_SHARED, + dev_name(xdev->dev), xdev); + if (ret) { + dev_err(xdev->dev, "failed to request IRQ\n"); + return ret; + } + + ddev = &xdev->common; + ddev->dev = &pdev->dev; + + dma_cap_set(DMA_SLAVE, ddev->cap_mask); + dma_cap_set(DMA_PRIVATE, ddev->cap_mask); + dma_cap_set(DMA_INTERLEAVE, ddev->cap_mask); + dma_cap_set(DMA_REPEAT, ddev->cap_mask); + dma_cap_set(DMA_LOAD_EOT, ddev->cap_mask); + ddev->copy_align = fls(XILINX_DPDMA_ALIGN_BYTES - 1); + + ddev->device_alloc_chan_resources = xilinx_dpdma_alloc_chan_resources; + ddev->device_free_chan_resources = xilinx_dpdma_free_chan_resources; + ddev->device_prep_interleaved_dma = xilinx_dpdma_prep_interleaved_dma; + /* TODO: Can we achieve better granularity ? */ + ddev->device_tx_status = dma_cookie_status; + ddev->device_issue_pending = xilinx_dpdma_issue_pending; + ddev->device_config = xilinx_dpdma_config; + ddev->device_pause = xilinx_dpdma_pause; + ddev->device_resume = xilinx_dpdma_resume; + ddev->device_terminate_all = xilinx_dpdma_terminate_all; + ddev->device_synchronize = xilinx_dpdma_synchronize; + ddev->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED); + ddev->directions = BIT(DMA_MEM_TO_DEV); + ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + + for (i = 0; i < ARRAY_SIZE(xdev->chan); ++i) { + ret = xilinx_dpdma_chan_init(xdev, i); + if (ret < 0) { + dev_err(xdev->dev, "failed to initialize channel %u\n", + i); + goto error; + } + } + + ret = clk_prepare_enable(xdev->axi_clk); + if (ret) { + dev_err(xdev->dev, "failed to enable the axi clock\n"); + goto error; + } + + ret = dma_async_device_register(ddev); + if (ret) { + dev_err(xdev->dev, "failed to register the dma device\n"); + goto error_dma_async; + } + + ret = of_dma_controller_register(xdev->dev->of_node, + of_dma_xilinx_xlate, ddev); + if (ret) { + dev_err(xdev->dev, "failed to register DMA to DT DMA helper\n"); + goto error_of_dma; + } + + xilinx_dpdma_enable_irq(xdev); + + xilinx_dpdma_debugfs_init(xdev); + + dev_info(&pdev->dev, "Xilinx DPDMA engine is probed\n"); + + return 0; + +error_of_dma: + dma_async_device_unregister(ddev); +error_dma_async: + clk_disable_unprepare(xdev->axi_clk); +error: + for (i = 0; i < ARRAY_SIZE(xdev->chan); i++) + xilinx_dpdma_chan_remove(xdev->chan[i]); + + free_irq(xdev->irq, xdev); + + return ret; +} + +static int xilinx_dpdma_remove(struct platform_device *pdev) +{ + struct xilinx_dpdma_device *xdev = platform_get_drvdata(pdev); + unsigned int i; + + /* Start by disabling the IRQ to avoid races during cleanup. */ + free_irq(xdev->irq, xdev); + + xilinx_dpdma_disable_irq(xdev); + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&xdev->common); + clk_disable_unprepare(xdev->axi_clk); + + for (i = 0; i < ARRAY_SIZE(xdev->chan); i++) + xilinx_dpdma_chan_remove(xdev->chan[i]); + + return 0; +} + +static const struct of_device_id xilinx_dpdma_of_match[] = { + { .compatible = "xlnx,zynqmp-dpdma",}, + { /* end of table */ }, +}; +MODULE_DEVICE_TABLE(of, xilinx_dpdma_of_match); + +static struct platform_driver xilinx_dpdma_driver = { + .probe = xilinx_dpdma_probe, + .remove = xilinx_dpdma_remove, + .driver = { + .name = "xilinx-zynqmp-dpdma", + .of_match_table = xilinx_dpdma_of_match, + }, +}; + +module_platform_driver(xilinx_dpdma_driver); + +MODULE_AUTHOR("Xilinx, Inc."); +MODULE_DESCRIPTION("Xilinx ZynqMP DPDMA driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c new file mode 100644 index 0000000000..bd8c3cc2ea --- /dev/null +++ b/drivers/dma/xilinx/zynqmp_dma.c @@ -0,0 +1,1185 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * DMA driver for Xilinx ZynqMP DMA Engine + * + * Copyright (C) 2016 Xilinx, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../dmaengine.h" + +/* Register Offsets */ +#define ZYNQMP_DMA_ISR 0x100 +#define ZYNQMP_DMA_IMR 0x104 +#define ZYNQMP_DMA_IER 0x108 +#define ZYNQMP_DMA_IDS 0x10C +#define ZYNQMP_DMA_CTRL0 0x110 +#define ZYNQMP_DMA_CTRL1 0x114 +#define ZYNQMP_DMA_DATA_ATTR 0x120 +#define ZYNQMP_DMA_DSCR_ATTR 0x124 +#define ZYNQMP_DMA_SRC_DSCR_WRD0 0x128 +#define ZYNQMP_DMA_SRC_DSCR_WRD1 0x12C +#define ZYNQMP_DMA_SRC_DSCR_WRD2 0x130 +#define ZYNQMP_DMA_SRC_DSCR_WRD3 0x134 +#define ZYNQMP_DMA_DST_DSCR_WRD0 0x138 +#define ZYNQMP_DMA_DST_DSCR_WRD1 0x13C +#define ZYNQMP_DMA_DST_DSCR_WRD2 0x140 +#define ZYNQMP_DMA_DST_DSCR_WRD3 0x144 +#define ZYNQMP_DMA_SRC_START_LSB 0x158 +#define ZYNQMP_DMA_SRC_START_MSB 0x15C +#define ZYNQMP_DMA_DST_START_LSB 0x160 +#define ZYNQMP_DMA_DST_START_MSB 0x164 +#define ZYNQMP_DMA_TOTAL_BYTE 0x188 +#define ZYNQMP_DMA_RATE_CTRL 0x18C +#define ZYNQMP_DMA_IRQ_SRC_ACCT 0x190 +#define ZYNQMP_DMA_IRQ_DST_ACCT 0x194 +#define ZYNQMP_DMA_CTRL2 0x200 + +/* Interrupt registers bit field definitions */ +#define ZYNQMP_DMA_DONE BIT(10) +#define ZYNQMP_DMA_AXI_WR_DATA BIT(9) +#define ZYNQMP_DMA_AXI_RD_DATA BIT(8) +#define ZYNQMP_DMA_AXI_RD_DST_DSCR BIT(7) +#define ZYNQMP_DMA_AXI_RD_SRC_DSCR BIT(6) +#define ZYNQMP_DMA_IRQ_DST_ACCT_ERR BIT(5) +#define ZYNQMP_DMA_IRQ_SRC_ACCT_ERR BIT(4) +#define ZYNQMP_DMA_BYTE_CNT_OVRFL BIT(3) +#define ZYNQMP_DMA_DST_DSCR_DONE BIT(2) +#define ZYNQMP_DMA_INV_APB BIT(0) + +/* Control 0 register bit field definitions */ +#define ZYNQMP_DMA_OVR_FETCH BIT(7) +#define ZYNQMP_DMA_POINT_TYPE_SG BIT(6) +#define ZYNQMP_DMA_RATE_CTRL_EN BIT(3) + +/* Control 1 register bit field definitions */ +#define ZYNQMP_DMA_SRC_ISSUE GENMASK(4, 0) + +/* Data Attribute register bit field definitions */ +#define ZYNQMP_DMA_ARBURST GENMASK(27, 26) +#define ZYNQMP_DMA_ARCACHE GENMASK(25, 22) +#define ZYNQMP_DMA_ARCACHE_OFST 22 +#define ZYNQMP_DMA_ARQOS GENMASK(21, 18) +#define ZYNQMP_DMA_ARQOS_OFST 18 +#define ZYNQMP_DMA_ARLEN GENMASK(17, 14) +#define ZYNQMP_DMA_ARLEN_OFST 14 +#define ZYNQMP_DMA_AWBURST GENMASK(13, 12) +#define ZYNQMP_DMA_AWCACHE GENMASK(11, 8) +#define ZYNQMP_DMA_AWCACHE_OFST 8 +#define ZYNQMP_DMA_AWQOS GENMASK(7, 4) +#define ZYNQMP_DMA_AWQOS_OFST 4 +#define ZYNQMP_DMA_AWLEN GENMASK(3, 0) +#define ZYNQMP_DMA_AWLEN_OFST 0 + +/* Descriptor Attribute register bit field definitions */ +#define ZYNQMP_DMA_AXCOHRNT BIT(8) +#define ZYNQMP_DMA_AXCACHE GENMASK(7, 4) +#define ZYNQMP_DMA_AXCACHE_OFST 4 +#define ZYNQMP_DMA_AXQOS GENMASK(3, 0) +#define ZYNQMP_DMA_AXQOS_OFST 0 + +/* Control register 2 bit field definitions */ +#define ZYNQMP_DMA_ENABLE BIT(0) + +/* Buffer Descriptor definitions */ +#define ZYNQMP_DMA_DESC_CTRL_STOP 0x10 +#define ZYNQMP_DMA_DESC_CTRL_COMP_INT 0x4 +#define ZYNQMP_DMA_DESC_CTRL_SIZE_256 0x2 +#define ZYNQMP_DMA_DESC_CTRL_COHRNT 0x1 + +/* Interrupt Mask specific definitions */ +#define ZYNQMP_DMA_INT_ERR (ZYNQMP_DMA_AXI_RD_DATA | \ + ZYNQMP_DMA_AXI_WR_DATA | \ + ZYNQMP_DMA_AXI_RD_DST_DSCR | \ + ZYNQMP_DMA_AXI_RD_SRC_DSCR | \ + ZYNQMP_DMA_INV_APB) +#define ZYNQMP_DMA_INT_OVRFL (ZYNQMP_DMA_BYTE_CNT_OVRFL | \ + ZYNQMP_DMA_IRQ_SRC_ACCT_ERR | \ + ZYNQMP_DMA_IRQ_DST_ACCT_ERR) +#define ZYNQMP_DMA_INT_DONE (ZYNQMP_DMA_DONE | ZYNQMP_DMA_DST_DSCR_DONE) +#define ZYNQMP_DMA_INT_EN_DEFAULT_MASK (ZYNQMP_DMA_INT_DONE | \ + ZYNQMP_DMA_INT_ERR | \ + ZYNQMP_DMA_INT_OVRFL | \ + ZYNQMP_DMA_DST_DSCR_DONE) + +/* Max number of descriptors per channel */ +#define ZYNQMP_DMA_NUM_DESCS 32 + +/* Max transfer size per descriptor */ +#define ZYNQMP_DMA_MAX_TRANS_LEN 0x40000000 + +/* Max burst lengths */ +#define ZYNQMP_DMA_MAX_DST_BURST_LEN 32768U +#define ZYNQMP_DMA_MAX_SRC_BURST_LEN 32768U + +/* Reset values for data attributes */ +#define ZYNQMP_DMA_AXCACHE_VAL 0xF + +#define ZYNQMP_DMA_SRC_ISSUE_RST_VAL 0x1F + +#define ZYNQMP_DMA_IDS_DEFAULT_MASK 0xFFF + +/* Bus width in bits */ +#define ZYNQMP_DMA_BUS_WIDTH_64 64 +#define ZYNQMP_DMA_BUS_WIDTH_128 128 + +#define ZDMA_PM_TIMEOUT 100 + +#define ZYNQMP_DMA_DESC_SIZE(chan) (chan->desc_size) + +#define to_chan(chan) container_of(chan, struct zynqmp_dma_chan, \ + common) +#define tx_to_desc(tx) container_of(tx, struct zynqmp_dma_desc_sw, \ + async_tx) + +/** + * struct zynqmp_dma_desc_ll - Hw linked list descriptor + * @addr: Buffer address + * @size: Size of the buffer + * @ctrl: Control word + * @nxtdscraddr: Next descriptor base address + * @rsvd: Reserved field and for Hw internal use. + */ +struct zynqmp_dma_desc_ll { + u64 addr; + u32 size; + u32 ctrl; + u64 nxtdscraddr; + u64 rsvd; +}; + +/** + * struct zynqmp_dma_desc_sw - Per Transaction structure + * @src: Source address for simple mode dma + * @dst: Destination address for simple mode dma + * @len: Transfer length for simple mode dma + * @node: Node in the channel descriptor list + * @tx_list: List head for the current transfer + * @async_tx: Async transaction descriptor + * @src_v: Virtual address of the src descriptor + * @src_p: Physical address of the src descriptor + * @dst_v: Virtual address of the dst descriptor + * @dst_p: Physical address of the dst descriptor + */ +struct zynqmp_dma_desc_sw { + u64 src; + u64 dst; + u32 len; + struct list_head node; + struct list_head tx_list; + struct dma_async_tx_descriptor async_tx; + struct zynqmp_dma_desc_ll *src_v; + dma_addr_t src_p; + struct zynqmp_dma_desc_ll *dst_v; + dma_addr_t dst_p; +}; + +/** + * struct zynqmp_dma_chan - Driver specific DMA channel structure + * @zdev: Driver specific device structure + * @regs: Control registers offset + * @lock: Descriptor operation lock + * @pending_list: Descriptors waiting + * @free_list: Descriptors free + * @active_list: Descriptors active + * @sw_desc_pool: SW descriptor pool + * @done_list: Complete descriptors + * @common: DMA common channel + * @desc_pool_v: Statically allocated descriptor base + * @desc_pool_p: Physical allocated descriptor base + * @desc_free_cnt: Descriptor available count + * @dev: The dma device + * @irq: Channel IRQ + * @is_dmacoherent: Tells whether dma operations are coherent or not + * @tasklet: Cleanup work after irq + * @idle : Channel status; + * @desc_size: Size of the low level descriptor + * @err: Channel has errors + * @bus_width: Bus width + * @src_burst_len: Source burst length + * @dst_burst_len: Dest burst length + */ +struct zynqmp_dma_chan { + struct zynqmp_dma_device *zdev; + void __iomem *regs; + spinlock_t lock; + struct list_head pending_list; + struct list_head free_list; + struct list_head active_list; + struct zynqmp_dma_desc_sw *sw_desc_pool; + struct list_head done_list; + struct dma_chan common; + void *desc_pool_v; + dma_addr_t desc_pool_p; + u32 desc_free_cnt; + struct device *dev; + int irq; + bool is_dmacoherent; + struct tasklet_struct tasklet; + bool idle; + size_t desc_size; + bool err; + u32 bus_width; + u32 src_burst_len; + u32 dst_burst_len; +}; + +/** + * struct zynqmp_dma_device - DMA device structure + * @dev: Device Structure + * @common: DMA device structure + * @chan: Driver specific DMA channel + * @clk_main: Pointer to main clock + * @clk_apb: Pointer to apb clock + */ +struct zynqmp_dma_device { + struct device *dev; + struct dma_device common; + struct zynqmp_dma_chan *chan; + struct clk *clk_main; + struct clk *clk_apb; +}; + +static inline void zynqmp_dma_writeq(struct zynqmp_dma_chan *chan, u32 reg, + u64 value) +{ + lo_hi_writeq(value, chan->regs + reg); +} + +/** + * zynqmp_dma_update_desc_to_ctrlr - Updates descriptor to the controller + * @chan: ZynqMP DMA DMA channel pointer + * @desc: Transaction descriptor pointer + */ +static void zynqmp_dma_update_desc_to_ctrlr(struct zynqmp_dma_chan *chan, + struct zynqmp_dma_desc_sw *desc) +{ + dma_addr_t addr; + + addr = desc->src_p; + zynqmp_dma_writeq(chan, ZYNQMP_DMA_SRC_START_LSB, addr); + addr = desc->dst_p; + zynqmp_dma_writeq(chan, ZYNQMP_DMA_DST_START_LSB, addr); +} + +/** + * zynqmp_dma_desc_config_eod - Mark the descriptor as end descriptor + * @chan: ZynqMP DMA channel pointer + * @desc: Hw descriptor pointer + */ +static void zynqmp_dma_desc_config_eod(struct zynqmp_dma_chan *chan, + void *desc) +{ + struct zynqmp_dma_desc_ll *hw = (struct zynqmp_dma_desc_ll *)desc; + + hw->ctrl |= ZYNQMP_DMA_DESC_CTRL_STOP; + hw++; + hw->ctrl |= ZYNQMP_DMA_DESC_CTRL_COMP_INT | ZYNQMP_DMA_DESC_CTRL_STOP; +} + +/** + * zynqmp_dma_config_sg_ll_desc - Configure the linked list descriptor + * @chan: ZynqMP DMA channel pointer + * @sdesc: Hw descriptor pointer + * @src: Source buffer address + * @dst: Destination buffer address + * @len: Transfer length + * @prev: Previous hw descriptor pointer + */ +static void zynqmp_dma_config_sg_ll_desc(struct zynqmp_dma_chan *chan, + struct zynqmp_dma_desc_ll *sdesc, + dma_addr_t src, dma_addr_t dst, size_t len, + struct zynqmp_dma_desc_ll *prev) +{ + struct zynqmp_dma_desc_ll *ddesc = sdesc + 1; + + sdesc->size = ddesc->size = len; + sdesc->addr = src; + ddesc->addr = dst; + + sdesc->ctrl = ddesc->ctrl = ZYNQMP_DMA_DESC_CTRL_SIZE_256; + if (chan->is_dmacoherent) { + sdesc->ctrl |= ZYNQMP_DMA_DESC_CTRL_COHRNT; + ddesc->ctrl |= ZYNQMP_DMA_DESC_CTRL_COHRNT; + } + + if (prev) { + dma_addr_t addr = chan->desc_pool_p + + ((uintptr_t)sdesc - (uintptr_t)chan->desc_pool_v); + ddesc = prev + 1; + prev->nxtdscraddr = addr; + ddesc->nxtdscraddr = addr + ZYNQMP_DMA_DESC_SIZE(chan); + } +} + +/** + * zynqmp_dma_init - Initialize the channel + * @chan: ZynqMP DMA channel pointer + */ +static void zynqmp_dma_init(struct zynqmp_dma_chan *chan) +{ + u32 val; + + writel(ZYNQMP_DMA_IDS_DEFAULT_MASK, chan->regs + ZYNQMP_DMA_IDS); + val = readl(chan->regs + ZYNQMP_DMA_ISR); + writel(val, chan->regs + ZYNQMP_DMA_ISR); + + if (chan->is_dmacoherent) { + val = ZYNQMP_DMA_AXCOHRNT; + val = (val & ~ZYNQMP_DMA_AXCACHE) | + (ZYNQMP_DMA_AXCACHE_VAL << ZYNQMP_DMA_AXCACHE_OFST); + writel(val, chan->regs + ZYNQMP_DMA_DSCR_ATTR); + } + + val = readl(chan->regs + ZYNQMP_DMA_DATA_ATTR); + if (chan->is_dmacoherent) { + val = (val & ~ZYNQMP_DMA_ARCACHE) | + (ZYNQMP_DMA_AXCACHE_VAL << ZYNQMP_DMA_ARCACHE_OFST); + val = (val & ~ZYNQMP_DMA_AWCACHE) | + (ZYNQMP_DMA_AXCACHE_VAL << ZYNQMP_DMA_AWCACHE_OFST); + } + writel(val, chan->regs + ZYNQMP_DMA_DATA_ATTR); + + /* Clearing the interrupt account rgisters */ + val = readl(chan->regs + ZYNQMP_DMA_IRQ_SRC_ACCT); + val = readl(chan->regs + ZYNQMP_DMA_IRQ_DST_ACCT); + + chan->idle = true; +} + +/** + * zynqmp_dma_tx_submit - Submit DMA transaction + * @tx: Async transaction descriptor pointer + * + * Return: cookie value + */ +static dma_cookie_t zynqmp_dma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct zynqmp_dma_chan *chan = to_chan(tx->chan); + struct zynqmp_dma_desc_sw *desc, *new; + dma_cookie_t cookie; + unsigned long irqflags; + + new = tx_to_desc(tx); + spin_lock_irqsave(&chan->lock, irqflags); + cookie = dma_cookie_assign(tx); + + if (!list_empty(&chan->pending_list)) { + desc = list_last_entry(&chan->pending_list, + struct zynqmp_dma_desc_sw, node); + if (!list_empty(&desc->tx_list)) + desc = list_last_entry(&desc->tx_list, + struct zynqmp_dma_desc_sw, node); + desc->src_v->nxtdscraddr = new->src_p; + desc->src_v->ctrl &= ~ZYNQMP_DMA_DESC_CTRL_STOP; + desc->dst_v->nxtdscraddr = new->dst_p; + desc->dst_v->ctrl &= ~ZYNQMP_DMA_DESC_CTRL_STOP; + } + + list_add_tail(&new->node, &chan->pending_list); + spin_unlock_irqrestore(&chan->lock, irqflags); + + return cookie; +} + +/** + * zynqmp_dma_get_descriptor - Get the sw descriptor from the pool + * @chan: ZynqMP DMA channel pointer + * + * Return: The sw descriptor + */ +static struct zynqmp_dma_desc_sw * +zynqmp_dma_get_descriptor(struct zynqmp_dma_chan *chan) +{ + struct zynqmp_dma_desc_sw *desc; + unsigned long irqflags; + + spin_lock_irqsave(&chan->lock, irqflags); + desc = list_first_entry(&chan->free_list, + struct zynqmp_dma_desc_sw, node); + list_del(&desc->node); + spin_unlock_irqrestore(&chan->lock, irqflags); + + INIT_LIST_HEAD(&desc->tx_list); + /* Clear the src and dst descriptor memory */ + memset((void *)desc->src_v, 0, ZYNQMP_DMA_DESC_SIZE(chan)); + memset((void *)desc->dst_v, 0, ZYNQMP_DMA_DESC_SIZE(chan)); + + return desc; +} + +/** + * zynqmp_dma_free_descriptor - Issue pending transactions + * @chan: ZynqMP DMA channel pointer + * @sdesc: Transaction descriptor pointer + */ +static void zynqmp_dma_free_descriptor(struct zynqmp_dma_chan *chan, + struct zynqmp_dma_desc_sw *sdesc) +{ + struct zynqmp_dma_desc_sw *child, *next; + + chan->desc_free_cnt++; + list_move_tail(&sdesc->node, &chan->free_list); + list_for_each_entry_safe(child, next, &sdesc->tx_list, node) { + chan->desc_free_cnt++; + list_move_tail(&child->node, &chan->free_list); + } +} + +/** + * zynqmp_dma_free_desc_list - Free descriptors list + * @chan: ZynqMP DMA channel pointer + * @list: List to parse and delete the descriptor + */ +static void zynqmp_dma_free_desc_list(struct zynqmp_dma_chan *chan, + struct list_head *list) +{ + struct zynqmp_dma_desc_sw *desc, *next; + + list_for_each_entry_safe(desc, next, list, node) + zynqmp_dma_free_descriptor(chan, desc); +} + +/** + * zynqmp_dma_alloc_chan_resources - Allocate channel resources + * @dchan: DMA channel + * + * Return: Number of descriptors on success and failure value on error + */ +static int zynqmp_dma_alloc_chan_resources(struct dma_chan *dchan) +{ + struct zynqmp_dma_chan *chan = to_chan(dchan); + struct zynqmp_dma_desc_sw *desc; + int i, ret; + + ret = pm_runtime_resume_and_get(chan->dev); + if (ret < 0) + return ret; + + chan->sw_desc_pool = kcalloc(ZYNQMP_DMA_NUM_DESCS, sizeof(*desc), + GFP_KERNEL); + if (!chan->sw_desc_pool) + return -ENOMEM; + + chan->idle = true; + chan->desc_free_cnt = ZYNQMP_DMA_NUM_DESCS; + + INIT_LIST_HEAD(&chan->free_list); + + for (i = 0; i < ZYNQMP_DMA_NUM_DESCS; i++) { + desc = chan->sw_desc_pool + i; + dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); + desc->async_tx.tx_submit = zynqmp_dma_tx_submit; + list_add_tail(&desc->node, &chan->free_list); + } + + chan->desc_pool_v = dma_alloc_coherent(chan->dev, + (2 * ZYNQMP_DMA_DESC_SIZE(chan) * + ZYNQMP_DMA_NUM_DESCS), + &chan->desc_pool_p, GFP_KERNEL); + if (!chan->desc_pool_v) + return -ENOMEM; + + for (i = 0; i < ZYNQMP_DMA_NUM_DESCS; i++) { + desc = chan->sw_desc_pool + i; + desc->src_v = (struct zynqmp_dma_desc_ll *) (chan->desc_pool_v + + (i * ZYNQMP_DMA_DESC_SIZE(chan) * 2)); + desc->dst_v = (struct zynqmp_dma_desc_ll *) (desc->src_v + 1); + desc->src_p = chan->desc_pool_p + + (i * ZYNQMP_DMA_DESC_SIZE(chan) * 2); + desc->dst_p = desc->src_p + ZYNQMP_DMA_DESC_SIZE(chan); + } + + return ZYNQMP_DMA_NUM_DESCS; +} + +/** + * zynqmp_dma_start - Start DMA channel + * @chan: ZynqMP DMA channel pointer + */ +static void zynqmp_dma_start(struct zynqmp_dma_chan *chan) +{ + writel(ZYNQMP_DMA_INT_EN_DEFAULT_MASK, chan->regs + ZYNQMP_DMA_IER); + writel(0, chan->regs + ZYNQMP_DMA_TOTAL_BYTE); + chan->idle = false; + writel(ZYNQMP_DMA_ENABLE, chan->regs + ZYNQMP_DMA_CTRL2); +} + +/** + * zynqmp_dma_handle_ovfl_int - Process the overflow interrupt + * @chan: ZynqMP DMA channel pointer + * @status: Interrupt status value + */ +static void zynqmp_dma_handle_ovfl_int(struct zynqmp_dma_chan *chan, u32 status) +{ + if (status & ZYNQMP_DMA_BYTE_CNT_OVRFL) + writel(0, chan->regs + ZYNQMP_DMA_TOTAL_BYTE); + if (status & ZYNQMP_DMA_IRQ_DST_ACCT_ERR) + readl(chan->regs + ZYNQMP_DMA_IRQ_DST_ACCT); + if (status & ZYNQMP_DMA_IRQ_SRC_ACCT_ERR) + readl(chan->regs + ZYNQMP_DMA_IRQ_SRC_ACCT); +} + +static void zynqmp_dma_config(struct zynqmp_dma_chan *chan) +{ + u32 val, burst_val; + + val = readl(chan->regs + ZYNQMP_DMA_CTRL0); + val |= ZYNQMP_DMA_POINT_TYPE_SG; + writel(val, chan->regs + ZYNQMP_DMA_CTRL0); + + val = readl(chan->regs + ZYNQMP_DMA_DATA_ATTR); + burst_val = __ilog2_u32(chan->src_burst_len); + val = (val & ~ZYNQMP_DMA_ARLEN) | + ((burst_val << ZYNQMP_DMA_ARLEN_OFST) & ZYNQMP_DMA_ARLEN); + burst_val = __ilog2_u32(chan->dst_burst_len); + val = (val & ~ZYNQMP_DMA_AWLEN) | + ((burst_val << ZYNQMP_DMA_AWLEN_OFST) & ZYNQMP_DMA_AWLEN); + writel(val, chan->regs + ZYNQMP_DMA_DATA_ATTR); +} + +/** + * zynqmp_dma_device_config - Zynqmp dma device configuration + * @dchan: DMA channel + * @config: DMA device config + * + * Return: 0 always + */ +static int zynqmp_dma_device_config(struct dma_chan *dchan, + struct dma_slave_config *config) +{ + struct zynqmp_dma_chan *chan = to_chan(dchan); + + chan->src_burst_len = clamp(config->src_maxburst, 1U, + ZYNQMP_DMA_MAX_SRC_BURST_LEN); + chan->dst_burst_len = clamp(config->dst_maxburst, 1U, + ZYNQMP_DMA_MAX_DST_BURST_LEN); + + return 0; +} + +/** + * zynqmp_dma_start_transfer - Initiate the new transfer + * @chan: ZynqMP DMA channel pointer + */ +static void zynqmp_dma_start_transfer(struct zynqmp_dma_chan *chan) +{ + struct zynqmp_dma_desc_sw *desc; + + if (!chan->idle) + return; + + zynqmp_dma_config(chan); + + desc = list_first_entry_or_null(&chan->pending_list, + struct zynqmp_dma_desc_sw, node); + if (!desc) + return; + + list_splice_tail_init(&chan->pending_list, &chan->active_list); + zynqmp_dma_update_desc_to_ctrlr(chan, desc); + zynqmp_dma_start(chan); +} + + +/** + * zynqmp_dma_chan_desc_cleanup - Cleanup the completed descriptors + * @chan: ZynqMP DMA channel + */ +static void zynqmp_dma_chan_desc_cleanup(struct zynqmp_dma_chan *chan) +{ + struct zynqmp_dma_desc_sw *desc, *next; + unsigned long irqflags; + + spin_lock_irqsave(&chan->lock, irqflags); + + list_for_each_entry_safe(desc, next, &chan->done_list, node) { + struct dmaengine_desc_callback cb; + + dmaengine_desc_get_callback(&desc->async_tx, &cb); + if (dmaengine_desc_callback_valid(&cb)) { + spin_unlock_irqrestore(&chan->lock, irqflags); + dmaengine_desc_callback_invoke(&cb, NULL); + spin_lock_irqsave(&chan->lock, irqflags); + } + + /* Run any dependencies, then free the descriptor */ + zynqmp_dma_free_descriptor(chan, desc); + } + + spin_unlock_irqrestore(&chan->lock, irqflags); +} + +/** + * zynqmp_dma_complete_descriptor - Mark the active descriptor as complete + * @chan: ZynqMP DMA channel pointer + */ +static void zynqmp_dma_complete_descriptor(struct zynqmp_dma_chan *chan) +{ + struct zynqmp_dma_desc_sw *desc; + + desc = list_first_entry_or_null(&chan->active_list, + struct zynqmp_dma_desc_sw, node); + if (!desc) + return; + list_del(&desc->node); + dma_cookie_complete(&desc->async_tx); + list_add_tail(&desc->node, &chan->done_list); +} + +/** + * zynqmp_dma_issue_pending - Issue pending transactions + * @dchan: DMA channel pointer + */ +static void zynqmp_dma_issue_pending(struct dma_chan *dchan) +{ + struct zynqmp_dma_chan *chan = to_chan(dchan); + unsigned long irqflags; + + spin_lock_irqsave(&chan->lock, irqflags); + zynqmp_dma_start_transfer(chan); + spin_unlock_irqrestore(&chan->lock, irqflags); +} + +/** + * zynqmp_dma_free_descriptors - Free channel descriptors + * @chan: ZynqMP DMA channel pointer + */ +static void zynqmp_dma_free_descriptors(struct zynqmp_dma_chan *chan) +{ + unsigned long irqflags; + + spin_lock_irqsave(&chan->lock, irqflags); + zynqmp_dma_free_desc_list(chan, &chan->active_list); + zynqmp_dma_free_desc_list(chan, &chan->pending_list); + zynqmp_dma_free_desc_list(chan, &chan->done_list); + spin_unlock_irqrestore(&chan->lock, irqflags); +} + +/** + * zynqmp_dma_free_chan_resources - Free channel resources + * @dchan: DMA channel pointer + */ +static void zynqmp_dma_free_chan_resources(struct dma_chan *dchan) +{ + struct zynqmp_dma_chan *chan = to_chan(dchan); + + zynqmp_dma_free_descriptors(chan); + dma_free_coherent(chan->dev, + (2 * ZYNQMP_DMA_DESC_SIZE(chan) * ZYNQMP_DMA_NUM_DESCS), + chan->desc_pool_v, chan->desc_pool_p); + kfree(chan->sw_desc_pool); + pm_runtime_mark_last_busy(chan->dev); + pm_runtime_put_autosuspend(chan->dev); +} + +/** + * zynqmp_dma_reset - Reset the channel + * @chan: ZynqMP DMA channel pointer + */ +static void zynqmp_dma_reset(struct zynqmp_dma_chan *chan) +{ + unsigned long irqflags; + + writel(ZYNQMP_DMA_IDS_DEFAULT_MASK, chan->regs + ZYNQMP_DMA_IDS); + + spin_lock_irqsave(&chan->lock, irqflags); + zynqmp_dma_complete_descriptor(chan); + spin_unlock_irqrestore(&chan->lock, irqflags); + zynqmp_dma_chan_desc_cleanup(chan); + zynqmp_dma_free_descriptors(chan); + + zynqmp_dma_init(chan); +} + +/** + * zynqmp_dma_irq_handler - ZynqMP DMA Interrupt handler + * @irq: IRQ number + * @data: Pointer to the ZynqMP DMA channel structure + * + * Return: IRQ_HANDLED/IRQ_NONE + */ +static irqreturn_t zynqmp_dma_irq_handler(int irq, void *data) +{ + struct zynqmp_dma_chan *chan = (struct zynqmp_dma_chan *)data; + u32 isr, imr, status; + irqreturn_t ret = IRQ_NONE; + + isr = readl(chan->regs + ZYNQMP_DMA_ISR); + imr = readl(chan->regs + ZYNQMP_DMA_IMR); + status = isr & ~imr; + + writel(isr, chan->regs + ZYNQMP_DMA_ISR); + if (status & ZYNQMP_DMA_INT_DONE) { + tasklet_schedule(&chan->tasklet); + ret = IRQ_HANDLED; + } + + if (status & ZYNQMP_DMA_DONE) + chan->idle = true; + + if (status & ZYNQMP_DMA_INT_ERR) { + chan->err = true; + tasklet_schedule(&chan->tasklet); + dev_err(chan->dev, "Channel %p has errors\n", chan); + ret = IRQ_HANDLED; + } + + if (status & ZYNQMP_DMA_INT_OVRFL) { + zynqmp_dma_handle_ovfl_int(chan, status); + dev_dbg(chan->dev, "Channel %p overflow interrupt\n", chan); + ret = IRQ_HANDLED; + } + + return ret; +} + +/** + * zynqmp_dma_do_tasklet - Schedule completion tasklet + * @t: Pointer to the ZynqMP DMA channel structure + */ +static void zynqmp_dma_do_tasklet(struct tasklet_struct *t) +{ + struct zynqmp_dma_chan *chan = from_tasklet(chan, t, tasklet); + u32 count; + unsigned long irqflags; + + if (chan->err) { + zynqmp_dma_reset(chan); + chan->err = false; + return; + } + + spin_lock_irqsave(&chan->lock, irqflags); + count = readl(chan->regs + ZYNQMP_DMA_IRQ_DST_ACCT); + while (count) { + zynqmp_dma_complete_descriptor(chan); + count--; + } + spin_unlock_irqrestore(&chan->lock, irqflags); + + zynqmp_dma_chan_desc_cleanup(chan); + + if (chan->idle) { + spin_lock_irqsave(&chan->lock, irqflags); + zynqmp_dma_start_transfer(chan); + spin_unlock_irqrestore(&chan->lock, irqflags); + } +} + +/** + * zynqmp_dma_device_terminate_all - Aborts all transfers on a channel + * @dchan: DMA channel pointer + * + * Return: Always '0' + */ +static int zynqmp_dma_device_terminate_all(struct dma_chan *dchan) +{ + struct zynqmp_dma_chan *chan = to_chan(dchan); + + writel(ZYNQMP_DMA_IDS_DEFAULT_MASK, chan->regs + ZYNQMP_DMA_IDS); + zynqmp_dma_free_descriptors(chan); + + return 0; +} + +/** + * zynqmp_dma_synchronize - Synchronizes the termination of a transfers to the current context. + * @dchan: DMA channel pointer + */ +static void zynqmp_dma_synchronize(struct dma_chan *dchan) +{ + struct zynqmp_dma_chan *chan = to_chan(dchan); + + tasklet_kill(&chan->tasklet); +} + +/** + * zynqmp_dma_prep_memcpy - prepare descriptors for memcpy transaction + * @dchan: DMA channel + * @dma_dst: Destination buffer address + * @dma_src: Source buffer address + * @len: Transfer length + * @flags: transfer ack flags + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor *zynqmp_dma_prep_memcpy( + struct dma_chan *dchan, dma_addr_t dma_dst, + dma_addr_t dma_src, size_t len, ulong flags) +{ + struct zynqmp_dma_chan *chan; + struct zynqmp_dma_desc_sw *new, *first = NULL; + void *desc = NULL, *prev = NULL; + size_t copy; + u32 desc_cnt; + unsigned long irqflags; + + chan = to_chan(dchan); + + desc_cnt = DIV_ROUND_UP(len, ZYNQMP_DMA_MAX_TRANS_LEN); + + spin_lock_irqsave(&chan->lock, irqflags); + if (desc_cnt > chan->desc_free_cnt) { + spin_unlock_irqrestore(&chan->lock, irqflags); + dev_dbg(chan->dev, "chan %p descs are not available\n", chan); + return NULL; + } + chan->desc_free_cnt = chan->desc_free_cnt - desc_cnt; + spin_unlock_irqrestore(&chan->lock, irqflags); + + do { + /* Allocate and populate the descriptor */ + new = zynqmp_dma_get_descriptor(chan); + + copy = min_t(size_t, len, ZYNQMP_DMA_MAX_TRANS_LEN); + desc = (struct zynqmp_dma_desc_ll *)new->src_v; + zynqmp_dma_config_sg_ll_desc(chan, desc, dma_src, + dma_dst, copy, prev); + prev = desc; + len -= copy; + dma_src += copy; + dma_dst += copy; + if (!first) + first = new; + else + list_add_tail(&new->node, &first->tx_list); + } while (len); + + zynqmp_dma_desc_config_eod(chan, desc); + async_tx_ack(&first->async_tx); + first->async_tx.flags = (enum dma_ctrl_flags)flags; + return &first->async_tx; +} + +/** + * zynqmp_dma_chan_remove - Channel remove function + * @chan: ZynqMP DMA channel pointer + */ +static void zynqmp_dma_chan_remove(struct zynqmp_dma_chan *chan) +{ + if (!chan) + return; + + if (chan->irq) + devm_free_irq(chan->zdev->dev, chan->irq, chan); + tasklet_kill(&chan->tasklet); + list_del(&chan->common.device_node); +} + +/** + * zynqmp_dma_chan_probe - Per Channel Probing + * @zdev: Driver specific device structure + * @pdev: Pointer to the platform_device structure + * + * Return: '0' on success and failure value on error + */ +static int zynqmp_dma_chan_probe(struct zynqmp_dma_device *zdev, + struct platform_device *pdev) +{ + struct zynqmp_dma_chan *chan; + struct device_node *node = pdev->dev.of_node; + int err; + + chan = devm_kzalloc(zdev->dev, sizeof(*chan), GFP_KERNEL); + if (!chan) + return -ENOMEM; + chan->dev = zdev->dev; + chan->zdev = zdev; + + chan->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(chan->regs)) + return PTR_ERR(chan->regs); + + chan->bus_width = ZYNQMP_DMA_BUS_WIDTH_64; + chan->dst_burst_len = ZYNQMP_DMA_MAX_DST_BURST_LEN; + chan->src_burst_len = ZYNQMP_DMA_MAX_SRC_BURST_LEN; + err = of_property_read_u32(node, "xlnx,bus-width", &chan->bus_width); + if (err < 0) { + dev_err(&pdev->dev, "missing xlnx,bus-width property\n"); + return err; + } + + if (chan->bus_width != ZYNQMP_DMA_BUS_WIDTH_64 && + chan->bus_width != ZYNQMP_DMA_BUS_WIDTH_128) { + dev_err(zdev->dev, "invalid bus-width value"); + return -EINVAL; + } + + chan->is_dmacoherent = of_property_read_bool(node, "dma-coherent"); + zdev->chan = chan; + tasklet_setup(&chan->tasklet, zynqmp_dma_do_tasklet); + spin_lock_init(&chan->lock); + INIT_LIST_HEAD(&chan->active_list); + INIT_LIST_HEAD(&chan->pending_list); + INIT_LIST_HEAD(&chan->done_list); + INIT_LIST_HEAD(&chan->free_list); + + dma_cookie_init(&chan->common); + chan->common.device = &zdev->common; + list_add_tail(&chan->common.device_node, &zdev->common.channels); + + zynqmp_dma_init(chan); + chan->irq = platform_get_irq(pdev, 0); + if (chan->irq < 0) + return -ENXIO; + err = devm_request_irq(&pdev->dev, chan->irq, zynqmp_dma_irq_handler, 0, + "zynqmp-dma", chan); + if (err) + return err; + + chan->desc_size = sizeof(struct zynqmp_dma_desc_ll); + chan->idle = true; + return 0; +} + +/** + * of_zynqmp_dma_xlate - Translation function + * @dma_spec: Pointer to DMA specifier as found in the device tree + * @ofdma: Pointer to DMA controller data + * + * Return: DMA channel pointer on success and NULL on error + */ +static struct dma_chan *of_zynqmp_dma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct zynqmp_dma_device *zdev = ofdma->of_dma_data; + + return dma_get_slave_channel(&zdev->chan->common); +} + +/** + * zynqmp_dma_suspend - Suspend method for the driver + * @dev: Address of the device structure + * + * Put the driver into low power mode. + * Return: 0 on success and failure value on error + */ +static int __maybe_unused zynqmp_dma_suspend(struct device *dev) +{ + if (!device_may_wakeup(dev)) + return pm_runtime_force_suspend(dev); + + return 0; +} + +/** + * zynqmp_dma_resume - Resume from suspend + * @dev: Address of the device structure + * + * Resume operation after suspend. + * Return: 0 on success and failure value on error + */ +static int __maybe_unused zynqmp_dma_resume(struct device *dev) +{ + if (!device_may_wakeup(dev)) + return pm_runtime_force_resume(dev); + + return 0; +} + +/** + * zynqmp_dma_runtime_suspend - Runtime suspend method for the driver + * @dev: Address of the device structure + * + * Put the driver into low power mode. + * Return: 0 always + */ +static int __maybe_unused zynqmp_dma_runtime_suspend(struct device *dev) +{ + struct zynqmp_dma_device *zdev = dev_get_drvdata(dev); + + clk_disable_unprepare(zdev->clk_main); + clk_disable_unprepare(zdev->clk_apb); + + return 0; +} + +/** + * zynqmp_dma_runtime_resume - Runtime suspend method for the driver + * @dev: Address of the device structure + * + * Put the driver into low power mode. + * Return: 0 always + */ +static int __maybe_unused zynqmp_dma_runtime_resume(struct device *dev) +{ + struct zynqmp_dma_device *zdev = dev_get_drvdata(dev); + int err; + + err = clk_prepare_enable(zdev->clk_main); + if (err) { + dev_err(dev, "Unable to enable main clock.\n"); + return err; + } + + err = clk_prepare_enable(zdev->clk_apb); + if (err) { + dev_err(dev, "Unable to enable apb clock.\n"); + clk_disable_unprepare(zdev->clk_main); + return err; + } + + return 0; +} + +static const struct dev_pm_ops zynqmp_dma_dev_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(zynqmp_dma_suspend, zynqmp_dma_resume) + SET_RUNTIME_PM_OPS(zynqmp_dma_runtime_suspend, + zynqmp_dma_runtime_resume, NULL) +}; + +/** + * zynqmp_dma_probe - Driver probe function + * @pdev: Pointer to the platform_device structure + * + * Return: '0' on success and failure value on error + */ +static int zynqmp_dma_probe(struct platform_device *pdev) +{ + struct zynqmp_dma_device *zdev; + struct dma_device *p; + int ret; + + zdev = devm_kzalloc(&pdev->dev, sizeof(*zdev), GFP_KERNEL); + if (!zdev) + return -ENOMEM; + + zdev->dev = &pdev->dev; + INIT_LIST_HEAD(&zdev->common.channels); + + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44)); + if (ret) { + dev_err(&pdev->dev, "DMA not available for address range\n"); + return ret; + } + dma_cap_set(DMA_MEMCPY, zdev->common.cap_mask); + + p = &zdev->common; + p->device_prep_dma_memcpy = zynqmp_dma_prep_memcpy; + p->device_terminate_all = zynqmp_dma_device_terminate_all; + p->device_synchronize = zynqmp_dma_synchronize; + p->device_issue_pending = zynqmp_dma_issue_pending; + p->device_alloc_chan_resources = zynqmp_dma_alloc_chan_resources; + p->device_free_chan_resources = zynqmp_dma_free_chan_resources; + p->device_tx_status = dma_cookie_status; + p->device_config = zynqmp_dma_device_config; + p->dev = &pdev->dev; + + zdev->clk_main = devm_clk_get(&pdev->dev, "clk_main"); + if (IS_ERR(zdev->clk_main)) + return dev_err_probe(&pdev->dev, PTR_ERR(zdev->clk_main), + "main clock not found.\n"); + + zdev->clk_apb = devm_clk_get(&pdev->dev, "clk_apb"); + if (IS_ERR(zdev->clk_apb)) + return dev_err_probe(&pdev->dev, PTR_ERR(zdev->clk_apb), + "apb clock not found.\n"); + + platform_set_drvdata(pdev, zdev); + pm_runtime_set_autosuspend_delay(zdev->dev, ZDMA_PM_TIMEOUT); + pm_runtime_use_autosuspend(zdev->dev); + pm_runtime_enable(zdev->dev); + ret = pm_runtime_resume_and_get(zdev->dev); + if (ret < 0) { + dev_err(&pdev->dev, "device wakeup failed.\n"); + pm_runtime_disable(zdev->dev); + } + if (!pm_runtime_enabled(zdev->dev)) { + ret = zynqmp_dma_runtime_resume(zdev->dev); + if (ret) + return ret; + } + + ret = zynqmp_dma_chan_probe(zdev, pdev); + if (ret) { + dev_err_probe(&pdev->dev, ret, "Probing channel failed\n"); + goto err_disable_pm; + } + + p->dst_addr_widths = BIT(zdev->chan->bus_width / 8); + p->src_addr_widths = BIT(zdev->chan->bus_width / 8); + + ret = dma_async_device_register(&zdev->common); + if (ret) { + dev_err(zdev->dev, "failed to register the dma device\n"); + goto free_chan_resources; + } + + ret = of_dma_controller_register(pdev->dev.of_node, + of_zynqmp_dma_xlate, zdev); + if (ret) { + dev_err_probe(&pdev->dev, ret, "Unable to register DMA to DT\n"); + dma_async_device_unregister(&zdev->common); + goto free_chan_resources; + } + + pm_runtime_mark_last_busy(zdev->dev); + pm_runtime_put_sync_autosuspend(zdev->dev); + + return 0; + +free_chan_resources: + zynqmp_dma_chan_remove(zdev->chan); +err_disable_pm: + if (!pm_runtime_enabled(zdev->dev)) + zynqmp_dma_runtime_suspend(zdev->dev); + pm_runtime_disable(zdev->dev); + return ret; +} + +/** + * zynqmp_dma_remove - Driver remove function + * @pdev: Pointer to the platform_device structure + * + * Return: Always '0' + */ +static int zynqmp_dma_remove(struct platform_device *pdev) +{ + struct zynqmp_dma_device *zdev = platform_get_drvdata(pdev); + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&zdev->common); + + zynqmp_dma_chan_remove(zdev->chan); + pm_runtime_disable(zdev->dev); + if (!pm_runtime_enabled(zdev->dev)) + zynqmp_dma_runtime_suspend(zdev->dev); + + return 0; +} + +static const struct of_device_id zynqmp_dma_of_match[] = { + { .compatible = "xlnx,zynqmp-dma-1.0", }, + {} +}; +MODULE_DEVICE_TABLE(of, zynqmp_dma_of_match); + +static struct platform_driver zynqmp_dma_driver = { + .driver = { + .name = "xilinx-zynqmp-dma", + .of_match_table = zynqmp_dma_of_match, + .pm = &zynqmp_dma_dev_pm_ops, + }, + .probe = zynqmp_dma_probe, + .remove = zynqmp_dma_remove, +}; + +module_platform_driver(zynqmp_dma_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Xilinx, Inc."); +MODULE_DESCRIPTION("Xilinx ZynqMP DMA driver"); -- cgit v1.2.3